gRPC directly with Audio2Face-3D Authoring#
In order to interact with Audio2Face-3D Authoring directly you will need to create a client to send data and implement a server to receive the data.
Client side#
This is the gRPC server prototype that you need to send the data to:
// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
syntax = "proto3";
import "nvidia_ace.a2f_authoring.v1.proto";
// This API allows to get unique frames from Audio2Face inference (also known as authoring)
service A2FAuthoringService {
// Upload the audio clip to the AuthoringService to be processed.
rpc UploadAudioClip(nvidia_ace.a2f_authoring.v1.AudioClip) returns (nvidia_ace.a2f_authoring.v1.AudioClipHandle) {};
// Request a single animation frame at the specified timecode.
rpc GetAvatarFacePose(nvidia_ace.a2f_authoring.v1.FacePoseRequest) returns (nvidia_ace.a2f_authoring.v1.BlendShapeData) {};
Protobuf data#
// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
syntax = "proto3";
package nvidia_ace.a2f_authoring.v1;
import "";
import "nvidia_ace.a2f.v1.proto";
message AudioClip {
// Audio header the appropriate metadata such as sample rate, etc audio_header = 1;
// Content of the audio file
bytes content = 2;
message AudioClipHandle {
// SHA-256 hash of the audio clip.
string audio_clip_id = 1;
// Names of the blendshapes that will be sent back to the client.
repeated string blendshape_names = 2;
message FacePoseRequest {
string audio_hash = 1;
map<string, float> preferred_emotions = 2;
float time_stamp = 3;
// Parameters for updating the facial characteristics of an avatar
// See the documentation for more information
nvidia_ace.a2f.v1.FaceParameters face_params = 4;
// Parameters relative to the emotion blending and processing
// before using it to generate blendshapes
// See the documentation for more information
nvidia_ace.a2f.v1.EmotionPostProcessingParameters emotion_pp_params = 5;
// Multipliers and offsets to apply to the generated blendshapes values
nvidia_ace.a2f.v1.BlendShapeParameters blendshape_params = 6;
message BlendShapeData {
// Blendshapes.
repeated float blendshapes = 1;
// Echo the time_code for the frame.
float time_code = 2;
// Emotion output from A2E after post-processing (Applied emotions).
map<string, float> emotions = 3;