gRPC directly with Audio2Face
In order to interact with Audio2Face directly you will need to create a client to send data and implement a server to receive the data.
Client side
This is the gRPC server prototype that you need to send the data to:
Service
nvidia_ace.services.a2f.v1.proto
syntax = "proto3";
package nvidia_ace.services.a2f.v1;
import "nvidia_ace.a2f.v1.proto";
import "nvidia_ace.status.v1.proto";
service A2FService {
// RPC to implement to send audio data to Audio2Face Microservice
// An example use for this RPC is a client pushing audio buffers to
// Audio2Face Microservice (server)
rpc PushAudioStream(stream nvidia_ace.a2f.v1.AudioStream)
returns (nvidia_ace.status.v1.Status) {}
}
//nvidia_ace.services.a2f.v1
//v0.1.0
Protobuf data
nvidia_ace.a2f.v1.proto
syntax = "proto3";
package nvidia_ace.a2f.v1;
import "nvidia_ace.animation_id.v1.proto";
import "nvidia_ace.status.v1.proto";
import "nvidia_ace.audio.v1.proto";
import "nvidia_ace.emotion_with_timecode.v1.proto";
message AudioStream {
// The header must be sent as the first message.
// One or more audio with emotion messages must be sent thereafter.
// The end of audio will happen when the client closes the connection
oneof stream_part {
AudioStreamHeader audio_stream_header = 1;
nvidia_ace.a2f.v1.AudioWithEmotion audio_with_emotion = 2;
}
}
// IMPORTANT NOTE: this is an AudioStreamHeader WITH ID
// A similar AudioStreamHeader exist in nvidia_ace.controller.v1.proto
// but that one does NOT contain IDs
message AudioStreamHeader {
// IDs of the current stream
nvidia_ace.animation_id.v1.AnimationIds animation_ids = 1;
nvidia_ace.audio.v1.AudioHeader audio_header = 2;
// Parameters for updating the facial characteristics of an avatar
// See the documentation for more information
FaceParameters face_params = 3;
// Parameters relative to the emotion blending and processing
// before using it to generate blendshapes
// See the documentation for more information
EmotionPostProcessingParameters emotion_post_processing_params = 4;
// Multipliers and offsets to apply to the generated blendshape values
BlendShapeParameters blendshape_params = 5;
}
message FloatArray { repeated float values = 1; }
// For which parameters to set here, refer to the documentation
message FaceParameters {
map<string, float> float_params = 1;
map<string, int32> integer_params = 2;
map<string, FloatArray> float_array_params = 3;
}
// For the Blendshape names, refer to the documentation
message BlendShapeParameters {
map<string, float> bs_weight_multipliers = 1;
map<string, float> bs_weight_offsets = 2;
}
// For more information refer to the documentation
message EmotionPostProcessingParameters {
// Increases the spread between emotion values by pushing them higher or lower.
// Default value: 1
// Min: 0.3
// Max: 3
optional float emotion_contrast = 1;
// Coefficient for smoothing emotions over time
// 0 means no smoothing at all (can be jittery)
// 1 means extreme smoothing (emotion values not updated over time)
// Default value: 0.7
// Min: 0
// Max: 1
optional float live_blend_coef = 2;
// Activate blending between the preferred emotions (passed as input) and the emotions detected by A2E.
// Default: True
optional bool enable_preferred_emotion = 3;
// Sets the strength of the preferred emotions (passed as input) relative to emotions detected by A2E.
// 0 means only A2E output will be used for emotion rendering.
// 1 means only the preferred emotions will be used for emotion rendering.
// Default value: 0.5
// Min: 0
// Max: 1
optional float preferred_emotion_strength = 4;
// Sets the strength of generated emotions relative to neutral emotion.
// This multiplier is applied globally after the mix of emotion is done.
// If set to 0, emotion will be neutral.
// If set to 1, the blend of emotion will be fully used. (can be too intense)
// Default value: 0.6
// Min: 0
// Max: 1
optional float emotion_strength = 5;
// Sets a firm limit on the quantity of emotion sliders engaged by A2E
// emotions with highest weight will be prioritized
// Default value: 3
// Min: 1
// Max: 6
optional int32 max_emotions = 6;
}
message AudioWithEmotion {
// audio buffer in bytes to interpret depending on the audio header
bytes audio_buffer = 1;
// The time codes are relative to the beginning of the audio clip.
repeated nvidia_ace.emotion_with_timecode.v1.EmotionWithTimeCode emotions = 2;
}
//nvidia_ace.a2f.v1
//v0.1.0
Server side
Service
Implementing PushAnimationDataStream as a server rpc will make it possible for you to receive data from A2F.
nvidia_ace.services.animation_data.v1.proto
syntax = "proto3";
package nvidia_ace.services.animation_data.v1;
import "nvidia_ace.animation_data.v1.proto";
import "nvidia_ace.animation_id.v1.proto";
import "nvidia_ace.status.v1.proto";
// 2 RPC exist to provide a stream of animation data
// The RPC to implement depends on if the part of the service
// is a client or a server.
// E.g.: In the case of Animation Graph Microservice, we implement both RPCs.
// One to receive and one to send.
service AnimationDataService {
// When the service creating the animation data is a client from the service receiving them
// This push RPC must be used.
// An example for that is Audio2Face Microservice creating animation data and sending them
// to Animation Graph Microservice
rpc PushAnimationDataStream(stream nvidia_ace.animation_data.v1.AnimationDataStream)
returns (nvidia_ace.status.v1.Status) {}
// When the service creating the animation data is a server from the service receiving them
// This pull RPC must be used.
// An example for that is the Omniverse Renderer Microservice requesting animation data to the
// Animation Graph Microservice.
rpc PullAnimationDataStream(nvidia_ace.animation_id.v1.AnimationIds)
returns (stream nvidia_ace.animation_data.v1.AnimationDataStream) {}
}
//nvidia_ace.services.animation_data.v1
//v0.1.0
Protobuf data
nvidia_ace.a2f.v1.proto
syntax = "proto3";
package nvidia_ace.a2f.v1;
import "nvidia_ace.animation_id.v1.proto";
import "nvidia_ace.status.v1.proto";
import "nvidia_ace.audio.v1.proto";
import "nvidia_ace.emotion_with_timecode.v1.proto";
message AudioStream {
// The header must be sent as the first message.
// One or more audio with emotion messages must be sent thereafter.
// The end of audio will happen when the client closes the connection
oneof stream_part {
AudioStreamHeader audio_stream_header = 1;
nvidia_ace.a2f.v1.AudioWithEmotion audio_with_emotion = 2;
}
}
// IMPORTANT NOTE: this is an AudioStreamHeader WITH ID
// A similar AudioStreamHeader exist in nvidia_ace.controller.v1.proto
// but that one does NOT contain IDs
message AudioStreamHeader {
// IDs of the current stream
nvidia_ace.animation_id.v1.AnimationIds animation_ids = 1;
nvidia_ace.audio.v1.AudioHeader audio_header = 2;
// Parameters for updating the facial characteristics of an avatar
// See the documentation for more information
FaceParameters face_params = 3;
// Parameters relative to the emotion blending and processing
// before using it to generate blendshapes
// See the documentation for more information
EmotionPostProcessingParameters emotion_post_processing_params = 4;
// Multipliers and offsets to apply to the generated blendshape values
BlendShapeParameters blendshape_params = 5;
}
message FloatArray { repeated float values = 1; }
// For which parameters to set here, refer to the documentation
message FaceParameters {
map<string, float> float_params = 1;
map<string, int32> integer_params = 2;
map<string, FloatArray> float_array_params = 3;
}
// For the Blendshape names, refer to the documentation
message BlendShapeParameters {
map<string, float> bs_weight_multipliers = 1;
map<string, float> bs_weight_offsets = 2;
}
// For more information refer to the documentation
message EmotionPostProcessingParameters {
// Increases the spread between emotion values by pushing them higher or lower.
// Default value: 1
// Min: 0.3
// Max: 3
optional float emotion_contrast = 1;
// Coefficient for smoothing emotions over time
// 0 means no smoothing at all (can be jittery)
// 1 means extreme smoothing (emotion values not updated over time)
// Default value: 0.7
// Min: 0
// Max: 1
optional float live_blend_coef = 2;
// Activate blending between the preferred emotions (passed as input) and the emotions detected by A2E.
// Default: True
optional bool enable_preferred_emotion = 3;
// Sets the strength of the preferred emotions (passed as input) relative to emotions detected by A2E.
// 0 means only A2E output will be used for emotion rendering.
// 1 means only the preferred emotions will be used for emotion rendering.
// Default value: 0.5
// Min: 0
// Max: 1
optional float preferred_emotion_strength = 4;
// Sets the strength of generated emotions relative to neutral emotion.
// This multiplier is applied globally after the mix of emotion is done.
// If set to 0, emotion will be neutral.
// If set to 1, the blend of emotion will be fully used. (can be too intense)
// Default value: 0.6
// Min: 0
// Max: 1
optional float emotion_strength = 5;
// Sets a firm limit on the quantity of emotion sliders engaged by A2E
// emotions with highest weight will be prioritized
// Default value: 3
// Min: 1
// Max: 6
optional int32 max_emotions = 6;
}
message AudioWithEmotion {
// audio buffer in bytes to interpret depending on the audio header
bytes audio_buffer = 1;
// The time codes are relative to the beginning of the audio clip.
repeated nvidia_ace.emotion_with_timecode.v1.EmotionWithTimeCode emotions = 2;
}
//nvidia_ace.a2f.v1
//v0.1.0