Action Specs - NVIDIA Docs

classification

evaluate

parameter	display_name	value_type	description	default_value	examples	required	popular
train_config		collection
train_config.train_dataset_path		hidden
train_config.val_dataset_path		hidden
train_config.pretrained_model_path		hidden
train_config.optimizer		collection
train_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr		float		0.01
train_config.optimizer.sgd.decay		float		0
train_config.optimizer.sgd.momentum		float		0.9
train_config.optimizer.sgd.nesterov		bool		FALSE
train_config.optimizer.adam		collection
train_config.optimizer.adam.lr		float
train_config.optimizer.adam.beta_1		float
train_config.optimizer.adam.beta_2		float
train_config.optimizer.adam.epsilon		float
train_config.optimizer.adam.decay		float
train_config.optimizer.rmsprop		collection
train_config.optimizer.rmsprop.lr		float
train_config.optimizer.rmsprop.rho		float
train_config.optimizer.rmsprop.epsilon		float
train_config.optimizer.rmsprop.decay		float
train_config.batch_size_per_gpu		integer		256
train_config.n_epochs		integer		80
train_config.n_workers		integer		2
train_config.reg_config		collection
train_config.reg_config.type		string		L2
train_config.reg_config.scope		string		Conv2D,Dense
train_config.reg_config.weight_decay		float		0.00005
train_config.lr_config		collection	ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step		collection
train_config.lr_config.step.learning_rate		float
train_config.lr_config.step.step_size		integer
train_config.lr_config.step.gamma		float
train_config.lr_config.soft_anneal		collection
train_config.lr_config.soft_anneal.learning_rate		float		0.05
train_config.lr_config.soft_anneal.soft_start		float		0.056
train_config.lr_config.soft_anneal.annealing_divider		float		10
train_config.lr_config.soft_anneal.annealing_points		list	List of float	[0.3,0.6,0.8]
train_config.lr_config.cosine		collection
train_config.lr_config.cosine.learning_rate		float
train_config.lr_config.cosine.min_lr_ratio		float
train_config.lr_config.cosine.soft_start		float
train_config.random_seed		integer		42
train_config.enable_random_crop		bool
train_config.enable_center_crop		bool
train_config.enable_color_augmentation		bool
train_config.label_smoothing		float
train_config.preprocess_mode		string		torch
train_config.mixup_alpha		float
train_config.model_parallelism		list
train_config.image_mean		collection
train_config.image_mean.key		string
train_config.image_mean.value		float
train_config.disable_horizontal_flip		bool
train_config.visualizer_config		collection
train_config.visualizer	Visualizer	collection
train_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
train_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
eval_config		collection
eval_config.top_k		integer		3
eval_config.eval_dataset_path		hidden
eval_config.model_path		hidden
eval_config.batch_size		integer		256
eval_config.n_workers		integer		2
eval_config.enable_center_crop		bool
model_config		collection
model_config.arch		string		squeezenet
model_config.input_image_size		string		3,224,224		yes	yes
model_config.resize_interpolation_method		string			__BILINEAR__, __BICUBIC__
model_config.n_layers		integer
model_config.retain_head		bool		FALSE
model_config.use_batch_norm		bool
model_config.use_bias		bool
model_config.use_pooling		bool
model_config.all_projections		bool
model_config.freeze_bn		bool
model_config.freeze_blocks		integer
model_config.dropout		float		1.00E-03
model_config.batch_norm_config		collection
model_config.batch_norm_config.momentum		float
model_config.batch_norm_config.epsilon		float
model_config.activation		collection
model_config.activation.activation_type		string
model_config.activation.activation_parameters		collection
model_config.activation.activation_parameters.key		string
model_config.activation.activation_parameters.value		float

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	FALSE			no
cal_image_dir		hidden
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
strict_type_constraints		bool		FALSE
gen_ds_config		bool		FALSE
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
min_batch_size		integer			1
opt_batch_size		integer			1
experiment_spec	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
static_batch_size		integer			-1
results_dir		hidden
verbose		hidden		TRUE
classmap_json		hidden
is_byom		bool		FALSE

inference

parameter	display_name	value_type	description	default_value	examples	required	popular	cli
batch_size	Batch Size	integer	Batch size CLI parameter	1				yes
train_config.train_dataset_path		hidden
train_config.val_dataset_path		hidden
train_config.pretrained_model_path		hidden
train_config.optimizer		collection
train_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr		float		0.01
train_config.optimizer.sgd.decay		float		0
train_config.optimizer.sgd.momentum		float		0.9
train_config.optimizer.sgd.nesterov		bool		FALSE
train_config		collection
train_config.optimizer.adam		collection
train_config.optimizer.adam.lr		float
train_config.optimizer.adam.beta_1		float
train_config.optimizer.adam.beta_2		float
train_config.optimizer.adam.epsilon		float
train_config.optimizer.adam.decay		float
train_config.optimizer.rmsprop		collection
train_config.optimizer.rmsprop.lr		float
train_config.optimizer.rmsprop.rho		float
train_config.optimizer.rmsprop.epsilon		float
train_config.optimizer.rmsprop.decay		float
train_config.batch_size_per_gpu		integer		256
train_config.n_epochs		integer		80
train_config.n_workers		integer		2
train_config.reg_config		collection
train_config.reg_config.type		string		L2
train_config.reg_config.scope		string		Conv2D,Dense
train_config.reg_config.weight_decay		float		0.00005
train_config.lr_config		collection	ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step		collection
train_config.lr_config.step.learning_rate		float
train_config.lr_config.step.step_size		integer
train_config.lr_config.step.gamma		float
train_config.lr_config.soft_anneal		collection
train_config.lr_config.soft_anneal.learning_rate		float		0.05
train_config.lr_config.soft_anneal.soft_start		float		0.056
train_config.lr_config.soft_anneal.annealing_divider		float		10
train_config.lr_config.soft_anneal.annealing_points		list	List of float	[0.3,0.6,0.8]
train_config.lr_config.cosine		collection
train_config.lr_config.cosine.learning_rate		float
train_config.lr_config.cosine.min_lr_ratio		float
train_config.lr_config.cosine.soft_start		float
train_config.random_seed		integer		42
train_config.enable_random_crop		bool
train_config.enable_center_crop		bool
train_config.enable_color_augmentation		bool
train_config.label_smoothing		float
train_config.preprocess_mode		string		torch
train_config.mixup_alpha		float
train_config.model_parallelism		list
train_config.image_mean		collection
train_config.image_mean.key		string
train_config.image_mean.value		float
train_config.disable_horizontal_flip		bool
train_config.visualizer_config		collection
train_config.visualizer	Visualizer	collection
train_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
train_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
eval_config		collection
eval_config.top_k		integer		3
eval_config.eval_dataset_path		hidden
eval_config.model_path		hidden
eval_config.batch_size		integer		256
eval_config.n_workers		integer		2
eval_config.enable_center_crop		bool
model_config		collection
model_config.arch		string		squeezenet
model_config.input_image_size		string		3,224,224		yes	yes
model_config.resize_interpolation_method		string			__BILINEAR__, __BICUBIC__
model_config.n_layers		integer
model_config.retain_head		bool		FALSE
model_config.use_batch_norm		bool
model_config.use_bias		bool
model_config.use_pooling		bool
model_config.all_projections		bool
model_config.freeze_bn		bool
model_config.freeze_blocks		integer
model_config.dropout		float		1.00E-03
model_config.batch_norm_config		collection
model_config.batch_norm_config.momentum		float
model_config.batch_norm_config.epsilon		float
model_config.activation		collection
model_config.activation.activation_type		string
model_config.activation.activation_parameters		collection
model_config.activation.activation_parameters.key		string
model_config.activation.activation_parameters.value		float

train

parameter	display_name	value_type	description	default_value	examples	required	popular
init_epoch		integer	CLI Parameter initial epoch	1
train_config		collection
train_config.train_dataset_path		hidden
train_config.val_dataset_path		hidden
train_config.pretrained_model_path		hidden
train_config.optimizer		collection
train_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr		float		0.01
train_config.optimizer.sgd.decay		float		0
train_config.optimizer.sgd.momentum		float		0.9
train_config.optimizer.sgd.nesterov		bool		FALSE
train_config.optimizer.adam		collection
train_config.optimizer.adam.lr		float
train_config.optimizer.adam.beta_1		float
train_config.optimizer.adam.beta_2		float
train_config.optimizer.adam.epsilon		float
train_config.optimizer.adam.decay		float
train_config.optimizer.rmsprop		collection
train_config.optimizer.rmsprop.lr		float
train_config.optimizer.rmsprop.rho		float
train_config.optimizer.rmsprop.epsilon		float
train_config.optimizer.rmsprop.decay		float
train_config.batch_size_per_gpu		integer		256
train_config.n_epochs		integer		80
train_config.n_workers		integer		2
train_config.reg_config		collection
train_config.reg_config.type		string		L2
train_config.reg_config.scope		string		Conv2D,Dense
train_config.reg_config.weight_decay		float		0.00005
train_config.lr_config		collection	ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step		collection
train_config.lr_config.step.learning_rate		float
train_config.lr_config.step.step_size		integer
train_config.lr_config.step.gamma		float
train_config.lr_config.soft_anneal		collection
train_config.lr_config.soft_anneal.learning_rate		float		0.05
train_config.lr_config.soft_anneal.soft_start		float		0.056
train_config.lr_config.soft_anneal.annealing_divider		float		10
train_config.lr_config.soft_anneal.annealing_points		list	List of float	[0.3,0.6,0.8]
train_config.lr_config.cosine		collection
train_config.lr_config.cosine.learning_rate		float
train_config.lr_config.cosine.min_lr_ratio		float
train_config.lr_config.cosine.soft_start		float
train_config.random_seed		integer		42
train_config.enable_random_crop		bool
train_config.enable_center_crop		bool
train_config.enable_color_augmentation		bool
train_config.label_smoothing		float
train_config.preprocess_mode		string		torch
train_config.mixup_alpha		float
train_config.model_parallelism		list
train_config.image_mean		collection
train_config.image_mean.key		string
train_config.image_mean.value		float
train_config.disable_horizontal_flip		bool
train_config.visualizer_config		collection
train_config.visualizer	Visualizer	collection
train_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
train_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
eval_config		collection
eval_config.top_k		integer		3
eval_config.eval_dataset_path		hidden
eval_config.model_path		hidden
eval_config.batch_size		integer		256
eval_config.n_workers		integer		2
eval_config.enable_center_crop		bool
model_config		collection
model_config.arch		string		squeezenet
model_config.input_image_size		string		3,224,224		yes	yes
model_config.resize_interpolation_method		string			__BILINEAR__, __BICUBIC__
model_config.n_layers		integer
model_config.retain_head		bool		FALSE
model_config.use_batch_norm		bool
model_config.use_bias		bool
model_config.use_pooling		bool
model_config.all_projections		bool
model_config.freeze_bn		bool
model_config.freeze_blocks		integer
model_config.dropout		float		1.00E-03
model_config.batch_norm_config		collection
model_config.batch_norm_config.momentum		float
model_config.batch_norm_config.epsilon		float
model_config.activation		collection
model_config.activation.activation_type		string
model_config.activation.activation_parameters		collection
model_config.activation.activation_parameters.key		string
model_config.activation.activation_parameters.value		float

detectnet_v2

convert

parameter	display_name	value_type	description	default_value	valid_options	required	popular
e	engine file path	hidden
k	encode key	hidden
c	cache_file	hidden
o	outputs	string	comma separated list of output node names
d	input_dims	string	comma separated list of input dimensions (not required for TLT 3.0 new models).			yes	yes
b	batch_size	integer	calibration batch size	8		yes
m	max_batch_size	integer	maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly.	16			yes
w	max_workspace_size	integer	maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t	data_type	string	TensorRT data type	fp32	fp32, fp16, int8		yes
i	input_order	string	input dimension ordering	nchw	nchw, nhwc, nc
s	strict_type_constraints	bool	TensorRT strict_type_constraints flag for INT8 mode	FALSE
u	dla_core	int	Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback).	-1
p	parse_profile_shapes	list	comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
platform	platform	string	platform label			yes	yes
model	etlt model from export	hidden

evaluate

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular	regex
version	Schema Version	const	The version of this schema	1
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.image_extension	Image Extension	string	Extension of the images to be used.	png				png, jpg, jpeg	yes
dataset_config.data_sources.tfrecords_path	TFRecord Path	hidden			/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.data_sources.image_directory_path	Image Path	hidden			/shared/users/1234/datasets/5678/training
dataset_config.validation_data_source.tfrecords_path	Validation TFRecord Path	hidden			/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.validation_data_source.image_directory_path	Validation Image Path	hidden			/shared/users/1234/datasets/5678/training
dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person						^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person						^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold	Validation Fold	integer	In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold.			0
augmentation_config	Data Augmentation	collection	Collection of parameters to configure the preprocessing and on the fly data augmentation							Yes
augmentation_config.preprocessing.output_image_width	Image Width	integer	The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16.	1248		480			yes	Yes
augmentation_config.preprocessing.output_image_height	Image Height	integer	The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16.	384		272			yes	Yes
augmentation_config.preprocessing.min_bbox_width	Bounding Box Width	float	The minimum width of the object labels to be considered for training.	1		0			yes
augmentation_config.preprocessing.min_bbox_height	Bounding Box Height	float	The minimum height of the object labels to be considered for training.	1		0			yes
augmentation_config.preprocessing.output_image_channel	Image Channel	integer	The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported.	3				1, 3	yes
augmentation_config.preprocessing.crop_right	Crop Right	integer	The right boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_left	Crop Left	integer	The left boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_top	Crop Top	integer	The top boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_bottom	Crop Bottom	integer	The bottom boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.scale_height	Scale Height	float	The floating point factor to scale the height of the cropped images.			0			yes
augmentation_config.preprocessing.scale_width	Scale Width	float	The floating point factor to scale the width of the cropped images.			0			yes
augmentation_config.spatial_augmentation.hflip_probability	Horizontal-Flip Probability	float	The probability to flip an input image horizontally.	0.5		0	1
augmentation_config.spatial_augmentation.vflip_probability	Vertical-Flip Probability	float	The probability to flip an input image vertically.			0	1
augmentation_config.spatial_augmentation.zoom_min	Minimum Zoom Scale	float	The minimum zoom scale of the input image.	1		0
augmentation_config.spatial_augmentation.zoom_max	Maximum Zoom Scale	float	The maximum zoom scale of the input image.	1		0
augmentation_config.spatial_augmentation.translate_max_x	X-Axis Maximum Traslation	float	The maximum translation to be added across the x axis.	8		0
augmentation_config.spatial_augmentation.translate_max_y	Y-Axis Maximum Translation	float	The maximum translation to be added across the y axis.	8		0
augmentation_config.spatial_augmentation.rotate_rad_max	Image Rotation	float	The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max].			0
augmentation_config.color_augmentation.color_shift_stddev	Color Shift Standard Deviation	float	The standard devidation value for the color shift.			0	1
augmentation_config.color_augmentation.hue_rotation_max	Hue Maximum Rotation	float	The maximum rotation angle for the hue rotation matrix.	25		0	360
augmentation_config.color_augmentation.saturation_shift_max	Saturation Maximum Shift	float	The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift.	0.2		0	1
augmentation_config.color_augmentation.contrast_scale_max	Contrast Maximum Scale	float	The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged.	0.1		0	1
augmentation_config.color_augmentation.contrast_center	Contrast Center	float	The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5.	0.5				0.5
bbox_rasterizer_config	Bounding box rasterizer	collection	Collection of parameters to configure the bounding box rasterizer
bbox_rasterizer_config.deadzone_radius	Bounding box rasterizer deadzone radius	float		0.4		0	1		yes
model_config	Model	collection
model_config.arch	BackBone Architecture	string	The architecture of the backbone feature extractor to be used for training.	resnet				resnet	yes
model_config.pretrained_model_file	PTM File Path	hidden	This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer.		/shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5
model_config.load_graph	PTM Load Graph	bool	A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.	FALSE
model_config.freeze_blocks	Freeze Blocks	integer	This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates.			0	3
model_config.freeze_bn	Freeze Batch Normalization	bool	A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections	All Projections	bool	For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.
model_config.num_layers	Number of Layers	integer	The depth of the feature extractor for scalable templates.	18				10, 18, 34, 50, 101	yes
model_config.use_pooling	Use Pooling	bool	Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm	Use Batch Normalization	bool	A flag to determine whether to use Batch Normalization layers or not.	TRUE
model_config.dropout_rate	Dropout Rate	float	Probability for drop out			0	1
model_config.training_precision.backend_floatx	Backend Training Precision	string	A nested parameter that sets the precision of the backend training framework.					__FLOAT32__	yes
model_config.objective_set.cov	Objective COV	collection	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	{}					yes
model_config.objective_set.bbox.scale	Objective Bounding Box Scale	float	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	35					yes
model_config.objective_set.bbox.offset	Objective Bounding Box Offset	float	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	0.5					yes
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	4		1			yes
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	120		1			yes	Yes
training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-06					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	5.00E-04					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.100000001		0	1		yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.699999988		0	1		yes	Yes
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__NO_REG__, __L1__, __L2__	yes
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-09					yes
training_config.optimizer.adam.epsilon	Optimizer Adam Epsilon	float	A very small number to prevent any division by zero in the implementation.	1.00E-08					yes
training_config.optimizer.adam.beta1	Optimizer Adam Beta1	float		0.899999976					yes
training_config.optimizer.adam.beta2	Optimizer Adam Beta2	float		0.999000013					yes
training_config.cost_scaling.enabled	Enable Cost Scaling	bool	Enables cost scaling during training.	FALSE					yes
training_config.cost_scaling.initial_exponent	Cost Scaling Initial Exponent	float		20					yes
training_config.cost_scaling.increment	Cost Scaling Increment	float		0.005					yes
training_config.cost_scaling.decrement	Cost Scaling Decrement	float		1					yes
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10		0			yes
evaluation_config	Evaluation	collection							yes
evaluation_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				__SAMPLE__, __INTEGRATE__
evaluation_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1			yes
evaluation_config.first_validation_epoch	First Validation Epoch	integer	The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down.	30		1			yes
cost_function_config	Cost function	collection
cost_function_config.enable_autoweighting	Auto-Weighting	bool		TRUE					yes
cost_function_config.max_objective_weight	Maximum Objective Weight	float		0.999899983
cost_function_config.min_objective_weight	Minimum Objective Weight	float		1.00E-04
classwise_config	Class-wise organized parameters	list
classwise_config.key	Class Key	string	Name of class for the classwise parameters		person
classwise_config.value.evaluation_config	Evaluation config elements per class	collection
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap	Minimum Detection Ground Truth Overlaps	float	Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class.	0.5		0	1		yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height	Minimum Height	integer	Minimum height in pixels for a valid ground truth and prediction bbox.	20		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height	Maximum Height	integer	Maximum height in pixels for a valid ground truth and prediction bbox.	9999		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width	Minimum Width	integer	Minimum width in pixels for a valid ground truth and prediction bbox.	10		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width	Maximum Width	integer	Maximum width in pixels for a valid ground truth and prediction bbox.	9999		0			yes
classwise_config.value.cost_function_config	Class-wise cost fuction config per class	collection							yes
classwise_config.value.cost_function_config.class_weight	Class Weight	float		4					yes
classwise_config.value.cost_function_config.coverage_foreground_weight	Coverage Forground Weight	float		0.050000001					yes
classwise_config.value.cost_function_config.objectives	Objectives	list		[{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}]					yes
classwise_config.value.cost_function_config.objectives.name	Objective Name	string	Objective name such as cov or bbox.	cov					yes
classwise_config.value.cost_function_config.objectives.initial_weight	Initial Weight	float	Initial weight for named objective.	1					yes
classwise_config.value.cost_function_config.objectives.weight_target	Weight Target	float	Target weight for named objective.	1					yes
classwise_config.value.bbox_rasterizer_config	Rasterization	collection							yes
classwise_config.value.bbox_rasterizer_config.cov_center_x	Center of Object X-Coordinate	float	x-coordinate of the center of the object	0.5		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_center_y	Center of Object Y-Coordinate	float	y-coordinate of the center of the object	0.5		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_radius_x	Center of Object X-Radius	float	x-radius of the coverage ellipse	1		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_radius_y	Center of Object Y-Radius	float	y-radius of the coverage ellipse	1		0	1		yes
classwise_config.value.bbox_rasterizer_config.bbox_min_radius	Bounding Box Minimum Radius	float	The minimum radius of the coverage region to be drawn for boxes	1		0	1		yes
classwise_config.postprocessing_config	Post-Processing	collection
classwise_config.postprocessing_config.clustering_config.coverage_threshold	Coverage Threshold	float	The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering.	0.0075		0	1		yes
classwise_config.postprocessing_config.clustering_config.dbscan_eps	DBSCAN Samples Distance	float	The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together.	0.230000004		0	1		yes
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples	DBSCAN Minimum Samples	float	The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself.	0.050000001		0	1		yes
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height	Minimum Bounding Box Height	integer	The minimum height in pixels to consider as a valid detection post clustering.	20		0	10000		yes
classwise_config.postprocessing_config.clustering_config.clustering_algorithm	Clustering Algorithm	string	Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined.	__DBSCAN__				__DBSCAN__, __NMS__, __HYBRID__	yes
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold	DBSCAN Confidence Threshold	float	The confidence threshold used to filter out the clustered bounding box output from DBSCAN.	0.1		0.1			yes
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold	NMS IOU Threshold	float	The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs.	0.2		0	1
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold	NMS Confidence Threshold	float	The confidence threshold to filter out clustered bounding boxes from NMS.	0		0	1

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	FALSE			no
cal_image_dir		hidden
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
strict_type_constraints		bool		FALSE
gen_ds_config		bool		FALSE
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
min_batch_size		integer			1
opt_batch_size		integer			1
experiment_spec	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
static_batch_size		integer			-1
results_dir		hidden
verbose		hidden		TRUE

inference

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular
inferencer_config		collection
inferencer_config.tlt_config		collection
inferencer_config.tlt_config.model		hidden
inferencer_config.tensorrt_config		collection
inferencer_config.tensorrt_config.parser		integer						0,1,2
inferencer_config.tensorrt_config.backend_data_type		integer						0,1,2
inferencer_config.tensorrt_config.save_engine		bool
inferencer_config.tensorrt_config.trt_engine		hidden
inferencer_config.tensorrt_config.calibrator_config		collection
inferencer_config.input_nodes		list	list of string
inferencer_config.output_nodes		list	list of string
inferencer_config.batch_size		integer		16
inferencer_config.image_height		integer		384
inferencer_config.image_width		integer		1248
inferencer_config.image_channels		integer		3
inferencer_config.gpu_index		integer		0
inferencer_config.target_classes		list	list of string	[“car”]					yes	yes
inferencer_config.stride		integer
bbox_handler_config		collection
bbox_handler_config.kitti_dump		bool		TRUE
bbox_handler_config.disable_overlay		bool		FALSE
bbox_handler_config.overlay_linewidth		integer		2
bbox_handler_config.classwise_bbox_handler_config		list							yes	yes
bbox_handler_config.classwise_bbox_handler_config.key		string		default
bbox_handler_config.classwise_bbox_handler_config.value		collection
bbox_handler_config.classwise_bbox_handler_config.value.output_map		string
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config		collection
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.coverage_threshold	Coverage Threshold	float	The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering.	0.005		0	1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_eps	DBSCAN Samples Distance	float	The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together.	0.3		0	1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_min_samples	DBSCAN Minimum Samples	float	The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself.	0.05		0	1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.minimum_bounding_box_height	Minimum Bounding Box Height	integer	The minimum height in pixels to consider as a valid detection post clustering.	4		0	10000
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.clustering_algorithm	Clustering Algorithm	string	Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined.	__DBSCAN__				__DBSCAN__, __NMS__, __HYBRID__
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_confidence_threshold	DBSCAN Confidence Threshold	float	The confidence threshold used to filter out the clustered bounding box output from DBSCAN.	0.9		0.1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_iou_threshold	NMS IOU Threshold	float	The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs.			0	1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_confidence_threshold	NMS Confidence Threshold	float	The confidence threshold to filter out clustered bounding boxes from NMS.			0	1
bbox_handler_config.classwise_bbox_handler_config.value.confidence_model		string		aggregate_cov
bbox_handler_config.classwise_bbox_handler_config.value.output_map		string
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color		collection	0		0,1,2
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.R		integer		255
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.G		integer		0
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.B		integer		0
bbox_handler_config.postproc_classes		list	list of string

prune

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
model	Model path	hidden	UNIX path to where the input model is located.					yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.					yes
results_dir	Results directory	hidden
key	Encode key	hidden
normalizer	Normalizer	string	How to normalize	max			max, L2
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union			union, intersection, arithmetic_mean,geometric_mean	no
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8				no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1	0	1		yes	yes
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16				no
excluded_layers	Excluded layers	string	string of list: List of excluded_layers. Examples: -i item1 item2
verbose	verbosity	hidden		TRUE

train

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular	regex
version	Schema Version	const	The version of this schema	1
enable_determinism	Enable determinism	bool	Flag to enable deterministic training	FALSE				FALSE, TRUE
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.image_extension	Image Extension	string	Extension of the images to be used.	png				png, jpg, jpeg	yes
dataset_config.data_sources.tfrecords_path	TFRecord Path	hidden			/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.data_sources.image_directory_path	Image Path	hidden			/shared/users/1234/datasets/5678/training
dataset_config.validation_data_source.tfrecords_path	Validation TFRecord Path	hidden			/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.validation_data_source.image_directory_path	Validation Image Path	hidden			/shared/users/1234/datasets/5678/training
dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person						^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person						^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold	Validation Fold	integer	In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold.			0
augmentation_config	Data Augmentation	collection	Collection of parameters to configure the preprocessing and on the fly data augmentation							Yes
augmentation_config.preprocessing.output_image_width	Image Width	integer	The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16.	1248		480			yes	Yes
augmentation_config.preprocessing.output_image_height	Image Height	integer	The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16.	384		272			yes	Yes
augmentation_config.preprocessing.min_bbox_width	Bounding Box Width	float	The minimum width of the object labels to be considered for training.	1		0			yes
augmentation_config.preprocessing.min_bbox_height	Bounding Box Height	float	The minimum height of the object labels to be considered for training.	1		0			yes
augmentation_config.preprocessing.output_image_channel	Image Channel	integer	The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported.	3				1, 3	yes
augmentation_config.preprocessing.crop_right	Crop Right	integer	The right boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_left	Crop Left	integer	The left boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_top	Crop Top	integer	The top boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.crop_bottom	Crop Bottom	integer	The bottom boundary of the crop to be extracted from the original image.			0			yes
augmentation_config.preprocessing.scale_height	Scale Height	float	The floating point factor to scale the height of the cropped images.			0			yes
augmentation_config.preprocessing.scale_width	Scale Width	float	The floating point factor to scale the width of the cropped images.			0			yes
augmentation_config.spatial_augmentation.hflip_probability	Horizontal-Flip Probability	float	The probability to flip an input image horizontally.	0.5		0	1
augmentation_config.spatial_augmentation.vflip_probability	Vertical-Flip Probability	float	The probability to flip an input image vertically.			0	1
augmentation_config.spatial_augmentation.zoom_min	Minimum Zoom Scale	float	The minimum zoom scale of the input image.	1		0
augmentation_config.spatial_augmentation.zoom_max	Maximum Zoom Scale	float	The maximum zoom scale of the input image.	1		0
augmentation_config.spatial_augmentation.translate_max_x	X-Axis Maximum Traslation	float	The maximum translation to be added across the x axis.	8		0
augmentation_config.spatial_augmentation.translate_max_y	Y-Axis Maximum Translation	float	The maximum translation to be added across the y axis.	8		0
augmentation_config.spatial_augmentation.rotate_rad_max	Image Rotation	float	The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max].			0
augmentation_config.color_augmentation.color_shift_stddev	Color Shift Standard Deviation	float	The standard devidation value for the color shift.			0	1
augmentation_config.color_augmentation.hue_rotation_max	Hue Maximum Rotation	float	The maximum rotation angle for the hue rotation matrix.	25		0	360
augmentation_config.color_augmentation.saturation_shift_max	Saturation Maximum Shift	float	The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift.	0.2		0	1
augmentation_config.color_augmentation.contrast_scale_max	Contrast Maximum Scale	float	The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged.	0.1		0	1
augmentation_config.color_augmentation.contrast_center	Contrast Center	float	The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5.	0.5				0.5
bbox_rasterizer_config	Bounding box rasterizer	collection	Collection of parameters to configure the bounding box rasterizer
bbox_rasterizer_config.deadzone_radius	Bounding box rasterizer deadzone radius	float		0.4		0	1		yes
model_config	Model	collection
model_config.arch	BackBone Architecture	string	The architecture of the backbone feature extractor to be used for training.	resnet				resnet	yes
model_config.pretrained_model_file	PTM File Path	hidden	This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer.		/shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5
model_config.load_graph	PTM Load Graph	bool	A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.	FALSE
model_config.freeze_blocks	Freeze Blocks	integer	This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates.			0	3
model_config.freeze_bn	Freeze Batch Normalization	bool	A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections	All Projections	bool	For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.
model_config.num_layers	Number of Layers	integer	The depth of the feature extractor for scalable templates.	18				10, 18, 34, 50, 101	yes
model_config.use_pooling	Use Pooling	bool	Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm	Use Batch Normalization	bool	A flag to determine whether to use Batch Normalization layers or not.	TRUE
model_config.dropout_rate	Dropout Rate	float	Probability for drop out			0	1
model_config.training_precision.backend_floatx	Backend Training Precision	string	A nested parameter that sets the precision of the backend training framework.					__FLOAT32__	yes
model_config.objective_set.cov	Objective COV	collection	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	{}					yes
model_config.objective_set.bbox.scale	Objective Bounding Box Scale	float	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	35					yes
model_config.objective_set.bbox.offset	Objective Bounding Box Offset	float	The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline.	0.5					yes
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	4		1			yes
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	120		1			yes	Yes
training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-06					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	5.00E-04					yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.100000001		0	1		yes	Yes
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.699999988		0	1		yes	Yes
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__NO_REG__, __L1__, __L2__	yes
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-09					yes
training_config.optimizer.adam.epsilon	Optimizer Adam Epsilon	float	A very small number to prevent any division by zero in the implementation.	1.00E-08					yes
training_config.optimizer.adam.beta1	Optimizer Adam Beta1	float		0.899999976					yes
training_config.optimizer.adam.beta2	Optimizer Adam Beta2	float		0.999000013					yes
training_config.cost_scaling.enabled	Enable Cost Scaling	bool	Enables cost scaling during training.	FALSE					yes
training_config.cost_scaling.initial_exponent	Cost Scaling Initial Exponent	float		20					yes
training_config.cost_scaling.increment	Cost Scaling Increment	float		0.005					yes
training_config.cost_scaling.decrement	Cost Scaling Decrement	float		1					yes
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10		0			yes
evaluation_config	Evaluation	collection							yes
evaluation_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				__SAMPLE__, __INTEGRATE__
evaluation_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1			yes
evaluation_config.first_validation_epoch	First Validation Epoch	integer	The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down.	30		1			yes
cost_function_config	Cost function	collection
cost_function_config.enable_autoweighting	Auto-Weighting	bool		TRUE					yes
cost_function_config.max_objective_weight	Maximum Objective Weight	float		0.999899983
cost_function_config.min_objective_weight	Minimum Objective Weight	float		1.00E-04
classwise_config	Class-wise organized parameters	list
classwise_config.key	Class Key	string	Name of class for the classwise parameters		person
classwise_config.value.evaluation_config	Evaluation config elements per class	collection
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap	Minimum Detection Ground Truth Overlaps	float	Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class.	0.5		0	1		yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height	Minimum Height	integer	Minimum height in pixels for a valid ground truth and prediction bbox.	20		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height	Maximum Height	integer	Maximum height in pixels for a valid ground truth and prediction bbox.	9999		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width	Minimum Width	integer	Minimum width in pixels for a valid ground truth and prediction bbox.	10		0			yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width	Maximum Width	integer	Maximum width in pixels for a valid ground truth and prediction bbox.	9999		0			yes
classwise_config.value.cost_function_config	Class-wise cost fuction config per class	collection							yes
classwise_config.value.cost_function_config.class_weight	Class Weight	float		4					yes
classwise_config.value.cost_function_config.coverage_foreground_weight	Coverage Forground Weight	float		0.050000001					yes
classwise_config.value.cost_function_config.objectives	Objectives	list		[{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}]					yes
classwise_config.value.cost_function_config.objectives.name	Objective Name	string	Objective name such as cov or bbox.	cov					yes
classwise_config.value.cost_function_config.objectives.initial_weight	Initial Weight	float	Initial weight for named objective.	1					yes
classwise_config.value.cost_function_config.objectives.weight_target	Weight Target	float	Target weight for named objective.	1					yes
classwise_config.value.bbox_rasterizer_config	Rasterization	collection							yes
classwise_config.value.bbox_rasterizer_config.cov_center_x	Center of Object X-Coordinate	float	x-coordinate of the center of the object	0.5		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_center_y	Center of Object Y-Coordinate	float	y-coordinate of the center of the object	0.5		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_radius_x	Center of Object X-Radius	float	x-radius of the coverage ellipse	1		0	1		yes
classwise_config.value.bbox_rasterizer_config.cov_radius_y	Center of Object Y-Radius	float	y-radius of the coverage ellipse	1		0	1		yes
classwise_config.value.bbox_rasterizer_config.bbox_min_radius	Bounding Box Minimum Radius	float	The minimum radius of the coverage region to be drawn for boxes	1		0	1		yes
classwise_config.postprocessing_config	Post-Processing	collection
classwise_config.postprocessing_config.clustering_config.coverage_threshold	Coverage Threshold	float	The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering.	0.0075		0	1		yes
classwise_config.postprocessing_config.clustering_config.dbscan_eps	DBSCAN Samples Distance	float	The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together.	0.230000004		0	1		yes
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples	DBSCAN Minimum Samples	float	The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself.	0.050000001		0	1		yes
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height	Minimum Bounding Box Height	integer	The minimum height in pixels to consider as a valid detection post clustering.	20		0	10000		yes
classwise_config.postprocessing_config.clustering_config.clustering_algorithm	Clustering Algorithm	string	Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined.	__DBSCAN__				__DBSCAN__, __NMS__, __HYBRID__	yes
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold	DBSCAN Confidence Threshold	float	The confidence threshold used to filter out the clustered bounding box output from DBSCAN.	0.1		0.1			yes
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold	NMS IOU Threshold	float	The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs.	0.2		0	1
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold	NMS Confidence Threshold	float	The confidence threshold to filter out clustered bounding boxes from NMS.	0		0	1

dssd

evaluate

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
version	Schema Version	const	The version of this schema	1							internal
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
dataset_config.data_sources.image_directory_path	Image path	hidden
dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
dataset_config.validation_data_sources.image_directory_path	Image path	hidden
dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
training_config.learning_rate		collection
training_config.learning_rate.soft_start_annealing_schedule		collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
eval_config	Evaluation	collection
eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		300						yes
augmentation_config.output_height	Model Input height	integer		300						yes
augmentation_config.output_channel	Model Input channel	integer		3						yes
augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios	Aspect Ratio	srting	The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	TRUE
dssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
dssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
dssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch	Arch	string	The backbone for feature extraction	resnet
dssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
dssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
dssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels	Prediction Layer Channel	integer	The number of channel of the DSSD prediction layer	512		1

inference

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
version	Schema Version	const	The version of this schema	1							internal
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
threshold	Threshold	float		0.3
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
dataset_config.data_sources.image_directory_path	Image path	hidden
dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
dataset_config.validation_data_sources.image_directory_path	Image path	hidden
dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
training_config.learning_rate		collection
training_config.learning_rate.soft_start_annealing_schedule		collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
eval_config	Evaluation	collection
eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		300						yes
augmentation_config.output_height	Model Input height	integer		300						yes
augmentation_config.output_channel	Model Input channel	integer		3						yes
augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios	Aspect Ratio	srting	The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	TRUE
dssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
dssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
dssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch	Arch	string	The backbone for feature extraction	resnet
dssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
dssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
dssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels	Prediction Layer Channel	integer	The number of channel of the DSSD prediction layer	512		1

train

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
version	Schema Version	const	The version of this schema	1							internal
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
initial_epoch	Initial epoch cli	integer		1
use_multiprocessing	CLI parameter	bool		FALSE
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
dataset_config.data_sources.image_directory_path	Image path	hidden
dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
dataset_config.validation_data_sources.image_directory_path	Image path	hidden
dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
training_config.learning_rate		collection
training_config.learning_rate.soft_start_annealing_schedule		collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
eval_config	Evaluation	collection
eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		300						yes
augmentation_config.output_height	Model Input height	integer		300						yes
augmentation_config.output_channel	Model Input channel	integer		3						yes
augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios	Aspect Ratio	srting	The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	TRUE
dssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
dssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
dssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch	Arch	string	The backbone for feature extraction	resnet
dssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
dssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
dssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels	Prediction Layer Channel	integer	The number of channel of the DSSD prediction layer	512		1

efficientdet

convert

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
b	batch_size	integer	calibration batch size	8			yes		CLI argument
c	cache_file	path	calibration cache file (default cal.bin)						CLI argument
d	input_dims	list	comma separated list of input dimensions (not required for TLT 3.0 new models).						CLI argument
i	input_order	enum	input dimension ordering	nchw		nchw, nhwc, nc			CLI argument
m	max_batch_size	integer	maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly.	16				yes	CLI argument
o	outputs	list	comma separated list of output node names						CLI argument
p	parse_profile_shapes	list	comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.						CLI argument
s	strict_type_constraints	bool	TensorRT strict_type_constraints flag for INT8 mode	FALSE					CLI argument
t	data_type	enum	TensorRT data type	fp32		fp32, fp16, int8		yes	CLI argument
u	dla_core	int	Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback).	-1					CLI argument
w	max_workspace_size	int	maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.		1<<30, 2<<30				CLI argument
platform	platform	enum	platform label	rtx			yes	yes

evaluate

parameter	display_name	value_type	description	default_value	valid_min	required
version	Schema Version	const	The version of this schema	1
training_config	Training config	collection	Parameters to configure the training process
training_config.train_batch_size	training batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus.	16	0
training_config.iterations_per_loop		integer		10
training_config.num_epochs	number of epochs	integer	The number of epochs to train the network	6	0
training_config.num_examples_per_epoch	number of images per epoch per gpu	integer	Total number of images in the training set divided by the number of GPUs	118288	0
training_config.checkpoint	path to pretrained model	hidden	The path to the pretrained model, if any
training_config.pruned_model_path	path to pruned model	hidden	The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period	checkpoint period	integer	The number of training epochs that should run per model checkpoint/validation	2	0
training_config.amp	AMP	bool	Whether to use mixed precision training	TRUE
training_config.moving_average_decay	moving average decay	float	Moving average decay	0.9999
training_config.l2_weight_decay	L2 weight decay	float	L2 weight decay	0.00004
training_config.l1_weight_decay	L1 weight decay	float	L1 weight decay	0
training_config.lr_warmup_epoch	learning rate warmup epoch	integer	The number of warmup epochs in the learning rate schedule	3	0
training_config.lr_warmup_init	initial learning rate during warmup	float	The initial learning rate in the warmup period	0.002
training_config.learning_rate	maximum learning rate	float	The maximum learning rate	0.02
training_config.tf_random_seed	random seed	integer	The random seed	42	0
training_config.clip_gradients_norm	clip gradient by norm	float	Clip gradients by the norm value	5.00E+00
training_config.skip_checkpoint_variables	skip checkpoint variables	string	If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.	-predict*
eval_config	evaluation config	collection	Parameters to configure evaluation
eval_config.eval_epoch_cycle	evaluation epoch cycle	integer	The number of training epochs that should run per validation	2	0
eval_config.max_detections_per_image	maximum detections per image	integer	The maximum number of detections to visualize	100	0
eval_config.min_score_thresh	minimum confidence threshold	float	The lowest confidence of the predicted box and ground truth box that can be considered a match	0.4
eval_config.eval_batch_size	evaluation batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus	16	0
eval_config.eval_samples	number of samples for evaluation	integer	The number of samples for evaluation	500
dataset_config	dataset config	collection	Parameters to configure dataset
dataset_config.image_size	image size	string	The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input.	512,512		yes
dataset_config.training_file_pattern	training file pattern	hidden	The TFRecord path for training
dataset_config.validation_file_pattern	validation file pattern	hidden	The TFRecord path for validation
dataset_config.validation_json_file	validation json file	hidden	The annotation file path for validation
dataset_config.num_classes	number of classes	integer	The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class)	91		yes
dataset_config.max_instances_per_image	maximum instances per image	integer	The maximum number of object instances to parse (default: 100)	100
dataset_config.skip_crowd_during_training	skip crowd during training	bool	Specifies whether to skip crowd during training	TRUE
model_config	model config	collection	Parameters to configure model
model_config.model_name	model name	string	Model name	efficientdet-d0
model_config.min_level	minimum level	integer	The minimum level of the output feature pyramid	3
model_config.max_level	maximum level	integer	The maximum level of the output feature pyramid	7
model_config.num_scales	number of scales	integer	The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)])	3
model_config.aspect_ratios	aspect ratios	string	A list of tuples representing the aspect ratios of anchors on each pyramid level	[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale	anchor scale	integer	Scale of the base-anchor size to the feature-pyramid stride	4
augmentation_config	augmentation config	collection	Parameters to configure model
augmentation_config.rand_hflip	random horizontal flip	bool	Whether to perform random horizontal flip	TRUE
augmentation_config.random_crop_min_scale	minimum scale of random crop	float	The minimum scale of RandomCrop augmentation.	0.1
augmentation_config.random_crop_max_scale	maximum scale of random crop	float	The maximum scale of RandomCrop augmentation.	2

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec_file	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
model_path	Model	hidden	UNIX path to the model file	0.1			yes
output_path	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
cal_image_dir		hidden
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
max_batch_size		integer			1
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
verbose		hidden		TRUE

inference

parameter	display_name	value_type	description	default_value	valid_min	required
version	Schema Version	const	The version of this schema	1
training_config	Training config	collection	Parameters to configure the training process
training_config.train_batch_size	training batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus.	16	0
training_config.iterations_per_loop		integer		10
training_config.num_epochs	number of epochs	integer	The number of epochs to train the network	6	0
training_config.num_examples_per_epoch	number of images per epoch per gpu	integer	Total number of images in the training set divided by the number of GPUs	118288	0
training_config.checkpoint	path to pretrained model	hidden	The path to the pretrained model, if any
training_config.pruned_model_path	path to pruned model	hidden	The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period	checkpoint period	integer	The number of training epochs that should run per model checkpoint/validation	2	0
training_config.amp	AMP	bool	Whether to use mixed precision training	TRUE
training_config.moving_average_decay	moving average decay	float	Moving average decay	0.9999
training_config.l2_weight_decay	L2 weight decay	float	L2 weight decay	0.00004
training_config.l1_weight_decay	L1 weight decay	float	L1 weight decay	0
training_config.lr_warmup_epoch	learning rate warmup epoch	integer	The number of warmup epochs in the learning rate schedule	3	0
training_config.lr_warmup_init	initial learning rate during warmup	float	The initial learning rate in the warmup period	0.002
training_config.learning_rate	maximum learning rate	float	The maximum learning rate	0.02
training_config.tf_random_seed	random seed	integer	The random seed	42	0
training_config.clip_gradients_norm	clip gradient by norm	float	Clip gradients by the norm value	5.00E+00
training_config.skip_checkpoint_variables	skip checkpoint variables	string	If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.	-predict*
eval_config	evaluation config	collection	Parameters to configure evaluation
eval_config.eval_epoch_cycle	evaluation epoch cycle	integer	The number of training epochs that should run per validation	2	0
eval_config.max_detections_per_image	maximum detections per image	integer	The maximum number of detections to visualize	100	0
eval_config.min_score_thresh	minimum confidence threshold	float	The lowest confidence of the predicted box and ground truth box that can be considered a match	0.4
eval_config.eval_batch_size	evaluation batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus	16	0
eval_config.eval_samples	number of samples for evaluation	integer	The number of samples for evaluation	500
dataset_config	dataset config	collection	Parameters to configure dataset
dataset_config.image_size	image size	string	The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input.	512,512		yes
dataset_config.training_file_pattern	training file pattern	hidden	The TFRecord path for training
dataset_config.validation_file_pattern	validation file pattern	hidden	The TFRecord path for validation
dataset_config.validation_json_file	validation json file	hidden	The annotation file path for validation
dataset_config.num_classes	number of classes	integer	The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class)	91		yes
dataset_config.max_instances_per_image	maximum instances per image	integer	The maximum number of object instances to parse (default: 100)	100
dataset_config.skip_crowd_during_training	skip crowd during training	bool	Specifies whether to skip crowd during training	TRUE
model_config	model config	collection	Parameters to configure model
model_config.model_name	model name	string	Model name	efficientdet-d0
model_config.min_level	minimum level	integer	The minimum level of the output feature pyramid	3
model_config.max_level	maximum level	integer	The maximum level of the output feature pyramid	7
model_config.num_scales	number of scales	integer	The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)])	3
model_config.aspect_ratios	aspect ratios	string	A list of tuples representing the aspect ratios of anchors on each pyramid level	[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale	anchor scale	integer	Scale of the base-anchor size to the feature-pyramid stride	4
augmentation_config	augmentation config	collection	Parameters to configure model
augmentation_config.rand_hflip	random horizontal flip	bool	Whether to perform random horizontal flip	TRUE
augmentation_config.random_crop_min_scale	minimum scale of random crop	float	The minimum scale of RandomCrop augmentation.	0.1
augmentation_config.random_crop_max_scale	maximum scale of random crop	float	The maximum scale of RandomCrop augmentation.	2

prune

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
model	Model path	hidden	UNIX path to where the input model is located.					yes
output_dir	Output Directory	hidden	UNIX path to where the pruned model will be saved.					yes
key	Encode key	hidden
normalizer	Normalizer	string	How to normalize	max			max, L2
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union			union, intersection, arithmetic_mean,geometric_mean	no
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8				no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1	0	1		yes	yes
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16				no
excluded_layers	Excluded layers	string	string of list: List of excluded_layers. Examples: -i item1 item2
verbose	verbosity	hidden		TRUE

train

parameter	display_name	value_type	description	default_value	valid_min	required
version	Schema Version	const	The version of this schema	1
training_config	Training config	collection	Parameters to configure the training process
training_config.train_batch_size	training batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus.	16	0
training_config.iterations_per_loop		integer		10
training_config.num_epochs	number of epochs	integer	The number of epochs to train the network	6	0
training_config.num_examples_per_epoch	number of images per epoch per gpu	integer	Total number of images in the training set divided by the number of GPUs	118288	0
training_config.checkpoint	path to pretrained model	hidden	The path to the pretrained model, if any
training_config.pruned_model_path	path to pruned model	hidden	The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period	checkpoint period	integer	The number of training epochs that should run per model checkpoint/validation	2	0
training_config.amp	AMP	bool	Whether to use mixed precision training	TRUE
training_config.moving_average_decay	moving average decay	float	Moving average decay	0.9999
training_config.l2_weight_decay	L2 weight decay	float	L2 weight decay	0.00004
training_config.l1_weight_decay	L1 weight decay	float	L1 weight decay	0
training_config.lr_warmup_epoch	learning rate warmup epoch	integer	The number of warmup epochs in the learning rate schedule	3	0
training_config.lr_warmup_init	initial learning rate during warmup	float	The initial learning rate in the warmup period	0.002
training_config.learning_rate	maximum learning rate	float	The maximum learning rate	0.02
training_config.tf_random_seed	random seed	integer	The random seed	42	0
training_config.clip_gradients_norm	clip gradient by norm	float	Clip gradients by the norm value	5.00E+00
training_config.skip_checkpoint_variables	skip checkpoint variables	string	If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.	-predict*
eval_config	evaluation config	collection	Parameters to configure evaluation
eval_config.eval_epoch_cycle	evaluation epoch cycle	integer	The number of training epochs that should run per validation	2	0
eval_config.max_detections_per_image	maximum detections per image	integer	The maximum number of detections to visualize	100	0
eval_config.min_score_thresh	minimum confidence threshold	float	The lowest confidence of the predicted box and ground truth box that can be considered a match	0.4
eval_config.eval_batch_size	evaluation batch size	integer	The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus	16	0
eval_config.eval_samples	number of samples for evaluation	integer	The number of samples for evaluation	500
dataset_config	dataset config	collection	Parameters to configure dataset
dataset_config.image_size	image size	string	The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input.	512,512		yes
dataset_config.training_file_pattern	training file pattern	hidden	The TFRecord path for training
dataset_config.validation_file_pattern	validation file pattern	hidden	The TFRecord path for validation
dataset_config.validation_json_file	validation json file	hidden	The annotation file path for validation
dataset_config.num_classes	number of classes	integer	The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class)	91		yes
dataset_config.max_instances_per_image	maximum instances per image	integer	The maximum number of object instances to parse (default: 100)	100
dataset_config.skip_crowd_during_training	skip crowd during training	bool	Specifies whether to skip crowd during training	TRUE
model_config	model config	collection	Parameters to configure model
model_config.model_name	model name	string	Model name	efficientdet-d0
model_config.min_level	minimum level	integer	The minimum level of the output feature pyramid	3
model_config.max_level	maximum level	integer	The maximum level of the output feature pyramid	7
model_config.num_scales	number of scales	integer	The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)])	3
model_config.aspect_ratios	aspect ratios	string	A list of tuples representing the aspect ratios of anchors on each pyramid level	[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale	anchor scale	integer	Scale of the base-anchor size to the feature-pyramid stride	4
augmentation_config	augmentation config	collection	Parameters to configure model
augmentation_config.rand_hflip	random horizontal flip	bool	Whether to perform random horizontal flip	TRUE
augmentation_config.random_crop_min_scale	minimum scale of random crop	float	The minimum scale of RandomCrop augmentation.	0.1
augmentation_config.random_crop_max_scale	maximum scale of random crop	float	The maximum scale of RandomCrop augmentation.	2

faster_rcnn

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
data_type	Pruning Granularity	enum	Number of filters to remove at a time.	int8		int8, fp32, fp16	yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100			no
experiment_spec	Experiment Spec	string	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.		hidden from train expeirment		yes
model	Model path	hidden	UNIX path to where the input model is located.		hidden		yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden		yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	TRUE			no
engine-file	Engine File	hidden	UNIX path to the model engine file.		/export/input_model_file.<data_type>.trt		yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	16				yes
cal_cache_file	Calibration cache file	string	Unix PATH to the int8 calibration cache file		hidden		yes	yes

prune

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular
version	Schema Version	const	The version of this schema						no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1		0	1		yes	yes
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8					no
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16					no
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union				union, intersection, arithmetic_mean,geometric_mean	no
model	Model path	hidden	UNIX path to where the input model is located.		hidden				yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden				yes

train

comments	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	regex	popular	valid_options_description
	version	Schema Version	const	The version of this schema	1
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value?	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42								>=0
	verbose	Verbose	bool	Flag of verbosity	TRUE								TRUE, FALSE
	dataset_config	Dataset	collection	Parameters to configure the dataset
JPG/PNG - auto pick this up	dataset_config.image_extension	Image Extension	hidden	Extension of the images to be used.	__jpg__				__png__, __jpg__, __jpeg__	yes			__png__, __jpg__, __jpeg__
Can be system generated - after conversion. This is the dataset preparation step.	dataset_config.data_sources.tfrecords_path	TFRecord Path	hidden			/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
Where the dataset is - where the images are. Will it figure it out from the parent directory?	dataset_config.data_sources.image_directory_path	Image Path	hidden			/shared/users/1234/datasets/5678/training
Read all labels in the label file (car, truck, suv, person). Ask the user to map it to Vehicle/Person.	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person					^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person					^[-a-zA-Z0-9_]{1,40}$
Default - 0	dataset_config.validation_fold	Validation Fold	integer	In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold.	0
Dataset specific config - augmentation	augmentation_config	Data Augmentation	collection	Collection of parameters to configure the preprocessing and on the fly data augmentation								Yes
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16.	augmentation_config.preprocessing.output_image_width	Image Width	integer	The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16.	1248		480			yes		Yes
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16	augmentation_config.preprocessing.output_image_height	Image Height	integer	The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16.	384		272			yes		Yes
Smaller side of image(height or width)	augmentation_config.preprocessing.output_image_min	Image smaller side’s size	integer	The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive, preprocessor will resize the image and keep aspect ratio, such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16.	0
Limit of larger side’s size of an image when resize and keep aspect ratio	augmentation_config.preprocessing.output_image_max	Limit of larger side’s size when resize and keep aspect ratio	integer	The maximum size of image’s larger side. If after resize and keeping aspect ratio, the larger side is exceeds this limit, the image will be resized such that the larger side’s size is this value, and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16.	0
Flag to enable automatic image scaling	augmentation_config.preprocessing.enable_auto_resize	Flag to enable or disable automatic image scaling	bool	If True, automatic image scaling will be enabled. Otherwise, disabled.	FALSE								TRUE, FALSE
Limit of what min dimension you DONT want to train for. Default 10x10	augmentation_config.preprocessing.min_bbox_width	Bounding Box Width	float	The minimum width of the object labels to be considered for training.	1		0			yes			>=0
Limit of what min dimension you DONT want to train for. Default 10x10	augmentation_config.preprocessing.min_bbox_height	Bounding Box Height	float	The minimum height of the object labels to be considered for training.	1		0			yes			>=0
3 channel default	augmentation_config.preprocessing.output_image_channel	Image Channel	integer	The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported.	3				1, 3	yes			3, 1
0	augmentation_config.preprocessing.crop_right	Crop Right	integer	The right boundary of the crop to be extracted from the original image.	0		0			yes			>=0
0	augmentation_config.preprocessing.crop_left	Crop Left	integer	The left boundary of the crop to be extracted from the original image.	0		0			yes			>=0
0	augmentation_config.preprocessing.crop_top	Crop Top	integer	The top boundary of the crop to be extracted from the original image.	0		0			yes			>=0
0	augmentation_config.preprocessing.crop_bottom	Crop Bottom	integer	The bottom boundary of the crop to be extracted from the original image.	0		0			yes			>=0
0	augmentation_config.preprocessing.scale_height	Scale Height	float	The floating point factor to scale the height of the cropped images.	0		0			yes			>=0
0	augmentation_config.preprocessing.scale_width	Scale Width	float	The floating point factor to scale the width of the cropped images.	0		0			yes			>=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg.	augmentation_config.spatial_augmentation.hflip_probability	Horizontal-Flip Probability	float	The probability to flip an input image horizontally.	0.5		0	1					[0, 1)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg.	augmentation_config.spatial_augmentation.vflip_probability	Vertical-Flip Probability	float	The probability to flip an input image vertically.	0		0	1					[0, 1)
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg.	augmentation_config.spatial_augmentation.zoom_min	Minimum Zoom Scale	float	The minimum zoom scale of the input image.	1		0						(0, 1]
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg.	augmentation_config.spatial_augmentation.zoom_max	Maximum Zoom Scale	float	The maximum zoom scale of the input image.	1		0						[1, 2)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable.	augmentation_config.spatial_augmentation.translate_max_x	X-Axis Maximum Traslation	float	The maximum translation to be added across the x axis.	8		0						>=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg.	augmentation_config.spatial_augmentation.translate_max_y	Y-Axis Maximum Translation	float	The maximum translation to be added across the y axis.	8		0						>=0
Enable go tyo default, disable - 0	augmentation_config.spatial_augmentation.rotate_rad_max	Image Rotation	float	The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max].	0.69		0						>=0
	augmentation_config.spatial_augmentation.rotate_probability	Image Rotation	float	The probability of image rotation. The range is [0, 1]									[0, 1)
	augmentation_config.color_augmentation.color_shift_stddev	Color Shift Standard Deviation	float	The standard devidation value for the color shift.	0		0	1					[0, 1)
	augmentation_config.color_augmentation.hue_rotation_max	Hue Maximum Rotation	float	The maximum rotation angle for the hue rotation matrix.	25		0	360					[0, 360)
	augmentation_config.color_augmentation.saturation_shift_max	Saturation Maximum Shift	float	The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift.	0.2		0	1					[0, 1)
	augmentation_config.color_augmentation.contrast_scale_max	Contrast Maximum Scale	float	The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged.	0.1		0	1					[0, 1)
	augmentation_config.color_augmentation.contrast_center	Contrast Center	float	The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5.	0.5				0.5				0.5
Might need different defaults based on task/scenario	model_config	Model	collection
	model_config.arch	BackBone Architecture	string	The architecture of the backbone feature extractor to be used for training.	resnet:18				resnet:18	yes			resnet:10’, ‘resnet:18’, ‘resnet:34’, ‘resnet:50’, ‘resnet:101’, ‘vgg16’, ‘vgg:16’, ‘vgg:19’, ‘googlenet’, ‘mobilenet_v1’, ‘mobilenet_v2’, ‘darknet:19’, ‘darknet:53’, ‘resnet101’, ‘efficientnet:b0’, ‘efficientnet:b1’,
Confirm correct default values	model_config.freeze_blocks	Freeze Blocks	integer	This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates.			0	3					depends on arch
Default values. Verify with TAO Toolkit. 2 sets of defaults required.	model_config.freeze_bn	Freeze Batch Normalization	bool	A flag to determine whether to freeze the Batch Normalization layers in the model during training.	FALSE								TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required.	model_config.all_projections	All Projections	bool	For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.	TRUE								TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required.	model_config.use_pooling	Use Pooling	bool	Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.	FALSE								TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required.	model_config.dropout_rate	Dropout Rate	float	Probability for drop out	0		0	0.1					[0, 1)
	model_config.input_image_config	Input Image	collection	Configuration for input images
	model_config.input_image_config.size_height_width		collection
	model_config.input_image_config.size_height_width.height		integer		384
	model_config.input_image_config.size_height_width.width		integer		1248
	model_config.input_image_config.image_type	Image Type	enum	The type of images, either RGB or GRAYSCALE	RGB								__RGB__, __GRAYSCALE__
	model_config.input_image_config.size_min	Image smaller side’s size	integer	The size of an image’s smaller side, should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio									>=0
	model_config.input_image_config.size_height_width	Image size by height and width	collection	The size of images by specifying height and width.
	model_config.input_image_config.size_height_width.height	Image Height	integer	The height of images									>=0
	model_config.input_image_config.size_height_width.width	Image Width	integer	The width of images									>=0
	model_config.input_image_config.image_channel_order	Image Channel Order	string	The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images	rgb								rgb’, ‘bgr’, ‘l’
	model_config.input_image_config.image_channel_mean	Image Channel Means	list	A dict from ‘r’, ‘g’, ‘b’ or ‘l’(for GRAYSCALE images) to per-channel mean values.	[{“key”:”r”,”value”:103.0}, {“key”:”g”,”value”:103.0}, {“key”:”b”,”value”:103.0}]
	model_config.input_image_config.image_channel_mean.key	channel means key	string	string => one of r,g,b									r’, ‘g’, ‘b’, ‘l’
	model_config.input_image_config.image_channel_mean.value	channel means value	float	value in float									(0, 255)
	model_config.input_image_config.image_scaling_factor	Image Scaling Factor	float	A scalar to normalize the images after mean subtraction.	1								>0
	model_config.input_image_config.max_objects_num_per_image	Max Objects Num	integer	The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels.	100								>=1
	model_config.anchor_box_config	Anchor Boxes	Collection
	model_config.anchor_box_config.scale	Anchor Scales	list	The list of anchor sizes(scales).	[64.0,128.0,256.0]								>0
	model_config.anchor_box_config.ratio	Anchor Ratios	list	The list of anchor aspect ratios.	[1.0,0.5,2.0]								>0
	model_config.roi_mini_batch	ROI Batch Size	integer	The batch size of ROIs for training the RCNN in the model	16								>0
	model_config.rpn_stride	RPN stride	integer	The stride of RPN feature map, compared to input resolutions. Currently only 16 is supported.	16								16
	model_config.drop_connect_rate	Drop Connect Rate	float	The rate of DropConnect. This is only useful for EfficientNet backbones.									(0, 1)
	model_config.rpn_cls_activation_type	RPN Classification Activation Type	string	Type of RPN classification head’s activation function. Currently only “sigmoid” is supported.									sigmoid
	model_config.use_bias	Use Bias	bool	Whether or not to use bias for convolutional layers									TRUE, FALSE
	model_config.roi_pooling_config	ROI Pooling	collection	Confiuration fo ROI Pooling layer
	model_config.roi_pooling_config.pool_size	Pool Size	integer	Pool size of the ROI Pooling operation.	7								>0
	model_config.roi_pooling_config.pool_size_2x	Pool Size Doubled	bool	Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling	FALSE								TRUE, FALSE
	model_config.activation	Activation	collection	Activation function for the model backbone. This is only useful for EfficientNet backbones.
	model_config.activation.activation_type	Activation Type	string	Type of the activation function of backbone.									relu, swish
	model_config.activation.activation_parameters	Activation Parameters	dict	A dict the maps name of a parameter to its value.
	training_config	Training	collection										>0
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence.	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	8		1			yes			>0
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	120		1			yes		Yes	TRUE, FALSE
Toggle for end user	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	TRUE					yes		Yes	>0
Default	training_config.learning_rate.soft_start .base_lr	Minimum Learning Rate	float		5.00E-06							Yes	>0
Default	training_config.learning_rate.soft_start .start_lr	Maximum Learning Rate	float		5.00E-04							Yes	(0, 1)
Default	training_config.learning_rate.soft_start .soft_start	Soft Start	float		0.100000001		0	1				Yes	>1
Default	training_config.learning_rate.soft_start .annealing_divider	Annealing	float		0.699999988		0	1				Yes	__NO_REG__, __L1__, __L2__
Default	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__NO_REG__, __L1__, __L2__	yes			>0
Default	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-09					yes			(0, 1)
Default	training_config.optimizer.adam.epsilon	Optimizer Adam Epsilon	float	A very small number to prevent any division by zero in the implementation.	1.00E-08					yes			(0, 1)
Default	training_config.optimizer.adam.beta_1	Optimizer Adam Beta1	float		0.899999976					yes			(0, 1)
Default	training_config.optimizer.adam.beta_2	Optimizer Adam Beta2	float		0.999000013					yes			>=1
Use default as 10. Provide last checpoint to user	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10		0			yes			TRUE, FALSE
	training_config.enable_augmentation	Enable Augmentation	bool	Whether or not to enable data augmentation	TRUE
	training_config.retrain_pruned_model	Pruned Model	hidden	The path of pruned model to be retrained
	training_config.pretrained_weights	Pretrained Weights	hidden	The path of the pretrained model(weights) used to initialize the model being trained
	training_config.resume_from_model	Resume Model	hidden	The path of the model used to resume a interrupted training									(0, 1)
	training_config.rpn_min_overlap	RPN Min Overlap	float	The lower IoU threshold used to match anchor boxes to groundtruth boxes.	0.1								(0, 1)
	training_config.rpn_max_overlap	RPN Max Overlap	float	The higher IoU threshold used to match anchor boxes to groundtruth boxes.	1								[0, 1)
	training_config.classifier_min_overlap	Classifier Min Overlap	float	The lower IoU threshold used to generate the proposal target.	0.1								(0, 1)
	training_config.classifier_max_overlap	Classifier Max Overlap	float	The higher IoU threshold used to generate the proposal target.	1								TRUE, FALSE
	training_config.gt_as_roi	Gt As ROI	bool	A flag to include groundtruth boxes in the positive ROIs for training the RCNN									>0
	training_config.std_scaling	RPN Regression Loss Scaling	float	A scaling factor (multiplier) for RPN regression loss	1
	training_config.classifier_regr_std	RCNN Regression Loss Scaling	list	Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯, ¡®y¡¯, ¡®w¡¯, ¡®h¡¯ to its corresponding scaling factor, respectively	[{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}]
	training_config.classifier_regr_std.key	RCNN Regression Loss Scaling Key	string	one of x,y,h,w									>0
	training_config.classifier_regr_std.value	RCNN Regression Loss Scaling Value	float	float value for key
	training_config.output_model	Output Model Path	hidden	Path of the output model									>0
	training_config.rpn_pre_nms_top_N	RPN Pre-NMS Top N	integer	The number of boxes (ROIs) to be retained before the NMS in Proposal layer	12000								>=1
	training_config.rpn_mini_batch	RPN Mini Batch	integer	The batch size to train RPN	16								>0
	training_config.rpn_nms_max_boxes	RPN NMS Max Boxes	integer	The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer	2000								(0, 1)
	training_config.rpn_nms_overlap_threshold	RPN NMS IoU Threshold	float	The IoU threshold for NMS in Proposal layer	0.7								>0
	training_config.lambda_rpn_regr	RPN Regression Loss Weighting	float	Weighting factor for RPN regression loss	1								>0
	training_config.lambda_rpn_class	RPN classification Loss Weighting	float	Weighting factor for RPN classification loss.	1								>0
	training_config.lambda_cls_regr	RCNN Regression Loss Weighting	float	Weighting factor for RCNN regression loss	1								>0
	training_config.lambda_cls_class	RCNN Classification Loss Weighting	float	Weighting factor for RCNN classification loss	1								list of floats
	training_config.model_parallelism	Model Parallelism	list of floats	List of fractions for model parallelism
	training_config.early_stopping	Early Stopping	collection										“loss”
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping									>=0
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed									>0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training
	training_config.visualizer	Visualizer	collection										TRUE, False
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not									>=1
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
	evaluation_config	Evaluation	collection							yes
	evaluation_config.model	Model Path	string	The path to the model to run inference									>=1
	evaluation_config.rpn_pre_nms_top_N	RPN Pre-NMS Top N	integer	The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation	6000								(0, 1)
	evaluation_config.rpn_nms_overlap_threshold	RPN overlap threshold	float		0.7								>0
	evaluation_config.rpn_nms_max_boxes	RPN NMS Max Boxes	integer	The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer	300								>0
	evaluation_config.classifier_nms_max_boxes	Classifier NMS Max Boxes	integer	The maxinum numbere of boxes for RCNN NMS	100								(0, 1)
	evaluation_config.classifier_nms_overlap_threshold	Classifier NMS Overlap Threshold	float	The NMS overlap threshold in RCNN	0.3								(0, 1)
	evaluation_config.object_confidence_thres	Object Confidence Threshold	float	The objects confidence threshold	0.00001								TRUE, FALSE
	evaluation_config.use_voc07_11point_metric	Use VOC 11-point Metric	bool	Whether to use PASCAL-VOC 11-point metric									>=1
	evaluation_config.validation_period_during_training	Validation Period	integer	The period(number of epochs) to run validation during training									>=1
	evaluation_config.batch_size	Batch Size	integer	The batch size for evaluation									(0, 1)
	evaluation_config.trt_evaluation	TensorRT Evaluation	Collection	TensorRT evaluation
	evaluation_config.trt_evaluation.trt_engine	Trt Engine	String	TRT Engine									(0, 1)
	evaluation_config.gt_matching_iou_threshold	Gt Matching IoU Threshold	float	The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range	0.5								(0, 1)
	evaluation_config.gt_matching_iou_threshold_range	Gt Matching IoU Threshold Range	collection	Only one of this collection or gt_matching_iou_threshold									(0, 1)
	evaluation_config.gt_matching_iou_threshold_range.start	Start	float	The starting value of the IoU range									TRUE, FALSE
	evaluation_config.gt_matching_iou_threshold_range.end	End	float	The end point of the IoU range(exclusive)
	evaluation_config.gt_matching_iou_threshold_range.step	Step	float	The step size of the IoU range
	evaluation_config.visualize_pr_curve	Visualize PR Curve	bool	Visualize precision-recall curve or not
	inference_config												>=1
	inference_config.images_dir	Images Directory	hidden	Path to the directory of images to run inference on									>0
	inference_config.model	Model Path	hidden	Path to the model to run inference on									>0
	inference_config.batch_size	Batch Size	integer	The batch size for inference									(0, 1)
	inference_config.rpn_pre_nms_top_N	RPN Pre-NMS Top N	integer	The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference	6000								(0, 1)
	inference_config.rpn_nms_max_boxes	RPN NMS Max Boxes	integer	The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer	300								(0, 1)
	inference_config.rpn_nms_overlap_threshold	RPN NMS IoU Threshold	float	The IoU threshold for NMS in Proposal layer	0.7								>0
	inference_config.bbox_visualize_threshold	Visualization Threshold	float	The confidence threshold for visualizing the bounding boxes	0.6								(0, 1)
	inference_config.object_confidence_thres	Object Confidence Threshold	float	The objects confidence threshold	0.00001
	inference_config.classifier_nms_max_boxes	Classifier NMS Max Boxes	integer	The maxinum numbere of boxes for RCNN NMS	100								True, False
	inference_config.classifier_nms_overlap_threshold	Classifier NMS Overlap Threshold	float	The NMS overlap threshold in RCNN	0.3
	inference_config.detection_image_output_dir	Image Output Directory	string	Path to the directory to save the output images during inference									0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
	inference_config.bbox_caption_on	Bbox Caption	bool	Enable text caption for bounding box or not
	inference_config.labels_dump_dir	Labels Ouptut Directory	hidden	Path to the directory to save the output labels
	inference_config.nms_score_bits	NMS Score Bits	integer	Number of score bits in optimized NMS
	inference_config.trt_inference	TensorRT Inference	Collection	TensorRT inference configurations
	inference_config.trt_inference.trt_engine	TensorRT Engine	hidden	Path to the TensorRT engine to run inference

instance_segmentation

convert

parameter	parameter	display_name	value_type	description	default_value
num_shards	num_shards	Num shards	integer	Number of shards.	256
include_masks	include_masks	Include masks	bool	Whether to include instance segmentation masks.	FALSE
tag	tag		string

lprnet

evaluate

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	popular
version	Schema Version	const	The version of this schema	1
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	Label Path	hidden
dataset_config.data_sources.image_directory_path	Image Path	hidden
dataset_config.validation_data_sources.label_directory_path	Label Path	hidden
dataset_config.validation_data_sources.image_directory_path	Image Path	hidden
dataset_config.characters_list_file	Characters List Path	string
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	32	1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	24	1
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	1.00E-06	0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.00E-05	0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.001	0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.5	0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L2__			__L1__, __L2__
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	5.00E-04	0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1	1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16	1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8	1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3	0
eval_config	Evaluation	collection
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	5	1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	1	1
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		96	1			yes
augmentation_config.output_height	Model Input height	integer		48	1			yes
augmentation_config.output_channel	Model Input channel	integer		3	1		1,3	yes
augmentation_config.max_rotate_degree	Max Rotation Degree	integer	The maximum rotation angle for augmentation	5	1
augmentation_config.keep_original_prob	Keep Original Probability	float	The probability for keeping original images. Only resized will be applied to am image with this probability	0.3	0	1
augmentation_config.rotate_prob	Rotation Probability	float	The probability for rotating the image	0.5	0	1
augmentation_config.gaussian_kernel_size	Gaussian Kernel Size	list	The kernel size of the Gaussian blur	[5,7,15]	1
augmentation_config.blur_prob	Gaussian Blur Probability	float	The probability for blurring the image with Gaussian blur	0.5	0	1
augmentation_config.reverse_color_prob	Reverse Color Probability	float	The probability for reversing the color of the image	0.5	0	1
lpr_config.hidden_units	Hidden Units	integer	The number of hidden units in the LSTM layers of LPRNet	512	1
lpr_config.max_label_length	Max Label Length	integer	The maximum length of license plates in the dataset	8
lpr_config.arch	Architecture	string	The architecture of LPRNet	baseline			baseline
lpr_config.nlayers	Number of Layers	integer	The number of convolution layers in LPRNet	18			10, 18

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
experiment_spec	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		fp32, fp16	yes	yes
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
verbose		hidden		TRUE
strict_type_constraints		bool		FALSE
results_dir		hidden

inference

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	popular
version	Schema Version	const	The version of this schema	1
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	Label Path	hidden
dataset_config.data_sources.image_directory_path	Image Path	hidden
dataset_config.validation_data_sources.label_directory_path	Label Path	hidden
dataset_config.validation_data_sources.image_directory_path	Image Path	hidden
dataset_config.characters_list_file	Characters List Path	string
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	32	1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	24	1
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	1.00E-06	0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.00E-05	0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.001	0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.5	0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L2__			__L1__, __L2__
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	5.00E-04	0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1	1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16	1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8	1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3	0
eval_config	Evaluation	collection
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	5	1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	1	1
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		96	1			yes
augmentation_config.output_height	Model Input height	integer		48	1			yes
augmentation_config.output_channel	Model Input channel	integer		3	1		1,3	yes
augmentation_config.max_rotate_degree	Max Rotation Degree	integer	The maximum rotation angle for augmentation	5	1
augmentation_config.keep_original_prob	Keep Original Probability	float	The probability for keeping original images. Only resized will be applied to am image with this probability	0.3	0	1
augmentation_config.rotate_prob	Rotation Probability	float	The probability for rotating the image	0.5	0	1
augmentation_config.gaussian_kernel_size	Gaussian Kernel Size	list	The kernel size of the Gaussian blur	[5,7,15]	1
augmentation_config.blur_prob	Gaussian Blur Probability	float	The probability for blurring the image with Gaussian blur	0.5	0	1
augmentation_config.reverse_color_prob	Reverse Color Probability	float	The probability for reversing the color of the image	0.5	0	1
lpr_config.hidden_units	Hidden Units	integer	The number of hidden units in the LSTM layers of LPRNet	512	1
lpr_config.max_label_length	Max Label Length	integer	The maximum length of license plates in the dataset	8
lpr_config.arch	Architecture	string	The architecture of LPRNet	baseline			baseline
lpr_config.nlayers	Number of Layers	integer	The number of convolution layers in LPRNet	18			10, 18

train

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	popular
version	Schema Version	const	The version of this schema	1
initial_epoch	Initial Epoch CLI	integer		1
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.data_sources.label_directory_path	Label Path	hidden
dataset_config.data_sources.image_directory_path	Image Path	hidden
dataset_config.validation_data_sources.label_directory_path	Label Path	hidden
dataset_config.validation_data_sources.image_directory_path	Image Path	hidden
dataset_config.characters_list_file	Characters List Path	string
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	32	1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	24	1
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	1.00E-06	0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.00E-05	0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.001	0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.5	0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L2__			__L1__, __L2__
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	5.00E-04	0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1	1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16	1
training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8	1
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				loss
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3	0
eval_config	Evaluation	collection
eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	5	1
eval_config.batch_size	Batch Size	integer	batch size for evaluation	1	1
augmentation_config	Augmentation config	collection
augmentation_config.output_width	Model Input width	integer		96	1			yes
augmentation_config.output_height	Model Input height	integer		48	1			yes
augmentation_config.output_channel	Model Input channel	integer		3	1		1,3	yes
augmentation_config.max_rotate_degree	Max Rotation Degree	integer	The maximum rotation angle for augmentation	5	1
augmentation_config.keep_original_prob	Keep Original Probability	float	The probability for keeping original images. Only resized will be applied to am image with this probability	0.3	0	1
augmentation_config.rotate_prob	Rotation Probability	float	The probability for rotating the image	0.5	0	1
augmentation_config.gaussian_kernel_size	Gaussian Kernel Size	list	The kernel size of the Gaussian blur	[5,7,15]	1
augmentation_config.blur_prob	Gaussian Blur Probability	float	The probability for blurring the image with Gaussian blur	0.5	0	1
augmentation_config.reverse_color_prob	Reverse Color Probability	float	The probability for reversing the color of the image	0.5	0	1
lpr_config.hidden_units	Hidden Units	integer	The number of hidden units in the LSTM layers of LPRNet	512	1
lpr_config.max_label_length	Max Label Length	integer	The maximum length of license plates in the dataset	8
lpr_config.arch	Architecture	string	The architecture of LPRNet	baseline			baseline
lpr_config.nlayers	Number of Layers	integer	The number of convolution layers in LPRNet	18			10, 18

mask_rcnn

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	FALSE			no
cal_image_dir		hidden
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
strict_type_constraints		bool		FALSE
gen_ds_config		bool		FALSE
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
min_batch_size		integer			1
opt_batch_size		integer			1
experiment_spec	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
static_batch_size		integer			-1
results_dir		hidden
verbose		hidden		TRUE

inference

parameter	display_name	value_type	description	default_value	param_type (internal / hidden / inferred)	CLI
version	Schema Version	const	The version of this schema	1	internal
threshold		float		0.3
include_mask		bool		TRUE
experiment_spec_file		hidden				CLI argument
model_dir		hidden				CLI argument
key		hidden				CLI argument
seed	Random Seed	integer	Seed value for the random number generator in the network	123
num_epochs		integer		10
use_amp	AMP	bool		FALSE
warmup_steps	Warmup steps	integer	The steps taken for learning rate to ramp up to the init_learning_rate	10000
learning_rate_steps	Learning rate steps	string	A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels	[100000, 150000, 200000]
learning_rate_decay_levels	learning rate decay steps	string	A list of decay factors. The length should match the length of learning_rate_steps.	[0.1, 0.02, 0.01]
total_steps	Total training steps	integer	The total number of training iterations	250000
train_batch_size	Training Batchsize	integer	The batch size during training	2
eval_batch_size	Evaluation Batchsize	integer	The batch size during validation or evaluation	4
num_steps_per_eval	Number of steps between each evaluation	integer	num_steps_per_eval	5000
momentum	SGD momentum	float	Momentum of the SGD optimizer	0.9
l1_weight_decay	L1 Weight decay	float	L1 regularizer weight
l2_weight_decay	L2 weight decay	float	L2 regularizer weight	0.00004
warmup_learning_rate		float		0.0001
init_learning_rate		float		0.005
num_examples_per_epoch		integer		118288
checkpoint	Path to Pretrained model	hidden	The path to a pretrained model
skip_checkpoint_variables	Name of skipped variables in the pretrained model	string	If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.
pruned_model_path	Path to pruned model	hidden	The path to a pruned MaskRCNN graph
maskrcnn_config	MaskRCNN configuration	collection
maskrcnn_config.nlayers	Number of layers in ResNet	integer	The number of layers in ResNet arch	50
maskrcnn_config.arch	Backbone name	string	The backbone feature extractor name	resnet
maskrcnn_config.freeze_bn	Freeze BN	bool	Whether to freeze all BatchNorm layers in the backbone
maskrcnn_config.freeze_blocks	Freeze Block	string	A list of conv blocks in the backbone to freeze
maskrcnn_config.gt_mask_size	Groundtruth Mask Size	integer	The groundtruth mask size	112
maskrcnn_config.rpn_positive_overlap	RPN positive overlap	float	The lower-bound threshold to assign positive labels for anchors	0.7
maskrcnn_config.rpn_negative_overlap	RPN negative overlap	float	The upper-bound threshold to assign negative labels for anchors	0.3
maskrcnn_config.rpn_batch_size_per_im	RPN batchsize per image	integer	The number of sampled anchors per image in RPN	256
maskrcnn_config.rpn_fg_fraction	RPN foreground fraction	float	The desired fraction of positive anchors in a batch	0.5
maskrcnn_config.rpn_min_size	RPN minimum size	float	The minimum proposal height and width	0
maskrcnn_config.batch_size_per_im	RoI batchsize per image	integer	The RoI minibatch size per image	512
maskrcnn_config.fg_fraction	Foreground fraction	float	The target fraction of RoI minibatch that is labeled as foreground	0.25
maskrcnn_config.fg_thresh		float		0.5
maskrcnn_config.bg_thresh_hi		float		0.5
maskrcnn_config.bg_thresh_lo		float		0
maskrcnn_config.fast_rcnn_mlp_head_dim	classification head dimension	integer	The Fast-RCNN classification head dimension	1024
maskrcnn_config.bbox_reg_weights	bounding-box regularization weights	string	The bounding-box regularization weights	(10., 10., 5., 5.)
maskrcnn_config.include_mask	Include mask head	bool	Specifies whether to include a mask head	TRUE
maskrcnn_config.mrcnn_resolution	Mask resolution	integer	The mask-head resolution	28
maskrcnn_config.train_rpn_pre_nms_topn	Top N RPN proposals pre NMS during training	integer	The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training	2000
maskrcnn_config.train_rpn_post_nms_topn	Top N RPN proposals post NMS during training	integer	The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training	1000
maskrcnn_config.train_rpn_nms_threshold	NMS threshold in RPN during training	float	The NMS IOU threshold in RPN during training	0.7
maskrcnn_config.test_detections_per_image	Number of bounding boxes after NMS	integer	The number of bounding box candidates after NMS	100
maskrcnn_config.test_nms	NMS threshold during test	float	The NMS IOU threshold during test	0.5
maskrcnn_config.test_rpn_pre_nms_topn	Top N RPN proposals pre NMS during test	integer	The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test	1000
maskrcnn_config.test_rpn_post_nms_topn	Top N RPN proposals post NMS during test	integer	The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test	1000
maskrcnn_config.test_rpn_nms_thresh	NMS threshold in RPN during test	float	The NMS IOU threshold in RPN during test	0.7
maskrcnn_config.min_level	Minimum FPN level	integer	The minimum level of the output feature pyramid	2
maskrcnn_config.max_level	Maximum FPN level	integer	The maximum level of the output feature pyramid	6
maskrcnn_config.num_scales	number of scales	integer	The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)])	1
maskrcnn_config.aspect_ratios	aspect ratios	string	A list of tuples representing the aspect ratios of anchors on each pyramid level	[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
maskrcnn_config.anchor_scale	anchor scale	integer	Scale of the base-anchor size to the feature-pyramid stride	8
maskrcnn_config.rpn_box_loss_weight	RPN box loss weight	float	The weight for adjusting RPN box loss in the total loss	1
maskrcnn_config.fast_rcnn_box_loss_weight	FastRCNN box regression weight	float	The weight for adjusting FastRCNN box regression loss in the total loss	1
maskrcnn_config.mrcnn_weight_loss_mask	Mask loss weight	float	The weight for adjusting mask loss in the total loss	1
data_config	Dataset configuration	collection
data_config.image_size	Image size	string	The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input.	(256, 256)
data_config.augment_input_data	augment input data	bool	Specifies whether to augment the data	TRUE
data_config.eval_samples	Number of evaluation samples	integer	The number of samples for evaluation	500
data_config.training_file_pattern	Train file pattern	hidden	The TFRecord path for training
data_config.validation_file_pattern	validation file pattern	hidden	The TFRecord path for validation
data_config.val_json_file	validation json path	hidden	The annotation file path for validation
data_config.num_classes	Number of classes	integer	The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class)	91
data_config.skip_crowd_during_training	skip crowd during training	bool	Specifies whether to skip crowd during training	TRUE
data_config.prefetch_buffer_size	prefetch buffer size	integer	The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE)
data_config.shuffle_buffer_size	shuffle buffer size	integer	The shuffle buffer size used by tf.data.Dataset (default: 4096)	4096
data_config.n_workers	Number of workers	integer	The number of workers to parse and preprocess data (default: 16)	16
data_config.max_num_instances	maximum number of instances	integer	The maximum number of object instances to parse (default: 200)	200

prune

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
model	Model path	hidden	UNIX path to where the input model is located.					yes
output_dir	Output Directory	hidden	UNIX path to where the pruned model will be saved.					yes
key	Encode key	hidden
normalizer	Normalizer	string	How to normalize	max			max, L2
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union			union, intersection, arithmetic_mean,geometric_mean	no
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8				no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1	0	1		yes	yes
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16				no
excluded_layers	Excluded layers	string	string of list: List of excluded_layers. Examples: -i item1 item2
verbose	verbosity	hidden		TRUE

train

parameter	display_name	value_type	description	default_value	param_type (internal / hidden / inferred)	CLI
version	Schema Version	const	The version of this schema	1	internal
experiment_spec_file		hidden				CLI argument
model_dir		hidden				CLI argument
key		hidden				CLI argument
seed	Random Seed	integer	Seed value for the random number generator in the network	123
num_epochs		integer		10
use_amp	AMP	bool		FALSE
warmup_steps	Warmup steps	integer	The steps taken for learning rate to ramp up to the init_learning_rate	10000
learning_rate_steps	Learning rate steps	string	A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels	[100000, 150000, 200000]
learning_rate_decay_levels	learning rate decay steps	string	A list of decay factors. The length should match the length of learning_rate_steps.	[0.1, 0.02, 0.01]
total_steps	Total training steps	integer	The total number of training iterations	250000
train_batch_size	Training Batchsize	integer	The batch size during training	2
eval_batch_size	Evaluation Batchsize	integer	The batch size during validation or evaluation	4
num_steps_per_eval	Number of steps between each evaluation	integer	num_steps_per_eval	5000
momentum	SGD momentum	float	Momentum of the SGD optimizer	0.9
l1_weight_decay	L1 Weight decay	float	L1 regularizer weight
l2_weight_decay	L2 weight decay	float	L2 regularizer weight	0.00004
warmup_learning_rate		float		0.0001
init_learning_rate		float		0.005
num_examples_per_epoch		integer		118288
checkpoint	Path to Pretrained model	hidden	The path to a pretrained model
skip_checkpoint_variables	Name of skipped variables in the pretrained model	string	If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.
pruned_model_path	Path to pruned model	hidden	The path to a pruned MaskRCNN graph
maskrcnn_config	MaskRCNN configuration	collection
maskrcnn_config.nlayers	Number of layers in ResNet	integer	The number of layers in ResNet arch	50
maskrcnn_config.arch	Backbone name	string	The backbone feature extractor name	resnet
maskrcnn_config.freeze_bn	Freeze BN	bool	Whether to freeze all BatchNorm layers in the backbone
maskrcnn_config.freeze_blocks	Freeze Block	string	A list of conv blocks in the backbone to freeze
maskrcnn_config.gt_mask_size	Groundtruth Mask Size	integer	The groundtruth mask size	112
maskrcnn_config.rpn_positive_overlap	RPN positive overlap	float	The lower-bound threshold to assign positive labels for anchors	0.7
maskrcnn_config.rpn_negative_overlap	RPN negative overlap	float	The upper-bound threshold to assign negative labels for anchors	0.3
maskrcnn_config.rpn_batch_size_per_im	RPN batchsize per image	integer	The number of sampled anchors per image in RPN	256
maskrcnn_config.rpn_fg_fraction	RPN foreground fraction	float	The desired fraction of positive anchors in a batch	0.5
maskrcnn_config.rpn_min_size	RPN minimum size	float	The minimum proposal height and width	0
maskrcnn_config.batch_size_per_im	RoI batchsize per image	integer	The RoI minibatch size per image	512
maskrcnn_config.fg_fraction	Foreground fraction	float	The target fraction of RoI minibatch that is labeled as foreground	0.25
maskrcnn_config.fg_thresh		float		0.5
maskrcnn_config.bg_thresh_hi		float		0.5
maskrcnn_config.bg_thresh_lo		float		0
maskrcnn_config.fast_rcnn_mlp_head_dim	classification head dimension	integer	The Fast-RCNN classification head dimension	1024
maskrcnn_config.bbox_reg_weights	bounding-box regularization weights	string	The bounding-box regularization weights	(10., 10., 5., 5.)
maskrcnn_config.include_mask	Include mask head	bool	Specifies whether to include a mask head	TRUE
maskrcnn_config.mrcnn_resolution	Mask resolution	integer	The mask-head resolution	28
maskrcnn_config.train_rpn_pre_nms_topn	Top N RPN proposals pre NMS during training	integer	The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training	2000
maskrcnn_config.train_rpn_post_nms_topn	Top N RPN proposals post NMS during training	integer	The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training	1000
maskrcnn_config.train_rpn_nms_threshold	NMS threshold in RPN during training	float	The NMS IOU threshold in RPN during training	0.7
maskrcnn_config.test_detections_per_image	Number of bounding boxes after NMS	integer	The number of bounding box candidates after NMS	100
maskrcnn_config.test_nms	NMS threshold during test	float	The NMS IOU threshold during test	0.5
maskrcnn_config.test_rpn_pre_nms_topn	Top N RPN proposals pre NMS during test	integer	The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test	1000
maskrcnn_config.test_rpn_post_nms_topn	Top N RPN proposals post NMS during test	integer	The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test	1000
maskrcnn_config.test_rpn_nms_thresh	NMS threshold in RPN during test	float	The NMS IOU threshold in RPN during test	0.7
maskrcnn_config.min_level	Minimum FPN level	integer	The minimum level of the output feature pyramid	2
maskrcnn_config.max_level	Maximum FPN level	integer	The maximum level of the output feature pyramid	6
maskrcnn_config.num_scales	number of scales	integer	The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)])	1
maskrcnn_config.aspect_ratios	aspect ratios	string	A list of tuples representing the aspect ratios of anchors on each pyramid level	[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
maskrcnn_config.anchor_scale	anchor scale	integer	Scale of the base-anchor size to the feature-pyramid stride	8
maskrcnn_config.rpn_box_loss_weight	RPN box loss weight	float	The weight for adjusting RPN box loss in the total loss	1
maskrcnn_config.fast_rcnn_box_loss_weight	FastRCNN box regression weight	float	The weight for adjusting FastRCNN box regression loss in the total loss	1
maskrcnn_config.mrcnn_weight_loss_mask	Mask loss weight	float	The weight for adjusting mask loss in the total loss	1
data_config	Dataset configuration	collection
data_config.image_size	Image size	string	The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input.	(256, 256)
data_config.augment_input_data	augment input data	bool	Specifies whether to augment the data	TRUE
data_config.eval_samples	Number of evaluation samples	integer	The number of samples for evaluation	500
data_config.training_file_pattern	Train file pattern	hidden	The TFRecord path for training
data_config.validation_file_pattern	validation file pattern	hidden	The TFRecord path for validation
data_config.val_json_file	validation json path	hidden	The annotation file path for validation
data_config.num_classes	Number of classes	integer	The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class)	91
data_config.skip_crowd_during_training	skip crowd during training	bool	Specifies whether to skip crowd during training	TRUE
data_config.prefetch_buffer_size	prefetch buffer size	integer	The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE)
data_config.shuffle_buffer_size	shuffle buffer size	integer	The shuffle buffer size used by tf.data.Dataset (default: 4096)	4096
data_config.n_workers	Number of workers	integer	The number of workers to parse and preprocess data (default: 16)	16
data_config.max_num_instances	maximum number of instances	integer	The maximum number of object instances to parse (default: 200)	200

multitask_classification

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	FALSE			no
cal_image_dir		hidden
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
strict_type_constraints		bool		FALSE
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
class_map		hidden
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
verbose		hidden		TRUE

train

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
random_seed		integer		42
model_config		collection
model_config.arch		string		resnet
model_config.input_image_size		string		3,80,60				yes	yes
model_config.resize_interpolation_method		string					__BILINEAR__, __BICUBIC__
model_config.n_layers		integer		10
model_config.use_imagenet_head		bool
model_config.use_batch_norm		bool		TRUE
model_config.use_bias		bool
model_config.use_pooling		bool
model_config.all_projections		bool		TRUE
model_config.freeze_bn		bool
model_config.freeze_blocks		integer
model_config.dropout		float
model_config.batch_norm_config		collection
model_config.batch_norm_config.momentum		float
model_config.batch_norm_config.epsilon		float
model_config.activation		collection
model_config.activation.activation_type		string
model_config.activation.activation_parameters		collection
model_config.activation.activation_parameters.key		string
model_config.activation.activation_parameters.value		float
dataset_config		collection
dataset_config.train_csv_path		hidden
dataset_config.image_directory_path		hidden
dataset_config.val_csv_path		hidden
training_config	Training	collection
training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	100	1
training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	10	1
training_config.enable_qat	Enable Quantization Aware Training	bool	bool
training_config.learning_rate		collection
training_config.learning_rate.soft_start_annealing_schedule		collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	1.00E-06	0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.00E-02	0
training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1	0	1
training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.7	0	1
training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__			__L1__, __L2__
training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	9.00E-05	0
training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1
training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset
training_config.n_workers	Workers	integer	Number of workers in sequence dataset
training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
training_config.early_stopping	Early Stopping	collection
training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training
training_config.visualizer	Visualizer	collection
training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
training_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum		float		0.9
training_config.optimizer.sgd.nesterov		bool		FALSE

object_detection

augment

parameter	value_type	description	default_value	examples	required	cli
batch_size	integer	CLI parameter	4			yes
spatial_config	collection
spatial_config.rotation_config	collection
spatial_config.rotation_config.angle	float		10
spatial_config.rotation_config.units	string		degrees
spatial_config.shear_config	collection
spatial_config.shear_config.shear_ratio_x	float
spatial_config.shear_config.shear_ratio_y	float
spatial_config.flip_config	collection
spatial_config.flip_config.flip_horizontal	bool
spatial_config.flip_config.flip_vertical	bool
spatial_config.translation_config	collection
spatial_config.translation_config.translate_x	integer
spatial_config.translation_config.translate_y	integer
color_config	collection
color_config.hue_saturation_config	collection
color_config.hue_saturation_config.hue_rotation_angle	float		5
color_config.hue_saturation_config.saturation_shift	float		1
color_config.contrast_config	collection
color_config.contrast_config.contrast	float
color_config.contrast_config.center	float
color_config.brightness_config	collection
color_config.brightness_config.offset	float
partition_config	collection
partition_config.partition_mode	string	Enum		__ID_WISE__, __RANDOM__
partition_config.dataset_percentage	float
blur_config	collection
blur_config.std	float
blur_config.size	float
output_image_width	integer		1248		yes
output_image_height	integer		384		yes
output_image_channel	integer		3		yes
image_extension	string		.png		yes
dataset_config	collection
dataset_config.image_path	const	hidden	images
dataset_config.label_path	const	hidden	labels

convert_coco

parameter	display_name	value_type	description	default_value	notes
coco_config		collection
coco_config.root_directory_path		hidden
coco_config.img_dir_names		list	List of image directories correspoding to each partition	[“images”]	The order of image directories must match annotation_files based on partitions
coco_config.annotation_files		list	List of JSON files with COCO dataset format	[“annotations.json”]
coco_config.num_partitions		integer	The number of partitions to use to split the data (N folds). The number of partitions must match the size of the img_dir_names and annotation_files	1
coco_config.num_shards		list	The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition	[256]
sample_modifier_config		collection
sample_modifier_config.filter_samples_containing_only		list	list of string
sample_modifier_config.dominant_target_classes		list	list of string
sample_modifier_config.minimum_target_class_imbalance		list	list of string
sample_modifier_config.minimum_target_class_imbalance.key		string
sample_modifier_config.minimum_target_class_imbalance.value		float
sample_modifier_config.num_duplicates		integer
sample_modifier_config.max_training_samples		integer
sample_modifier_config.source_to_target_class_mapping		list	list of string
sample_modifier_config.source_to_target_class_mapping.key		string
sample_modifier_config.source_to_target_class_mapping.value		string
image_directory_path		hidden
target_class_mapping		list	list of string
target_class_mapping.key	Class Key	string
target_class_mapping.value	Class Value	string

convert_efficientdet

parameter	display_name	value_type	description	default_value
num_shards	num_shards	integer	Number of shards	256
include_masks	include_masks	bool	Whether to include instance segmentation masks	FALSE
tag	tag	string	Tag

convert_kitti

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular	notes
kitti_config		collection
kitti_config.root_directory_path		hidden
kitti_config.image_dir_name		const		images
kitti_config.label_dir_name		const		labels
kitti_config.point_clouds_dir		string
kitti_config.calibrations_dir		string
kitti_config.kitti_sequence_to_frames_file		string	The name of the KITTI sequence to frame mapping file. This file must be present within the dataset root as mentioned in the root_directory_path. This file must be uploaded by the user along with images and labels. The name of that file must be filled in this field
kitti_config.image_extension		string	The extension of the images in the image_dir_name parameter.	.png	.jpg, .png, .jpeg				yes	yes
kitti_config.num_partitions		integer	The number of partitions to use to split the data (N folds). This field is ignored when the partition model is set to random, as by default only two partitions are generated: val and train. In sequence mode, the data is split into n-folds. The number of partitions is ideally fewer than the total number of sequences in the kitti_sequence_to_frames file. Valid options: n=2 for random partition, n< number of sequences in the kitti_sequence_to_frames_file	2
kitti_config.num_shards		integer	The number of shards per fold.	10		1	20
kitti_config.partition_mode		string	The method employed when partitioning the data to multiple folds. Two methods are supported: Random partitioning: The data is divided in to 2 folds, train and val. This mode requires that the val_split parameter be set. Sequence-wise partitioning: The data is divided into n partitions (defined by the num_partitions parameter) based on the number of sequences available.	random				random, sequence
kitti_config.val_split		float	The percentage of data to be separated for validation. This only works under “random” partition mode. This partition is available in fold 0 of the TFrecords generated. Set the validation fold to 0 in the dataset_config.	0		0	100				Must not be exposed from API since each dataset is its own and cannot be split into train, val, test, etc… through the API
sample_modifier_config		collection
sample_modifier_config.filter_samples_containing_only		list	list of string
sample_modifier_config.dominant_target_classes		list	list of string
sample_modifier_config.minimum_target_class_imbalance		list
sample_modifier_config.minimum_target_class_imbalance.key		string
sample_modifier_config.minimum_target_class_imbalance.value		float
sample_modifier_config.num_duplicates		integer
sample_modifier_config.max_training_samples		integer
sample_modifier_config.source_to_target_class_mapping		list
sample_modifier_config.source_to_target_class_mapping.key		string
sample_modifier_config.source_to_target_class_mapping.value		string
image_directory_path		hidden
target_class_mapping		list									Better not expose these on dataset convert and use the target_class_mapping in the train / eval / inference spec
target_class_mapping.key	Class Key	string
target_class_mapping.value	Class Value	string

kmeans

parameter	value_type	description	default_value	required
size_x	integer	Network input width		yes
size_y	integer	Network input height		yes
num_clusters	integer	Number of clusters needed.	9
max_steps	integer	Maximum kmeans steps. Kmeans will stop even if not converged at max_steps	10000
min_x	integer	Ignore boxes with width (as in network input-size image) not larger than this value.	0
min_y	integer	Ignore boxes with height (as in network input-size image) not larger than this value.	0

retinanet

evaluate

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)	CLI
	version	Schema Version	const	The version of this schema	1							internal
	experiment_spec_file		hidden										CLI argument
	results_dir		hidden										CLI argument
	key		hidden										CLI argument
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	string
	dataset_config.validation_data_sources.image_directory_path	Image path	string
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	string
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	8		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	100		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	4.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.50E-02		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.3		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__L1__, __L2__
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	2.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	2
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
	training_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
	training_config.optimizer.sgd.momentum		float		0.9
	training_config.optimizer.sgd.nesterov		bool		TRUE
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				__SAMPLE__, __INTEGRATE__
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		1248						yes
	augmentation_config.output_height	Model Input height	integer		384						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation
	augmentation_config.hue	Hue	float	Hue delta in color jittering augmentation
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	retinanet_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	retinanet_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different RetinaNet feature layers
	retinanet_config.aspect_ratios_global		string		[1.0, 2.0, 0.5]
	retinanet_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	FALSE
	retinanet_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
	retinanet_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
	retinanet_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
	retinanet_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	retinanet_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	retinanet_config.arch	Arch	string	The backbone for feature extraction	resnet
	retinanet_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
	retinanet_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
	retinanet_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
	retinanet_config.loss_loc_weight	Localization loss weight	float	This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss	0.8
	retinanet_config.focal_loss_alpha	Alpha (Focal loss)	float	Alpha in the focal loss equation	0.25
	retinanet_config.focal_loss_gamma	Gamma (Focal loss)	float	Gamma in the focal loss equation	2
	retinanet_config.n_kernels	Number of kernels	integer	This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train.	1
	retinanet_config.feature_size	Feature size	integer	This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ).	256
	retinanet_config.pos_iou_thresh	Postive IOU threshold	float	The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
	retinanet_config.neg_iou_thresh	Negative IOU threshold	float	The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
	retinanet_config.n_anchor_levels	Number of Anchor levels	integer	Number of anchor levels between two adjacent scales.	1

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes		CLI argument
data_type	Pruning Granularity	enum	Number of filters to remove at a time.	int8		int8, fp32, fp16	yes	yes	CLI argument
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100			no		CLI argument
experiment_spec	Experiment Spec	string	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.		hidden from train expeirment		yes		CLI argument
model	Model path	hidden	UNIX path to where the input model is located.		hidden		yes		CLI argument
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden		yes		CLI argument
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	TRUE			no		CLI argument
engine-file	Engine File	hidden	UNIX path to the model engine file.		/export/input_model_file.<data_type>.trt		yes		CLI argument
key	Encryption Key	hidden	Encryption key	tlt_encode			yes		CLI argument
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	16				yes	CLI argument
cal_cache_file	Calibration cache file	string	Unix PATH to the int8 calibration cache file		hidden		yes	yes	CLI argument

inference

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)	CLI
	version	Schema Version	const	The version of this schema	1							internal
	threshold	CLI parameter initial epoch	float		0.3
	experiment_spec_file		hidden										CLI argument
	results_dir		hidden										CLI argument
	key		hidden										CLI argument
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	string
	dataset_config.validation_data_sources.image_directory_path	Image path	string
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	string
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	8		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	100		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	4.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.50E-02		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.3		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__L1__, __L2__
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	2.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	2
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
	training_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
	training_config.optimizer.sgd.momentum		float		0.9
	training_config.optimizer.sgd.nesterov		bool		TRUE
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				__SAMPLE__, __INTEGRATE__
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		1248						yes
	augmentation_config.output_height	Model Input height	integer		384						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation
	augmentation_config.hue	Hue	float	Hue delta in color jittering augmentation
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	retinanet_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	retinanet_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different RetinaNet feature layers
	retinanet_config.aspect_ratios_global		string		[1.0, 2.0, 0.5]
	retinanet_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	FALSE
	retinanet_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
	retinanet_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
	retinanet_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
	retinanet_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	retinanet_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	retinanet_config.arch	Arch	string	The backbone for feature extraction	resnet
	retinanet_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
	retinanet_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
	retinanet_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
	retinanet_config.loss_loc_weight	Localization loss weight	float	This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss	0.8
	retinanet_config.focal_loss_alpha	Alpha (Focal loss)	float	Alpha in the focal loss equation	0.25
	retinanet_config.focal_loss_gamma	Gamma (Focal loss)	float	Gamma in the focal loss equation	2
	retinanet_config.n_kernels	Number of kernels	integer	This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train.	1
	retinanet_config.feature_size	Feature size	integer	This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ).	256
	retinanet_config.pos_iou_thresh	Postive IOU threshold	float	The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
	retinanet_config.neg_iou_thresh	Negative IOU threshold	float	The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
	retinanet_config.n_anchor_levels	Number of Anchor levels	integer	Number of anchor levels between two adjacent scales.	1

inference_seq

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	regex	popular	param_type (internal / hidden / inferred)
	version	Schema Version	const	The version of this schema	1								internal
	out_thres		float		0.3

model convert

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
b	batch_size	integer	calibration batch size	8			yes		CLI argument
c	cache_file	path	calibration cache file (default cal.bin)						CLI argument
d	input_dims	list	comma separated list of input dimensions (not required for TLT 3.0 new models).						CLI argument
i	input_order	enum	input dimension ordering	nchw		nchw, nhwc, nc			CLI argument
m	max_batch_size	integer	maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly.	16				yes	CLI argument
o	outputs	list	comma separated list of output node names						CLI argument
p	parse_profile_shapes	list	comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.						CLI argument
s	strict_type_constraints	bool	TensorRT strict_type_constraints flag for INT8 mode	FALSE					CLI argument
t	data_type	enum	TensorRT data type	fp32		fp32, fp16, int8		yes	CLI argument
u	dla_core	int	Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback).	-1					CLI argument
w	max_workspace_size	int	maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.		1<<30, 2<<30				CLI argument
platform	platform	enum	platform label	rtx			yes	yes

prune

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular
version	Schema Version	const	The version of this schema						no		CLI argument
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1		0	1		yes	yes	CLI argument
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8					no		CLI argument
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16					no		CLI argument
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union				union, intersection, arithmetic_mean,geometric_mean	no		CLI argument
model	Model path	hidden	UNIX path to where the input model is located.		hidden				yes		CLI argument
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden				yes		CLI argument

train

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)	CLI
	version	Schema Version	const	The version of this schema	1							internal
	initial_epoch	CLI parameter initial epoch	integer		0
	experiment_spec_file		hidden										CLI argument
	results_dir		hidden										CLI argument
	key		hidden										CLI argument
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	string
	dataset_config.validation_data_sources.image_directory_path	Image path	string
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	string
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	8		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	100		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	4.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	1.50E-02		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.3		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				__L1__, __L2__
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	2.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	10
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	2
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard
	training_config.optimizer.sgd		collection	One of SGD / ADAM / RMSPROP
	training_config.optimizer.sgd.momentum		float		0.9
	training_config.optimizer.sgd.nesterov		bool		TRUE
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				__SAMPLE__, __INTEGRATE__
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		1248						yes
	augmentation_config.output_height	Model Input height	integer		384						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation
	augmentation_config.hue	Hue	float	Hue delta in color jittering augmentation
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	retinanet_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	[1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	retinanet_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different RetinaNet feature layers
	retinanet_config.aspect_ratios_global		string		[1.0, 2.0, 0.5]
	retinanet_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.	FALSE
	retinanet_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.	FALSE
	retinanet_config.variances	Variance	string	A list of 4 positive floats to decode bboxes	[0.1, 0.1, 0.2, 0.2]
	retinanet_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer	[0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
	retinanet_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	retinanet_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	retinanet_config.arch	Arch	string	The backbone for feature extraction	resnet
	retinanet_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch	18
	retinanet_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.	FALSE
	retinanet_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training
	retinanet_config.loss_loc_weight	Localization loss weight	float	This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss	0.8
	retinanet_config.focal_loss_alpha	Alpha (Focal loss)	float	Alpha in the focal loss equation	0.25
	retinanet_config.focal_loss_gamma	Gamma (Focal loss)	float	Gamma in the focal loss equation	2
	retinanet_config.n_kernels	Number of kernels	integer	This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train.	1
	retinanet_config.feature_size	Feature size	integer	This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ).	256
	retinanet_config.pos_iou_thresh	Postive IOU threshold	float	The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
	retinanet_config.neg_iou_thresh	Negative IOU threshold	float	The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
	retinanet_config.n_anchor_levels	Number of Anchor levels	integer	Number of anchor levels between two adjacent scales.	1

spectro_gen

dataset_convert

parameter	display_name	value_type	description	default_value	required
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file		yes
result_dir	Results directory	hidden	Path to the output results directory and logs		yes
key	Save key	hidden	Key to save/load the model		yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
dataset_name	Name	string		merge	yes
original_json	Original json	hidden
finetune_json	Finetune json	hidden
original_minutes	Original minutes	integer		300
delimiter	Delimiter	string
save_path	Save Path	hidden

export

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	required
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file					yes
result_dir	Results directory	hidden	Path to the output results directory and logs					yes
key	Save key	hidden	Key to save/load the model					yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1		1	1
export_format	Export format	string		RIVA	RIVA, ONNX			yes
export_to	Export To	hidden

finetune

parameter	display_name	value_type	description	default_value	valid_options	required	popular	CLI
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file			yes		yes
result_dir	Results directory	hidden	Path to the output results directory and logs			yes		yes
key	Save key	hidden	Key to save the model			yes		yes
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model			yes	yes	yes
resume_model_weights	Pretrained model path	hidden	Path to the trained model					yes
sample_rate	Sample rate	int	The target sample rate to load the audio, in Hz	22050
train_dataset	Train Dataset	hidden	Path to the train dataset manifest json file
validation_dataset	Validation Dataset	hidden	Path to the validation dataset manifest json file
prior_folder		hidden
n_speakers	Number of speakers	int	Number of speakers in the dataset	1		yes
n_window_size	Window size	int	The size of the fft window in samples	1024		yes
n_window_stride	Window stride	int	The stride of the window in samples	256		yes
pitch_fmin	Pitch Fmin	float	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	64		yes
pitch_fmax	Pitch Fmin	float	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	512		yes
pitch_avg	Pitch Average	float	The average used to normalize the pitch			yes	yes
pitch_std	Pitch std. deviation	float	The standard deviation used to normalize the pitch			yes	yes
train_ds	Train Dataset	collection	Parameters to configure the training dataset
train_ds.dataset	Train Dataset	collection	Parameters to configure the training dataset
train_ds.dataset._target_	Target	const	The nemo class module to be imported	nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset
train_ds.dataset.manifest_filepath	Train manifest file	const	Path to the train dataset manifest json file	${train_dataset}
train_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped
train_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped	0.1
train_ds.dataset.int_values	Input as integer values	bool	Load samples as 32 bit integers or not	FALSE
train_ds.dataset.normalize	Normalize dataset	bool	The flag to determine whether or not to normalize the transcript text	TRUE
train_ds.dataset.sample_rate	Sample rate	const	The target sample rate to load the audio, in Hz.	${sample_rate}
train_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE
train_ds.dataset.sup_data_path	Prior folder	const	Path to the prior folder	${prior_folder}
train_ds.dataset.n_window_size	Window size	const	The size of the fft window in samples	${n_window_size}
train_ds.dataset.n_window_stride	Window stride	const	The stride of the window in samples	${n_window_stride}
train_ds.dataset.pitch_fmin	Pitch Fmin	const	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	${pitch_fmin}
train_ds.dataset.pitch_fmax	Pitch Fmin	const	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	${pitch_fmax}
train_ds.dataset.pitch_avg	Pitch Average	const	The average used to normalize the pitch	${pitch_avg}
train_ds.dataset.pitch_std	Pitch std. deviation	const	The standard deviation used to normalize the pitch	${pitch_std}
train_ds.dataset.vocab	Training data vocabulary	collection	Collection describing the vocabular component of the training dataset
train_ds.dataset.vocab.notation	Vocabulary Notation	str	Either chars or phonemes as general notation	phonemes
train_ds.dataset.vocab.punct	Punctuation	bool	Whether to reserve graphemes from basic punctuation	TRUE
train_ds.dataset.vocab.spaces	Spaces	bool	Whether to prepend spaces to every punctuation	TRUE
train_ds.dataset.vocab.stresses	Stresses	bool		TRUE
train_ds.dataset.vocab.add_blank_at	Add blank at	str	Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels	None	last, last_but_none, None
train_ds.dataset.vocab.pad_with_space	Pad with space	bool	Whether to pad text with spaces at the beginning and at the end.	TRUE
train_ds.dataset.vocab.chars	Chars	bool	Whether to additionaly use chars together with phonemes	TRUE
train_ds.dataset.vocab.improved_version_g2p	Imporved version G2P	bool	Whether to use the new version of g2p.	TRUE
train_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples
train_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE
train_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE
train_ds.dataloader_params.batch_size	Batch Size	int	Number of samples per batch of data.	32
train_ds.dataloader_params.num_workers	Number of workers	int	The number of worker threads for loading the dataset	12
validation_ds	Validation Dataset	collection	Parameters to configure the training dataset
validation_ds.dataset	Validation Dataset	collection	Parameters to configure the training dataset
validation_ds.dataset._target_	Target	const	The nemo class module to be imported	nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset
validation_ds.dataset.manifest_filepath	Validation manifest file	const	Path to the train dataset manifest json file	${validation_dataset}
validation_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped
validation_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped
validation_ds.dataset.int_values	Input as integer values	bool	Load samples as 32 bit integers or not	FALSE
validation_ds.dataset.normalize	Normalize dataset	bool	The flag to determine whether or not to normalize the transcript text	TRUE
validation_ds.dataset.sample_rate	Sample rate	const	The target sample rate to load the audio, in Hz.	${sample_rate}
validation_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE
validation_ds.dataset.sup_data_path	Prior folder	const	Path to the prior folder	${prior_folder}
validation_ds.dataset.n_window_size	Window size	const	The size of the fft window in samples	${n_window_size}
validation_ds.dataset.n_window_stride	Window stride	const	The stride of the window in samples	${n_window_stride}
validation_ds.dataset.pitch_fmin	Pitch Fmin	const	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	${pitch_fmin}
validation_ds.dataset.pitch_fmax	Pitch Fmin	const	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	${pitch_fmax}
validation_ds.dataset.pitch_avg	Pitch Average	const	The average used to normalize the pitch	${pitch_avg}
validation_ds.dataset.pitch_std	Pitch std. deviation	const	The standard deviation used to normalize the pitch	${pitch_std}
validation_ds.dataset.vocab	Validation data vocabulary	collection	Collection describing the vocabular component of the training dataset
validation_ds.dataset.vocab.notation	Vocabulary Notation	str	Either chars or phonemes as general notation	phonemes
validation_ds.dataset.vocab.punct	Punctuation	bool	Whether to reserve graphemes from basic punctuation	TRUE
validation_ds.dataset.vocab.spaces	Spaces	bool	Whether to prepend spaces to every punctuation	TRUE
validation_ds.dataset.vocab.stresses	Stresses	bool		TRUE
validation_ds.dataset.vocab.add_blank_at	Add blank at	str	Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels	None
validation_ds.dataset.vocab.pad_with_space	Pad with space	bool	Whether to pad text with spaces at the beginning and at the end.	TRUE
validation_ds.dataset.vocab.chars	Chars	bool	Whether to additionaly use chars together with phonemes	TRUE
validation_ds.dataset.vocab.improved_version_g2p	Imporved version G2P	bool	Whether to use the new version of g2p.	TRUE
validation_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples
validation_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE
validation_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE
validation_ds.dataloader_params.batch_size	Batch Size	int	Number of samples per batch of data.	32
validation_ds.dataloader_params.num_workers	Number of workers	int	The number of worker threads for loading the dataset	12
optim	Optimizer	collection
optim.name	Optimizer Name	str	Type of optimizer to be used during training	adam
optim.lr	Learning rate	float	Learning rate	0.0002
optim.betas	Optimizer betas	list	List of floats	[0.9, 0.98]
optim.weight_decay	Weight decay	float		0.000001

infer

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file					yes
result_dir	Results directory	hidden	Path to the output results directory and logs					yes
key	Save key	hidden	Key to save/load the model					yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1	1	1
input_batch	List of input texts	list	List of text sentences to render spectrograms. This only works in infer mode						yes
input_json	Input dataset to run inference	hidden	Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder						yes
speaker	Speaker ID	int	ID of the speaker to generate spectrograms	0
mode	Infer mode	string	Mode to run inference 1. Inferences on discrete text samples (infer) 2. Inference on a dataset (infer_hifigan_ft)	infer			infer, infer_hifigan_ft	yes

infer_onnx

parameter	display_name	value_type	description	default_value	valid_min	valid_max	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file				yes
result_dir	Results directory	hidden	Path to the output results directory and logs				yes
key	Save key	hidden	Key to save/load the model				yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1	1	1
input_batch	List of input texts	list	List of text sentences to render spectrograms. This only works in infer mode				yes	yes
speaker	Speaker ID	int	ID of the speaker to generate spectrograms	0

train

parameter	display_name	value_type	description	default_value	valid_options	required	popular	CLI
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file			yes		yes
result_dir	Results directory	hidden	Path to the output results directory and logs			yes		yes
key	Save key	hidden	Key to save the model			yes		yes
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model			yes	yes	yes
sample_rate	Sample rate	integer	The target sample rate to load the audio, in Hz	22050		yes	yes
train_dataset	Train Dataset	hidden	Path to the train dataset manifest json file			yes
validation_dataset	Validation Dataset	hidden	Path to the validation dataset manifest json file			yes
prior_folder		hidden				yes
model.learn_alignment	Learn alignment	bool		TRUE
model.n_speakers	N speakers	integer	Number of speakers in the dataset	1		yes
model.symbols_embedding_dim	Symbols Embedding dimension	integer	The dimension of the symbols embedding	384		yes
model.max_token_duration	Max token duration	integer	Maximum duration to clamp the tokens to	75
model.n_mel_channels	Number of channels in Mel Output	integer	Number of channels in the Mel output	80
model.pitch_embedding_kernel_size	Pitch embedding kernel size	integer	The kernel size of the Conv1D layer generating the pitch embeddings	3
model.n_window_size	Window size	integer	The size of the fft window in samples	1024		yes
model.n_window_stride	Window stride	integer	The stride of the window in samples	256		yes
model.pitch_fmin	Pitch Fmin	float	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	64		yes	yes
model.pitch_fmax	Pitch Fmin	float	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	512		yes	yes
model.pitch_avg	Pitch Average	float	The average used to normalize the pitch			yes	yes
model.pitch_std	Pitch std. deviation	float	The standard deviation used to normalize the pitch			yes	yes
model.train_ds	Train Dataset	collection	Parameters to configure the training dataset
model.train_ds.dataset	Train Dataset	collection	Parameters to configure the training dataset
model.train_ds.dataset._target_	Target	const	The nemo class module to be imported	nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset		yes
model.train_ds.dataset.manifest_filepath	Train manifest file	const	Path to the train dataset manifest json file	${train_dataset}		yes
model.train_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped			yes
model.train_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped	0.1		yes
model.train_ds.dataset.int_values	Input as integer values	bool	Load samples as 32 bit integers or not	FALSE		yes
model.train_ds.dataset.normalize	Normalize dataset	bool	The flag to determine whether or not to normalize the transcript text	TRUE		yes
model.train_ds.dataset.sample_rate	Sample rate	const	The target sample rate to load the audio, in Hz.	${sample_rate}		yes
model.train_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE		yes
model.train_ds.dataset.sup_data_path	Prior folder	const	Path to the prior folder	${prior_folder}		yes
model.train_ds.dataset.n_window_size	Window size	const	The size of the fft window in samples	${model.n_window_size}		yes
model.train_ds.dataset.n_window_stride	Window stride	const	The stride of the window in samples	${model.n_window_stride}		yes
model.train_ds.dataset.pitch_fmin	Pitch Fmin	const	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	${model.pitch_fmin}		yes
model.train_ds.dataset.pitch_fmax	Pitch Fmin	const	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	${model.pitch_fmax}		yes
model.train_ds.dataset.pitch_avg	Pitch Average	const	The average used to normalize the pitch	${model.pitch_avg}		yes
model.train_ds.dataset.pitch_std	Pitch std. deviation	const	The standard deviation used to normalize the pitch	${model.pitch_std}		yes
model.train_ds.dataset.vocab	Training data vocabulary	collection	Collection describing the vocabular component of the training dataset			yes
model.train_ds.dataset.vocab.notation	Vocabulary Notation	string	Either chars or phonemes as general notation	phonemes		yes
model.train_ds.dataset.vocab.punct	Punctuation	bool	Whether to reserve graphemes from basic punctuation	TRUE		yes
model.train_ds.dataset.vocab.spaces	Spaces	bool	Whether to prepend spaces to every punctuation	TRUE		yes
model.train_ds.dataset.vocab.stresses	Stresses	bool		TRUE		yes
model.train_ds.dataset.vocab.add_blank_at	Add blank at	string	Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels	None	last, last_but_none, None	yes
model.train_ds.dataset.vocab.pad_with_space	Pad with space	bool	Whether to pad text with spaces at the beginning and at the end.	TRUE		yes
model.train_ds.dataset.vocab.chars	Chars	bool	Whether to additionaly use chars together with phonemes	TRUE		yes
model.train_ds.dataset.vocab.improved_version_g2p	Imporved version G2P	bool	Whether to use the new version of g2p.	TRUE		yes
model.train_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples			yes
model.train_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE		yes
model.train_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE		yes
model.train_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	32		yes	yes
model.train_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	12		yes
model.validation_ds	Validation Dataset	collection	Parameters to configure the training dataset			yes
model.validation_ds.dataset	Validation Dataset	collection	Parameters to configure the training dataset			yes
model.validation_ds.dataset._target_	Target	const	The nemo class module to be imported	nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset		yes
model.validation_ds.dataset.manifest_filepath	Validation manifest file	const	Path to the train dataset manifest json file	${validation_dataset}		yes
model.validation_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped			yes
model.validation_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped			yes
model.validation_ds.dataset.int_values	Input as integer values	bool	Load samples as 32 bit integers or not	FALSE		yes
model.validation_ds.dataset.normalize	Normalize dataset	bool	The flag to determine whether or not to normalize the transcript text	TRUE		yes
model.validation_ds.dataset.sample_rate	Sample rate	const	The target sample rate to load the audio, in Hz.	${sample_rate}		yes
model.validation_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE		yes
model.validation_ds.dataset.sup_data_path	Prior folder	const	Path to the prior folder	${prior_folder}		yes
model.validation_ds.dataset.n_window_size	Window size	const	The size of the fft window in samples	${model.n_window_size}		yes
model.validation_ds.dataset.n_window_stride	Window stride	const	The stride of the window in samples	${model.n_window_stride}		yes
model.validation_ds.dataset.pitch_fmin	Pitch Fmin	const	The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”)	${model.pitch_fmin}		yes
model.validation_ds.dataset.pitch_fmax	Pitch Fmin	const	The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”)	${model.pitch_fmax}		yes
model.validation_ds.dataset.pitch_avg	Pitch Average	const	The average used to normalize the pitch	${model.pitch_avg}		yes
model.validation_ds.dataset.pitch_std	Pitch std. deviation	const	The standard deviation used to normalize the pitch	${model.pitch_std}		yes
model.validation_ds.dataset.vocab	Validation data vocabulary	collection	Collection describing the vocabular component of the training dataset			yes
model.validation_ds.dataset.vocab.notation	Vocabulary Notation	string	Either chars or phonemes as general notation	phonemes		yes
model.validation_ds.dataset.vocab.punct	Punctuation	bool	Whether to reserve graphemes from basic punctuation	TRUE		yes
model.validation_ds.dataset.vocab.spaces	Spaces	bool	Whether to prepend spaces to every punctuation	TRUE		yes
model.validation_ds.dataset.vocab.stresses	Stresses	bool		TRUE		yes
model.validation_ds.dataset.vocab.add_blank_at	Add blank at	string	Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels	None		yes
model.validation_ds.dataset.vocab.pad_with_space	Pad with space	bool	Whether to pad text with spaces at the beginning and at the end.	TRUE		yes
model.validation_ds.dataset.vocab.chars	Chars	bool	Whether to additionaly use chars together with phonemes	TRUE		yes
model.validation_ds.dataset.vocab.improved_version_g2p	Imporved version G2P	bool	Whether to use the new version of g2p.	TRUE		yes
model.validation_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples			yes
model.validation_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE		yes
model.validation_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE		yes
model.validation_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	32		yes	yes
model.validation_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	12		yes
model.optim	Optimizer	collection				yes
model.optim.name	Optimizer Name	string	Type of optimizer to be used during training	lamb		yes
model.optim.lr	Learning rate	float	Learning rate	0.1		yes	yes
model.optim.betas	Optimizer betas	list	Coefficients used to compute the running averages of the gradient and it’s square	[0.9, 0.98]		yes
model.optim.weight_decay	Weight decay	float	Weight decay (L2 penalty	0.000001		yes
model.optim.sched	Learning rate scheduler	collection	Parameters to configure the learning rate scheduler			yes
model.optim.sched.name	Scheduler Name	string	Type of learning rate scheduler to be used	NoamAnnealing		yes
model.optim.sched.warmup_steps	Warm up steps	integer	No. of steps to warm up the learning rate	1000		yes
model.optim.sched.last_epoch	Last epoch	integer		-1		yes
model.optim.sched.d_model	Disable scaling	integer	Flag to disable scaling based on model dim	1		yes
model.preprocessor	Preprocessor config	collection	Collection to configure the model preprocessor			yes
model.preprocessor._target_	Target class of the preprocessor instance	const	The Nemo class to instantiate.	nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor		yes
model.preprocessor.dither	Dither	float		0		yes
model.preprocessor.features	Number of channels in Mel Output	const	Number of channels in the Mel Output	${model.n_mel_channels}		yes
model.preprocessor.frame_splicing	Spectrogram Frames per step	integer	Number of spectrogram frames per step	1		yes
model.preprocessor.highfreq	High frequency bound in Hz	integer	Upper bound of the mel basis in Hz	8000		yes
model.preprocessor.log	Log Spectrograms	bool	Flags to enable logging spectrograms	TRUE		yes
model.preprocessor.log_zero_guard_type	Zero guard type	enum	Need to avoid taking the log of zero. There are two options: “add” or “clamp”.	add		yes
model.preprocessor.lowfreq	Low frequency bound in Hz	integer	Lower bound of the mel basis in Hz	0		yes
model.preprocessor.mag_power	Multiplication with mel basis	integer	Prior to multiplication with mel basis	1		yes
model.preprocessor.n_fft	FFT Window size	const	The size of the window for the FFT in samples.	${model.n_window_size}		yes
model.preprocessor.n_window_size	FFT Window size	const	The size of the window for the FFT in samples.	${model.n_window_size}		yes
model.preprocessor.n_window_stride	FFT Window stride	const	The stride of the window for FFT	${model.n_window_stride}		yes
model.preprocessor.normalize	Feature Normalization	string	Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq	null		yes
model.preprocessor.pad_to	Pad to	integer	A multiple pf pad_to	1		yes
model.preprocessor.pad_value	Pad Value	float	The value to that shorter mels are padded with	0		yes
model.preprocessor.preemph	Pre-emphasis value	float	Amount of pre-emphasis to be added to the audio. Can be disabled by passing None.			yes
model.preprocessor.sample_rate	Samping rate	const	The target sample rate to load the audio in Hz.	${sample_rate}		yes
model.preprocessor.window	Window type	string	The type of window to be used.	hann		yes
model.preprocessor.window_size	Window size	integer	The size of the window to be used			yes
model.preprocessor.window_stride	Window stride	integer	The stride of the window to be used			yes
model.input_fft	Input FFT	collection	Collection to configure the Input FFT			yes
model.input_fft._target_	Target class for the FFT Transformer Encoder	const	The Nemo FFTEncoder module to be instantiated	nemo.collections.tts.modules.transformer.FFTransformerEncoder		yes
model.input_fft.n_layer	input_fft n_layer	integer	Number of transformer layers	6		yes
model.input_fft.n_head	input_fft num heads	integer	Number of heads in the MultiHeadAttn	1		yes
model.input_fft.d_model	input_fft d_model	const	Hidden size of the input and output	${model.symbols_embedding_dim}		yes
model.input_fft.d_head	input_fft d_head	integer	Hidden size of the attention module	64		yes
model.input_fft.d_inner	Input fft d_inner	integer	Hidden size of the convolutional layers	1536		yes
model.input_fft.kernel_size	input_fft kernel_size	integer	Hidden size of the input and output	3		yes
model.input_fft.dropout	input_fft dropout	float	Dropout parameters	0.1		yes
model.input_fft.dropatt	input_fft dropatt	float	Dropout parameter for attention	0.1		yes
model.input_fft.dropemb	input_fft dropemb	integer	Dropout parameter for embedding	0		yes
model.input_fft.d_embed	input_fft d_embed	const	Hidden size of embeddings (input fft only)	${model.symbols_embedding_dim}		yes
model.output_fft	output_fft	collection	Collection to configure the Input FFT			yes
model.output_fft._target_	Target class for the FFT Transformer Encoder	const	The Nemo FFTEncoder module to be instantiated	nemo.collections.tts.modules.transformer.FFTransformerDecoder		yes
model.output_fft.n_layer	output_fft n_layer	integer	Number of transformer layers	6		yes
model.output_fft.n_head	output_fft num heads	integer	Number of heads in the MultiHeadAttn	1		yes
model.output_fft.d_model	output_fft d_model	const	Hidden size of the input and output	${model.symbols_embedding_dim}		yes
model.output_fft.d_head	output_fft d_head	integer	Hidden size of the attention module	64		yes
model.output_fft.d_inner	output_fft d_inner	integer	Hidden size of the convolutional layers	1536		yes
model.output_fft.kernel_size	output_fft kernel_size	integer	Hidden size of the input and output	3		yes
model.output_fft.dropout	output_fft dropout	float	Dropout parameters	0.1		yes
model.output_fft.dropatt	output_fft dropatt	float	Dropout parameter for attention	0.1		yes
model.output_fft.dropemb	output_fft dropemb	integer	Dropout parameter for embedding	0		yes
model.alignment_module	alignment_module	collection	Configuration element for the alignment module			yes
model.alignment_module._target_	alignment_module._target_	const	Module to be instantiated for alignment	nemo.collections.tts.modules.aligner.AlignmentEncoder		yes
model.alignment_module.n_text_channels	n_text_channels	const	The dimensionality of symbol embedding	${model.symbols_embedding_dim}		yes
model.duration_predictor	duration_predictor	collection	Configuration element for the duration predictor			yes
model.duration_predictor._target_	duration_predictor._target_	const	Module to be instantiated for duration predictor	nemo.collections.tts.modules.fastpitch.TemporalPredictor		yes
model.duration_predictor.input_size	duration_predictor.input_size	const	Hidden size of the input and output	${model.symbols_embedding_dim}		yes
model.duration_predictor.kernel_size	duration_predictor.kernel_size	integer	Kernel size for convolutional layers	3		yes
model.duration_predictor.filter_size	duration_predictor.filter_size	integer	Filter size for the convolutional layers	256		yes
model.duration_predictor.dropout	duration_predictor.dropout	float	Drop out parameter	0.1		yes
model.duration_predictor.n_layers	duration_predictor.n_layers	integer	Number of layers	2		yes
model.pitch_predictor	pitch_predictor	collection	Configuration element for the pitch predictor			yes
model.pitch_predictor._target_	pitch_predictor._target_	const	Module to be instantiated for pitch predictor	nemo.collections.tts.modules.fastpitch.TemporalPredictor		yes
model.pitch_predictor.input_size	pitch_predictor.input_size	const	Hidden size of the input and output	${model.symbols_embedding_dim}		yes
model.pitch_predictor.kernel_size	pitch_predictor.kernel_size	integer	Kernel size for convolutional layers	3		yes
model.pitch_predictor.filter_size	pitch_predictor.filter_size	integer	Filter size for the convolutional layers	256		yes
model.pitch_predictor.dropout	pitch_predictor.dropout	float	Drop out parameter	0.1		yes
model.pitch_predictor.n_layers	pitch_predictor.n_layers	integer	Number of layers	2		yes
trainer	Trainer Configurations	collection	Collection of parameters to configure the trainer			yes
trainer.max_epochs	Number of epochs	collection	Maximum number of epochs to train the model	100		yes	yes

speech

pitch_stats

parameter	display_name	value_type	description	default_value	valid_min	valid_max	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file				yes
result_dir	Results directory	hidden	Path to the output results directory and logs				yes
key	Save key	hidden	Key to save/load the model				yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1	1	1
num_files	Number of files	integer	List of text sentences to render spectrograms. This only works in infer mode	10			yes	yes
manifest_filepath	Manifest	hidden	Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder				yes	yes
output_path	Output	hidden	ID of the speaker to generate spectrograms	0
pitch_fmin	F min	float		64			yes
pitch_fmax	F max	float		512			yes
n_window_size	Window size	integer		1024
sample_rate	Sample rate	integer		22050
render_plots	Render plots	bool		TRUE
compute_stats	Compute stats	bool		TRUE

convert

parameter	display_name	value_type	description	default_value	required
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file		yes
result_dir	Results directory	hidden	Path to the output results directory and logs		yes
dataset_name	Name	string		ljs	yes
data_dir	Data dir	hidden

ssd

evaluate

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
	version	Schema Version	const	The version of this schema	1							internal
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
	dataset_config.validation_data_sources.image_directory_path	Image path	hidden
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate		collection
	training_config.learning_rate.soft_start_annealing_schedule		collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		300						yes
	augmentation_config.output_height	Model Input height	integer		300						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	ssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.		[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	ssd_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different SSD feature layers
	ssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.		TRUE
	ssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.		FALSE
	ssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes		[0.1, 0.1, 0.2, 0.2]
	ssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer		[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95]
	ssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	ssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	ssd_config.arch	Arch	string	The backbone for feature extraction		resnet
	ssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch		18
	ssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.		FALSE
	ssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training

inference

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
	version	Schema Version	const	The version of this schema	1							internal
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	threshold	Threshold	float		0.3
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
	dataset_config.validation_data_sources.image_directory_path	Image path	hidden
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate		collection
	training_config.learning_rate.soft_start_annealing_schedule		collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		300						yes
	augmentation_config.output_height	Model Input height	integer		300						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	ssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.		[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	ssd_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different SSD feature layers
	ssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.		TRUE
	ssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.		FALSE
	ssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes		[0.1, 0.1, 0.2, 0.2]
	ssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer		[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95]
	ssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	ssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	ssd_config.arch	Arch	string	The backbone for feature extraction		resnet
	ssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch		18
	ssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.		FALSE
	ssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training

train

	parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	regex	popular	param_type (internal / hidden / inferred)
	version	Schema Version	const	The version of this schema	1							internal
	random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
	initial_epoch	Initial epoch cli	integer		1
	use_multiprocessing	CLI parameter	bool		FALSE
	dataset_config	Dataset	collection	Parameters to configure the dataset
	dataset_config.data_sources.label_directory_path	KITTI label path	hidden									hidden
	dataset_config.data_sources.image_directory_path	Image path	hidden
	dataset_config.data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.target_class_mapping	Target Class Mappings	list	This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_data_sources.label_directory_path	KITTI label path	hidden
	dataset_config.validation_data_sources.image_directory_path	Image path	hidden
	dataset_config.validation_data_sources.tfrecords_directory_path	TFRecords path	hidden
	dataset_config.include_difficult_in_training	include difficult label in training	bool	Whether to use difficult objects in training	TRUE
	training_config	Training	collection
	training_config.batch_size_per_gpu	Batch Size Per GPU	integer	The number of images per batch per GPU.	10		1
	training_config.num_epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	80		1
	training_config.enable_qat	Enable Quantization Aware Training	bool	bool	FALSE
	training_config.learning_rate		collection
	training_config.learning_rate.soft_start_annealing_schedule		collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Minimum Learning Rate	float	The minimum learning rate in the learning rate schedule.	5.00E-05		0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Maximum Learning Rate	float	The maximum learning rate in the learning rate schedule.	9.00E-03		0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	The time to ramp up the learning rate from minimum learning rate to maximum learning rate.	0.1		0	1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start.	0.8		0	1
	training_config.regularizer.type	Regularizer Type	string	The type of the regularizer being used.	__L1__				L1, L2
	training_config.regularizer.weight	Regularizer Weight	float	The floating point weight of the regularizer.	3.00E-05		0
	training_config.checkpoint_interval	Checkpoint Interval	integer	The interval (in epochs) at which train saves intermediate models.	1		1
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	16		1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	8		1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping					loss, validation_loss, val_loss
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed			0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training			0
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard	3		0
	eval_config	Evaluation	collection
	eval_config.average_precision_mode	Average Precision Mode	string	The mode in which the average precision for each class is calculated.	__SAMPLE__				SAMPLE/INTEGRATE
	eval_config.validation_period_during_training	Validation Period During Training	integer	The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below.	10		1
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	16		1
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5		0	1
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.01		0	1
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.6		0	1
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200		0
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS			1	32
	augmentation_config	Augmentation config	collection
	augmentation_config.output_width	Model Input width	integer		300						yes
	augmentation_config.output_height	Model Input height	integer		300						yes
	augmentation_config.output_channel	Model Input channel	integer		3						yes
	augmentation_config.random_crop_min_scale	Random Crop Min Scale	float	the minimum random crop size	0.3		0	1
	augmentation_config.random_crop_max_scale	Random Crop Max Scale	float	the maximum random crop size	1		0	1
	augmentation_config.random_crop_min_ar	Random Crop Max Aspect Ratio	float	the minimum random crop aspect ratio	0.5
	augmentation_config.random_crop_max_ar	Random Crop MIin Aspect Ratio	float	the maximum random crop aspect ratio	2
	augmentation_config.zoom_out_min_scale	Zoom Out Min Scale	float	Minimum scale of ZoomOut augmentation	1		1
	augmentation_config.zoom_out_max_scale	Zoom Out Max Scale	float	Maximum scale of ZoomOut augmentation	4		1
	augmentation_config.brightness	Brightness	integer	Brightness delta in color jittering augmentation	32		0	255
	augmentation_config.contrast	Contrast	float	Contrast delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.saturation	Saturation	float	Saturation delta factor in color jitter augmentation	0.5		0	1
	augmentation_config.hue	Hue	integer	Hue delta in color jittering augmentation	18		0	180
	augmentation_config.random_flip	Random Flip	float	Probablity of performing random horizontal flip
	augmentation_config.image_mean	Image Mean	collection	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.key	Image Mean key	string	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	augmentation_config.image_mean.value	Image Mean value	float	A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
	ssd_config.aspect_ratios_global	Aspect Ratio Global	string	The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.		[1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both.	ssd_config.aspect_ratios	Aspect Ratio	string	The aspect ratio of anchor boxes for different SSD feature layers
	ssd_config.two_boxes_for_ar1	Two boxes for aspect-ratio=1	bool	If this parameter is True, two boxes will be generated with an aspect ratio of 1.		TRUE
	ssd_config.clip_boxes	Clip Boxes	bool	If true, all corner anchor boxes will be truncated so they are fully inside the feature images.		FALSE
	ssd_config.variances	Variance	string	A list of 4 positive floats to decode bboxes		[0.1, 0.1, 0.2, 0.2]
	ssd_config.scales	Scales	string	A list of positive floats containing scaling factors per convolutional predictor layer		[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95]
	ssd_config.steps	Steps	string	An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
	ssd_config.offsets	Offsets	string	An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
	ssd_config.arch	Arch	string	The backbone for feature extraction		resnet
	ssd_config.nlayers	Number of Layers	integer	The number of conv layers in a specific arch		18
	ssd_config.freeze_bn	Freeze BN	bool	Whether to freeze all batch normalization layers during training.		FALSE
	ssd_config.freeze_blocks	Freeze Blocks	list	The list of block IDs to be frozen in the model during training

unet

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
experiment_spec	Experiment Spec	hidden	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.				yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.				yes
data_type	Pruning Granularity	string	Number of filters to remove at a time.	fp32		int8, fp32, fp16	yes	yes
max_workspace_size		integer	Example: The integer value of 1<<30, 2<<30
max_batch_size		integer			1
min_batch_size		integer			1
opt_batch_size		integer			1
gen_ds_config		bool		FALSE
engine_file	Engine File	hidden	UNIX path to the model engine file.				yes
verbose		hidden		TRUE
strict_type_constraints		bool		FALSE
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	100				yes
cal_image_dir		hidden
cal_cache_file	Calibration cache file	hidden	Unix PATH to the int8 calibration cache file				yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100
results_dir		hidden

prune

parameter	display_name	value_type	description	default_value	valid_min	valid_max	valid_options	required	popular
model	Model path	hidden	UNIX path to where the input model is located.					yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.					yes
experiment_spec_path		hidden
key	Encode key	hidden
normalizer	Normalizer	string	How to normalize	max			max, L2
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union			union, intersection, arithmetic_mean,geometric_mean	no
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8				no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1	0	1		yes	yes
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16				no
excluded_layers	Excluded layers	string	string of list: List of excluded_layers. Examples: -i item1 item2
results_dir	Results directory	hidden
verbose	verbosity	hidden		TRUE

train

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	regex	popular
version	Schema Version	const	The version of this schema	1
random_seed	Random Seed	integer	Seed value for the random number generator in the network	42
dataset_config	Dataset	collection	Parameters to configure the dataset
dataset_config.dataset		string		custom
dataset_config.augment	Augment	bool	Boolean to augment the dataset or not	FALSE
dataset_config.buffer_size	buffer_size	integer	The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset
dataset_config.filter_data	filter_data	bool	Set this to omit images or masks that are not present
dataset_config.resize_padding	Resize Padding	bool	If the image needs to be resized by preserving aspect ratio
dataset_config.resize_method	Resize Method	string						BILINEAR, NEAREST_NEIGHBOR, BICUBIC AREA
dataset_config.input_image_type	Input Image type	string	Gives information on if the input is RGB or grayscale	color				color, grayscale
dataset_config.data_sources.image_path	Image path	hidden
dataset_config.data_sources.masks_path	Masks path	hidden
dataset_config.data_class_config	Target Class Mappings	collection	Contains the parameters to configure the mappping of diferent classes						yes		yes
dataset_config.data_class_config.target_classes	Target Class Mappings list	list	Contains the parameters to configure the mappping of diferent classes						yes
dataset_config.data_class_config.target_classes.name	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person				yes	^[-a-zA-Z0-9_]{1,40}$	yes
dataset_config.data_class_config.target_classes.mapping_class	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person				yes	^[-a-zA-Z0-9_]{1,40}$	yes
dataset_config.data_class_config.target_classes.label_id	Class label ID	integer			1				yes		yes
augmentation_config	Data Augmentation	collection	Collection of parameters to configure augmentation								Yes
augmentation_config.spatial_augmentation		collection	Configure augmentation pertaining to spatial transformations
augmentation_config.spatial_augmentation.hflip_probability		float	probability for flipping image horizontally
augmentation_config.spatial_augmentation.vflip_probability		float	probability for flipping image vertically
augmentation_config.spatial_augmentation.crop_and_resize_prob		float	probability at which to crop and resize
model_config	Model	collection
model_config.arch	BackBone Architecture	string	The architecture of the backbone feature extractor to be used for training.	vanilla_unet_dynamic				resnet	yes
model_config.enable_qat	Enable Quantization aware training	bool	Set this to true, to enable quantization during re-training of pruned model	FALSE
model_config.byom_model	Model path to BYOM .tltb	hidden	Set the path to byom model when using byom arch
model_config.load_graph	Pruned model Load Graph	bool	For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.	FALSE
model_config.freeze_blocks	Freeze Blocks	integer	This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates.			0	3
model_config.freeze_bn	Freeze Batch Normalization	bool	A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections	All Projections	bool	For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.	TRUE
model_config.num_layers	Number of Layers	integer	The depth of the feature extractor for scalable templates.	18				10, 18, 34, 50, 101	yes
model_config.use_pooling	Use Pooling	bool	Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm	Use Batch Normalization	bool	A flag to determine whether to use Batch Normalization layers or not.	TRUE
model_config.enable_qat		bool		FALSE
model_config.dropout_rate	Dropout Rate	float	Probability for drop out			0	0.1
model_config.training_precision.backend_floatx	Backend Training Precision	string	A nested parameter that sets the precision of the backend training framework.	__FLOAT32__				__FLOAT32__	yes
model_config.initializer	Kernel Initializer	enum	The type of initializer for the kernels					__HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__
model_config.model_input_height	Model Input height	int	The model input dimensions
model_config.model_input_height	Model Input width	int	The model input dimensions
model_config.model_input_channels	Model input channels	int	The model input dimensions
training_config	Training	collection
training_config.batch_size	Batch Size Per GPU	integer	The number of images per batch per GPU.	1		1			yes
training_config.epochs	Number of Epochs	integer	The total number of epochs to run the experiment.	120		1			yes		Yes
training_config.log_summary_steps		integer	Number of steps after which to display the log summary	200
training_config.checkpoint_interval	checkpoint interval	integer	Number of epochs after which to save the ceheckpoint	1
training_config.loss		string	Loss to be used	cross_entropy				cross_entropy, cross_dice_sum, dice
training_config.learning_rate		float	Learning rate	0.00008
training_config.lr_scheduler	learning rate scheduler	string
training_config.weights_monitor		bool	Bool to turn on tensorboard visualization of loss and gradients variations
training_config.regularizer		collection	Regularizer to use
training_config.regularizer.type		string		__L2__				__L1__, __L2__
training_config.regularizer.weight		float		1.00E-05
training_config.optimizer	Optimizer	collection
training_config.optimizer.adam.epsilon	Optimizer Adam Epsilon	float	A very small number to prevent any division by zero in the implementation.	1.00E-08					yes
training_config.optimizer.adam.beta1	Optimizer Adam Beta1	float		0.899999976					yes
training_config.optimizer.adam.beta2	Optimizer Adam Beta2	float		0.999000013					yes
training_config.visualizer		collection
training_config.visualizer.enabled		bool		FALSE
training_config.visualizer.save_summary_steps		integer	Steps at which to visualize loss on TB.
training_config.visualizer.infrequent_save_summary_steps		integer	Steps at which to visualize input images, ground truth and histograms.
training_config.data_options		bool		TRUE

vocoder

export

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	required
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file					yes
result_dir	Results directory	hidden	Path to the output results directory and logs					yes
key	Save key	hidden	Key to save/load the model					yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1		1	1
export_format	Export format	string		RIVA	RIVA, ONNX			yes
export_to	Export To	const

finetune

parameter	display_name	value_type	description	default_value	valid_min	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file			yes
resume_model_weights	Pretrained model path	hidden	Path to the pre-trained model
result_dir	Results directory	hidden	Path to the output results directory and logs			yes
key	Save key	hidden	Key to save the model			yes
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model			yes	yes
train_dataset	Train Dataset	hidden	Path to the train dataset manifest json file			yes
validation_dataset	Validation Dataset	hidden	Path to the validation dataset manifest json file			yes
training_ds	Train Dataset	collection	Parameters to configure the training dataset
training_ds.dataset	Train Dataset	collection	Parameters to configure the training dataset
training_ds.dataset._target_	Target dataset class	const	Nemo training ds class instance	nemo.collections.tts.data.datalayers.MelAudioDataset		yes
training_ds.dataset.manifest_filepath	Train manifest file	const	Path to the train dataset manifest json file	${train_dataset}		yes
training_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped	0.75		yes
training_ds.dataset.n_segments	Number of segments	int	The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio.	16384		yes
training_ds.dataset.mel_hop_size	Mel Hop Size	int	Mel hop size	256		yes
training_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples
training_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE		yes
training_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE		yes
training_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	16		yes	yes
training_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	4		yes
validation_ds	Validation Dataset	collection	Parameters to configure the validation dataset
validation_ds.dataset	Validation Dataset	collection	Parameters to configure the validation dataset
validation_ds.dataset._target_	Target dataset class	const	Nemo validation ds class instance	nemo.collections.tts.data.datalayers.MelAudioDataset		yes
validation_ds.dataset.manifest_filepath	Train manifest file	const	Path to the validation dataset manifest json file	${validation_dataset}		yes
validation_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped	0.75		yes
validation_ds.dataset.n_segments	Number of segments	int	The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio.	16384		yes
validation_ds.dataset.mel_hop_size	Mel Hop Size	int	Mel hop size	256		yes
validation_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples
validation_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE		yes
validation_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	FALSE		yes
validation_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	2		yes	yes
validation_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	1		yes
optim	Optimizer	collection				yes
optim._target_	Optimizer Class	const	The class of the Optimizer to be instantiated	torch.optim.AdamW		yes
optim.lr	Learning rate	float	Learning rate	0.0001		yes	yes
optim.betas	Optimizer betas	list	Coefficients used to compute the running averages of the gradient and it’s square	[0.8, 0.99]		yes
trainer	collection		Parameters to configure the trainer object
trainer.max_steps	Maximum Steps	int	Maximum number of steps to run training	1000	0	yes
trainer.max_epochs	Maximum number of epochs	int	Maximum number of epochs to run training. This parameter supercedes the trainer.max_steps parameter		0	yes	yes

infer

parameter	display_name	value_type	description	default_value	valid_min	valid_max	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file				yes
result_dir	Results directory	hidden	Path to the output results directory and logs				yes
key	Save key	hidden	Key to save/load the model				yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1	1	1
input_path	List of input texts	hidden	Path to the directory containing spectrogram outputs from FastPitch inference				yes	yes
output_path	Input dataset to run inference	hidden	Path to the output directory containing rendered audio clips				yes	yes
sample_rate	Speaker ID	int	Sampling rate of the output audio clip.	22050			yes	yes

infer_onnx

parameter	display_name	value_type	description	default_value	valid_min	valid_max	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file				yes
result_dir	Results directory	hidden	Path to the output results directory and logs				yes
key	Save key	hidden	Key to save/load the model				yes
resume_model_weights	Pretrained model path	hidden	Path to the trained/finetuned model
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1	1	1
input_path	List of input texts	hidden	Path to the directory containing spectrogram outputs from FastPitch inference				yes	yes
output_path	Input dataset to run inference	hidden	Path to the output directory containing rendered audio clips				yes	yes
sample_rate	Speaker ID	int	Sampling rate of the output audio clip.	22050			yes	yes

train

parameter	display_name	value_type	description	default_value	valid_min	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
experiment_spec	Experiment spec	hidden	Path to the training experiment spec file				yes
result_dir	Results directory	hidden	Path to the output results directory and logs				yes
key	Save key	hidden	Key to save the model				yes
gpus	Number of GPUs	hidden	Number of GPUs to be used to train the model	1			yes	yes
train_dataset	Train Dataset	hidden	Path to the train dataset manifest json file				yes
validation_dataset	Validation Dataset	hidden	Path to the validation dataset manifest json file				yes
training_ds	Train Dataset	collection	Parameters to configure the training dataset
training_ds.dataset	Train Dataset	collection	Parameters to configure the training dataset
training_ds.dataset._target_	Target dataset class	const	Nemo training ds class instance	nemo.collections.tts.data.datalayers.AudioDataset			yes
training_ds.dataset.manifest_filepath	Train manifest file	const	Path to the train dataset manifest json file	${train_dataset}			yes
training_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped
training_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped	0.1			yes
training_ds.dataset.n_segments	Number of segments	int	The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio.	8192			yes
training_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE			yes
training_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples				yes
training_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE			yes
training_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE			yes
training_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	16			yes	yes
training_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	4			yes
validation_ds	Validation Dataset	collection	Parameters to configure the validation dataset
validation_ds.dataset	Validation Dataset	collection	Parameters to configure the validation dataset
validation_ds.dataset._target_	Target dataset class	const	Nemo validation ds class instance	nemo.collections.tts.data.datalayers.AudioDataset			yes
validation_ds.dataset.manifest_filepath	Train manifest file	const	Path to the validation dataset manifest json file	${train_dataset}			yes
validation_ds.dataset.max_duration	Max clip duration	float	All files with a duration greater than the given value (in seconds) will be dropped
validation_ds.dataset.min_duration	Min clip duration	float	All files with a duration lesser than the given value (in seconds) will be dropped
validation_ds.dataset.n_segments	Number of segments	int	The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio.	-1			yes
validation_ds.dataset.trim	Trim	bool	Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim().	FALSE			yes
validation_ds.dataloader_params	Dataloader parameters	collection	Configuring the dataloader yielding the data samples
validation_ds.dataloader_params.drop_last	Drop last	bool	Whether to drop the last samples	FALSE			yes
validation_ds.dataloader_params.shuffle	Enable shuffle	bool	Whether to shuffle the data or not. We recommend True for training data, and false for validation	TRUE			yes
validation_ds.dataloader_params.batch_size	Batch Size	integer	Number of samples per batch of data.	16			yes	yes
validation_ds.dataloader_params.num_workers	Number of workers	integer	The number of worker threads for loading the dataset	1			yes
model	Model Config	collection	Collection to configure the HiFiGAN model element
model.preprocessor	Preprocessor config	collection	Collection to configure the model preprocessor
model.preprocessor._target_	Target class of the preprocessor instance	const	The Nemo class to instantiate.	nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures			yes
model.preprocessor.dither	Dither	float		0			yes
model.preprocessor.frame_splicing	Spectrogram Frames per step	integer	Number of spectrogram frames per step	1			yes
model.preprocessor.nfilt	Number of filter	integer	Number of filters in the conv layer	80
model.preprocessor.highfreq	High frequency bound in Hz	integer	Upper bound of the mel basis in Hz	8000			yes
model.preprocessor.log	Log Spectrograms	bool	Flags to enable logging spectrograms	TRUE			yes
model.preprocessor.log_zero_guard_type	Zero guard type	string	Need to avoid taking the log of zero. There are two options: “add” or “clamp”.	clamp			yes
model.preprocessor.log_zero_guard_value	Zero guard value	float	The value to be set so as to not take the log(zero).	0.00001
model.preprocessor.lowfreq	Low frequency bound in Hz	integer	Lower bound of the mel basis in Hz	0			yes
model.preprocessor.mag_power	Multiplication with mel basis	integer	Prior to multiplication with mel basis	1			yes
model.preprocessor.n_fft	FFT Window size	integer	The size of the window for the FFT in samples.	1024			yes
model.preprocessor.n_window_size	FFT Window size	integer	The size of the window for the FFT in samples.	1024			yes
model.preprocessor.n_window_stride	FFT Window stride	integer	The stride of the window for FFT	256			yes
model.preprocessor.normalize	Feature Normalization	string	Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq
model.preprocessor.pad_to	Pad to	integer	A multiple pf pad_to	0			yes
model.preprocessor.pad_value	Pad Value	float	The value to that shorter mels are padded with	-11.52			yes
model.preprocessor.preemph	Pre-emphasis value	float	Amount of pre-emphasis to be added to the audio. Can be disabled by passing None.
model.preprocessor.sample_rate	Samping rate	integer	The target sample rate to load the audio in Hz.	22050			yes
model.preprocessor.window	Window type	string	The type of window to be used.	hann			yes
model.preprocessor.exact_pad	Exact pad	bool		TRUE
model.preprocessor.use_grads	Use grads	bool		FALSE
model.optim	Optimizer	collection					yes
model.optim._target_	Optimizer Class	const	The class of the Optimizer to be instantiated	torch.optim.AdamW			yes
model.optim.lr	Learning rate	float	Learning rate	0.0002			yes	yes
model.optim.betas	Optimizer betas	list	Coefficients used to compute the running averages of the gradient and it’s square	[0.8, 0.99]			yes
model.sched	Learning rate scheduler	collection	Parameters to configure the learning rate scheduler
model.sched.name	Scheduler Name	string	Type of learning rate scheduler to be used	CosineAnnealing			yes
model.sched.warmup_ratio	Warm up steps	float	Ratio of steps to warm up the learning rate	0.02			yes
model.sched.min_lr	Minimum Learning Rate	float	Lower bound of the learning rate scheduler	1.00E-05			yes
model.max_steps	Maximum steps	const	Maximum number of steps to run training	${trainer.max_steps}			yes
model.l1_loss_factor	L1 Loss factor	int	The multiplicative factor for L1 loss used in training	45			yes
model.denoise_strength	Denoise stregth	float	The small desnoising factor, currently only used in validation	0.0025			yes
model.generator	Generator configuration	collection	Parameters to configure the generator.
model.generator._target_	Class for the HiFiGAN generator	const	Target Nemo Generator class to instantiate	nemo.collections.tts.modules.hifigan_modules.Generator			yes
model.generator.resblock	Resblock	int	Type of Residual Block to be used	1		1,2	yes
model.generator.upsample_rates	Upsample rate	list	List of upsample rate for the ConvTranspose1D layer	[8,8,2,2]	0		yes
model.generator.upsample_kernel_sizes	Upsample kernel size	list	List of kernel dimensions for the ConvTranspose1D layers. Note: This number of elements in this list must be equal to the number of elements in the model.generator.upsample_rates parameter.	[16, 16, 4, 4]	0		yes
model.generator.upsample_initial_channel	Upsample initial channel	int	Number of channels in the first upsample layer. The channel count of the subsequent layers are computer as upsample_initial_count/ (2 ** i), where i is range(len(upsample_kernel_sizes))	512	8		yes
model.generator.resblock_kernel_sizes	Resblock kernel sizes	list	Size of all the Conv1D kernels in a resblock	[3, 7, 11]			yes
model.generator.resblock_dilation_sizes	Resblock dilation sizes	list	Dilation factor per Conv1D layer in a resblock	[[1,3,5], [1,3,5], [1,3,5]]			yes
trainer	collection		Parameters to configure the trainer object
trainer.max_steps	Maximum Steps	int	Maximum number of steps to run training	25000	0		yes
trainer.max_epochs	Maximum number of epochs	int	Maximum number of epochs to run training	100	0		yes	yes

yolo_v3

convert

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
b	batch_size	integer	calibration batch size	8			yes
c	cache_file	path	calibration cache file (default cal.bin)
d	input_dims	list	comma separated list of input dimensions (not required for TLT 3.0 new models).
i	input_order	enum	input dimension ordering	nchw		nchw, nhwc, nc
m	max_batch_size	integer	maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly.	16				yes
o	outputs	list	comma separated list of output node names
p	parse_profile_shapes	list	comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
s	strict_type_constraints	bool	TensorRT strict_type_constraints flag for INT8 mode	FALSE
t	data_type	enum	TensorRT data type	fp32		fp32, fp16, int8		yes
u	dla_core	int	Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback).	-1
w	max_workspace_size	int	maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.		1<<30, 2<<30
platform	platform	enum	platform label	rtx			yes	yes

evaluate

	parameter	display_name	value_type	description	default_value	examples	regex
	version	Schema Version	integer	The version of this schema	1
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-06
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.5
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE
	yolov3_config	YOLOv3	collection
	yolov3_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
	yolov3_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
	yolov3_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
	yolov3_config.matching_neutral_box_iou		float		0.7
	yolov3_config.arch	Arch	string	backbone(architecture)	resnet
	yolov3_config.nlayers	Number of Layers	integer	number of layers for this architecture	18
	yolov3_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2
	yolov3_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	0.8
	yolov3_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	100
	yolov3_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1
	yolov3_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training
	yolov3_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE
	yolov3_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
data_type	Pruning Granularity	enum	Number of filters to remove at a time.	int8		int8, fp32, fp16	yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100			no
experiment_spec	Experiment Spec	string	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.		hidden from train expeirment		yes
model	Model path	hidden	UNIX path to where the input model is located.		hidden		yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden		yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	TRUE			no
engine-file	Engine File	hidden	UNIX path to the model engine file.		/export/input_model_file.<data_type>.trt		yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	16				yes
cal_cache_file	Calibration cache file	string	Unix PATH to the int8 calibration cache file		hidden		yes	yes

inference

	parameter	display_name	value_type	description	default_value	examples	regex
	version	Schema Version	integer	The version of this schema	1
	threshold		float		0.3
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-06
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.5
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE
	yolov3_config	YOLOv3	collection
	yolov3_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
	yolov3_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
	yolov3_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
	yolov3_config.matching_neutral_box_iou		float		0.7
	yolov3_config.arch	Arch	string	backbone(architecture)	resnet
	yolov3_config.nlayers	Number of Layers	integer	number of layers for this architecture	18
	yolov3_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2
	yolov3_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	0.8
	yolov3_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	100
	yolov3_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1
	yolov3_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training
	yolov3_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE
	yolov3_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve

prune

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular
version	Schema Version	const	The version of this schema						no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1		0	1		yes	yes
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8					no
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16					no
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union				union, intersection, arithmetic_mean,geometric_mean	no
model	Model path	hidden	UNIX path to where the input model is located.		hidden				yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden				yes

train

	parameter	display_name	value_type	description	default_value	examples	regex
	version	Schema Version	integer	The version of this schema	1
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-06
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.1
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.5
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE
	yolov3_config	YOLOv3	collection
	yolov3_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
	yolov3_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
	yolov3_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
	yolov3_config.matching_neutral_box_iou		float		0.7
	yolov3_config.arch	Arch	string	backbone(architecture)	resnet
	yolov3_config.nlayers	Number of Layers	integer	number of layers for this architecture	18
	yolov3_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2
	yolov3_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	0.8
	yolov3_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	100
	yolov3_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1
	yolov3_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training
	yolov3_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE
	yolov3_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve

yolo_v4

convert

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
b	batch_size	integer	calibration batch size	8			yes
c	cache_file	path	calibration cache file (default cal.bin)
d	input_dims	list	comma separated list of input dimensions (not required for TLT 3.0 new models).
i	input_order	enum	input dimension ordering	nchw		nchw, nhwc, nc
m	max_batch_size	integer	maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly.	16				yes
o	outputs	list	comma separated list of output node names
p	parse_profile_shapes	list	comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
s	strict_type_constraints	bool	TensorRT strict_type_constraints flag for INT8 mode	FALSE
t	data_type	enum	TensorRT data type	fp32		fp32, fp16, int8		yes
u	dla_core	int	Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback).	-1
w	max_workspace_size	int	maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.		1<<30, 2<<30
platform	platform	enum	platform label	rtx			yes	yes

evaluate

	parameter	display_name	value_type	description	default_value	examples	regex	valid_options_description
	version	Schema Version	integer	The version of this schema	1
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE			TRUE, False
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension				__png__, __jpg__, __jpeg__
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE			true, false
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3			1, 3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0			>=0
	augmentation_config.mosaic_prob		float		0.5			[0, 1)
	augmentation_config.mosaic_min_ratio	mosaic min ratio	float	mosaic min ratio	0.2
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string					r’, ‘g’, ‘b’
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8			>=1
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80			>=1
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-07			>0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04			>0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.3			(0, 1)
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.7			(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate				>0
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up				(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate				>0
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__			__L1__, __L2__, __NO_REG__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05			>=0
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07			(0, 1)
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9			(0, 1)
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999			(0, 1)
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE			TRUE, FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)				(0, 1)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)				TRUE, FALSE
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp				TRUE, FALSE
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10			>=1
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE			TRUE, FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3			>=1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4			>=1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE			TRUE, FALSE
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				“loss”
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed				>=0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training				>=1
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not				TRUE, FALSE
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard				>=1
	yolov4_config	YOLOv4	collection
	yolov4_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.matching_neutral_box_iou	Matching Neutral Box IoU	float	Neutral box matching IoU	0.5			(0, 1)
	yolov4_config.box_matching_iou	Box Matching IoU	float	box matching IoU	0.25			(0, 1)
	yolov4_config.arch	Arch	string	backbone(architecture)	resnet			cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
	yolov4_config.nlayers	Number of Layers	integer	number of layers for this architecture	18			depends on arch
	yolov4_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2			1
	yolov4_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	1			1
	yolov4_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	1			1
	yolov4_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1			list of integers
	yolov4_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training				TRUE, FALSE
	yolov4_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE			TRUE, FALSE
	yolov4_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE			relu, leaky_relu, mish
	yolov4_config.activation	Activation	string	Activation function				(0, 1)
	yolov4_config.label_smoothing	Label Smoothing	float	Label Smoothing	0			(0, 1)
	yolov4_config.big_grid_xy_extend	Big Grid XY Extend	float	Big anchors adjustment	0.05			(0, 1)
	yolov4_config.mid_grid_xy_extend	Middle Grid XY Extend	float	Middle anchors adjustment	0.1			(0, 1)
	yolov4_config.small_grid_xy_extend	Small Grid XY Extend	float	Small anchors adjustment	0.2			(0, 1)
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001			(0, 1)
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5			>0
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200			0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS				__SAMPLE__, __INTEGRATE__
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE			>=1
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__			(0, 1)
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8			TRUE, FALSE
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve

export

parameter	display_name	value_type	description	default_value	examples	valid_options	required	popular
version	Schema Version	const	The version of this schema	1
model	Model	hidden	UNIX path to the model file	0.1			yes
data_type	Pruning Granularity	enum	Number of filters to remove at a time.	int8		int8, fp32, fp16	yes	yes
batches	Number of calibration batches	integer	Number of batches to calibrate the model when run in INT8 mode	100			no
experiment_spec	Experiment Spec	string	UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file.		hidden from train expeirment		yes
model	Model path	hidden	UNIX path to where the input model is located.		hidden		yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden		yes
force_ptq	Force Post-Training Quantization	bool	Force generating int8 engine using Post Training Quantization	TRUE			no
engine-file	Engine File	hidden	UNIX path to the model engine file.		/export/input_model_file.<data_type>.trt		yes
key	Encryption Key	hidden	Encryption key	tlt_encode			yes
batch_size	Batch size	integer	Number of images per batch when generating the TensorRT engine.	16				yes
cal_cache_file	Calibration cache file	string	Unix PATH to the int8 calibration cache file		hidden		yes	yes

inference

	parameter	display_name	value_type	description	default_value	examples	regex	valid_options_description
	version	Schema Version	integer	The version of this schema	1
	threshold		float		0.3
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE			TRUE, False
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension				__png__, __jpg__, __jpeg__
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE			true, false
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3			1, 3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0			>=0
	augmentation_config.mosaic_prob		float		0.5			[0, 1)
	augmentation_config.mosaic_min_ratio	mosaic min ratio	float	mosaic min ratio	0.2
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string					r’, ‘g’, ‘b’
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8			>=1
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80			>=1
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-07			>0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04			>0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.3			(0, 1)
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.7			(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate				>0
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up				(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate				>0
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__			__L1__, __L2__, __NO_REG__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05			>=0
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07			(0, 1)
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9			(0, 1)
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999			(0, 1)
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE			TRUE, FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)				(0, 1)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)				TRUE, FALSE
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp				TRUE, FALSE
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10			>=1
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE			TRUE, FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3			>=1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4			>=1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE			TRUE, FALSE
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				“loss”
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed				>=0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training				>=1
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not				TRUE, FALSE
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard				>=1
	yolov4_config	YOLOv4	collection
	yolov4_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.matching_neutral_box_iou	Matching Neutral Box IoU	float	Neutral box matching IoU	0.5			(0, 1)
	yolov4_config.box_matching_iou	Box Matching IoU	float	box matching IoU	0.25			(0, 1)
	yolov4_config.arch	Arch	string	backbone(architecture)	resnet			cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
	yolov4_config.nlayers	Number of Layers	integer	number of layers for this architecture	18			depends on arch
	yolov4_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2			1
	yolov4_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	1			1
	yolov4_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	1			1
	yolov4_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1			list of integers
	yolov4_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training				TRUE, FALSE
	yolov4_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE			TRUE, FALSE
	yolov4_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE			relu, leaky_relu, mish
	yolov4_config.activation	Activation	string	Activation function				(0, 1)
	yolov4_config.label_smoothing	Label Smoothing	float	Label Smoothing	0			(0, 1)
	yolov4_config.big_grid_xy_extend	Big Grid XY Extend	float	Big anchors adjustment	0.05			(0, 1)
	yolov4_config.mid_grid_xy_extend	Middle Grid XY Extend	float	Middle anchors adjustment	0.1			(0, 1)
	yolov4_config.small_grid_xy_extend	Small Grid XY Extend	float	Small anchors adjustment	0.2			(0, 1)
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001			(0, 1)
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5			>0
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200			0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS				__SAMPLE__, __INTEGRATE__
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE			>=1
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__			(0, 1)
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8			TRUE, FALSE
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve

prune

parameter	display_name	value_type	description	default_value	examples	valid_min	valid_max	valid_options	required	popular
version	Schema Version	const	The version of this schema						no
pruning_threshold	Pruning Threshold	float	Threshold to compare normalized norm against.	0.1		0	1		yes	yes
pruning_granularity	Pruning Granularity	integer	Number of filters to remove at a time.	8					no
min_num_filters	Minimum number of filters	integer	Minimum number of filters to be kept per layer	16					no
equalization_criterion	Equalization Criterion	string	Criteria to equalize the stats of inputs to an element wise op layer.	union				union, intersection, arithmetic_mean,geometric_mean	no
model	Model path	hidden	UNIX path to where the input model is located.		hidden				yes
output_file	Output File	hidden	UNIX path to where the pruned model will be saved.		hidden				yes

train

	parameter	display_name	value_type	description	default_value	examples	regex	valid_options_description
	version	Schema Version	integer	The version of this schema	1
	random_seed	Random Seed	integer	Random seed	42
	dataset_config	Dataset	collection	Dataset configuration
	dataset_config.data_sources	Data Source	hidden	Data source
	dataset_config.data_sources.image_directory_path	Image Directory	hidden	Relative path to the directory of images for training
	dataset_config.data_sources.root_path	Root Path	hidden	The root path
	dataset_config.data_sources.source_weight	Source Weight	hidden	The weighting for the source
	dataset_config.data_sources.label_directory_path	Label Directory Path	hidden	The path to the directory of labels for training
	dataset_config.data_sources.tfrecords_path	TFRecords Path	hidden	The path to the TFRecords data for training
	dataset_config.target_class_mapping	Target Class Mapping	collection	The Mapping from source class names to target class names
Class you want to train for (vehicle)	dataset_config.target_class_mapping.key	Class Key	string	The “key” field is the value of the class name in the tfrecords file.		person	^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle)	dataset_config.target_class_mapping.value	Class Value	string	The “value” field corresponds to the value that the network is expected to learn.		masked-person	^[-a-zA-Z0-9_]{1,40}$
	dataset_config.validation_fold	Validation Fold	integer	The percentage of the entire dataset to be used as validation data	0
	dataset_config.validation_data_sources	Validation Data Sources	hidden	The definition is the same as training data sources
	dataset_config.include_difficult_in_training	Include Difficult Objects in Training	bool	Whether or not to include difficult objects in training	FALSE			TRUE, False
	dataset_config.type	Type	string	Dataset type, either kitti or coco	kitti
	dataset_config.image_extension	Image Extension	string	The image extension				__png__, __jpg__, __jpeg__
	dataset_config.is_monochrome	Is Monochrome	bool	Whether or not the images are monochrome(grayscale)	FALSE			true, false
	augmentation_config	Data Augmentation	collection	Data augmentation configuration
	augmentation_config.hue	Hue	float	Hue variance	0.1
	augmentation_config.saturation	Saturation	float	Saturation variance	1.5
	augmentation_config.exposure	Exposure	float	Exposure	1.5
	augmentation_config.vertical_flip	Vertical Flip Probability	float	Probability of vertical flip	0
	augmentation_config.horizontal_flip	Horizontal Flip	float	Probability of horizontal flip	0.5
	augmentation_config.jitter	Jitter	float	Jitter	0.3
	augmentation_config.output_width	Output Width	integer	Output Image Width	1248
	augmentation_config.output_height	Output Height	integer	Output Image Height	384
	augmentation_config.output_channel	Output Channel	integer	Output Image Channel	3			1, 3
	augmentation_config.randomize_input_shape_period	Randomize Input Shape Period	integer	Period(in number of epochs) to randomize input shape for multi-scale training	0			>=0
	augmentation_config.mosaic_prob		float		0.5			[0, 1)
	augmentation_config.mosaic_min_ratio	mosaic min ratio	float	mosaic min ratio	0.2
	augmentation_config.image_mean	Image Mean	collection	per-channel image mean values
	augmentation_config.image_mean.key		string					r’, ‘g’, ‘b’
	augmentation_config.image_mean.value		float
	training_config	Training	collection	Training configuration
	training_config.batch_size_per_gpu	Batch Size per GPU	integer	Batch size per GPU in training	8			>=1
	training_config.num_epochs	Number of Epochs	integer	Number of Epochs to run the training	80			>=1
	training_config.learning_rate.soft_start_annealing_schedule	Soft Start Annealing Schedule	collection
	training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate, example: 1e-7	1.00E-07			>0
	training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate	Max Learning Rate	float	Maximum learning rate. example: 1e-4	1.00E-04			>0
	training_config.learning_rate.soft_start_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up: example 0.3	0.3			(0, 1)
	training_config.learning_rate.soft_start_annealing_schedule.annealing	Annealing	float	progress(in percentage) for decreasing learning rate	0.7			(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate	Max Learning Rate	float	maximum learning rate				>0
	training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start	Soft Start	float	progress(in percentage) for warm up				(0, 1)
	training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate	Min Learning Rate	float	Minimum learning rate				>0
	training_config.regularizer	Regularizer	collection
	training_config.regularizer.type	Type	string	Type of regularizer, either NO_REG, L1 or L2	__L1__			__L1__, __L2__, __NO_REG__
	training_config.regularizer.weight	Weight	float	weight decay of regularizer	3.00E-05			>=0
	training_config.optimizer.adam	Adam	collection
	training_config.optimizer.adam.epsilon	Epsilon	float	Epsilon of Adam	1.00E-07			(0, 1)
	training_config.optimizer.adam.beta1	Beta1	float	beta1 of Adam	0.9			(0, 1)
	training_config.optimizer.adam.beta2	Beta 2	float	beta2 of Adam	0.999			(0, 1)
	training_config.optimizer.adam.amsgrad	AMSGrad	bool	AMSGrad of Adam	FALSE			TRUE, FALSE
	training_config.optimizer.sgd	SGD	collection
	training_config.optimizer.sgd.momentum	Momentum	float	momentum of sgd (example: 0.9)				(0, 1)
	training_config.optimizer.sgd.nesterov	Nesterov	bool	nesterov of sgd (example: FALSE)				TRUE, FALSE
	training_config.optimizer.rmsprop	RMSProp	collection
	training_config.optimizer.rmsprop.rho	Rho	float	rho of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.momentum	Momentum	float	momentum of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.epsilon	Epsilon	float	epsilon of RMSProp				(0, 1)
	training_config.optimizer.rmsprop.centered	Centered	bool	centered of RMSProp				TRUE, FALSE
	training_config.checkpoint_interval	Checkpoint Interval	integer	Period(in number of epochs) to save checkpoints	10			>=1
	training_config.enable_qat	QAT	bool	Enable QAT or not	FALSE			TRUE, FALSE
	training_config.resume_model_path	Resume Model Path	hidden	Path of the model to be resumed
	training_config.pretrain_model_path	Pretrained Model Path	hidden	Path of the pretrained model
	training_config.pruned_model_path	Pruned Model Path	hidden	Path of the pruned model
	training_config.max_queue_size	Max Queue Size	integer	Maximum Queue Size in Sequence Dataset	3			>=1
	training_config.n_workers	Workers	integer	Number of workers in sequence dataset	4			>=1
	training_config.use_multiprocessing	Use Multiprocessing	bool	Use multiprocessing or not	FALSE			TRUE, FALSE
	training_config.early_stopping	Early Stopping	collection
	training_config.early_stopping.monitor	Monitor	string	The name of the quantity to be monitored for early stopping				“loss”
	training_config.early_stopping.min_delta	Min Delta	float	Minimum delta of the quantity to be regarded as changed				>=0
	training_config.early_stopping.patience	Patience	integer	The number of epochs to be waited for before stopping the training				>=1
	training_config.visualizer	Visualizer	collection
	training_config.visualizer.enabled	Enable	bool	Enable the visualizer or not				TRUE, FALSE
	training_config.visualizer.num_images	Max Num Images	integer	Maximum number of images to be displayed in TensorBoard				>=1
	train_config.model_ema	ModelEMA	bool	Enable ModelEMA	FALSE
	yolov4_config	YOLOv4	collection
	yolov4_config.big_anchor_shape	Big Anchor Shape	string	Big anchor shapes in string	[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.mid_anchor_shape	Middle Anchor Shape	string	Middle anchor shapes in string	[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.small_anchor_shape	Small Anchor Shape	string	Small anchor shapes in string	[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]			numpy array of shape (3, 2) in string format. All elements should be positive float
	yolov4_config.matching_neutral_box_iou	Matching Neutral Box IoU	float	Neutral box matching IoU	0.5			(0, 1)
	yolov4_config.box_matching_iou	Box Matching IoU	float	box matching IoU	0.25			(0, 1)
	yolov4_config.arch	Arch	string	backbone(architecture)	resnet			cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
	yolov4_config.nlayers	Number of Layers	integer	number of layers for this architecture	18			depends on arch
	yolov4_config.arch_conv_blocks	Extra Convolution Blocks	integer	Number of extra convolution blocks	2			1
	yolov4_config.loss_loc_weight	weighting for location loss	float	weighting factor for location loss	1			1
	yolov4_config.loss_neg_obj_weights	weighting for loss of negative objects	float	weighting factor for loss of negative objects	1			1
	yolov4_config.loss_class_weights	weighting for classification loss	float	weighting factor for classification loss	1			list of integers
	yolov4_config.freeze_blocks	Freeze Blocks	list	ID of blocks to be frozen during training				TRUE, FALSE
	yolov4_config.freeze_bn	Freeze BN	bool	Whether or not to freeze BatchNormalization layers	FALSE			TRUE, FALSE
	yolov4_config.force_relu	Force ReLU	bool	Whether or not to force activation function to ReLU	FALSE			relu, leaky_relu, mish
	yolov4_config.activation	Activation	string	Activation function				(0, 1)
	yolov4_config.label_smoothing	Label Smoothing	float	Label Smoothing	0			(0, 1)
	yolov4_config.big_grid_xy_extend	Big Grid XY Extend	float	Big anchors adjustment	0.05			(0, 1)
	yolov4_config.mid_grid_xy_extend	Middle Grid XY Extend	float	Middle anchors adjustment	0.1			(0, 1)
	yolov4_config.small_grid_xy_extend	Small Grid XY Extend	float	Small anchors adjustment	0.2			(0, 1)
	nms_config.confidence_threshold	Confidence Threshold	float	Confidence threshold	0.001			(0, 1)
	nms_config.clustering_iou_threshold	IoU threshold	float	IoU threshold	0.5			>0
	nms_config.top_k	Top K	integer	Maximum number of objects after NMS	200			0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
	nms_config.infer_nms_score_bits	NMS Score Bits	integer	Number of bits for scores for optimized NMS				__SAMPLE__, __INTEGRATE__
	nms_config.force_on_cpu	Force on CPU	bool	Force NMS to run on CPU in training	TRUE			>=1
	eval_config.average_precision_mode	AP Mode	enum	Average Precision mode, either __SAMPLE__ or __INTEGRATE__	__SAMPLE__			(0, 1)
	eval_config.batch_size	Batch Size	integer	batch size for evaluation	8			TRUE, FALSE
	eval_config.matching_iou_threshold	Matching IoU Threshold	float	IoU threshold	0.5
	eval_config.visualize_pr_curve	Visualize PR Curve	bool	Whether or not to visualize precision-recall curve