Action Specs

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

train_config collection
train_config.train_dataset_path hidden
train_config.val_dataset_path hidden
train_config.pretrained_model_path hidden
train_config.optimizer collection
train_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr float 0.01
train_config.optimizer.sgd.decay float 0
train_config.optimizer.sgd.momentum float 0.9
train_config.optimizer.sgd.nesterov bool FALSE
train_config.optimizer.adam collection
train_config.optimizer.adam.lr float
train_config.optimizer.adam.beta_1 float
train_config.optimizer.adam.beta_2 float
train_config.optimizer.adam.epsilon float
train_config.optimizer.adam.decay float
train_config.optimizer.rmsprop collection
train_config.optimizer.rmsprop.lr float
train_config.optimizer.rmsprop.rho float
train_config.optimizer.rmsprop.epsilon float
train_config.optimizer.rmsprop.decay float
train_config.batch_size_per_gpu integer 256
train_config.n_epochs integer 80
train_config.n_workers integer 2
train_config.reg_config collection
train_config.reg_config.type string L2
train_config.reg_config.scope string Conv2D,Dense
train_config.reg_config.weight_decay float 0.00005
train_config.lr_config collection ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step collection
train_config.lr_config.step.learning_rate float
train_config.lr_config.step.step_size integer
train_config.lr_config.step.gamma float
train_config.lr_config.soft_anneal collection
train_config.lr_config.soft_anneal.learning_rate float 0.05
train_config.lr_config.soft_anneal.soft_start float 0.056
train_config.lr_config.soft_anneal.annealing_divider float 10
train_config.lr_config.soft_anneal.annealing_points list List of float [0.3,0.6,0.8]
train_config.lr_config.cosine collection
train_config.lr_config.cosine.learning_rate float
train_config.lr_config.cosine.min_lr_ratio float
train_config.lr_config.cosine.soft_start float
train_config.random_seed integer 42
train_config.enable_random_crop bool
train_config.enable_center_crop bool
train_config.enable_color_augmentation bool
train_config.label_smoothing float
train_config.preprocess_mode string caffe
train_config.mixup_alpha float
train_config.model_parallelism list
train_config.image_mean collection
train_config.image_mean.key string
train_config.image_mean.value float
train_config.disable_horizontal_flip bool
train_config.visualizer_config collection
train_config.visualizer Visualizer collection
train_config.visualizer.enabled Enable bool Enable the visualizer or not
train_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
eval_config collection
eval_config.top_k integer 3
eval_config.eval_dataset_path hidden
eval_config.model_path hidden
eval_config.batch_size integer 256
eval_config.n_workers integer 2
eval_config.enable_center_crop bool TRUE
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,224,224 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 18
model_config.retain_head bool FALSE
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float 1.00E-03
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE
classmap_json hidden
is_byom bool FALSE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

cli

batch_size Batch Size integer Batch size CLI parameter 1 yes
train_config.train_dataset_path hidden
train_config.val_dataset_path hidden
train_config.pretrained_model_path hidden
train_config.optimizer collection
train_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr float 0.01
train_config.optimizer.sgd.decay float 0
train_config.optimizer.sgd.momentum float 0.9
train_config.optimizer.sgd.nesterov bool FALSE
train_config collection
train_config.optimizer.adam collection
train_config.optimizer.adam.lr float
train_config.optimizer.adam.beta_1 float
train_config.optimizer.adam.beta_2 float
train_config.optimizer.adam.epsilon float
train_config.optimizer.adam.decay float
train_config.optimizer.rmsprop collection
train_config.optimizer.rmsprop.lr float
train_config.optimizer.rmsprop.rho float
train_config.optimizer.rmsprop.epsilon float
train_config.optimizer.rmsprop.decay float
train_config.batch_size_per_gpu integer 256
train_config.n_epochs integer 80
train_config.n_workers integer 2
train_config.reg_config collection
train_config.reg_config.type string L2
train_config.reg_config.scope string Conv2D,Dense
train_config.reg_config.weight_decay float 0.00005
train_config.lr_config collection ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step collection
train_config.lr_config.step.learning_rate float
train_config.lr_config.step.step_size integer
train_config.lr_config.step.gamma float
train_config.lr_config.soft_anneal collection
train_config.lr_config.soft_anneal.learning_rate float 0.05
train_config.lr_config.soft_anneal.soft_start float 0.056
train_config.lr_config.soft_anneal.annealing_divider float 10
train_config.lr_config.soft_anneal.annealing_points list List of float [0.3,0.6,0.8]
train_config.lr_config.cosine collection
train_config.lr_config.cosine.learning_rate float
train_config.lr_config.cosine.min_lr_ratio float
train_config.lr_config.cosine.soft_start float
train_config.random_seed integer 42
train_config.enable_random_crop bool
train_config.enable_center_crop bool
train_config.enable_color_augmentation bool
train_config.label_smoothing float
train_config.preprocess_mode string caffe
train_config.mixup_alpha float
train_config.model_parallelism list
train_config.image_mean collection
train_config.image_mean.key string
train_config.image_mean.value float
train_config.disable_horizontal_flip bool
train_config.visualizer_config collection
train_config.visualizer Visualizer collection
train_config.visualizer.enabled Enable bool Enable the visualizer or not
train_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
eval_config collection
eval_config.top_k integer 3
eval_config.eval_dataset_path hidden
eval_config.model_path hidden
eval_config.batch_size integer 256
eval_config.n_workers integer 2
eval_config.enable_center_crop bool TRUE
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,224,224 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 18
model_config.retain_head bool FALSE
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float 1.00E-03
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

eval_config collection FALSE
eval_config.batch_size integer 256 1 inf
eval_config.enable_center_crop bool TRUE
eval_config.eval_dataset_path hidden FALSE
eval_config.model_path hidden FALSE
eval_config.n_workers integer 2 1 inf FALSE
eval_config.top_k integer 3 1 inf
init_epoch hidden CLI Parameter initial epoch 1 1 inf FALSE
model_config collection FALSE
model_config.activation collection FALSE
model_config.activation.activation_parameters collection FALSE
model_config.activation.activation_parameters.key string FALSE
model_config.activation.activation_parameters.value float FALSE
model_config.activation.activation_type ordered relu,swish
model_config.all_projections bool TRUE
model_config.arch ordered resnet resnet,efficientnet_b0,efficientnet_b1,efficientnet_b2,efficientnet_b3,efficientnet_b4,efficientnet_b5,efficientnet_b6,efficientnet_b5,mobilenet_v1,mobilenet_v2,googlenet,alexnet,darknet,cspdarknet,cspdarknet_tiny,vgg,squeezenet,byom FALSE
model_config.batch_norm_config collection FALSE
model_config.batch_norm_config.epsilon float 1.00E-05 1.00E-10 inf
model_config.batch_norm_config.momentum float 0.9 1.00E-05 inf
model_config.dropout float 1.00E-03 0 1
model_config.freeze_blocks integer FALSE
model_config.freeze_bn bool
model_config.input_image_size string 3,224,224 yes yes FALSE
model_config.n_layers integer 18 FALSE
model_config.resize_interpolation_method ordered __BILINEAR__,__BICUBIC__
model_config.retain_head bool FALSE
model_config.use_batch_norm bool TRUE
model_config.use_bias bool FALSE
model_config.use_pooling bool
train_config collection FALSE
train_config.batch_size_per_gpu integer 64 1 inf
train_config.disable_horizontal_flip bool
train_config.enable_center_crop bool TRUE
train_config.enable_color_augmentation bool TRUE
train_config.enable_random_crop bool TRUE
train_config.image_mean collection FALSE
train_config.image_mean.key string FALSE
train_config.image_mean.value float FALSE
train_config.label_smoothing float 0 0 1
train_config.lr_config collection ONE OF STEP / SOFT_ANNEAL / COSINE FALSE
train_config.lr_config.cosine collection FALSE
train_config.lr_config.cosine.learning_rate float 0 inf FALSE
train_config.lr_config.cosine.min_lr_ratio float 0 1
train_config.lr_config.cosine.soft_start float 0 1
train_config.lr_config.soft_anneal collection 0 1 FALSE
train_config.lr_config.soft_anneal.annealing_divider float 1 inf FALSE
train_config.lr_config.soft_anneal.annealing_points list List of float FALSE
train_config.lr_config.soft_anneal.learning_rate float 0 1
train_config.lr_config.soft_anneal.soft_start float 0 1
train_config.lr_config.step collection FALSE
train_config.lr_config.step.gamma float 0.1 1.00E-10 1
train_config.lr_config.step.learning_rate float 0.06 0 1 TRUE
train_config.lr_config.step.step_size integer 10 1 inf
train_config.mixup_alpha float 0.1 0 1
train_config.model_parallelism list FALSE
train_config.n_epochs integer 80 1 inf FALSE
train_config.n_workers integer 2 1 inf FALSE
train_config.optimizer collection One of SGD / ADAM / RMSPROP FALSE
train_config.optimizer.sgd collection FALSE
train_config.optimizer.sgd.decay float 0 FALSE
train_config.optimizer.sgd.lr float 0.01 0 inf TRUE
train_config.optimizer.sgd.momentum float 0.9 1.00E-10 0.99
train_config.optimizer.sgd.nesterov bool FALSE TRUE
train_config.preprocess_mode ordered caffe caffe,torch,tf
train_config.pretrained_model_path hidden FALSE
train_config.random_seed integer 42 1 inf FALSE
train_config.reg_config collection FALSE
train_config.reg_config.scope string Conv2D,Dense FALSE
train_config.reg_config.type ordered L2 L1,L2 TRUE
train_config.reg_config.weight_decay float 0.00005 3.00E-11 0.003 TRUE
train_config.train_dataset_path hidden FALSE
train_config.val_dataset_path hidden FALSE
train_config.visualizer Visualizer collection FALSE
train_config.visualizer_config collection FALSE
train_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
train_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard FALSE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

init_epoch hidden CLI Parameter initial epoch 1
train_config collection
train_config.train_dataset_path hidden
train_config.val_dataset_path hidden
train_config.pretrained_model_path hidden
train_config.optimizer collection
train_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
train_config.optimizer.sgd.lr float 0.01
train_config.optimizer.sgd.decay float 0
train_config.optimizer.sgd.momentum float 0.9
train_config.optimizer.sgd.nesterov bool FALSE
train_config.optimizer.adam collection
train_config.optimizer.adam.lr float
train_config.optimizer.adam.beta_1 float
train_config.optimizer.adam.beta_2 float
train_config.optimizer.adam.epsilon float
train_config.optimizer.adam.decay float
train_config.optimizer.rmsprop collection
train_config.optimizer.rmsprop.lr float
train_config.optimizer.rmsprop.rho float
train_config.optimizer.rmsprop.epsilon float
train_config.optimizer.rmsprop.decay float
train_config.batch_size_per_gpu integer 64
train_config.n_epochs integer 80
train_config.n_workers integer 2
train_config.reg_config collection
train_config.reg_config.type string L2
train_config.reg_config.scope string Conv2D,Dense
train_config.reg_config.weight_decay float 0.00005
train_config.lr_config collection ONE OF STEP / SOFT_ANNEAL / COSINE
train_config.lr_config.step collection
train_config.lr_config.step.learning_rate float 0.006
train_config.lr_config.step.step_size integer 10
train_config.lr_config.step.gamma float 0.1
train_config.lr_config.soft_anneal collection
train_config.lr_config.soft_anneal.learning_rate float
train_config.lr_config.soft_anneal.soft_start float
train_config.lr_config.soft_anneal.annealing_divider float
train_config.lr_config.soft_anneal.annealing_points list List of float
train_config.lr_config.cosine collection
train_config.lr_config.cosine.learning_rate float
train_config.lr_config.cosine.min_lr_ratio float
train_config.lr_config.cosine.soft_start float
train_config.random_seed integer 42
train_config.enable_random_crop bool TRUE
train_config.enable_center_crop bool TRUE
train_config.enable_color_augmentation bool
train_config.label_smoothing float 0
train_config.preprocess_mode string caffe
train_config.mixup_alpha float 0.1
train_config.model_parallelism list
train_config.image_mean collection
train_config.image_mean.key string
train_config.image_mean.value float
train_config.disable_horizontal_flip bool
train_config.visualizer_config collection
train_config.visualizer Visualizer collection
train_config.visualizer.enabled Enable bool Enable the visualizer or not
train_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
eval_config collection
eval_config.top_k integer 3
eval_config.eval_dataset_path hidden
eval_config.model_path hidden
eval_config.batch_size integer 256
eval_config.n_workers integer 2
eval_config.enable_center_crop bool TRUE
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,224,224 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 18
model_config.retain_head bool FALSE
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float 1.00E-03
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.image_extension Image Extension string Extension of the images to be used. png png, jpg, jpeg yes
dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training
dataset_config.validation_data_source.tfrecords_path Validation TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.validation_data_source.image_directory_path Validation Image Path hidden /shared/users/1234/datasets/5678/training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. 0
augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes
augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 yes Yes
augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 yes Yes
augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 yes
augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 yes
augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. 3 1, 3 yes
augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 no
augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 no
augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1
augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 1
augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0
augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0
augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0
augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0
augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. 0
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 1
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. 0.5 0.5
bbox_rasterizer_config Bounding box rasterizer collection Collection of parameters to configure the bounding box rasterizer
bbox_rasterizer_config.deadzone_radius Bounding box rasterizer deadzone radius float 0.4 0 1 yes
model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet yes
model_config.pretrained_model_file PTM File Path hidden This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer. /shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5
model_config.load_graph PTM Load Graph bool A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported. FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.
model_config.num_layers Number of Layers integer The depth of the feature extractor for scalable templates. 18 10, 18, 34, 50, 101 yes
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ no
model_config.objective_set.cov Objective COV collection The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. {} yes
model_config.objective_set.bbox.scale Objective Bounding Box Scale float The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 35 yes
model_config.objective_set.bbox.offset Objective Bounding Box Offset float The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 0.5 yes
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 4 1 yes
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 yes Yes
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-06 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 5.00E-04 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.100000001 0 1 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.699999988 0 1 yes Yes
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __NO_REG__, __L1__, __L2__ yes
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 yes
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 yes
training_config.cost_scaling.enabled Enable Cost Scaling bool Enables cost scaling during training. FALSE yes
training_config.cost_scaling.initial_exponent Cost Scaling Initial Exponent float 20 yes
training_config.cost_scaling.increment Cost Scaling Increment float 0.005 yes
training_config.cost_scaling.decrement Cost Scaling Decrement float 1 yes
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 yes
evaluation_config Evaluation collection yes
evaluation_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
evaluation_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1 yes
evaluation_config.first_validation_epoch First Validation Epoch integer The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down. 30 1 yes
cost_function_config Cost function collection
cost_function_config.enable_autoweighting Auto-Weighting bool TRUE yes
cost_function_config.max_objective_weight Maximum Objective Weight float 0.999899983
cost_function_config.min_objective_weight Minimum Objective Weight float 1.00E-04
classwise_config Class-wise organized parameters list
classwise_config.key Class Key string Name of class for the classwise parameters person
classwise_config.value.evaluation_config Evaluation config elements per class collection
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap Minimum Detection Ground Truth Overlaps float Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class. 0.5 0 1 yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height Minimum Height integer Minimum height in pixels for a valid ground truth and prediction bbox. 20 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height Maximum Height integer Maximum height in pixels for a valid ground truth and prediction bbox. 9999 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width Minimum Width integer Minimum width in pixels for a valid ground truth and prediction bbox. 10 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width Maximum Width integer Maximum width in pixels for a valid ground truth and prediction bbox. 9999 0 yes
classwise_config.value.cost_function_config Class-wise cost fuction config per class collection yes
classwise_config.value.cost_function_config.class_weight Class Weight float 4 yes
classwise_config.value.cost_function_config.coverage_foreground_weight Coverage Forground Weight float 0.050000001 yes
classwise_config.value.cost_function_config.objectives Objectives list [{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}] yes
classwise_config.value.cost_function_config.objectives.name Objective Name string Objective name such as cov or bbox. cov yes
classwise_config.value.cost_function_config.objectives.initial_weight Initial Weight float Initial weight for named objective. 1 yes
classwise_config.value.cost_function_config.objectives.weight_target Weight Target float Target weight for named objective. 1 yes
classwise_config.value.bbox_rasterizer_config Rasterization collection yes
classwise_config.value.bbox_rasterizer_config.cov_center_x Center of Object X-Coordinate float x-coordinate of the center of the object 0.5 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_center_y Center of Object Y-Coordinate float y-coordinate of the center of the object 0.5 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_x Center of Object X-Radius float x-radius of the coverage ellipse 1 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_y Center of Object Y-Radius float y-radius of the coverage ellipse 1 0 1 yes
classwise_config.value.bbox_rasterizer_config.bbox_min_radius Bounding Box Minimum Radius float The minimum radius of the coverage region to be drawn for boxes 1 0 1 yes
classwise_config.postprocessing_config Post-Processing collection
classwise_config.postprocessing_config.clustering_config.coverage_threshold Coverage Threshold float The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. 0.0075 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_eps DBSCAN Samples Distance float The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. 0.230000004 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples DBSCAN Minimum Samples float The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. 0.050000001 0 1 yes
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height Minimum Bounding Box Height integer The minimum height in pixels to consider as a valid detection post clustering. 20 0 10000 yes
classwise_config.postprocessing_config.clustering_config.clustering_algorithm Clustering Algorithm string Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. __DBSCAN__ __DBSCAN__, __NMS__, __HYBRID__ yes
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold DBSCAN Confidence Threshold float The confidence threshold used to filter out the clustered bounding box output from DBSCAN. 0.1 0.1 yes
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold NMS IOU Threshold float The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. 0.2 0 1
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold NMS Confidence Threshold float The confidence threshold to filter out clustered bounding boxes from NMS. 0 0 1

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

inferencer_config collection
inferencer_config.tlt_config collection
inferencer_config.tlt_config.model hidden
inferencer_config.tensorrt_config collection
inferencer_config.tensorrt_config.parser integer 0,1,2
inferencer_config.tensorrt_config.backend_data_type integer 0,1,2
inferencer_config.tensorrt_config.save_engine bool
inferencer_config.tensorrt_config.trt_engine hidden
inferencer_config.tensorrt_config.calibrator_config collection
inferencer_config.input_nodes list list of string
inferencer_config.output_nodes list list of string
inferencer_config.batch_size integer 16
inferencer_config.image_height integer 544
inferencer_config.image_width integer 960
inferencer_config.image_channels integer 3
inferencer_config.gpu_index integer 0
inferencer_config.target_classes list list of string [“car”] yes yes
inferencer_config.stride integer
bbox_handler_config collection
bbox_handler_config.kitti_dump bool TRUE
bbox_handler_config.disable_overlay bool FALSE
bbox_handler_config.overlay_linewidth integer 2
bbox_handler_config.classwise_bbox_handler_config list yes yes
bbox_handler_config.classwise_bbox_handler_config.key string default
bbox_handler_config.classwise_bbox_handler_config.value collection
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config collection
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.coverage_threshold Coverage Threshold float The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. 0.005 0 1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_eps DBSCAN Samples Distance float The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. 0.3 0 1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_min_samples DBSCAN Minimum Samples float The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. 0.05 0 1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.minimum_bounding_box_height Minimum Bounding Box Height integer The minimum height in pixels to consider as a valid detection post clustering. 4 0 10000
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.clustering_algorithm Clustering Algorithm string Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. __DBSCAN__ __DBSCAN__, __NMS__, __HYBRID__
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_confidence_threshold DBSCAN Confidence Threshold float The confidence threshold used to filter out the clustered bounding box output from DBSCAN. 0.9 0.1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_iou_threshold NMS IOU Threshold float The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. 0 1
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_confidence_threshold NMS Confidence Threshold float The confidence threshold to filter out clustered bounding boxes from NMS. 0 1
bbox_handler_config.classwise_bbox_handler_config.value.confidence_model string aggregate_cov
bbox_handler_config.classwise_bbox_handler_config.value.output_map string
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color collection 0 0,1,2
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.R integer 255
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.G integer 0
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.B integer 0
bbox_handler_config.postproc_classes list list of string

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 FALSE
random_seed Random Seed integer Seed value for the random number generator in the network 42 FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.image_extension Image Extension string Extension of the images to be used. png png,jpg,jpeg yes FALSE
dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* FALSE
dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training FALSE
dataset_config.validation_data_source.tfrecords_path Validation TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* FALSE
dataset_config.validation_data_source.image_directory_path Validation Image Path hidden /shared/users/1234/datasets/5678/training FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords,you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0,N-1]. For random split partitioning,force the validation fold index to 0 as the tfrecord is just 2-fold. 0 FALSE
augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes FALSE
augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 inf yes Yes / 32
augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 inf yes Yes / 32
augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 inf yes FALSE
augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 inf yes FALSE
augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently,1-channel input is not recommended for datasets with JPG images. For PNG images,both 3-channel RGB and 1-channel monochrome images are supported. 3 1,3 yes FALSE
augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 no FALSE
augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 no FALSE
augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 no FALSE
augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 no FALSE
augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 no FALSE
augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 no FALSE
augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1
augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 1
augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0 inf <= augmentation_config.spatial_augmentation.zoom_max
augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0 inf TRUE
augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0 FALSE
augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0 FALSE
augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max,rotate_rad_max]. 0 FALSE
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 1
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally,this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0,you can set this value to 0.5. 0.5 0 1 0.5
bbox_rasterizer_config Bounding box rasterizer collection Collection of parameters to configure the bounding box rasterizer FALSE
bbox_rasterizer_config.deadzone_radius Bounding box rasterizer deadzone radius float 0.4 0 1 yes
model_config Model collection FALSE
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet yes FALSE
model_config.pretrained_model_file PTM File Path hidden This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false,it is assumed that only the weights of the pretrained model file is to be used. In this case,TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus,transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model,the tool initializes them with random weights and skips the import for that layer. /shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5 FALSE
model_config.load_graph PTM Load Graph bool A flag to determine whether or not to load the graph from the pretrained model file,or just the weights. For a pruned model,set this parameter to True. Pruning modifies the original graph,so the pruned model graph and the weights need to be imported. FALSE FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template,and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections,this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers,irrespective of whether there is a change in stride across the input and output.
model_config.num_layers Number of Layers ordered_int The depth of the feature extractor for scalable templates. 18 10,18,34,50,101 yes FALSE
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True,MaxPooling is used to downsample; however,for the object-detection network,NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ no FALSE
model_config.objective_set.cov Objective COV collection The objectives for training the network. For object-detection networks,set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. {} yes FALSE
model_config.objective_set.bbox.scale Objective Bounding Box Scale float The objectives for training the network. For object-detection networks,set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 35 yes FALSE
model_config.objective_set.bbox.offset Objective Bounding Box Offset float The objectives for training the network. For object-detection networks,set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 0.5 yes FALSE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 4 1 32 yes
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 10 1 500 yes Yes FALSE
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-06 0 1 yes Yes TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 5.00E-04 0 1 yes Yes TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.100000001 0 1 yes Yes TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.699999988 0 1 yes Yes TRUE TRUE
training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L1__ __NO_REG__,__L1__,__L2__ yes TRUE
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 3.00E-11 3.00E-03 yes
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes FALSE
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 0.5 0.95 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 0.5 0.95 yes
training_config.cost_scaling.enabled Enable Cost Scaling bool Enables cost scaling during training. FALSE yes FALSE
training_config.cost_scaling.initial_exponent Cost Scaling Initial Exponent float 20 yes FALSE
training_config.cost_scaling.increment Cost Scaling Increment float 0.005 yes FALSE
training_config.cost_scaling.decrement Cost Scaling Decrement float 1 yes FALSE
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 0 inf yes FALSE
evaluation_config Evaluation collection yes FALSE
evaluation_config.average_precision_mode Average Precision Mode ordered The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__,__INTEGRATE__ FALSE
evaluation_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 0 inf yes FALSE
evaluation_config.first_validation_epoch First Validation Epoch integer The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation,since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down. 30 1 inf yes FALSE
cost_function_config Cost function collection FALSE
cost_function_config.enable_autoweighting Auto-Weighting bool TRUE yes FALSE
cost_function_config.max_objective_weight Maximum Objective Weight float 0.999899983 FALSE
cost_function_config.min_objective_weight Minimum Objective Weight float 1.00E-04 FALSE
classwise_config Class-wise organized parameters list FALSE
classwise_config.key Class Key string Name of class for the classwise parameters person FALSE
classwise_config.value.evaluation_config Evaluation config elements per class collection FALSE
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap Minimum Detection Ground Truth Overlaps float Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class. 0.5 0 1 yes FALSE
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height Minimum Height integer Minimum height in pixels for a valid ground truth and prediction bbox. 20 0 yes FALSE
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height Maximum Height integer Maximum height in pixels for a valid ground truth and prediction bbox. 9999 0 yes FALSE
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width Minimum Width integer Minimum width in pixels for a valid ground truth and prediction bbox. 10 0 yes FALSE
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width Maximum Width integer Maximum width in pixels for a valid ground truth and prediction bbox. 9999 0 yes FALSE
classwise_config.value.cost_function_config Class-wise cost fuction config per class collection yes FALSE
classwise_config.value.cost_function_config.class_weight Class Weight float 4 1 4 yes
classwise_config.value.cost_function_config.coverage_foreground_weight Coverage Forground Weight float 0.050000001 yes FALSE
classwise_config.value.cost_function_config.objectives Objectives list [{“name”: “cov”,”initial_weight”: 1.0,”weight_target”: 1.0},{“name”: “bbox”,”initial_weight”: 10.0,”weight_target”: 10.0}] yes FALSE
classwise_config.value.cost_function_config.objectives.name Objective Name string Objective name such as cov or bbox. cov yes FALSE
classwise_config.value.cost_function_config.objectives.initial_weight Initial Weight float Initial weight for named objective. 1 yes FALSE
classwise_config.value.cost_function_config.objectives.weight_target Weight Target float Target weight for named objective. 1 yes FALSE
classwise_config.value.bbox_rasterizer_config Rasterization collection yes FALSE
classwise_config.value.bbox_rasterizer_config.cov_center_x Center of Object X-Coordinate float x-coordinate of the center of the object 0.5 0.3 0.7 yes
classwise_config.value.bbox_rasterizer_config.cov_center_y Center of Object Y-Coordinate float y-coordinate of the center of the object 0.5 0.3 0.7 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_x Center of Object X-Radius float x-radius of the coverage ellipse 1 0.7 1 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_y Center of Object Y-Radius float y-radius of the coverage ellipse 1 0.7 1 yes
classwise_config.value.bbox_rasterizer_config.bbox_min_radius Bounding Box Minimum Radius float The minimum radius of the coverage region to be drawn for boxes 1 0 1 yes
classwise_config.postprocessing_config Post-Processing collection FALSE
classwise_config.postprocessing_config.clustering_config.coverage_threshold Coverage Threshold float The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. 0.0075 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_eps DBSCAN Samples Distance float The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value,the more boxes are grouped together. 0.230000004 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples DBSCAN Minimum Samples float The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. 0.050000001 0 1 yes
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height Minimum Bounding Box Height integer The minimum height in pixels to consider as a valid detection post clustering. 20 0 10000 yes
classwise_config.postprocessing_config.clustering_config.clustering_algorithm Clustering Algorithm ordered Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode,ensure both DBSCAN and NMS configuration parameters are defined. __DBSCAN__ __DBSCAN__,__NMS__,__HYBRID__ yes FALSE
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold DBSCAN Confidence Threshold float The confidence threshold used to filter out the clustered bounding box output from DBSCAN. 0.1 0.01 0.8 yes TRUE
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold NMS IOU Threshold float The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. 0.2 0 1
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold NMS Confidence Threshold float The confidence threshold to filter out clustered bounding boxes from NMS. 0 0 1

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.image_extension Image Extension string Extension of the images to be used. png png, jpg, jpeg yes
dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training
dataset_config.validation_data_source.tfrecords_path Validation TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
dataset_config.validation_data_source.image_directory_path Validation Image Path hidden /shared/users/1234/datasets/5678/training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. 0
augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes
augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 yes Yes
augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 yes Yes
augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 yes
augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 yes
augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. 3 1, 3 yes
augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 no
augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 no
augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 no
augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1
augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 1
augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0
augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0
augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0
augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0
augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. 0
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 1
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. 0.5 0.5
bbox_rasterizer_config Bounding box rasterizer collection Collection of parameters to configure the bounding box rasterizer
bbox_rasterizer_config.deadzone_radius Bounding box rasterizer deadzone radius float 0.4 0 1 yes
model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet yes
model_config.pretrained_model_file PTM File Path hidden This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer. /shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5
model_config.load_graph PTM Load Graph bool A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported. FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output.
model_config.num_layers Number of Layers integer The depth of the feature extractor for scalable templates. 18 10, 18, 34, 50, 101 yes
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ no
model_config.objective_set.cov Objective COV collection The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. {} yes
model_config.objective_set.bbox.scale Objective Bounding Box Scale float The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 35 yes
model_config.objective_set.bbox.offset Objective Bounding Box Offset float The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. 0.5 yes
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 4 1 yes
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 yes Yes
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-06 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 5.00E-04 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.100000001 0 1 yes Yes
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.699999988 0 1 yes Yes
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __NO_REG__, __L1__, __L2__ yes
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 yes
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 yes
training_config.cost_scaling.enabled Enable Cost Scaling bool Enables cost scaling during training. FALSE yes
training_config.cost_scaling.initial_exponent Cost Scaling Initial Exponent float 20 yes
training_config.cost_scaling.increment Cost Scaling Increment float 0.005 yes
training_config.cost_scaling.decrement Cost Scaling Decrement float 1 yes
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 yes
evaluation_config Evaluation collection yes
evaluation_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
evaluation_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1 yes
evaluation_config.first_validation_epoch First Validation Epoch integer The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down. 30 1 yes
cost_function_config Cost function collection
cost_function_config.enable_autoweighting Auto-Weighting bool TRUE yes
cost_function_config.max_objective_weight Maximum Objective Weight float 0.999899983
cost_function_config.min_objective_weight Minimum Objective Weight float 1.00E-04
classwise_config Class-wise organized parameters list
classwise_config.key Class Key string Name of class for the classwise parameters person
classwise_config.value.evaluation_config Evaluation config elements per class collection
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap Minimum Detection Ground Truth Overlaps float Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class. 0.5 0 1 yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height Minimum Height integer Minimum height in pixels for a valid ground truth and prediction bbox. 20 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height Maximum Height integer Maximum height in pixels for a valid ground truth and prediction bbox. 9999 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width Minimum Width integer Minimum width in pixels for a valid ground truth and prediction bbox. 10 0 yes
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width Maximum Width integer Maximum width in pixels for a valid ground truth and prediction bbox. 9999 0 yes
classwise_config.value.cost_function_config Class-wise cost fuction config per class collection yes
classwise_config.value.cost_function_config.class_weight Class Weight float 4 yes
classwise_config.value.cost_function_config.coverage_foreground_weight Coverage Forground Weight float 0.050000001 yes
classwise_config.value.cost_function_config.objectives Objectives list [{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}] yes
classwise_config.value.cost_function_config.objectives.name Objective Name string Objective name such as cov or bbox. cov yes
classwise_config.value.cost_function_config.objectives.initial_weight Initial Weight float Initial weight for named objective. 1 yes
classwise_config.value.cost_function_config.objectives.weight_target Weight Target float Target weight for named objective. 1 yes
classwise_config.value.bbox_rasterizer_config Rasterization collection yes
classwise_config.value.bbox_rasterizer_config.cov_center_x Center of Object X-Coordinate float x-coordinate of the center of the object 0.5 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_center_y Center of Object Y-Coordinate float y-coordinate of the center of the object 0.5 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_x Center of Object X-Radius float x-radius of the coverage ellipse 1 0 1 yes
classwise_config.value.bbox_rasterizer_config.cov_radius_y Center of Object Y-Radius float y-radius of the coverage ellipse 1 0 1 yes
classwise_config.value.bbox_rasterizer_config.bbox_min_radius Bounding Box Minimum Radius float The minimum radius of the coverage region to be drawn for boxes 1 0 1 yes
classwise_config.postprocessing_config Post-Processing collection
classwise_config.postprocessing_config.clustering_config.coverage_threshold Coverage Threshold float The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. 0.0075 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_eps DBSCAN Samples Distance float The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. 0.230000004 0 1 yes
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples DBSCAN Minimum Samples float The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. 0.050000001 0 1 yes
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height Minimum Bounding Box Height integer The minimum height in pixels to consider as a valid detection post clustering. 20 0 10000 yes
classwise_config.postprocessing_config.clustering_config.clustering_algorithm Clustering Algorithm string Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. __DBSCAN__ __DBSCAN__, __NMS__, __HYBRID__ yes
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold DBSCAN Confidence Threshold float The confidence threshold used to filter out the clustered bounding box output from DBSCAN. 0.1 0.1 yes
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold NMS IOU Threshold float The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. 0.2 0 1
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold NMS Confidence Threshold float The confidence threshold to filter out clustered bounding boxes from NMS. 0 0 1

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios Aspect Ratio srting The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
dssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
dssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
dssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch Arch string The backbone for feature extraction resnet
dssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
dssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
dssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels Prediction Layer Channel integer The number of channel of the DSSD prediction layer 512 1

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
threshold Threshold float 0.3
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios Aspect Ratio srting The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
dssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
dssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
dssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch Arch string The backbone for feature extraction resnet
dssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
dssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
dssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels Prediction Layer Channel integer The number of channel of the DSSD prediction layer 512 1

train

parameter

Random Crop Max Aspect Ratio

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 internal FALSE
random_seed Random Seed integer Seed value for the random number generator in the network 42 FALSE
initial_epoch Initial epoch cli hidden 1 CLI argument FALSE
use_multiprocessing CLI parameter hidden FALSE FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden FALSE
dataset_config.data_sources.image_directory_path Image path hidden FALSE
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden FALSE
dataset_config.validation_data_sources.image_directory_path Image path hidden FALSE
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden FALSE
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1 inf
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1 inf FALSE
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE FALSE
training_config.learning_rate collection FALSE
training_config.learning_rate.soft_start_annealing_schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0 inf TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0 inf TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1 TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1 TRUE TRUE
training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L1__ __L1__,__L2__ TRUE
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0 inf TRUE
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1 inf FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor ordered The name of the quantity to be monitored for early stopping loss,validation_loss,val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0 inf FALSE
eval_config Evaluation collection FALSE
eval_config.average_precision_mode Average Precision Mode ordered The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__,__INTEGRATE__ FALSE
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1 inf FALSE
eval_config.batch_size Batch Size integer batch size for evaluation 16 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1 FALSE
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0 inf
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection FALSE
augmentation_config.output_width Model Input width integer 960 1 inf yes
augmentation_config.output_height Model Input height integer 544 1 inf yes
augmentation_config.output_channel Model Input channel ordered_int 3 1,3 yes FALSE
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1 TRUE < augmentation_config.random_crop_max_scale
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1 TRUE TRUE
augmentation_config.random_crop_min_ar Random Crop Min Aspect Ratio float the minimum random crop aspect ratio 0.5 0.1 10 < augmentation_config.random_crop_max_ar
augmentation_config.random_crop_max_ar Random Crop Max Aspect Ratio float the maximum random crop aspect ratio 2 0.1 10 TRUE
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1 inf < augmentation_config.zoom_out_max_scale
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1 inf TRUE
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip 0 1
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. 0 255
dssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0,2.0,0.5,3.0,1.0/3.0] FALSE
dssd_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different SSD feature layers FALSE
dssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True,two boxes will be generated with an aspect ratio of 1. TRUE
dssd_config.clip_boxes Clip Boxes bool If true,all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
dssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1,0.1,0.2,0.2] FALSE
dssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05,0.1,0.25,0.4,0.55,0.7,0.85] FALSE
dssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be FALSE
dssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided,0.5 will be used as default value. FALSE
dssd_config.arch Arch ordered The backbone for feature extraction resnet resnet FALSE
dssd_config.nlayers Number of Layers ordered_int The number of conv layers in a specific arch 18 10,18,34,50,101,152 FALSE
dssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
dssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training FALSE
dssd_config.pred_num_channels Prediction Layer Channel integer The number of channel of the DSSD prediction layer 512 1 512 FALSE

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
initial_epoch Initial epoch cli hidden 1 CLI argument
use_multiprocessing CLI parameter bool FALSE
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
dssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
dssd_config.aspect_ratios Aspect Ratio srting The aspect ratio of anchor boxes for different SSD feature layers
dssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
dssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
dssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
dssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
dssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
dssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
dssd_config.arch Arch string The backbone for feature extraction resnet
dssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
dssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
dssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
dssd_config.pred_num_channels Prediction Layer Channel integer The number of channel of the DSSD prediction layer 512 1

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

version Schema Version const The version of this schema 1
training_config Training config collection Parameters to configure the training process
training_config.train_batch_size training batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. 16 0
training_config.iterations_per_loop integer 10
training_config.num_epochs number of epochs integer The number of epochs to train the network 6 0
training_config.num_examples_per_epoch number of images per epoch per gpu integer Total number of images in the training set divided by the number of GPUs 118288 0
training_config.checkpoint path to pretrained model hidden The path to the pretrained model, if any
training_config.pruned_model_path path to pruned model hidden The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period checkpoint period integer The number of training epochs that should run per model checkpoint/validation 2 0
training_config.amp AMP bool Whether to use mixed precision training TRUE
training_config.moving_average_decay moving average decay float Moving average decay 0.9999
training_config.l2_weight_decay L2 weight decay float L2 weight decay 0.00004
training_config.l1_weight_decay L1 weight decay float L1 weight decay 0
training_config.lr_warmup_epoch learning rate warmup epoch integer The number of warmup epochs in the learning rate schedule 3 0
training_config.lr_warmup_init initial learning rate during warmup float The initial learning rate in the warmup period 0.002
training_config.learning_rate maximum learning rate float The maximum learning rate 0.02
training_config.tf_random_seed random seed integer The random seed 42 0
training_config.clip_gradients_norm clip gradient by norm float Clip gradients by the norm value 5.00E+00
training_config.skip_checkpoint_variables skip checkpoint variables string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. -predict*
eval_config evaluation config collection Parameters to configure evaluation
eval_config.eval_epoch_cycle evaluation epoch cycle integer The number of training epochs that should run per validation 2 0
eval_config.max_detections_per_image maximum detections per image integer The maximum number of detections to visualize 100 0
eval_config.min_score_thresh minimum confidence threshold float The lowest confidence of the predicted box and ground truth box that can be considered a match 0.4
eval_config.eval_batch_size evaluation batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus 16 0
eval_config.eval_samples number of samples for evaluation integer The number of samples for evaluation 500
dataset_config dataset config collection Parameters to configure dataset
dataset_config.image_size image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. 1024,1024 yes
dataset_config.training_file_pattern training file pattern hidden The TFRecord path for training
dataset_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
dataset_config.validation_json_file validation json file hidden The annotation file path for validation
dataset_config.num_classes number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91 yes
dataset_config.max_instances_per_image maximum instances per image integer The maximum number of object instances to parse (default: 100) 100
dataset_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
model_config model config collection Parameters to configure model
model_config.model_name model name string Model name efficientdet-d0
model_config.min_level minimum level integer The minimum level of the output feature pyramid 3
model_config.max_level maximum level integer The maximum level of the output feature pyramid 7
model_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 3
model_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 4
augmentation_config augmentation config collection Parameters to configure model
augmentation_config.rand_hflip random horizontal flip bool Whether to perform random horizontal flip TRUE
augmentation_config.random_crop_min_scale minimum scale of random crop float The minimum scale of RandomCrop augmentation. 0.1
augmentation_config.random_crop_max_scale maximum scale of random crop float The maximum scale of RandomCrop augmentation. 2

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
experiment_spec_file Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
model_path Model hidden UNIX path to the model file 0.1 yes
output_path Output File hidden UNIX path to where the pruned model will be saved. yes
key Encryption Key hidden Encryption key tlt_encode yes
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
cal_image_dir hidden
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
engine_file Engine File hidden UNIX path to the model engine file. yes
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

version Schema Version const The version of this schema 1
training_config Training config collection Parameters to configure the training process
training_config.train_batch_size training batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. 16 0
training_config.iterations_per_loop integer 10
training_config.num_epochs number of epochs integer The number of epochs to train the network 6 0
training_config.num_examples_per_epoch number of images per epoch per gpu integer Total number of images in the training set divided by the number of GPUs 118288 0
training_config.checkpoint path to pretrained model hidden The path to the pretrained model, if any
training_config.pruned_model_path path to pruned model hidden The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period checkpoint period integer The number of training epochs that should run per model checkpoint/validation 2 0
training_config.amp AMP bool Whether to use mixed precision training TRUE
training_config.moving_average_decay moving average decay float Moving average decay 0.9999
training_config.l2_weight_decay L2 weight decay float L2 weight decay 0.00004
training_config.l1_weight_decay L1 weight decay float L1 weight decay 0
training_config.lr_warmup_epoch learning rate warmup epoch integer The number of warmup epochs in the learning rate schedule 3 0
training_config.lr_warmup_init initial learning rate during warmup float The initial learning rate in the warmup period 0.002
training_config.learning_rate maximum learning rate float The maximum learning rate 0.02
training_config.tf_random_seed random seed integer The random seed 42 0
training_config.clip_gradients_norm clip gradient by norm float Clip gradients by the norm value 5.00E+00
training_config.skip_checkpoint_variables skip checkpoint variables string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. -predict*
eval_config evaluation config collection Parameters to configure evaluation
eval_config.eval_epoch_cycle evaluation epoch cycle integer The number of training epochs that should run per validation 2 0
eval_config.max_detections_per_image maximum detections per image integer The maximum number of detections to visualize 100 0
eval_config.min_score_thresh minimum confidence threshold float The lowest confidence of the predicted box and ground truth box that can be considered a match 0.4
eval_config.eval_batch_size evaluation batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus 16 0
eval_config.eval_samples number of samples for evaluation integer The number of samples for evaluation 500
dataset_config dataset config collection Parameters to configure dataset
dataset_config.image_size image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. 1024,1024 yes
dataset_config.training_file_pattern training file pattern hidden The TFRecord path for training
dataset_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
dataset_config.validation_json_file validation json file hidden The annotation file path for validation
dataset_config.num_classes number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91 yes
dataset_config.max_instances_per_image maximum instances per image integer The maximum number of object instances to parse (default: 100) 100
dataset_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
model_config model config collection Parameters to configure model
model_config.model_name model name string Model name efficientdet-d0
model_config.min_level minimum level integer The minimum level of the output feature pyramid 3
model_config.max_level maximum level integer The maximum level of the output feature pyramid 7
model_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 3
model_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 4
augmentation_config augmentation config collection Parameters to configure model
augmentation_config.rand_hflip random horizontal flip bool Whether to perform random horizontal flip TRUE
augmentation_config.random_crop_min_scale minimum scale of random crop float The minimum scale of RandomCrop augmentation. 0.1
augmentation_config.random_crop_max_scale maximum scale of random crop float The maximum scale of RandomCrop augmentation. 2

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_dir Output Directory hidden UNIX path to where the pruned model will be saved. yes
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

automl_enabled

math_cond

parent_param

depends_on

augmentation_config augmentation config collection Parameters to configure model FALSE
augmentation_config.rand_hflip random horizontal flip bool Whether to perform random horizontal flip TRUE
augmentation_config.random_crop_max_scale maximum scale of random crop float The maximum scale of RandomCrop augmentation. 2 1.00E-05 inf TRUE
augmentation_config.random_crop_min_scale minimum scale of random crop float The minimum scale of RandomCrop augmentation. 0.1 1.00E-05 inf TRUE < augmentation_config.random_crop_max_scale
dataset_config dataset config collection Parameters to configure dataset FALSE
dataset_config.image_size image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. 1024,1024 yes FALSE
dataset_config.max_instances_per_image maximum instances per image integer The maximum number of object instances to parse (default: 100) 100 1 inf
dataset_config.num_classes number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91 2 inf yes FALSE
dataset_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
dataset_config.training_file_pattern training file pattern hidden The TFRecord path for training FALSE
dataset_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation FALSE
dataset_config.validation_json_file validation json file hidden The annotation file path for validation FALSE
eval_config evaluation config collection Parameters to configure evaluation FALSE
eval_config.eval_batch_size evaluation batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus 16 1 inf FALSE
eval_config.eval_epoch_cycle evaluation epoch cycle integer The number of training epochs that should run per validation 2 1 inf FALSE
eval_config.eval_samples number of samples for evaluation integer The number of samples for evaluation 500 1 inf FALSE
eval_config.max_detections_per_image maximum detections per image integer The maximum number of detections to visualize 100 1 inf
eval_config.min_score_thresh minimum confidence threshold float The lowest confidence of the predicted box and ground truth box that can be considered a match 0.4 0 inf
model_config model config collection Parameters to configure model FALSE
model_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 4 1 inf TRUE
model_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] FALSE
model_config.max_level maximum level integer The maximum level of the output feature pyramid 7 7 FALSE
model_config.min_level minimum level integer The minimum level of the output feature pyramid 3 3 FALSE
model_config.model_name model name ordered Model name efficientdet-d0 efficientdet-d0, efficientdet-d1, efficientdet-d2, efficientdet-d3, efficientdet-d4, efficientdet-d5 FALSE
model_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 3 1 inf
training_config Training config collection Parameters to configure the training process FALSE
training_config.amp AMP bool Whether to use mixed precision training TRUE
training_config.checkpoint path to pretrained model hidden The path to the pretrained model, if any FALSE
training_config.checkpoint_period checkpoint period integer The number of training epochs that should run per model checkpoint/validation 2 1 inf FALSE
training_config.clip_gradients_norm clip gradient by norm float Clip gradients by the norm value 5.00E+00 0 inf
training_config.iterations_per_loop integer 10 1 inf FALSE
training_config.l1_weight_decay L1 weight decay float L1 weight decay 0 0 1
training_config.l2_weight_decay L2 weight decay float L2 weight decay 0.00004 0 inf TRUE
training_config.learning_rate maximum learning rate float The maximum learning rate 0.02 0 inf TRUE
training_config.lr_warmup_epoch learning rate warmup epoch integer The number of warmup epochs in the learning rate schedule 3 0 inf FALSE <= training_config.num_epochs
training_config.lr_warmup_init initial learning rate during warmup float The initial learning rate in the warmup period 0.002 0 inf TRUE
training_config.moving_average_decay moving average decay float Moving average decay 0.9999 0 1 TRUE
training_config.num_epochs number of epochs integer The number of epochs to train the network 6 1 inf FALSE
training_config.num_examples_per_epoch number of images per epoch per gpu integer Total number of images in the training set divided by the number of GPUs 118288 1 inf FALSE
training_config.pruned_model_path path to pruned model hidden The path to a TAO pruned model for re-training, if any FALSE
training_config.skip_checkpoint_variables skip checkpoint variables string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. -predict* FALSE
training_config.tf_random_seed random seed integer The random seed 42 1 inf FALSE
training_config.train_batch_size training batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. 16 1 inf
version Schema Version const The version of this schema 1 FALSE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

version Schema Version const The version of this schema 1
training_config Training config collection Parameters to configure the training process
training_config.train_batch_size training batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. 16 0
training_config.iterations_per_loop integer 10
training_config.num_epochs number of epochs integer The number of epochs to train the network 6 0
training_config.num_examples_per_epoch number of images per epoch per gpu integer Total number of images in the training set divided by the number of GPUs 118288 0
training_config.checkpoint path to pretrained model hidden The path to the pretrained model, if any
training_config.pruned_model_path path to pruned model hidden The path to a TAO pruned model for re-training, if any
training_config.checkpoint_period checkpoint period integer The number of training epochs that should run per model checkpoint/validation 2 0
training_config.amp AMP bool Whether to use mixed precision training TRUE
training_config.moving_average_decay moving average decay float Moving average decay 0.9999
training_config.l2_weight_decay L2 weight decay float L2 weight decay 0.00004
training_config.l1_weight_decay L1 weight decay float L1 weight decay 0
training_config.lr_warmup_epoch learning rate warmup epoch integer The number of warmup epochs in the learning rate schedule 3 0
training_config.lr_warmup_init initial learning rate during warmup float The initial learning rate in the warmup period 0.002
training_config.learning_rate maximum learning rate float The maximum learning rate 0.02
training_config.tf_random_seed random seed integer The random seed 42 0
training_config.clip_gradients_norm clip gradient by norm float Clip gradients by the norm value 5.00E+00
training_config.skip_checkpoint_variables skip checkpoint variables string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. -predict*
eval_config evaluation config collection Parameters to configure evaluation
eval_config.eval_epoch_cycle evaluation epoch cycle integer The number of training epochs that should run per validation 2 0
eval_config.max_detections_per_image maximum detections per image integer The maximum number of detections to visualize 100 0
eval_config.min_score_thresh minimum confidence threshold float The lowest confidence of the predicted box and ground truth box that can be considered a match 0.4
eval_config.eval_batch_size evaluation batch size integer The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus 16 0
eval_config.eval_samples number of samples for evaluation integer The number of samples for evaluation 500
dataset_config dataset config collection Parameters to configure dataset
dataset_config.image_size image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. 1024,1024 yes
dataset_config.training_file_pattern training file pattern hidden The TFRecord path for training
dataset_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
dataset_config.validation_json_file validation json file hidden The annotation file path for validation
dataset_config.num_classes number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91 yes
dataset_config.max_instances_per_image maximum instances per image integer The maximum number of object instances to parse (default: 100) 100
dataset_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
model_config model config collection Parameters to configure model
model_config.model_name model name string Model name efficientdet-d0
model_config.min_level minimum level integer The minimum level of the output feature pyramid 3
model_config.max_level maximum level integer The maximum level of the output feature pyramid 7
model_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 3
model_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
model_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 4
augmentation_config augmentation config collection Parameters to configure model
augmentation_config.rand_hflip random horizontal flip bool Whether to perform random horizontal flip TRUE
augmentation_config.random_crop_min_scale minimum scale of random crop float The minimum scale of RandomCrop augmentation. 0.1
augmentation_config.random_crop_max_scale maximum scale of random crop float The maximum scale of RandomCrop augmentation. 2

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

train

comments

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 FALSE
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value? random_seed Random Seed integer Seed value for the random number generator in the network 42 1 inf FALSE
verbose Verbose bool Flag of verbosity TRUE FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
JPG/PNG - auto pick this up dataset_config.image_extension Image Extension ordered Extension of the images to be used. png jpeg,png,jpg yes FALSE
Can be system generated - after conversion. This is the dataset preparation step. dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* FALSE
Where the dataset is - where the images are. Will it figure it out from the parent directory? dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training FALSE
Read all labels in the label file (car,truck,suv,person). Ask the user to map it to Vehicle/Person. dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
Class you want to train for (vehicle) dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
Class defined in the label file (car,truck,suv -> map to vehicle) dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
Default - 0 dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords,you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0,N-1]. For random split partitioning,force the validation fold index to 0 as the tfrecord is just 2-fold. 0 0 inf FALSE
Dataset specific config - augmentation augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes FALSE
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16. augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 160 inf yes Yes FALSE / 16
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16 augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 160 inf yes Yes FALSE / 16
Smaller side of image(height or width) augmentation_config.preprocessing.output_image_min Image smaller side’s size integer The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive,preprocessor will resize the image and keep aspect ratio,such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16. 0 160 inf FALSE / 16
Limit of larger side’s size of an image when resize and keep aspect ratio augmentation_config.preprocessing.output_image_max Limit of larger side’s size when resize and keep aspect ratio integer The maximum size of image’s larger side. If after resize and keeping aspect ratio,the larger side is exceeds this limit,the image will be resized such that the larger side’s size is this value,and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16. 0 160 inf FALSE / 16
Flag to enable automatic image scaling augmentation_config.preprocessing.enable_auto_resize Flag to enable or disable automatic image scaling bool If True,automatic image scaling will be enabled. Otherwise,disabled. TRUE
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 10 1 inf yes
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 10 1 inf yes
3 channel default augmentation_config.preprocessing.output_image_channel Image Channel ordered_int The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently,1-channel input is not recommended for datasets with JPG images. For PNG images,both 3-channel RGB and 1-channel monochrome images are supported. 3 1 3 1,3 yes FALSE
0 augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 0 inf yes
0 augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 0 inf yes
0 augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 0 inf yes
0 augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 0 inf yes
0 augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 0 inf yes
0 augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 0 inf yes
Enable - go to default,disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1
Enable - go to default,disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 0 1
Enable - go to default,disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0 1
Enable - go to default,disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0 inf
Enable - go to default,disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable. augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0 inf
Enable - go to default,disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0 inf
Enable go tyo default,disable - 0 augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max,rotate_rad_max]. 0.69 0 inf
augmentation_config.spatial_augmentation.rotate_probability Image Rotation float The probability of image rotation. The range is [0,1] 0 1
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 0 1
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally,this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0,you can set this value to 0.5. 0.5 0 1 0.5
Might need different defaults based on task/scenario model_config Model collection FALSE
model_config.arch BackBone Architecture ordered The architecture of the backbone feature extractor to be used for training. resnet:18 resnet:10,resnet:18,resnet:34,resnet:50,resnet:101,vgg16,vgg:16,vgg:19,googlenet,mobilenet_v1,mobilenet_v2,darknet:19,darknet:53,resnet101,efficientnet:b0,efficientnet:b1 yes FALSE
Confirm correct default values model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template,and is different for different feature extractor templates. 0 3
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training. TRUE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.all_projections All Projections bool For templates with shortcut connections,this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers,irrespective of whether there is a change in stride across the input and output. TRUE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True,MaxPooling is used to downsample; however,for the object-detection network,NVIDIA recommends setting this to False and using strided convolutions. FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.dropout_rate Dropout Rate float Probability for drop out 0 0 1
model_config.input_image_config Input Image collection Configuration for input images FALSE
model_config.input_image_config.size_height_width collection FALSE
model_config.input_image_config.size_height_width.height integer 544 160 inf / 16 TRUE
model_config.input_image_config.size_height_width.width integer 960 160 inf / 16 TRUE
model_config.input_image_config.image_type Image Type enum The type of images,either RGB or GRAYSCALE __RGB__ __RGB__,__GRAYSCALE__ FALSE
model_config.input_image_config.size_min Image smaller side’s size integer The size of an image’s smaller side,should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio 160 inf FALSE
model_config.input_image_config.image_channel_order Image Channel Order ordered The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images bgr rgb,bgr,l FALSE
model_config.input_image_config.image_channel_mean Image Channel Means list A dict from ‘r’,’g’,’b’ or ‘l’(for GRAYSCALE images) to per-channel mean values. [{“key”:”r”,”value”:103.0},{“key”:”g”,”value”:103.0},{“key”:”b”,”value”:103.0}] FALSE
model_config.input_image_config.image_channel_mean.key channel means key string string => one of r,g,b FALSE
model_config.input_image_config.image_channel_mean.value channel means value float value in float 0 255
model_config.input_image_config.image_scaling_factor Image Scaling Factor float A scalar to normalize the images after mean subtraction. 1 0 inf
model_config.input_image_config.max_objects_num_per_image Max Objects Num integer The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels. 100 1 inf
model_config.anchor_box_config Anchor Boxes collection FALSE
model_config.anchor_box_config.scale Anchor Scales list The list of anchor sizes(scales). [64.0,128.0,256.0] FALSE
model_config.anchor_box_config.ratio Anchor Ratios list The list of anchor aspect ratios. [1.0,0.5,2.0] FALSE
model_config.roi_mini_batch ROI Batch Size integer The batch size of ROIs for training the RCNN in the model 16 0 inf
model_config.rpn_stride RPN stride integer The stride of RPN feature map,compared to input resolutions. Currently only 16 is supported. 16 16 16 FALSE
model_config.drop_connect_rate Drop Connect Rate float The rate of DropConnect. This is only useful for EfficientNet backbones. 0 1
model_config.rpn_cls_activation_type RPN Classification Activation Type string Type of RPN classification head’s activation function. Currently only “sigmoid” is supported. FALSE
model_config.use_bias Use Bias bool Whether or not to use bias for convolutional layers FALSE FALSE
model_config.roi_pooling_config ROI Pooling collection Confiuration fo ROI Pooling layer FALSE
model_config.roi_pooling_config.pool_size Pool Size integer Pool size of the ROI Pooling operation. 7 0 inf
model_config.roi_pooling_config.pool_size_2x Pool Size Doubled bool Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling FALSE
model_config.activation Activation collection Activation function for the model backbone. This is only useful for EfficientNet backbones. FALSE
model_config.activation.activation_type Activation Type ordered Type of the activation function of backbone. relu,swish
model_config.activation.activation_parameters Activation Parameters dict A dict the maps name of a parameter to its value. FALSE
training_config Training collection FALSE
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence. training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1 inf yes
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 inf yes Yes FALSE
Toggle for end user training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes FALSE
Default training_config.learning_rate.soft_start .base_lr Maximum learning rate during the training float 5.00E-04 0 inf Yes TRUE
Default training_config.learning_rate.soft_start .start_lr The initial learning rate at the start float 5.00E-06 0 inf Yes TRUE < training_config.learning_rate.soft_start .base_lr
Default training_config.learning_rate.soft_start .soft_start Soft Start float 0.100000001 0 1 Yes TRUE < training_config.learning_rate.soft_start .annealing_points
Default training_config.learning_rate.soft_start .annealing_points Annealing float 0.8 0 1 Yes TRUE
Default training_config.learning_rate.soft_start .annealing_divider Annealing float 10 0 inf Yes
Default training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L1__ __NO_REG__,__L1__,__L2__ yes TRUE
Default training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 0 1 yes TRUE
Default training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 0 1 yes
Default training_config.optimizer.adam.beta_1 Optimizer Adam Beta1 float 0.899999976 0 1 yes
Default training_config.optimizer.adam.beta_2 Optimizer Adam Beta2 float 0.999000013 0 1 yes TRUE
Use default as 10. Provide last checpoint to user training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 inf yes FALSE
training_config.enable_augmentation Enable Augmentation bool Whether or not to enable data augmentation TRUE
training_config.retrain_pruned_model Pruned Model hidden The path of pruned model to be retrained FALSE
training_config.pretrained_weights Pretrained Weights hidden The path of the pretrained model(weights) used to initialize the model being trained FALSE
training_config.resume_from_model Resume Model hidden The path of the model used to resume a interrupted training FALSE
training_config.rpn_min_overlap RPN Min Overlap float The lower IoU threshold used to match anchor boxes to groundtruth boxes. 0.1 0 1
training_config.rpn_max_overlap RPN Max Overlap float The higher IoU threshold used to match anchor boxes to groundtruth boxes. 1 0 1
training_config.classifier_min_overlap Classifier Min Overlap float The lower IoU threshold used to generate the proposal target. 0.1 0 1
training_config.classifier_max_overlap Classifier Max Overlap float The higher IoU threshold used to generate the proposal target. 1 0 1
training_config.gt_as_roi Gt As ROI bool A flag to include groundtruth boxes in the positive ROIs for training the RCNN
training_config.std_scaling RPN Regression Loss Scaling float A scaling factor (multiplier) for RPN regression loss 1 0 inf
training_config.classifier_regr_std RCNN Regression Loss Scaling list Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯,¡®y¡¯,¡®w¡¯,¡®h¡¯ to its corresponding scaling factor,respectively [{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}] FALSE
training_config.classifier_regr_std.key RCNN Regression Loss Scaling Key string one of x,y,h,w FALSE
training_config.classifier_regr_std.value RCNN Regression Loss Scaling Value float float value for key 0 inf FALSE
training_config.output_model Output Model Path hidden Path of the output model FALSE
training_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer 12000 1 inf
training_config.rpn_mini_batch RPN Mini Batch integer The batch size to train RPN 16 1 inf
training_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 2000 1 inf
training_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 0 1
training_config.lambda_rpn_regr RPN Regression Loss Weighting float Weighting factor for RPN regression loss 1 0 inf
training_config.lambda_rpn_class RPN classification Loss Weighting float Weighting factor for RPN classification loss. 1 0 inf
training_config.lambda_cls_regr RCNN Regression Loss Weighting float Weighting factor for RCNN regression loss 1 0 inf
training_config.lambda_cls_class RCNN Classification Loss Weighting float Weighting factor for RCNN classification loss 1
training_config.model_parallelism Model Parallelism list of floats List of fractions for model parallelism FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 1 inf FALSE
evaluation_config Evaluation collection yes FALSE
evaluation_config.model Model Path string The path to the model to run inference FALSE
evaluation_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation 6000 1 inf
evaluation_config.rpn_nms_overlap_threshold RPN overlap threshold float 0.7 0 1
evaluation_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 1 inf
evaluation_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 1 inf
evaluation_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3 0 1
evaluation_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001 0 1
evaluation_config.use_voc07_11point_metric Use VOC 11-point Metric bool Whether to use PASCAL-VOC 11-point metric FALSE
evaluation_config.validation_period_during_training Validation Period integer The period(number of epochs) to run validation during training 1 inf FALSE
evaluation_config.batch_size Batch Size integer The batch size for evaluation 1 inf FALSE
evaluation_config.trt_evaluation TensorRT Evaluation collection TensorRT evaluation FALSE
evaluation_config.trt_evaluation.trt_engine Trt Engine string TRT Engine FALSE
evaluation_config.gt_matching_iou_threshold Gt Matching IoU Threshold float The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range 0.5 0 1
evaluation_config.gt_matching_iou_threshold_range Gt Matching IoU Threshold Range collection Only one of this collection or gt_matching_iou_threshold FALSE
evaluation_config.gt_matching_iou_threshold_range.start Start float The starting value of the IoU range 0 1
evaluation_config.gt_matching_iou_threshold_range.end End float The end point of the IoU range(exclusive) 0 1
evaluation_config.gt_matching_iou_threshold_range.step Step float The step size of the IoU range 0 1
evaluation_config.visualize_pr_curve Visualize PR Curve bool Visualize precision-recall curve or not FALSE
inference_config FALSE
inference_config.images_dir Images Directory hidden Path to the directory of images to run inference on FALSE
inference_config.model Model Path hidden Path to the model to run inference on FALSE
inference_config.batch_size Batch Size integer The batch size for inference 1 inf FALSE
inference_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference 6000 1 inf FALSE
inference_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 1 inf FALSE
inference_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 0 1 FALSE
inference_config.bbox_visualize_threshold Visualization Threshold float The confidence threshold for visualizing the bounding boxes 0.6 0 1 FALSE
inference_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001 0 1 FALSE
inference_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 1 inf FALSE
inference_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3 0 1 FALSE
inference_config.detection_image_output_dir Image Output Directory string Path to the directory to save the output images during inference FALSE
inference_config.bbox_caption_on Bbox Caption bool Enable text caption for bounding box or not FALSE
inference_config.labels_dump_dir Labels Ouptut Directory hidden Path to the directory to save the output labels FALSE
inference_config.nms_score_bits NMS Score Bits integer Number of score bits in optimized NMS 1 10 FALSE
inference_config.trt_inference TensorRT Inference Collection TensorRT inference configurations FALSE
inference_config.trt_inference.trt_engine TensorRT Engine hidden Path to the TensorRT engine to run inference FALSE

inference

comments

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version const The version of this schema 1
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value? random_seed Random Seed integer Seed value for the random number generator in the network 42 >=0
verbose Verbose bool Flag of verbosity TRUE TRUE, FALSE
dataset_config Dataset collection Parameters to configure the dataset
JPG/PNG - auto pick this up dataset_config.image_extension Image Extension string Extension of the images to be used. png png,jpg yes __png__, __jpg__, __jpeg__
Can be system generated - after conversion. This is the dataset preparation step. dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
Where the dataset is - where the images are. Will it figure it out from the parent directory? dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training
Read all labels in the label file (car, truck, suv, person). Ask the user to map it to Vehicle/Person. dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
Class you want to train for (vehicle) dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle) dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
Default - 0 dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. 0
Dataset specific config - augmentation augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16. augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 yes Yes
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16 augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 yes Yes
Smaller side of image(height or width) augmentation_config.preprocessing.output_image_min Image smaller side’s size integer The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive, preprocessor will resize the image and keep aspect ratio, such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16. 0
Limit of larger side’s size of an image when resize and keep aspect ratio augmentation_config.preprocessing.output_image_max Limit of larger side’s size when resize and keep aspect ratio integer The maximum size of image’s larger side. If after resize and keeping aspect ratio, the larger side is exceeds this limit, the image will be resized such that the larger side’s size is this value, and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16. 0
Flag to enable automatic image scaling augmentation_config.preprocessing.enable_auto_resize Flag to enable or disable automatic image scaling bool If True, automatic image scaling will be enabled. Otherwise, disabled. TRUE TRUE, FALSE
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 yes >=0
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 yes >=0
3 channel default augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. 3 1, 3 yes 3, 1
0 augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 0 yes >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1 [0, 1)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 0 1 [0, 1)
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0 (0, 1]
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0 [1, 2)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable. augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0 >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0 >=0
Enable go tyo default, disable - 0 augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. 0.69 0 >=0
augmentation_config.spatial_augmentation.rotate_probability Image Rotation float The probability of image rotation. The range is [0, 1] [0, 1)
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 0 1 [0, 1)
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360 [0, 360)
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. 0.5 0.5 0.5
Might need different defaults based on task/scenario model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet:18 resnet:18 yes
resnet:10’,
‘resnet:18’, ‘resnet:34’, ‘resnet:50’, ‘resnet:101’, ‘vgg16’, ‘vgg:16’, ‘vgg:19’, ‘googlenet’, ‘mobilenet_v1’, ‘mobilenet_v2’, ‘darknet:19’, ‘darknet:53’, ‘resnet101’, ‘efficientnet:b0’, ‘efficientnet:b1’,

Confirm correct default values model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3 depends on arch
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.dropout_rate Dropout Rate float Probability for drop out 0 0 0.1 [0, 1)
model_config.input_image_config Input Image collection Configuration for input images
model_config.input_image_config.size_height_width collection
model_config.input_image_config.size_height_width.height integer 544
model_config.input_image_config.size_height_width.width integer 960
model_config.input_image_config.image_type Image Type enum The type of images, either RGB or GRAYSCALE __RGB__ __RGB__, __GRAYSCALE__
model_config.input_image_config.size_min Image smaller side’s size integer The size of an image’s smaller side, should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio >=0
model_config.input_image_config.size_height_width Image size by height and width collection The size of images by specifying height and width.
model_config.input_image_config.size_height_width.height Image Height integer The height of images >=0
model_config.input_image_config.size_height_width.width Image Width integer The width of images >=0
model_config.input_image_config.image_channel_order Image Channel Order string The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images bgr rgb’, ‘bgr’, ‘l’
model_config.input_image_config.image_channel_mean Image Channel Means list A dict from ‘r’, ‘g’, ‘b’ or ‘l’(for GRAYSCALE images) to per-channel mean values. [{“key”:”r”,”value”:103.0}, {“key”:”g”,”value”:103.0}, {“key”:”b”,”value”:103.0}]
model_config.input_image_config.image_channel_mean.key channel means key string string => one of r,g,b r’, ‘g’, ‘b’, ‘l’
model_config.input_image_config.image_channel_mean.value channel means value float value in float (0, 255)
model_config.input_image_config.image_scaling_factor Image Scaling Factor float A scalar to normalize the images after mean subtraction. 1 >0
model_config.input_image_config.max_objects_num_per_image Max Objects Num integer The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels. 100 >=1
model_config.anchor_box_config Anchor Boxes Collection
model_config.anchor_box_config.scale Anchor Scales list The list of anchor sizes(scales). [64.0,128.0,256.0] >0
model_config.anchor_box_config.ratio Anchor Ratios list The list of anchor aspect ratios. [1.0,0.5,2.0] >0
model_config.roi_mini_batch ROI Batch Size integer The batch size of ROIs for training the RCNN in the model 16 >0
model_config.rpn_stride RPN stride integer The stride of RPN feature map, compared to input resolutions. Currently only 16 is supported. 16 16
model_config.drop_connect_rate Drop Connect Rate float The rate of DropConnect. This is only useful for EfficientNet backbones. (0, 1)
model_config.rpn_cls_activation_type RPN Classification Activation Type string Type of RPN classification head’s activation function. Currently only “sigmoid” is supported. sigmoid
model_config.use_bias Use Bias bool Whether or not to use bias for convolutional layers TRUE, FALSE
model_config.roi_pooling_config ROI Pooling collection Confiuration fo ROI Pooling layer
model_config.roi_pooling_config.pool_size Pool Size integer Pool size of the ROI Pooling operation. 7 >0
model_config.roi_pooling_config.pool_size_2x Pool Size Doubled bool Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling FALSE TRUE, FALSE
model_config.activation Activation collection Activation function for the model backbone. This is only useful for EfficientNet backbones.
model_config.activation.activation_type Activation Type string Type of the activation function of backbone. relu, swish
model_config.activation.activation_parameters Activation Parameters dict A dict the maps name of a parameter to its value.
training_config Training collection >0
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence. training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1 yes >0
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 yes Yes TRUE, FALSE
Toggle for end user training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes >0
Default training_config.learning_rate.soft_start .base_lr Minimum Learning Rate float 5.00E-06 Yes >0
Default training_config.learning_rate.soft_start .start_lr Maximum Learning Rate float 5.00E-04 Yes (0, 1)
Default training_config.learning_rate.soft_start .soft_start Soft Start float 0.100000001 0 1 Yes >1
Default training_config.learning_rate.soft_start .annealing_divider Annealing float 0.699999988 0 1 Yes __NO_REG__, __L1__, __L2__
Default training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __NO_REG__, __L1__, __L2__ yes >0
Default training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 yes (0, 1)
Default training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes (0, 1)
Default training_config.optimizer.adam.beta_1 Optimizer Adam Beta1 float 0.899999976 yes (0, 1)
Default training_config.optimizer.adam.beta_2 Optimizer Adam Beta2 float 0.999000013 yes >=1
Use default as 10. Provide last checpoint to user training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 yes TRUE, FALSE
training_config.enable_augmentation Enable Augmentation bool Whether or not to enable data augmentation TRUE
training_config.retrain_pruned_model Pruned Model hidden The path of pruned model to be retrained
training_config.pretrained_weights Pretrained Weights hidden The path of the pretrained model(weights) used to initialize the model being trained
training_config.resume_from_model Resume Model hidden The path of the model used to resume a interrupted training (0, 1)
training_config.rpn_min_overlap RPN Min Overlap float The lower IoU threshold used to match anchor boxes to groundtruth boxes. 0.1 (0, 1)
training_config.rpn_max_overlap RPN Max Overlap float The higher IoU threshold used to match anchor boxes to groundtruth boxes. 1 [0, 1)
training_config.classifier_min_overlap Classifier Min Overlap float The lower IoU threshold used to generate the proposal target. 0.1 (0, 1)
training_config.classifier_max_overlap Classifier Max Overlap float The higher IoU threshold used to generate the proposal target. 1 TRUE, FALSE
training_config.gt_as_roi Gt As ROI bool A flag to include groundtruth boxes in the positive ROIs for training the RCNN >0
training_config.std_scaling RPN Regression Loss Scaling float A scaling factor (multiplier) for RPN regression loss 1
training_config.classifier_regr_std RCNN Regression Loss Scaling list Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯, ¡®y¡¯, ¡®w¡¯, ¡®h¡¯ to its corresponding scaling factor, respectively [{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}]
training_config.classifier_regr_std.key RCNN Regression Loss Scaling Key string one of x,y,h,w >0
training_config.classifier_regr_std.value RCNN Regression Loss Scaling Value float float value for key
training_config.output_model Output Model Path hidden Path of the output model >0
training_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer 12000 >=1
training_config.rpn_mini_batch RPN Mini Batch integer The batch size to train RPN 16 >0
training_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 2000 (0, 1)
training_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
training_config.lambda_rpn_regr RPN Regression Loss Weighting float Weighting factor for RPN regression loss 1 >0
training_config.lambda_rpn_class RPN classification Loss Weighting float Weighting factor for RPN classification loss. 1 >0
training_config.lambda_cls_regr RCNN Regression Loss Weighting float Weighting factor for RCNN regression loss 1 >0
training_config.lambda_cls_class RCNN Classification Loss Weighting float Weighting factor for RCNN classification loss 1 list of floats
training_config.model_parallelism Model Parallelism list of floats List of fractions for model parallelism
training_config.early_stopping Early Stopping collection “loss”
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping >=0
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection TRUE, False
training_config.visualizer.enabled Enable bool Enable the visualizer or not >=1
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
evaluation_config Evaluation collection yes
evaluation_config.model Model Path string The path to the model to run inference >=1
evaluation_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation 6000 (0, 1)
evaluation_config.rpn_nms_overlap_threshold RPN overlap threshold float 0.7 >0
evaluation_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 >0
evaluation_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 (0, 1)
evaluation_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3 (0, 1)
evaluation_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001 TRUE, FALSE
evaluation_config.use_voc07_11point_metric Use VOC 11-point Metric bool Whether to use PASCAL-VOC 11-point metric >=1
evaluation_config.validation_period_during_training Validation Period integer The period(number of epochs) to run validation during training >=1
evaluation_config.batch_size Batch Size integer The batch size for evaluation (0, 1)
evaluation_config.trt_evaluation TensorRT Evaluation Collection TensorRT evaluation
evaluation_config.trt_evaluation.trt_engine Trt Engine String TRT Engine (0, 1)
evaluation_config.gt_matching_iou_threshold Gt Matching IoU Threshold float The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range 0.5 (0, 1)
evaluation_config.gt_matching_iou_threshold_range Gt Matching IoU Threshold Range collection Only one of this collection or gt_matching_iou_threshold (0, 1)
evaluation_config.gt_matching_iou_threshold_range.start Start float The starting value of the IoU range TRUE, FALSE
evaluation_config.gt_matching_iou_threshold_range.end End float The end point of the IoU range(exclusive)
evaluation_config.gt_matching_iou_threshold_range.step Step float The step size of the IoU range
evaluation_config.visualize_pr_curve Visualize PR Curve bool Visualize precision-recall curve or not
inference_config >=1
inference_config.images_dir Images Directory hidden Path to the directory of images to run inference on >0
inference_config.model Model Path hidden Path to the model to run inference on >0
inference_config.batch_size Batch Size integer The batch size for inference (0, 1)
inference_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference 6000 (0, 1)
inference_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 (0, 1)
inference_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
inference_config.bbox_visualize_threshold Visualization Threshold float The confidence threshold for visualizing the bounding boxes 0.6 (0, 1)
inference_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001
inference_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 True, False
inference_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3
inference_config.detection_image_output_dir Image Output Directory string Path to the directory to save the output images during inference 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
inference_config.bbox_caption_on Bbox Caption bool Enable text caption for bounding box or not
inference_config.labels_dump_dir Labels Ouptut Directory hidden Path to the directory to save the output labels
inference_config.nms_score_bits NMS Score Bits integer Number of score bits in optimized NMS
inference_config.trt_inference TensorRT Inference Collection TensorRT inference configurations
inference_config.trt_inference.trt_engine TensorRT Engine hidden Path to the TensorRT engine to run inference

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

comments

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version const The version of this schema 1
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value? random_seed Random Seed integer Seed value for the random number generator in the network 42 >=0
verbose Verbose bool Flag of verbosity TRUE TRUE, FALSE
dataset_config Dataset collection Parameters to configure the dataset
JPG/PNG - auto pick this up dataset_config.image_extension Image Extension string Extension of the images to be used. png png,jpg, __jpeg__ yes __png__, __jpg__, __jpeg__
Can be system generated - after conversion. This is the dataset preparation step. dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
Where the dataset is - where the images are. Will it figure it out from the parent directory? dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training
Read all labels in the label file (car, truck, suv, person). Ask the user to map it to Vehicle/Person. dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
Class you want to train for (vehicle) dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle) dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
Default - 0 dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. 0
Dataset specific config - augmentation augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16. augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 yes Yes
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16 augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 yes Yes
Smaller side of image(height or width) augmentation_config.preprocessing.output_image_min Image smaller side’s size integer The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive, preprocessor will resize the image and keep aspect ratio, such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16. 0
Limit of larger side’s size of an image when resize and keep aspect ratio augmentation_config.preprocessing.output_image_max Limit of larger side’s size when resize and keep aspect ratio integer The maximum size of image’s larger side. If after resize and keeping aspect ratio, the larger side is exceeds this limit, the image will be resized such that the larger side’s size is this value, and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16. 0
Flag to enable automatic image scaling augmentation_config.preprocessing.enable_auto_resize Flag to enable or disable automatic image scaling bool If True, automatic image scaling will be enabled. Otherwise, disabled. TRUE TRUE, FALSE
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 yes >=0
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 yes >=0
3 channel default augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. 3 1, 3 yes 3, 1
0 augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 0 yes >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1 [0, 1)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 0 1 [0, 1)
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0 (0, 1]
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0 [1, 2)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable. augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0 >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0 >=0
Enable go tyo default, disable - 0 augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. 0.69 0 >=0
augmentation_config.spatial_augmentation.rotate_probability Image Rotation float The probability of image rotation. The range is [0, 1] [0, 1)
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 0 1 [0, 1)
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360 [0, 360)
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. 0.5 0.5 0.5
Might need different defaults based on task/scenario model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet:18 resnet:18 yes
resnet:10’,
‘resnet:18’, ‘resnet:34’, ‘resnet:50’, ‘resnet:101’, ‘vgg16’, ‘vgg:16’, ‘vgg:19’, ‘googlenet’, ‘mobilenet_v1’, ‘mobilenet_v2’, ‘darknet:19’, ‘darknet:53’, ‘resnet101’, ‘efficientnet:b0’, ‘efficientnet:b1’,

Confirm correct default values model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3 depends on arch
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.dropout_rate Dropout Rate float Probability for drop out 0 0 0.1 [0, 1)
model_config.input_image_config Input Image collection Configuration for input images
model_config.input_image_config.size_height_width collection
model_config.input_image_config.size_height_width.height integer 544
model_config.input_image_config.size_height_width.width integer 960
model_config.input_image_config.image_type Image Type enum The type of images, either RGB or GRAYSCALE __RGB__ __RGB__, __GRAYSCALE__
model_config.input_image_config.size_min Image smaller side’s size integer The size of an image’s smaller side, should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio >=0
model_config.input_image_config.size_height_width Image size by height and width collection The size of images by specifying height and width.
model_config.input_image_config.size_height_width.height Image Height integer The height of images >=0
model_config.input_image_config.size_height_width.width Image Width integer The width of images >=0
model_config.input_image_config.image_channel_order Image Channel Order string The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images bgr rgb’, ‘bgr’, ‘l’
model_config.input_image_config.image_channel_mean Image Channel Means list A dict from ‘r’, ‘g’, ‘b’ or ‘l’(for GRAYSCALE images) to per-channel mean values. [{“key”:”r”,”value”:103.0}, {“key”:”g”,”value”:103.0}, {“key”:”b”,”value”:103.0}]
model_config.input_image_config.image_channel_mean.key channel means key string string => one of r,g,b r’, ‘g’, ‘b’, ‘l’
model_config.input_image_config.image_channel_mean.value channel means value float value in float (0, 255)
model_config.input_image_config.image_scaling_factor Image Scaling Factor float A scalar to normalize the images after mean subtraction. 1 >0
model_config.input_image_config.max_objects_num_per_image Max Objects Num integer The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels. 100 >=1
model_config.anchor_box_config Anchor Boxes Collection
model_config.anchor_box_config.scale Anchor Scales list The list of anchor sizes(scales). [64.0,128.0,256.0] >0
model_config.anchor_box_config.ratio Anchor Ratios list The list of anchor aspect ratios. [1.0,0.5,2.0] >0
model_config.roi_mini_batch ROI Batch Size integer The batch size of ROIs for training the RCNN in the model 16 >0
model_config.rpn_stride RPN stride integer The stride of RPN feature map, compared to input resolutions. Currently only 16 is supported. 16 16
model_config.drop_connect_rate Drop Connect Rate float The rate of DropConnect. This is only useful for EfficientNet backbones. (0, 1)
model_config.rpn_cls_activation_type RPN Classification Activation Type string Type of RPN classification head’s activation function. Currently only “sigmoid” is supported. sigmoid
model_config.use_bias Use Bias bool Whether or not to use bias for convolutional layers TRUE, FALSE
model_config.roi_pooling_config ROI Pooling collection Confiuration fo ROI Pooling layer
model_config.roi_pooling_config.pool_size Pool Size integer Pool size of the ROI Pooling operation. 7 >0
model_config.roi_pooling_config.pool_size_2x Pool Size Doubled bool Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling FALSE TRUE, FALSE
model_config.activation Activation collection Activation function for the model backbone. This is only useful for EfficientNet backbones.
model_config.activation.activation_type Activation Type string Type of the activation function of backbone. relu, swish
model_config.activation.activation_parameters Activation Parameters dict A dict the maps name of a parameter to its value.
training_config Training collection >0
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence. training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1 yes >0
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 yes Yes TRUE, FALSE
Toggle for end user training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes >0
Default training_config.learning_rate.soft_start .base_lr Minimum Learning Rate float 5.00E-06 Yes >0
Default training_config.learning_rate.soft_start .start_lr Maximum Learning Rate float 5.00E-04 Yes (0, 1)
Default training_config.learning_rate.soft_start .soft_start Soft Start float 0.100000001 0 1 Yes >1
Default training_config.learning_rate.soft_start .annealing_divider Annealing float 0.699999988 0 1 Yes __NO_REG__, __L1__, __L2__
Default training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __NO_REG__, __L1__, __L2__ yes >0
Default training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 yes (0, 1)
Default training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes (0, 1)
Default training_config.optimizer.adam.beta_1 Optimizer Adam Beta1 float 0.899999976 yes (0, 1)
Default training_config.optimizer.adam.beta_2 Optimizer Adam Beta2 float 0.999000013 yes >=1
Use default as 10. Provide last checpoint to user training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 yes TRUE, FALSE
training_config.enable_augmentation Enable Augmentation bool Whether or not to enable data augmentation TRUE
training_config.retrain_pruned_model Pruned Model hidden The path of pruned model to be retrained
training_config.pretrained_weights Pretrained Weights hidden The path of the pretrained model(weights) used to initialize the model being trained
training_config.resume_from_model Resume Model hidden The path of the model used to resume a interrupted training (0, 1)
training_config.rpn_min_overlap RPN Min Overlap float The lower IoU threshold used to match anchor boxes to groundtruth boxes. 0.1 (0, 1)
training_config.rpn_max_overlap RPN Max Overlap float The higher IoU threshold used to match anchor boxes to groundtruth boxes. 1 [0, 1)
training_config.classifier_min_overlap Classifier Min Overlap float The lower IoU threshold used to generate the proposal target. 0.1 (0, 1)
training_config.classifier_max_overlap Classifier Max Overlap float The higher IoU threshold used to generate the proposal target. 1 TRUE, FALSE
training_config.gt_as_roi Gt As ROI bool A flag to include groundtruth boxes in the positive ROIs for training the RCNN >0
training_config.std_scaling RPN Regression Loss Scaling float A scaling factor (multiplier) for RPN regression loss 1
training_config.classifier_regr_std RCNN Regression Loss Scaling list Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯, ¡®y¡¯, ¡®w¡¯, ¡®h¡¯ to its corresponding scaling factor, respectively [{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}]
training_config.classifier_regr_std.key RCNN Regression Loss Scaling Key string one of x,y,h,w >0
training_config.classifier_regr_std.value RCNN Regression Loss Scaling Value float float value for key
training_config.output_model Output Model Path hidden Path of the output model >0
training_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer 12000 >=1
training_config.rpn_mini_batch RPN Mini Batch integer The batch size to train RPN 16 >0
training_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 2000 (0, 1)
training_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
training_config.lambda_rpn_regr RPN Regression Loss Weighting float Weighting factor for RPN regression loss 1 >0
training_config.lambda_rpn_class RPN classification Loss Weighting float Weighting factor for RPN classification loss. 1 >0
training_config.lambda_cls_regr RCNN Regression Loss Weighting float Weighting factor for RCNN regression loss 1 >0
training_config.lambda_cls_class RCNN Classification Loss Weighting float Weighting factor for RCNN classification loss 1 list of floats
training_config.model_parallelism Model Parallelism list of floats List of fractions for model parallelism
training_config.early_stopping Early Stopping collection “loss”
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping >=0
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection TRUE, False
training_config.visualizer.enabled Enable bool Enable the visualizer or not >=1
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
evaluation_config Evaluation collection yes
evaluation_config.model Model Path string The path to the model to run inference >=1
evaluation_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation 6000 (0, 1)
evaluation_config.rpn_nms_overlap_threshold RPN overlap threshold float 0.7 >0
evaluation_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 >0
evaluation_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 (0, 1)
evaluation_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3 (0, 1)
evaluation_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001 TRUE, FALSE
evaluation_config.use_voc07_11point_metric Use VOC 11-point Metric bool Whether to use PASCAL-VOC 11-point metric >=1
evaluation_config.validation_period_during_training Validation Period integer The period(number of epochs) to run validation during training >=1
evaluation_config.batch_size Batch Size integer The batch size for evaluation (0, 1)
evaluation_config.trt_evaluation TensorRT Evaluation Collection TensorRT evaluation
evaluation_config.trt_evaluation.trt_engine Trt Engine String TRT Engine (0, 1)
evaluation_config.gt_matching_iou_threshold Gt Matching IoU Threshold float The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range 0.5 (0, 1)
evaluation_config.gt_matching_iou_threshold_range Gt Matching IoU Threshold Range collection Only one of this collection or gt_matching_iou_threshold (0, 1)
evaluation_config.gt_matching_iou_threshold_range.start Start float The starting value of the IoU range TRUE, FALSE
evaluation_config.gt_matching_iou_threshold_range.end End float The end point of the IoU range(exclusive)
evaluation_config.gt_matching_iou_threshold_range.step Step float The step size of the IoU range
evaluation_config.visualize_pr_curve Visualize PR Curve bool Visualize precision-recall curve or not
inference_config >=1
inference_config.images_dir Images Directory hidden Path to the directory of images to run inference on >0
inference_config.model Model Path hidden Path to the model to run inference on >0
inference_config.batch_size Batch Size integer The batch size for inference (0, 1)
inference_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference 6000 (0, 1)
inference_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 (0, 1)
inference_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
inference_config.bbox_visualize_threshold Visualization Threshold float The confidence threshold for visualizing the bounding boxes 0.6 (0, 1)
inference_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001
inference_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 True, False
inference_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3
inference_config.detection_image_output_dir Image Output Directory string Path to the directory to save the output images during inference 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
inference_config.bbox_caption_on Bbox Caption bool Enable text caption for bounding box or not
inference_config.labels_dump_dir Labels Ouptut Directory hidden Path to the directory to save the output labels
inference_config.nms_score_bits NMS Score Bits integer Number of score bits in optimized NMS
inference_config.trt_inference TensorRT Inference Collection TensorRT inference configurations
inference_config.trt_inference.trt_engine TensorRT Engine hidden Path to the TensorRT engine to run inference

retrain

comments

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version const The version of this schema 1
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value? random_seed Random Seed integer Seed value for the random number generator in the network 42 >=0
verbose Verbose bool Flag of verbosity TRUE TRUE, FALSE
dataset_config Dataset collection Parameters to configure the dataset
JPG/PNG - auto pick this up dataset_config.image_extension Image Extension string Extension of the images to be used. png png,jpg yes __png__, __jpg__, __jpeg__
Can be system generated - after conversion. This is the dataset preparation step. dataset_config.data_sources.tfrecords_path TFRecord Path hidden /shared/users/1234/datasets/5678/tfrecords/kitti_trainval/*
Where the dataset is - where the images are. Will it figure it out from the parent directory? dataset_config.data_sources.image_directory_path Image Path hidden /shared/users/1234/datasets/5678/training
Read all labels in the label file (car, truck, suv, person). Ask the user to map it to Vehicle/Person. dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
Class you want to train for (vehicle) dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
Class defined in the label file (car, truck, suv -> map to vehicle) dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
Default - 0 dataset_config.validation_fold Validation Fold integer In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. 0
Dataset specific config - augmentation augmentation_config Data Augmentation collection Collection of parameters to configure the preprocessing and on the fly data augmentation Yes
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16. augmentation_config.preprocessing.output_image_width Image Width integer The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. 960 480 yes Yes
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16 augmentation_config.preprocessing.output_image_height Image Height integer The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. 544 272 yes Yes
Smaller side of image(height or width) augmentation_config.preprocessing.output_image_min Image smaller side’s size integer The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive, preprocessor will resize the image and keep aspect ratio, such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16. 0
Limit of larger side’s size of an image when resize and keep aspect ratio augmentation_config.preprocessing.output_image_max Limit of larger side’s size when resize and keep aspect ratio integer The maximum size of image’s larger side. If after resize and keeping aspect ratio, the larger side is exceeds this limit, the image will be resized such that the larger side’s size is this value, and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16. 0
Flag to enable automatic image scaling augmentation_config.preprocessing.enable_auto_resize Flag to enable or disable automatic image scaling bool If True, automatic image scaling will be enabled. Otherwise, disabled. TRUE TRUE, FALSE
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_width Bounding Box Width float The minimum width of the object labels to be considered for training. 1 0 yes >=0
Limit of what min dimension you DONT want to train for. Default 10x10 augmentation_config.preprocessing.min_bbox_height Bounding Box Height float The minimum height of the object labels to be considered for training. 1 0 yes >=0
3 channel default augmentation_config.preprocessing.output_image_channel Image Channel integer The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. 3 1, 3 yes 3, 1
0 augmentation_config.preprocessing.crop_right Crop Right integer The right boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_left Crop Left integer The left boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_top Crop Top integer The top boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.crop_bottom Crop Bottom integer The bottom boundary of the crop to be extracted from the original image. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_height Scale Height float The floating point factor to scale the height of the cropped images. 0 0 yes >=0
0 augmentation_config.preprocessing.scale_width Scale Width float The floating point factor to scale the width of the cropped images. 0 0 yes >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.hflip_probability Horizontal-Flip Probability float The probability to flip an input image horizontally. 0.5 0 1 [0, 1)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.vflip_probability Vertical-Flip Probability float The probability to flip an input image vertically. 0 0 1 [0, 1)
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_min Minimum Zoom Scale float The minimum zoom scale of the input image. 1 0 (0, 1]
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.zoom_max Maximum Zoom Scale float The maximum zoom scale of the input image. 1 0 [1, 2)
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable. augmentation_config.spatial_augmentation.translate_max_x X-Axis Maximum Traslation float The maximum translation to be added across the x axis. 8 0 >=0
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. augmentation_config.spatial_augmentation.translate_max_y Y-Axis Maximum Translation float The maximum translation to be added across the y axis. 8 0 >=0
Enable go tyo default, disable - 0 augmentation_config.spatial_augmentation.rotate_rad_max Image Rotation float The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. 0.69 0 >=0
augmentation_config.spatial_augmentation.rotate_probability Image Rotation float The probability of image rotation. The range is [0, 1] [0, 1)
augmentation_config.color_augmentation.color_shift_stddev Color Shift Standard Deviation float The standard devidation value for the color shift. 0 0 1 [0, 1)
augmentation_config.color_augmentation.hue_rotation_max Hue Maximum Rotation float The maximum rotation angle for the hue rotation matrix. 25 0 360 [0, 360)
augmentation_config.color_augmentation.saturation_shift_max Saturation Maximum Shift float The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. 0.2 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_scale_max Contrast Maximum Scale float The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. 0.1 0 1 [0, 1)
augmentation_config.color_augmentation.contrast_center Contrast Center float The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. 0.5 0.5 0.5
Might need different defaults based on task/scenario model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet:18 resnet:18 yes
resnet:10’,
‘resnet:18’, ‘resnet:34’, ‘resnet:50’, ‘resnet:101’, ‘vgg16’, ‘vgg:16’, ‘vgg:19’, ‘googlenet’, ‘mobilenet_v1’, ‘mobilenet_v2’,,,,,image_type ‘darknet:19’, ‘darknet:53’, ‘resnet101’, ‘efficientnet:b0’, ‘efficientnet:b1’,

bgr
Confirm correct default values model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3 depends on arch
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. FALSE TRUE, FALSE
Default values. Verify with TAO Toolkit. 2 sets of defaults required. model_config.dropout_rate Dropout Rate float Probability for drop out 0 0 0.1 [0, 1)
model_config.input_image_config Input Image collection Configuration for input images
model_config.input_image_config.size_height_width collection
model_config.input_image_config.size_height_width.height integer 544
model_config.input_image_config.size_height_width.width integer 960
model_config.input_image_config.image_type Image Type enum The type of images, either RGB or GRAYSCALE __RGB__ __RGB__, __GRAYSCALE__
model_config.input_image_config.size_min Image smaller side’s size integer The size of an image’s smaller side, should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio >=0
model_config.input_image_config.size_height_width Image size by height and width collection The size of images by specifying height and width.
model_config.input_image_config.size_height_width.height Image Height integer The height of images >=0
model_config.input_image_config.size_height_width.width Image Width integer The width of images >=0
model_config.input_image_config.image_channel_order Image Channel Order string The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images bgr rgb’, ‘bgr’, ‘l’
model_config.input_image_config.image_channel_mean Image Channel Means list A dict from ‘r’, ‘g’, ‘b’ or ‘l’(for GRAYSCALE images) to per-channel mean values. [{“key”:”r”,”value”:103.0}, {“key”:”g”,”value”:103.0}, {“key”:”b”,”value”:103.0}]
model_config.input_image_config.image_channel_mean.key channel means key string string => one of r,g,b r’, ‘g’, ‘b’, ‘l’
model_config.input_image_config.image_channel_mean.value channel means value float value in float (0, 255)
model_config.input_image_config.image_scaling_factor Image Scaling Factor float A scalar to normalize the images after mean subtraction. 1 >0
model_config.input_image_config.max_objects_num_per_image Max Objects Num integer The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels. 100 >=1
model_config.anchor_box_config Anchor Boxes Collection
model_config.anchor_box_config.scale Anchor Scales list The list of anchor sizes(scales). [64.0,128.0,256.0] >0
model_config.anchor_box_config.ratio Anchor Ratios list The list of anchor aspect ratios. [1.0,0.5,2.0] >0
model_config.roi_mini_batch ROI Batch Size integer The batch size of ROIs for training the RCNN in the model 16 >0
model_config.rpn_stride RPN stride integer The stride of RPN feature map, compared to input resolutions. Currently only 16 is supported. 16 16
model_config.drop_connect_rate Drop Connect Rate float The rate of DropConnect. This is only useful for EfficientNet backbones. (0, 1)
model_config.rpn_cls_activation_type RPN Classification Activation Type string Type of RPN classification head’s activation function. Currently only “sigmoid” is supported. sigmoid
model_config.use_bias Use Bias bool Whether or not to use bias for convolutional layers TRUE, FALSE
model_config.roi_pooling_config ROI Pooling collection Confiuration fo ROI Pooling layer
model_config.roi_pooling_config.pool_size Pool Size integer Pool size of the ROI Pooling operation. 7 >0
model_config.roi_pooling_config.pool_size_2x Pool Size Doubled bool Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling FALSE TRUE, FALSE
model_config.activation Activation collection Activation function for the model backbone. This is only useful for EfficientNet backbones.
model_config.activation.activation_type Activation Type string Type of the activation function of backbone. relu, swish
model_config.activation.activation_parameters Activation Parameters dict A dict the maps name of a parameter to its value.
training_config Training collection >0
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence. training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1 yes >0
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 120 1 yes Yes TRUE, FALSE
Toggle for end user training_config.enable_qat Enable Quantization Aware Training bool bool FALSE yes Yes >0
Default training_config.learning_rate.soft_start .base_lr Minimum Learning Rate float 5.00E-06 Yes >0
Default training_config.learning_rate.soft_start .start_lr Maximum Learning Rate float 5.00E-04 Yes (0, 1)
Default training_config.learning_rate.soft_start .soft_start Soft Start float 0.100000001 0 1 Yes >1
Default training_config.learning_rate.soft_start .annealing_divider Annealing float 0.699999988 0 1 Yes __NO_REG__, __L1__, __L2__
Default training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __NO_REG__, __L1__, __L2__ yes >0
Default training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-09 yes (0, 1)
Default training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes (0, 1)
Default training_config.optimizer.adam.beta_1 Optimizer Adam Beta1 float 0.899999976 yes (0, 1)
Default training_config.optimizer.adam.beta_2 Optimizer Adam Beta2 float 0.999000013 yes >=1
Use default as 10. Provide last checpoint to user training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 0 yes TRUE, FALSE
training_config.enable_augmentation Enable Augmentation bool Whether or not to enable data augmentation TRUE
training_config.retrain_pruned_model Pruned Model hidden The path of pruned model to be retrained
training_config.pretrained_weights Pretrained Weights hidden The path of the pretrained model(weights) used to initialize the model being trained
training_config.resume_from_model Resume Model hidden The path of the model used to resume a interrupted training (0, 1)
training_config.rpn_min_overlap RPN Min Overlap float The lower IoU threshold used to match anchor boxes to groundtruth boxes. 0.1 (0, 1)
training_config.rpn_max_overlap RPN Max Overlap float The higher IoU threshold used to match anchor boxes to groundtruth boxes. 1 [0, 1)
training_config.classifier_min_overlap Classifier Min Overlap float The lower IoU threshold used to generate the proposal target. 0.1 (0, 1)
training_config.classifier_max_overlap Classifier Max Overlap float The higher IoU threshold used to generate the proposal target. 1 TRUE, FALSE
training_config.gt_as_roi Gt As ROI bool A flag to include groundtruth boxes in the positive ROIs for training the RCNN >0
training_config.std_scaling RPN Regression Loss Scaling float A scaling factor (multiplier) for RPN regression loss 1
training_config.classifier_regr_std RCNN Regression Loss Scaling list Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯, ¡®y¡¯, ¡®w¡¯, ¡®h¡¯ to its corresponding scaling factor, respectively [{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}]
training_config.classifier_regr_std.key RCNN Regression Loss Scaling Key string one of x,y,h,w >0
training_config.classifier_regr_std.value RCNN Regression Loss Scaling Value float float value for key
training_config.output_model Output Model Path hidden Path of the output model >0
training_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer 12000 >=1
training_config.rpn_mini_batch RPN Mini Batch integer The batch size to train RPN 16 >0
training_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 2000 (0, 1)
training_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
training_config.lambda_rpn_regr RPN Regression Loss Weighting float Weighting factor for RPN regression loss 1 >0
training_config.lambda_rpn_class RPN classification Loss Weighting float Weighting factor for RPN classification loss. 1 >0
training_config.lambda_cls_regr RCNN Regression Loss Weighting float Weighting factor for RCNN regression loss 1 >0
training_config.lambda_cls_class RCNN Classification Loss Weighting float Weighting factor for RCNN classification loss 1 list of floats
training_config.model_parallelism Model Parallelism list of floats List of fractions for model parallelism
training_config.early_stopping Early Stopping collection “loss”
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping >=0
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection TRUE, False
training_config.visualizer.enabled Enable bool Enable the visualizer or not >=1
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
evaluation_config Evaluation collection yes
evaluation_config.model Model Path string The path to the model to run inference >=1
evaluation_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation 6000 (0, 1)
evaluation_config.rpn_nms_overlap_threshold RPN overlap threshold float 0.7 >0
evaluation_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 >0
evaluation_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 (0, 1)
evaluation_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3 (0, 1)
evaluation_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001 TRUE, FALSE
evaluation_config.use_voc07_11point_metric Use VOC 11-point Metric bool Whether to use PASCAL-VOC 11-point metric >=1
evaluation_config.validation_period_during_training Validation Period integer The period(number of epochs) to run validation during training >=1
evaluation_config.batch_size Batch Size integer The batch size for evaluation (0, 1)
evaluation_config.trt_evaluation TensorRT Evaluation Collection TensorRT evaluation
evaluation_config.trt_evaluation.trt_engine Trt Engine String TRT Engine (0, 1)
evaluation_config.gt_matching_iou_threshold Gt Matching IoU Threshold float The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range 0.5 (0, 1)
evaluation_config.gt_matching_iou_threshold_range Gt Matching IoU Threshold Range collection Only one of this collection or gt_matching_iou_threshold (0, 1)
evaluation_config.gt_matching_iou_threshold_range.start Start float The starting value of the IoU range TRUE, FALSE
evaluation_config.gt_matching_iou_threshold_range.end End float The end point of the IoU range(exclusive)
evaluation_config.gt_matching_iou_threshold_range.step Step float The step size of the IoU range
evaluation_config.visualize_pr_curve Visualize PR Curve bool Visualize precision-recall curve or not
inference_config >=1
inference_config.images_dir Images Directory hidden Path to the directory of images to run inference on >0
inference_config.model Model Path hidden Path to the model to run inference on >0
inference_config.batch_size Batch Size integer The batch size for inference (0, 1)
inference_config.rpn_pre_nms_top_N RPN Pre-NMS Top N integer The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference 6000 (0, 1)
inference_config.rpn_nms_max_boxes RPN NMS Max Boxes integer The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer 300 (0, 1)
inference_config.rpn_nms_overlap_threshold RPN NMS IoU Threshold float The IoU threshold for NMS in Proposal layer 0.7 >0
inference_config.bbox_visualize_threshold Visualization Threshold float The confidence threshold for visualizing the bounding boxes 0.6 (0, 1)
inference_config.object_confidence_thres Object Confidence Threshold float The objects confidence threshold 0.00001
inference_config.classifier_nms_max_boxes Classifier NMS Max Boxes integer The maxinum numbere of boxes for RCNN NMS 100 True, False
inference_config.classifier_nms_overlap_threshold Classifier NMS Overlap Threshold float The NMS overlap threshold in RCNN 0.3
inference_config.detection_image_output_dir Image Output Directory string Path to the directory to save the output images during inference 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
inference_config.bbox_caption_on Bbox Caption bool Enable text caption for bounding box or not
inference_config.labels_dump_dir Labels Ouptut Directory hidden Path to the directory to save the output labels
inference_config.nms_score_bits NMS Score Bits integer Number of score bits in optimized NMS
inference_config.trt_inference TensorRT Inference Collection TensorRT inference configurations
inference_config.trt_inference.trt_engine TensorRT Engine hidden Path to the TensorRT engine to run inference

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

num_files num_files integer Number of images to convert from COCO json to VOC.
results_dir results_dir string Where it will be stored inside the root masks

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

coco_config collection
coco_config.root_directory_path hidden
coco_config.image_dir string List of image directories correspoding to each partition images The order of image directories must match annotation_files based on partitions
coco_config.annotations_file string List of JSON files with COCO dataset format annotations.json
coco_config.num_shards integer The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition
coco_config.tag string
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list list of string
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list list of string
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping list list of string
target_class_mapping.key Class Key string
target_class_mapping.value Class Value string

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path Label Path hidden
dataset_config.data_sources.image_directory_path Image Path hidden
dataset_config.validation_data_sources.label_directory_path Label Path hidden
dataset_config.validation_data_sources.image_directory_path Image Path hidden
dataset_config.characters_list_file Characters List Path string
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 32 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 24 1
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.001 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.5 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L2__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 5.00E-04 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 5 1
eval_config.batch_size Batch Size integer batch size for evaluation 1 1
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 96 1 yes
augmentation_config.output_height Model Input height integer 48 1 yes
augmentation_config.output_channel Model Input channel integer 3 1 1,3 yes
augmentation_config.max_rotate_degree Max Rotation Degree integer The maximum rotation angle for augmentation 5 1
augmentation_config.keep_original_prob Keep Original Probability float The probability for keeping original images. Only resized will be applied to am image with this probability 0.3 0 1
augmentation_config.rotate_prob Rotation Probability float The probability for rotating the image 0.5 0 1
augmentation_config.gaussian_kernel_size Gaussian Kernel Size list The kernel size of the Gaussian blur [5,7,15] 1
augmentation_config.blur_prob Gaussian Blur Probability float The probability for blurring the image with Gaussian blur 0.5 0 1
augmentation_config.reverse_color_prob Reverse Color Probability float The probability for reversing the color of the image 0.5 0 1
lpr_config.hidden_units Hidden Units integer The number of hidden units in the LSTM layers of LPRNet 512 1
lpr_config.max_label_length Max Label Length integer The maximum length of license plates in the dataset 8
lpr_config.arch Architecture string The architecture of LPRNet baseline baseline
lpr_config.nlayers Number of Layers integer The number of convolution layers in LPRNet 18 10, 18

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
data_type Pruning Granularity string Number of filters to remove at a time. fp32 fp32, fp16 yes yes
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
engine_file Engine File hidden UNIX path to the model engine file. yes
verbose hidden TRUE
strict_type_constraints bool FALSE
results_dir hidden

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path Label Path hidden
dataset_config.data_sources.image_directory_path Image Path hidden
dataset_config.validation_data_sources.label_directory_path Label Path hidden
dataset_config.validation_data_sources.image_directory_path Image Path hidden
dataset_config.characters_list_file Characters List Path string
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 32 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 24 1
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.001 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.5 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L2__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 5.00E-04 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 5 1
eval_config.batch_size Batch Size integer batch size for evaluation 1 1
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 96 1 yes
augmentation_config.output_height Model Input height integer 48 1 yes
augmentation_config.output_channel Model Input channel integer 3 1 1,3 yes
augmentation_config.max_rotate_degree Max Rotation Degree integer The maximum rotation angle for augmentation 5 1
augmentation_config.keep_original_prob Keep Original Probability float The probability for keeping original images. Only resized will be applied to am image with this probability 0.3 0 1
augmentation_config.rotate_prob Rotation Probability float The probability for rotating the image 0.5 0 1
augmentation_config.gaussian_kernel_size Gaussian Kernel Size list The kernel size of the Gaussian blur [5,7,15] 1
augmentation_config.blur_prob Gaussian Blur Probability float The probability for blurring the image with Gaussian blur 0.5 0 1
augmentation_config.reverse_color_prob Reverse Color Probability float The probability for reversing the color of the image 0.5 0 1
lpr_config.hidden_units Hidden Units integer The number of hidden units in the LSTM layers of LPRNet 512 1
lpr_config.max_label_length Max Label Length integer The maximum length of license plates in the dataset 8
lpr_config.arch Architecture string The architecture of LPRNet baseline baseline
lpr_config.nlayers Number of Layers integer The number of convolution layers in LPRNet 18 10, 18

train

parameter

display_name

value_type

log_float

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

CLI

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 FALSE
initial_epoch Initial Epoch CLI hidden 1 FALSE
random_seed Random Seed integer Seed value for the random number generator in the network 42 FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.data_sources.label_directory_path Label Path hidden FALSE
dataset_config.data_sources.image_directory_path Image Path hidden FALSE
dataset_config.validation_data_sources.label_directory_path Label Path hidden FALSE
dataset_config.validation_data_sources.image_directory_path Image Path hidden FALSE
dataset_config.characters_list_file Characters List Path string FALSE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 32 1 inf TRUE
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 24 1 inf FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0 inf TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-05 0 inf TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float TRUE The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.001 0 1 TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.5 0 1 TRUE
training_config.regularizer.type Regularizer Type categorical The type of the regularizer being used. __L2__ __L1__,__L2__ TRUE
training_config.regularizer.weight Regularizer Weight float TRUE The floating point weight of the regularizer. 5.00E-04 3.00E-11 inf TRUE
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1 inf FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor categorical The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 0.5
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0 inf FALSE
eval_config Evaluation collection FALSE
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 5 1 inf FALSE
eval_config.batch_size Batch Size integer batch size for evaluation 1 1 inf FALSE
augmentation_config Augmentation config collection FALSE
augmentation_config.output_width Model Input width integer 96 1 inf yes
augmentation_config.output_height Model Input height integer 48 1 inf yes
augmentation_config.output_channel Model Input channel ordered_int 3 1,3 yes FALSE
augmentation_config.max_rotate_degree Max Rotation Degree integer The maximum rotation angle for augmentation 5 0 inf
augmentation_config.keep_original_prob Keep Original Probability float The probability for keeping original images. Only resized will be applied to am image with this probability 0.3 0 1
augmentation_config.rotate_prob Rotation Probability float The probability for rotating the image 0.5 0 1
augmentation_config.gaussian_kernel_size Gaussian Kernel Size list The kernel size of the Gaussian blur [5,7,15] 1 FALSE
augmentation_config.blur_prob Gaussian Blur Probability float The probability for blurring the image with Gaussian blur 0.5 0 1
augmentation_config.reverse_color_prob Reverse Color Probability float The probability for reversing the color of the image 0.5 0 1
lpr_config.hidden_units Hidden Units integer The number of hidden units in the LSTM layers of LPRNet 512 1 inf
lpr_config.max_label_length Max Label Length integer The maximum length of license plates in the dataset 8 1 inf
lpr_config.arch Architecture categorical The architecture of LPRNet baseline baseline FALSE
lpr_config.nlayers Number of Layers integer The number of convolution layers in LPRNet 18 10,18 FALSE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes
b batch_size integer calibration batch size 8
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. yes
model etlt model from export hidden

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
threshold float 0.3
include_mask bool TRUE
experiment_spec_file hidden CLI argument
model_dir hidden CLI argument
key hidden CLI argument
seed Random Seed integer Seed value for the random number generator in the network 123
num_epochs integer 10
use_amp AMP bool FALSE
warmup_steps Warmup steps integer The steps taken for learning rate to ramp up to the init_learning_rate 10000
learning_rate_steps Learning rate steps string A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels [100000, 150000, 200000]
learning_rate_decay_levels learning rate decay steps string A list of decay factors. The length should match the length of learning_rate_steps. [0.1, 0.02, 0.01]
total_steps Total training steps integer The total number of training iterations 250000
train_batch_size Training Batchsize integer The batch size during training 2
eval_batch_size Evaluation Batchsize integer The batch size during validation or evaluation 4
num_steps_per_eval Number of steps between each evaluation integer num_steps_per_eval 5000
momentum SGD momentum float Momentum of the SGD optimizer 0.9
l1_weight_decay L1 Weight decay float L1 regularizer weight
l2_weight_decay L2 weight decay float L2 regularizer weight 0.00004
warmup_learning_rate float 0.0001
init_learning_rate float 0.005
num_examples_per_epoch integer 118288
checkpoint Path to Pretrained model hidden The path to a pretrained model
skip_checkpoint_variables Name of skipped variables in the pretrained model string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.
pruned_model_path Path to pruned model hidden The path to a pruned MaskRCNN graph
maskrcnn_config MaskRCNN configuration collection
maskrcnn_config.nlayers Number of layers in ResNet integer The number of layers in ResNet arch 50
maskrcnn_config.arch Backbone name string The backbone feature extractor name resnet
maskrcnn_config.freeze_bn Freeze BN bool Whether to freeze all BatchNorm layers in the backbone TRUE
maskrcnn_config.freeze_blocks Freeze Block string A list of conv blocks in the backbone to freeze [0,1]
maskrcnn_config.gt_mask_size Groundtruth Mask Size integer The groundtruth mask size 112
maskrcnn_config.rpn_positive_overlap RPN positive overlap float The lower-bound threshold to assign positive labels for anchors 0.7
maskrcnn_config.rpn_negative_overlap RPN negative overlap float The upper-bound threshold to assign negative labels for anchors 0.3
maskrcnn_config.rpn_batch_size_per_im RPN batchsize per image integer The number of sampled anchors per image in RPN 256
maskrcnn_config.rpn_fg_fraction RPN foreground fraction float The desired fraction of positive anchors in a batch 0.5
maskrcnn_config.rpn_min_size RPN minimum size float The minimum proposal height and width 0
maskrcnn_config.batch_size_per_im RoI batchsize per image integer The RoI minibatch size per image 512
maskrcnn_config.fg_fraction Foreground fraction float The target fraction of RoI minibatch that is labeled as foreground 0.25
maskrcnn_config.fg_thresh float 0.5
maskrcnn_config.bg_thresh_hi float 0.5
maskrcnn_config.bg_thresh_lo float 0
maskrcnn_config.fast_rcnn_mlp_head_dim classification head dimension integer The Fast-RCNN classification head dimension 1024
maskrcnn_config.bbox_reg_weights bounding-box regularization weights string The bounding-box regularization weights (10., 10., 5., 5.)
maskrcnn_config.include_mask Include mask head bool Specifies whether to include a mask head TRUE
maskrcnn_config.mrcnn_resolution Mask resolution integer The mask-head resolution 28
maskrcnn_config.train_rpn_pre_nms_topn Top N RPN proposals pre NMS during training integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training 2000
maskrcnn_config.train_rpn_post_nms_topn Top N RPN proposals post NMS during training integer The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training 1000
maskrcnn_config.train_rpn_nms_threshold NMS threshold in RPN during training float The NMS IOU threshold in RPN during training 0.7
maskrcnn_config.test_detections_per_image Number of bounding boxes after NMS integer The number of bounding box candidates after NMS 100
maskrcnn_config.test_nms NMS threshold during test float The NMS IOU threshold during test 0.5
maskrcnn_config.test_rpn_pre_nms_topn Top N RPN proposals pre NMS during test integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test 1000
maskrcnn_config.test_rpn_post_nms_topn Top N RPN proposals post NMS during test integer The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test 1000
maskrcnn_config.test_rpn_nms_thresh NMS threshold in RPN during test float The NMS IOU threshold in RPN during test 0.7
maskrcnn_config.min_level Minimum FPN level integer The minimum level of the output feature pyramid 2
maskrcnn_config.max_level Maximum FPN level integer The maximum level of the output feature pyramid 6
maskrcnn_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 1
maskrcnn_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
maskrcnn_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 8
maskrcnn_config.rpn_box_loss_weight RPN box loss weight float The weight for adjusting RPN box loss in the total loss 1
maskrcnn_config.fast_rcnn_box_loss_weight FastRCNN box regression weight float The weight for adjusting FastRCNN box regression loss in the total loss 1
maskrcnn_config.mrcnn_weight_loss_mask Mask loss weight float The weight for adjusting mask loss in the total loss 1
data_config Dataset configuration collection
data_config.image_size Image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. (832, 1344)
data_config.augment_input_data augment input data bool Specifies whether to augment the data TRUE
data_config.eval_samples Number of evaluation samples integer The number of samples for evaluation 500
data_config.training_file_pattern Train file pattern hidden The TFRecord path for training
data_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
data_config.val_json_file validation json path hidden The annotation file path for validation
data_config.num_classes Number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91
data_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
data_config.prefetch_buffer_size prefetch buffer size integer The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE)
data_config.shuffle_buffer_size shuffle buffer size integer The shuffle buffer size used by tf.data.Dataset (default: 4096) 4096
data_config.n_workers Number of workers integer The number of workers to parse and preprocess data (default: 16) 16
data_config.max_num_instances maximum number of instances integer The maximum number of object instances to parse (default: 200) 200

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_dir Output Directory hidden UNIX path to where the pruned model will be saved. yes
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

automl_enabled

math_cond

parent_param

depends_on

checkpoint Path to Pretrained model hidden The path to a pretrained model FALSE
data_config Dataset configuration collection FALSE
data_config.augment_input_data augment input data bool Specifies whether to augment the data TRUE
data_config.eval_samples Number of evaluation samples integer The number of samples for evaluation 500 1 inf FALSE
data_config.image_size Image size string The image dimension as a tuple within quote marks. (height,width) indicates the dimension of the resized and padded input. (832,1344) FALSE
data_config.max_num_instances maximum number of instances integer The maximum number of object instances to parse (default: 200) 200
data_config.n_workers Number of workers integer The number of workers to parse and preprocess data (default: 16) 16 0 inf FALSE
data_config.num_classes Number of classes integer The number of classes. If there are N categories in the annotation,num_classes should be N+1 (background class) 91 2 inf FALSE
data_config.prefetch_buffer_size prefetch buffer size integer The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE) 1 inf FALSE
data_config.shuffle_buffer_size shuffle buffer size integer The shuffle buffer size used by tf.data.Dataset (default: 4096) 4096 1 inf
data_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
data_config.training_file_pattern Train file pattern hidden The TFRecord path for training FALSE
data_config.val_json_file validation json path hidden The annotation file path for validation FALSE
data_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation FALSE
eval_batch_size Evaluation Batchsize integer The batch size during validation or evaluation 4 1 inf FALSE
experiment_spec_file hidden CLI argument FALSE
init_learning_rate float 0.005 0 inf TRUE
key hidden CLI argument FALSE
l1_weight_decay L1 Weight decay float L1 regularizer weight 0 2
l2_weight_decay L2 weight decay float L2 regularizer weight 0.00004 0 inf TRUE
learning_rate_decay_levels learning rate decay steps string A list of decay factors. The length should match the length of learning_rate_steps. [0.1,0.02,0.01] FALSE
learning_rate_steps Learning rate steps string A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels [100000,150000,200000] FALSE
maskrcnn_config MaskRCNN configuration collection FALSE
maskrcnn_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 8 1 inf
maskrcnn_config.arch Backbone name string The backbone feature extractor name resnet resnet FALSE
maskrcnn_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0,1.0),(1.4,0.7),(0.7,1.4)] FALSE
maskrcnn_config.batch_size_per_im RoI batchsize per image integer The RoI minibatch size per image 512 1 inf
maskrcnn_config.bbox_reg_weights bounding-box regularization weights string The bounding-box regularization weights (10.,10.,5.,5.) FALSE
maskrcnn_config.bg_thresh_hi float 0.5 0 1
maskrcnn_config.bg_thresh_lo float 0 0 1 < maskrcnn_config.bg_thresh_hi
maskrcnn_config.fast_rcnn_box_loss_weight FastRCNN box regression weight float The weight for adjusting FastRCNN box regression loss in the total loss 1 1.00E-05 inf
maskrcnn_config.fast_rcnn_mlp_head_dim classification head dimension integer The Fast-RCNN classification head dimension 1024 1 inf
maskrcnn_config.fg_fraction Foreground fraction float The target fraction of RoI minibatch that is labeled as foreground 0.25 1.00E-05 inf
maskrcnn_config.fg_thresh float 0.5 1.00E-05 1
maskrcnn_config.freeze_blocks Freeze Block string A list of conv blocks in the backbone to freeze [0,1] FALSE
maskrcnn_config.freeze_bn Freeze BN bool Whether to freeze all BatchNorm layers in the backbone TRUE
maskrcnn_config.gt_mask_size Groundtruth Mask Size integer The groundtruth mask size 112 14 inf
maskrcnn_config.include_mask Include mask head bool Specifies whether to include a mask head TRUE FALSE
maskrcnn_config.max_level Maximum FPN level integer The maximum level of the output feature pyramid 6 6 FALSE
maskrcnn_config.min_level Minimum FPN level integer The minimum level of the output feature pyramid 2 2 FALSE
maskrcnn_config.mrcnn_resolution Mask resolution integer The mask-head resolution 28 14 inf / 4
maskrcnn_config.mrcnn_weight_loss_mask Mask loss weight float The weight for adjusting mask loss in the total loss 1 1.00E-05 inf
maskrcnn_config.nlayers Number of layers in ResNet integer The number of layers in ResNet arch 50 10,18,34,50,101 FALSE
maskrcnn_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3,the anchor scales are [2^0,2^(1/3),2^(2/3)]) 1 1 inf
maskrcnn_config.rpn_batch_size_per_im RPN batchsize per image integer The number of sampled anchors per image in RPN 256 1 inf
maskrcnn_config.rpn_box_loss_weight RPN box loss weight float The weight for adjusting RPN box loss in the total loss 1 1.00E-05 inf
maskrcnn_config.rpn_fg_fraction RPN foreground fraction float The desired fraction of positive anchors in a batch 0.5 1.00E-05 1
maskrcnn_config.rpn_min_size RPN minimum size float The minimum proposal height and width 0 0 inf
maskrcnn_config.rpn_negative_overlap RPN negative overlap float The upper-bound threshold to assign negative labels for anchors 0.3 0 1
maskrcnn_config.rpn_positive_overlap RPN positive overlap float The lower-bound threshold to assign positive labels for anchors 0.7 0 1 > maskrcnn_config.rpn_negative_overlap
maskrcnn_config.test_detections_per_image Number of bounding boxes after NMS integer The number of bounding box candidates after NMS 100 1 inf
maskrcnn_config.test_nms NMS threshold during test float The NMS IOU threshold during test 0.5 0 1
maskrcnn_config.test_rpn_nms_thresh NMS threshold in RPN during test float The NMS IOU threshold in RPN during test 0.7 0 1
maskrcnn_config.test_rpn_post_nms_topn Top N RPN proposals post NMS during test integer The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test 1000 1 inf
maskrcnn_config.test_rpn_pre_nms_topn Top N RPN proposals pre NMS during test integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test 1000 1 inf
maskrcnn_config.train_rpn_nms_threshold NMS threshold in RPN during training float The NMS IOU threshold in RPN during training 0.7 0 1
maskrcnn_config.train_rpn_post_nms_topn Top N RPN proposals post NMS during training integer The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training 1000 1 inf
maskrcnn_config.train_rpn_pre_nms_topn Top N RPN proposals pre NMS during training integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training 2000 1 inf
model_dir hidden CLI argument FALSE
momentum SGD momentum float Momentum of the SGD optimizer 0.9 0 1 TRUE
num_epochs integer 10 1 inf FALSE
num_examples_per_epoch integer 118288 1 inf FALSE
num_steps_per_eval Number of steps between each evaluation integer num_steps_per_eval 5000 1 inf FALSE
pruned_model_path Path to pruned model hidden The path to a pruned MaskRCNN graph FALSE
seed Random Seed integer Seed value for the random number generator in the network 123 1 inf FALSE
skip_checkpoint_variables Name of skipped variables in the pretrained model string If specified,the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. FALSE
total_steps Total training steps integer The total number of training iterations 250000 1 inf FALSE
train_batch_size Training Batchsize integer The batch size during training 2 1 inf
use_amp AMP bool FALSE
version Schema Version const The version of this schema 1 internal FALSE
warmup_learning_rate float 0.0001 0 inf TRUE
warmup_steps Warmup steps integer The steps taken for learning rate to ramp up to the init_learning_rate 10000 1 inf FALSE <=total_steps

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
experiment_spec_file hidden CLI argument
model_dir hidden CLI argument
key hidden CLI argument
seed Random Seed integer Seed value for the random number generator in the network 123
num_epochs integer 10
use_amp AMP bool FALSE
warmup_steps Warmup steps integer The steps taken for learning rate to ramp up to the init_learning_rate 10000
learning_rate_steps Learning rate steps string A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels [100000, 150000, 200000]
learning_rate_decay_levels learning rate decay steps string A list of decay factors. The length should match the length of learning_rate_steps. [0.1, 0.02, 0.01]
total_steps Total training steps integer The total number of training iterations 250000
train_batch_size Training Batchsize integer The batch size during training 2
eval_batch_size Evaluation Batchsize integer The batch size during validation or evaluation 4
num_steps_per_eval Number of steps between each evaluation integer num_steps_per_eval 5000
momentum SGD momentum float Momentum of the SGD optimizer 0.9
l1_weight_decay L1 Weight decay float L1 regularizer weight
l2_weight_decay L2 weight decay float L2 regularizer weight 0.00004
warmup_learning_rate float 0.0001
init_learning_rate float 0.005
num_examples_per_epoch integer 118288
checkpoint Path to Pretrained model hidden The path to a pretrained model
skip_checkpoint_variables Name of skipped variables in the pretrained model string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.
pruned_model_path Path to pruned model hidden The path to a pruned MaskRCNN graph
maskrcnn_config MaskRCNN configuration collection
maskrcnn_config.nlayers Number of layers in ResNet integer The number of layers in ResNet arch 50
maskrcnn_config.arch Backbone name string The backbone feature extractor name resnet
maskrcnn_config.freeze_bn Freeze BN bool Whether to freeze all BatchNorm layers in the backbone TRUE
maskrcnn_config.freeze_blocks Freeze Block string A list of conv blocks in the backbone to freeze [0,1]
maskrcnn_config.gt_mask_size Groundtruth Mask Size integer The groundtruth mask size 112
maskrcnn_config.rpn_positive_overlap RPN positive overlap float The lower-bound threshold to assign positive labels for anchors 0.7
maskrcnn_config.rpn_negative_overlap RPN negative overlap float The upper-bound threshold to assign negative labels for anchors 0.3
maskrcnn_config.rpn_batch_size_per_im RPN batchsize per image integer The number of sampled anchors per image in RPN 256
maskrcnn_config.rpn_fg_fraction RPN foreground fraction float The desired fraction of positive anchors in a batch 0.5
maskrcnn_config.rpn_min_size RPN minimum size float The minimum proposal height and width 0
maskrcnn_config.batch_size_per_im RoI batchsize per image integer The RoI minibatch size per image 512
maskrcnn_config.fg_fraction Foreground fraction float The target fraction of RoI minibatch that is labeled as foreground 0.25
maskrcnn_config.fg_thresh float 0.5
maskrcnn_config.bg_thresh_hi float 0.5
maskrcnn_config.bg_thresh_lo float 0
maskrcnn_config.fast_rcnn_mlp_head_dim classification head dimension integer The Fast-RCNN classification head dimension 1024
maskrcnn_config.bbox_reg_weights bounding-box regularization weights string The bounding-box regularization weights (10., 10., 5., 5.)
maskrcnn_config.include_mask Include mask head bool Specifies whether to include a mask head TRUE
maskrcnn_config.mrcnn_resolution Mask resolution integer The mask-head resolution 28
maskrcnn_config.train_rpn_pre_nms_topn Top N RPN proposals pre NMS during training integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training 2000
maskrcnn_config.train_rpn_post_nms_topn Top N RPN proposals post NMS during training integer The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training 1000
maskrcnn_config.train_rpn_nms_threshold NMS threshold in RPN during training float The NMS IOU threshold in RPN during training 0.7
maskrcnn_config.test_detections_per_image Number of bounding boxes after NMS integer The number of bounding box candidates after NMS 100
maskrcnn_config.test_nms NMS threshold during test float The NMS IOU threshold during test 0.5
maskrcnn_config.test_rpn_pre_nms_topn Top N RPN proposals pre NMS during test integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test 1000
maskrcnn_config.test_rpn_post_nms_topn Top N RPN proposals post NMS during test integer The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test 1000
maskrcnn_config.test_rpn_nms_thresh NMS threshold in RPN during test float The NMS IOU threshold in RPN during test 0.7
maskrcnn_config.min_level Minimum FPN level integer The minimum level of the output feature pyramid 2
maskrcnn_config.max_level Maximum FPN level integer The maximum level of the output feature pyramid 6
maskrcnn_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 1
maskrcnn_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
maskrcnn_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 8
maskrcnn_config.rpn_box_loss_weight RPN box loss weight float The weight for adjusting RPN box loss in the total loss 1
maskrcnn_config.fast_rcnn_box_loss_weight FastRCNN box regression weight float The weight for adjusting FastRCNN box regression loss in the total loss 1
maskrcnn_config.mrcnn_weight_loss_mask Mask loss weight float The weight for adjusting mask loss in the total loss 1
data_config Dataset configuration collection
data_config.image_size Image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. (832, 1344)
data_config.augment_input_data augment input data bool Specifies whether to augment the data TRUE
data_config.eval_samples Number of evaluation samples integer The number of samples for evaluation 500
data_config.training_file_pattern Train file pattern hidden The TFRecord path for training
data_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
data_config.val_json_file validation json path hidden The annotation file path for validation
data_config.num_classes Number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91
data_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
data_config.prefetch_buffer_size prefetch buffer size integer The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE)
data_config.shuffle_buffer_size shuffle buffer size integer The shuffle buffer size used by tf.data.Dataset (default: 4096) 4096
data_config.n_workers Number of workers integer The number of workers to parse and preprocess data (default: 16) 16
data_config.max_num_instances maximum number of instances integer The maximum number of object instances to parse (default: 200) 200

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
experiment_spec_file hidden CLI argument
model_dir hidden CLI argument
key hidden CLI argument
seed Random Seed integer Seed value for the random number generator in the network 123
num_epochs integer 10
use_amp AMP bool FALSE
warmup_steps Warmup steps integer The steps taken for learning rate to ramp up to the init_learning_rate 10000
learning_rate_steps Learning rate steps string A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels [100000, 150000, 200000]
learning_rate_decay_levels learning rate decay steps string A list of decay factors. The length should match the length of learning_rate_steps. [0.1, 0.02, 0.01]
total_steps Total training steps integer The total number of training iterations 250000
train_batch_size Training Batchsize integer The batch size during training 2
eval_batch_size Evaluation Batchsize integer The batch size during validation or evaluation 4
num_steps_per_eval Number of steps between each evaluation integer num_steps_per_eval 5000
momentum SGD momentum float Momentum of the SGD optimizer 0.9
l1_weight_decay L1 Weight decay float L1 regularizer weight
l2_weight_decay L2 weight decay float L2 regularizer weight 0.00004
warmup_learning_rate float 0.0001
init_learning_rate float 0.005
num_examples_per_epoch integer 118288
checkpoint Path to Pretrained model hidden The path to a pretrained model
skip_checkpoint_variables Name of skipped variables in the pretrained model string If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning.
pruned_model_path Path to pruned model hidden The path to a pruned MaskRCNN graph
maskrcnn_config MaskRCNN configuration collection
maskrcnn_config.nlayers Number of layers in ResNet integer The number of layers in ResNet arch 50
maskrcnn_config.arch Backbone name string The backbone feature extractor name resnet
maskrcnn_config.freeze_bn Freeze BN bool Whether to freeze all BatchNorm layers in the backbone TRUE
maskrcnn_config.freeze_blocks Freeze Block string A list of conv blocks in the backbone to freeze [0,1]
maskrcnn_config.gt_mask_size Groundtruth Mask Size integer The groundtruth mask size 112
maskrcnn_config.rpn_positive_overlap RPN positive overlap float The lower-bound threshold to assign positive labels for anchors 0.7
maskrcnn_config.rpn_negative_overlap RPN negative overlap float The upper-bound threshold to assign negative labels for anchors 0.3
maskrcnn_config.rpn_batch_size_per_im RPN batchsize per image integer The number of sampled anchors per image in RPN 256
maskrcnn_config.rpn_fg_fraction RPN foreground fraction float The desired fraction of positive anchors in a batch 0.5
maskrcnn_config.rpn_min_size RPN minimum size float The minimum proposal height and width 0
maskrcnn_config.batch_size_per_im RoI batchsize per image integer The RoI minibatch size per image 512
maskrcnn_config.fg_fraction Foreground fraction float The target fraction of RoI minibatch that is labeled as foreground 0.25
maskrcnn_config.fg_thresh float 0.5
maskrcnn_config.bg_thresh_hi float 0.5
maskrcnn_config.bg_thresh_lo float 0
maskrcnn_config.fast_rcnn_mlp_head_dim classification head dimension integer The Fast-RCNN classification head dimension 1024
maskrcnn_config.bbox_reg_weights bounding-box regularization weights string The bounding-box regularization weights (10., 10., 5., 5.)
maskrcnn_config.include_mask Include mask head bool Specifies whether to include a mask head TRUE
maskrcnn_config.mrcnn_resolution Mask resolution integer The mask-head resolution 28
maskrcnn_config.train_rpn_pre_nms_topn Top N RPN proposals pre NMS during training integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training 2000
maskrcnn_config.train_rpn_post_nms_topn Top N RPN proposals post NMS during training integer The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training 1000
maskrcnn_config.train_rpn_nms_threshold NMS threshold in RPN during training float The NMS IOU threshold in RPN during training 0.7
maskrcnn_config.test_detections_per_image Number of bounding boxes after NMS integer The number of bounding box candidates after NMS 100
maskrcnn_config.test_nms NMS threshold during test float The NMS IOU threshold during test 0.5
maskrcnn_config.test_rpn_pre_nms_topn Top N RPN proposals pre NMS during test integer The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test 1000
maskrcnn_config.test_rpn_post_nms_topn Top N RPN proposals post NMS during test integer The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test 1000
maskrcnn_config.test_rpn_nms_thresh NMS threshold in RPN during test float The NMS IOU threshold in RPN during test 0.7
maskrcnn_config.min_level Minimum FPN level integer The minimum level of the output feature pyramid 2
maskrcnn_config.max_level Maximum FPN level integer The maximum level of the output feature pyramid 6
maskrcnn_config.num_scales number of scales integer The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) 1
maskrcnn_config.aspect_ratios aspect ratios string A list of tuples representing the aspect ratios of anchors on each pyramid level [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
maskrcnn_config.anchor_scale anchor scale integer Scale of the base-anchor size to the feature-pyramid stride 8
maskrcnn_config.rpn_box_loss_weight RPN box loss weight float The weight for adjusting RPN box loss in the total loss 1
maskrcnn_config.fast_rcnn_box_loss_weight FastRCNN box regression weight float The weight for adjusting FastRCNN box regression loss in the total loss 1
maskrcnn_config.mrcnn_weight_loss_mask Mask loss weight float The weight for adjusting mask loss in the total loss 1
data_config Dataset configuration collection
data_config.image_size Image size string The image dimension as a tuple within quote marks. (height, width) indicates the dimension of the resized and padded input. (832, 1344)
data_config.augment_input_data augment input data bool Specifies whether to augment the data TRUE
data_config.eval_samples Number of evaluation samples integer The number of samples for evaluation 500
data_config.training_file_pattern Train file pattern hidden The TFRecord path for training
data_config.validation_file_pattern validation file pattern hidden The TFRecord path for validation
data_config.val_json_file validation json path hidden The annotation file path for validation
data_config.num_classes Number of classes integer The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) 91
data_config.skip_crowd_during_training skip crowd during training bool Specifies whether to skip crowd during training TRUE
data_config.prefetch_buffer_size prefetch buffer size integer The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE)
data_config.shuffle_buffer_size shuffle buffer size integer The shuffle buffer size used by tf.data.Dataset (default: 4096) 4096
data_config.n_workers Number of workers integer The number of workers to parse and preprocess data (default: 16) 16
data_config.max_num_instances maximum number of instances integer The maximum number of object instances to parse (default: 200) 200

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
class_map hidden
engine_file Engine File hidden UNIX path to the model engine file. yes
verbose hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

random_seed integer 42 FALSE
model_config collection FALSE
model_config.arch string resnet FALSE
model_config.input_image_size string 3,80,60 yes yes FALSE
model_config.resize_interpolation_method ordered __BILINEAR__,__BICUBIC__
model_config.n_layers ordered_int 10 10,18,34,50,101 FALSE
model_config.use_imagenet_head bool
model_config.use_batch_norm bool TRUE
model_config.use_bias bool FALSE
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer FALSE
model_config.dropout float 0 1
model_config.batch_norm_config collection FALSE
model_config.batch_norm_config.momentum float 0.9 1.00E-05 inf
model_config.batch_norm_config.epsilon float 1.00E-05 1.00E-10 inf
model_config.activation collection FALSE
model_config.activation.activation_type string FALSE
model_config.activation.activation_parameters collection FALSE
model_config.activation.activation_parameters.key string FALSE
model_config.activation.activation_parameters.value float FALSE
dataset_config collection FALSE
dataset_config.train_csv_path hidden FALSE
dataset_config.image_directory_path hidden FALSE
dataset_config.val_csv_path hidden FALSE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 100 1 inf
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 10 1 FALSE
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate float 0.00002 0.0002 FALSE
training_config.learning_rate.soft_start_annealing_schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 1.00E-06 1.00E-04 TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-02 0 TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1 TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.7 0 1 TRUE
training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L1__ __L1__,__L2__ TRUE
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 9.00E-05 3.00E-11 3.00E-03 TRUE
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9 1.00E-10 0.99 TRUE
training_config.resume_model_path hidden FALSE
training_config.optimizer.sgd.nesterov bool FALSE TRUE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

random_seed integer 42
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,80,60 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 10
model_config.use_imagenet_head bool
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float
dataset_config collection
dataset_config.train_csv_path hidden
dataset_config.image_directory_path hidden
dataset_config.val_csv_path hidden
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 100 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 10 1
training_config.enable_qat Enable Quantization Aware Training bool bool
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.7 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 9.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool FALSE

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

random_seed integer 42
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,80,60 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 10
model_config.use_imagenet_head bool
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float
dataset_config collection
dataset_config.train_csv_path hidden
dataset_config.image_directory_path hidden
dataset_config.val_csv_path hidden
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 100 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 10 1
training_config.enable_qat Enable Quantization Aware Training bool bool
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.7 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 9.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool FALSE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

random_seed integer 42
model_config collection
model_config.arch string resnet
model_config.input_image_size string 3,80,60 yes yes
model_config.resize_interpolation_method string __BILINEAR__, __BICUBIC__
model_config.n_layers integer 10
model_config.use_imagenet_head bool
model_config.use_batch_norm bool TRUE
model_config.use_bias bool
model_config.use_pooling bool
model_config.all_projections bool TRUE
model_config.freeze_bn bool
model_config.freeze_blocks integer
model_config.dropout float
model_config.batch_norm_config collection
model_config.batch_norm_config.momentum float
model_config.batch_norm_config.epsilon float
model_config.activation collection
model_config.activation.activation_type string
model_config.activation.activation_parameters collection
model_config.activation.activation_parameters.key string
model_config.activation.activation_parameters.value float
dataset_config collection
dataset_config.train_csv_path hidden
dataset_config.image_directory_path hidden
dataset_config.val_csv_path hidden
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 100 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 10 1
training_config.enable_qat Enable Quantization Aware Training bool bool
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 1.00E-06 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.00E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.7 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 9.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool FALSE

augment

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

cli

batch_size integer 4 yes
spatial_config collection
spatial_config.rotation_config collection
spatial_config.rotation_config.angle float 10
spatial_config.rotation_config.units string degrees
spatial_config.shear_config collection
spatial_config.shear_config.shear_ratio_x float
spatial_config.shear_config.shear_ratio_y float
spatial_config.flip_config collection
spatial_config.flip_config.flip_horizontal bool
spatial_config.flip_config.flip_vertical bool
spatial_config.translation_config collection
spatial_config.translation_config.translate_x integer
spatial_config.translation_config.translate_y integer
color_config collection
color_config.hue_saturation_config collection
color_config.hue_saturation_config.hue_rotation_angle float 5
color_config.hue_saturation_config.saturation_shift float 1
color_config.contrast_config collection
color_config.contrast_config.contrast float
color_config.contrast_config.center float
color_config.brightness_config collection
color_config.brightness_config.offset float
partition_config collection
partition_config.partition_mode string Enum __ID_WISE__, __RANDOM__
partition_config.dataset_percentage float
blur_config collection
blur_config.std float
blur_config.size float
output_image_width integer 960 yes
output_image_height integer 544 yes
output_image_channel integer 3 yes
image_extension string .png yes
dataset_config collection
dataset_config.image_path const hidden images
dataset_config.label_path const hidden labels

convert_efficientdet

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

coco_config collection
coco_config.root_directory_path hidden
coco_config.image_dir string List of image directories correspoding to each partition images The order of image directories must match annotation_files based on partitions
coco_config.annotations_file string List of JSON files with COCO dataset format annotations.json
coco_config.num_shards integer The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition
coco_config.tag string
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list list of string
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list list of string
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping list list of string
target_class_mapping.key Class Key string
target_class_mapping.value Class Value string

kmeans

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

size_x integer yes
size_y integer yes
num_clusters integer 9
max_steps integer 10000
min_x integer 0
min_y integer 0

convert__kitti

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

kitti_config collection
kitti_config.root_directory_path hidden
kitti_config.image_dir_name const images
kitti_config.label_dir_name const labels
kitti_config.point_clouds_dir string
kitti_config.calibrations_dir string
kitti_config.kitti_sequence_to_frames_file string The name of the KITTI sequence to frame mapping file. This file must be present within the dataset root as mentioned in the root_directory_path. This file must be uploaded by the user along with images and labels. The name of that file must be filled in this field
kitti_config.image_extension string The extension of the images in the image_dir_name parameter. .png .jpg, .png, .jpeg yes yes
kitti_config.num_partitions integer The number of partitions to use to split the data (N folds). This field is ignored when the partition model is set to random, as by default only two partitions are generated: val and train. In sequence mode, the data is split into n-folds. The number of partitions is ideally fewer than the total number of sequences in the kitti_sequence_to_frames file. Valid options: n=2 for random partition, n< number of sequences in the kitti_sequence_to_frames_file 2
kitti_config.num_shards integer The number of shards per fold. 10 1 20
kitti_config.partition_mode string The method employed when partitioning the data to multiple folds. Two methods are supported: Random partitioning: The data is divided in to 2 folds, train and val. This mode requires that the val_split parameter be set. Sequence-wise partitioning: The data is divided into n partitions (defined by the num_partitions parameter) based on the number of sequences available. random random, sequence
kitti_config.val_split float The percentage of data to be separated for validation. This only works under “random” partition mode. This partition is available in fold 0 of the TFrecords generated. Set the validation fold to 0 in the dataset_config. 0 0 100 Must not be exposed from API since each dataset is its own and cannot be split into train, val, test, etc… through the API
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping list Better not expose these on dataset convert and use the target_class_mapping in the train / eval / inference spec
target_class_mapping.key Class Key string
target_class_mapping.value Class Value string

convert__coco

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

coco_config collection
coco_config.root_directory_path hidden
coco_config.image_dir string List of image directories correspoding to each partition images The order of image directories must match annotation_files based on partitions
coco_config.annotations_file string List of JSON files with COCO dataset format annotations.json
coco_config.num_shards integer The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition
coco_config.tag string
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list list of string
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list list of string
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping list list of string
target_class_mapping.key Class Key string
target_class_mapping.value Class Value string

convert_and_index__kitti

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

kitti_config collection
kitti_config.root_directory_path hidden
kitti_config.image_dir_name const images
kitti_config.label_dir_name const labels
kitti_config.point_clouds_dir string
kitti_config.calibrations_dir string
kitti_config.kitti_sequence_to_frames_file string The name of the KITTI sequence to frame mapping file. This file must be present within the dataset root as mentioned in the root_directory_path. This file must be uploaded by the user along with images and labels. The name of that file must be filled in this field
kitti_config.image_extension string The extension of the images in the image_dir_name parameter. .png .jpg, .png, .jpeg yes yes
kitti_config.num_partitions integer The number of partitions to use to split the data (N folds). This field is ignored when the partition model is set to random, as by default only two partitions are generated: val and train. In sequence mode, the data is split into n-folds. The number of partitions is ideally fewer than the total number of sequences in the kitti_sequence_to_frames file. Valid options: n=2 for random partition, n< number of sequences in the kitti_sequence_to_frames_file 2
kitti_config.num_shards integer The number of shards per fold. 10 1 20
kitti_config.partition_mode string The method employed when partitioning the data to multiple folds. Two methods are supported: Random partitioning: The data is divided in to 2 folds, train and val. This mode requires that the val_split parameter be set. Sequence-wise partitioning: The data is divided into n partitions (defined by the num_partitions parameter) based on the number of sequences available. random random, sequence
kitti_config.val_split float The percentage of data to be separated for validation. This only works under “random” partition mode. This partition is available in fold 0 of the TFrecords generated. Set the validation fold to 0 in the dataset_config. 0 0 100 Must not be exposed from API since each dataset is its own and cannot be split into train, val, test, etc… through the API
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. masked-person ^[-a-zA-Z0-9_]{1,40}$

convert_and_index__coco

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

popular

regex

notes

coco_config collection
coco_config.root_directory_path hidden
coco_config.img_dir_names list List of image directories correspoding to each partition [“images”] The order of image directories must match annotation_files based on partitions
coco_config.annotation_files list List of JSON files with COCO dataset format [“annotations.json”]
coco_config.num_partitions integer The number of partitions to use to split the data (N folds). The number of partitions must match the size of the img_dir_names and annotation_files 1
coco_config.num_shards list The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition [256]
sample_modifier_config collection
sample_modifier_config.filter_samples_containing_only list list of string
sample_modifier_config.dominant_target_classes list list of string
sample_modifier_config.minimum_target_class_imbalance list list of string
sample_modifier_config.minimum_target_class_imbalance.key string
sample_modifier_config.minimum_target_class_imbalance.value float
sample_modifier_config.num_duplicates integer
sample_modifier_config.max_training_samples integer
sample_modifier_config.source_to_target_class_mapping list list of string
sample_modifier_config.source_to_target_class_mapping.key string
sample_modifier_config.source_to_target_class_mapping.value string
image_directory_path hidden
target_class_mapping list list of string
target_class_mapping.key Class Key string
target_class_mapping.value Class Value string

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
experiment_spec_file hidden CLI argument
results_dir hidden CLI argument
key hidden CLI argument
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path string
dataset_config.validation_data_sources.image_directory_path Image path string
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path string
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 100 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 4.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.50E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.3 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 2.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset 2
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool TRUE
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 8 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation
augmentation_config.hue Hue float Hue delta in color jittering augmentation
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
retinanet_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. retinanet_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different RetinaNet feature layers
retinanet_config.aspect_ratios_global string [1.0, 2.0, 0.5]
retinanet_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. FALSE
retinanet_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
retinanet_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
retinanet_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
retinanet_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
retinanet_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
retinanet_config.arch Arch string The backbone for feature extraction resnet
retinanet_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
retinanet_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
retinanet_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
retinanet_config.loss_loc_weight Localization loss weight float This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss 0.8
retinanet_config.focal_loss_alpha Alpha (Focal loss) float Alpha in the focal loss equation 0.25
retinanet_config.focal_loss_gamma Gamma (Focal loss) float Gamma in the focal loss equation 2
retinanet_config.n_kernels Number of kernels integer This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. 1
retinanet_config.feature_size Feature size integer This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). 256
retinanet_config.pos_iou_thresh Postive IOU threshold float The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
retinanet_config.neg_iou_thresh Negative IOU threshold float The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
retinanet_config.n_anchor_levels Number of Anchor levels integer Number of anchor levels between two adjacent scales. 1

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
threshold CLI parameter threshold float 0.3
experiment_spec_file hidden CLI argument
results_dir hidden CLI argument
key hidden CLI argument
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path string
dataset_config.validation_data_sources.image_directory_path Image path string
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path string
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 100 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 4.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.50E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.3 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 2.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset 2
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool TRUE
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 8 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation
augmentation_config.hue Hue float Hue delta in color jittering augmentation
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
retinanet_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. retinanet_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different RetinaNet feature layers
retinanet_config.aspect_ratios_global string [1.0, 2.0, 0.5]
retinanet_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. FALSE
retinanet_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
retinanet_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
retinanet_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
retinanet_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
retinanet_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
retinanet_config.arch Arch string The backbone for feature extraction resnet
retinanet_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
retinanet_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
retinanet_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
retinanet_config.loss_loc_weight Localization loss weight float This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss 0.8
retinanet_config.focal_loss_alpha Alpha (Focal loss) float Alpha in the focal loss equation 0.25
retinanet_config.focal_loss_gamma Gamma (Focal loss) float Gamma in the focal loss equation 2
retinanet_config.n_kernels Number of kernels integer This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. 1
retinanet_config.feature_size Feature size integer This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). 256
retinanet_config.pos_iou_thresh Postive IOU threshold float The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
retinanet_config.neg_iou_thresh Negative IOU threshold float The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
retinanet_config.n_anchor_levels Number of Anchor levels integer Number of anchor levels between two adjacent scales. 1

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

automl_enabled

math_cond

parent_param

depends_on

augmentation_config Augmentation config collection FALSE
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.key string A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.value float A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.output_channel Model Input channel integer 3 1,3 yes FALSE
augmentation_config.output_height Model Input height integer 544 64 inf yes
augmentation_config.output_width Model Input width integer 960 64 inf yes
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2 0.1 10 >= augmentation_config.random_crop_min_ar
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1 >= augmentation_config.random_crop_min_scale
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5 0.1 10 TRUE
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1 TRUE TRUE
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1 inf >= augmentation_config.zoom_out_min_scale
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1 inf TRUE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.data_sources.image_directory_path Image path hidden FALSE
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden FALSE
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_data_sources.image_directory_path Image path string FALSE
dataset_config.validation_data_sources.label_directory_path KITTI label path string FALSE
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path string FALSE
eval_config Evaluation collection FALSE
eval_config.average_precision_mode Average Precision Mode ordered The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__,__INTEGRATE__
eval_config.batch_size Batch Size integer batch size for evaluation 8 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1 FALSE
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve FALSE
experiment_spec_file hidden CLI argument FALSE
initial_epoch CLI parameter initial epoch hidden 0 0 CLI argument FALSE
key hidden CLI argument FALSE
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
nms_config.top_k Top K integer Maximum number of objects after NMS 200 1 inf
random_seed Random Seed integer Seed value for the random number generator in the network 42 1 inf FALSE
results_dir hidden CLI argument FALSE
retinanet_config.arch Arch string The backbone for feature extraction resnet resnet FALSE
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. retinanet_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different RetinaNet feature layers FALSE
retinanet_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0,2.0,0.5] FALSE
retinanet_config.aspect_ratios_global string [1.0,2.0,0.5] FALSE
retinanet_config.clip_boxes Clip Boxes bool If true,all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
retinanet_config.feature_size Feature size integer This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps,thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0,2^‚Öì,2^‚Öî ). 256 1 inf
retinanet_config.focal_loss_alpha Alpha (Focal loss) float Alpha in the focal loss equation 0.25 1.00E-05 inf
retinanet_config.focal_loss_gamma Gamma (Focal loss) float Gamma in the focal loss equation 2 1.00E-05 inf
retinanet_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training FALSE
retinanet_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
retinanet_config.loss_loc_weight Localization loss weight float This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss 0.8 1.00E-05 inf
retinanet_config.n_anchor_levels Number of Anchor levels integer Number of anchor levels between two adjacent scales. 1 1 inf FALSE
retinanet_config.n_kernels Number of kernels integer This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. 1 1 inf FALSE
retinanet_config.neg_iou_thresh Negative IOU threshold float The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive,nor a negative box,it will be ignored during training. 0.4 0 1 < retinanet_config.pos_iou_thresh
retinanet_config.nlayers Number of Layers ordered_int The number of conv layers in a specific arch 18 10,18,34,50,101,152 FALSE
retinanet_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided,0.5 will be used as default value. FALSE
retinanet_config.pos_iou_thresh Postive IOU threshold float The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box. 0.6 0 1 TRUE
retinanet_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.045,0.09,0.2,0.4,0.55,0.7] FALSE
retinanet_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be FALSE
retinanet_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True,two boxes will be generated with an aspect ratio of 1. FALSE
retinanet_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1,0.1,0.2,0.2] FALSE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1 inf
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.monitor Monitor ordered The name of the quantity to be monitored for early stopping loss,validation_loss,val_loss
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE FALSE
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.3 0 1 TRUE TRUE
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.50E-02 0 inf TRUE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 4.00E-05 0 inf TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1 TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 2 FALSE
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 100 1 FALSE
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP FALSE
training_config.optimizer.sgd.momentum float 0.9 0 inf
training_config.optimizer.sgd.nesterov bool FALSE TRUE
training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L2__ __L1__,__L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 2.00E-05 3.00E-11 3.00E-03 TRUE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 10 1 100 FALSE
version Schema Version const The version of this schema 1 internal FALSE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
initial_epoch CLI parameter initial epoch hidden 0 CLI argument
experiment_spec_file hidden CLI argument
results_dir hidden CLI argument
key hidden CLI argument
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path string
dataset_config.validation_data_sources.image_directory_path Image path string
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path string
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 8 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 100 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 4.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 1.50E-02 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.3 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 2.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset
training_config.n_workers Workers integer Number of workers in sequence dataset 2
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard
training_config.optimizer.sgd collection One of SGD / ADAM / RMSPROP
training_config.optimizer.sgd.momentum float 0.9
training_config.optimizer.sgd.nesterov bool FALSE
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 8 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation
augmentation_config.hue Hue float Hue delta in color jittering augmentation
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
retinanet_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. retinanet_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different RetinaNet feature layers
retinanet_config.aspect_ratios_global string [1.0, 2.0, 0.5]
retinanet_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. FALSE
retinanet_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
retinanet_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
retinanet_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.045, 0.09, 0.2, 0.4, 0.55, 0.7]
retinanet_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
retinanet_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
retinanet_config.arch Arch string The backbone for feature extraction resnet
retinanet_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
retinanet_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
retinanet_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training
retinanet_config.loss_loc_weight Localization loss weight float This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss 0.8
retinanet_config.focal_loss_alpha Alpha (Focal loss) float Alpha in the focal loss equation 0.25
retinanet_config.focal_loss_gamma Gamma (Focal loss) float Gamma in the focal loss equation 2
retinanet_config.n_kernels Number of kernels integer This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. 1
retinanet_config.feature_size Feature size integer This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). 256
retinanet_config.pos_iou_thresh Postive IOU threshold float The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box.
retinanet_config.neg_iou_thresh Negative IOU threshold float The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training.
retinanet_config.n_anchor_levels Number of Anchor levels integer Number of anchor levels between two adjacent scales. 1

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

dataset_convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
dataset_name Name const merge merge
original_json Original json hidden
finetune_json Finetune json hidden
original_minutes Original minutes integer 300
delimiter Delimiter string

save_path Save Path hidden

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 yes
export_format Export format string RIVA
export_to Export To hidden

finetune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save the model yes yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model yes yes yes
resume_model_weights Pretrained model path hidden Path to the trained model yes
sample_rate Sample rate integer The target sample rate to load the audio, in Hz 22050
train_dataset Train Dataset hidden Path to the train dataset manifest json file
phoneme_dict_path Phoneme Dictionary Path str Path to the phoneme dictionary path cmudict-0.7b_nv22.01 yes yes
whitelist_path Path to whitelist tsv file str Path to the whitelist path heteronyms-030921 yes yes
heteronyms_path Path to heteronyms path str Path to the heteronyms path lj_speech.tsv yes yes
sup_data_types Supplementary data types list List of supplementary data types [“align_prior_matrix”, “pitch”]
validation_dataset Validation Dataset hidden Path to the validation dataset manifest json file
prior_folder hidden
n_speakers Number of speakers integer Number of speakers in the dataset 1 yes
n_window_size Window size integer The size of the fft window in samples 1024 yes
n_window_stride Window stride integer The stride of the window in samples 256 yes
pitch_fmin Pitch Fmin float The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) 65 yes
pitch_fmax Pitch Fmin float The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) 2094 yes
pitch_avg Pitch Average float The average used to normalize the pitch 117.27540199742586 yes yes
pitch_std Pitch std. deviation float The standard deviation used to normalize the pitch 22.1851002822779 yes yes
train_ds Train Dataset collection Parameters to configure the training dataset
train_ds.dataset Train Dataset collection Parameters to configure the training dataset
train_ds.dataset._target_ Target const The nemo class module to be imported nemo.collections.tts.torch.data.TTSDataset
train_ds.dataset.manifest_filepath Train manifest file const Path to the train dataset manifest json file ${train_dataset}
train_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped
train_ds.dataset.min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.1
train_ds.dataset.int_values Input as integer values bool Load samples as 32 bit integers or not FALSE
train_ds.dataset.normalize Normalize dataset bool The flag to determine whether or not to normalize the transcript text TRUE
train_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz. ${sample_rate}
train_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). TRUE
train_ds.dataset.trim_top_db Trim integer The threshold (in decibels) below reference to consider as silence 50
train_ds.dataset.sup_data_path Prior folder const Path to the prior folder ${prior_folder}
train_ds.dataset.sup_data_types Supplementary data types const Supplementary data types ${sup_data_types}
train_ds.dataset.n_window_size Window size const The size of the fft window in samples ${n_window_size}
train_ds.dataset.n_window_stride Window stride const The stride of the window in samples ${n_window_stride}
train_ds.dataset.pitch_fmin Pitch Fmin const The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) ${pitch_fmin}
train_ds.dataset.pitch_fmax Pitch Fmin const The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) ${pitch_fmax}
train_ds.dataset.pitch_mean Pitch Average const The average used to normalize the pitch ${pitch_avg}
train_ds.dataset.pitch_std Pitch std. deviation const The standard deviation used to normalize the pitch ${pitch_std}
train_ds.dataset.pitch_norm Pitch normalization bool Whether to normalize pitch (via pitch_mean and pitch_std) or not TRUE
train_ds.dataset.n_fft Number of fft integer The number of fft samples 1024
train_ds.dataset.win_length STFT window length integer The length of the stft windows. 1024
train_ds.dataset.hop_length Hop Length integer The hop length between fft computations. 256
train_ds.dataset.window Window function str Window function type. hann
train_ds.dataset.n_mels Number of mel integer Number of mel filterbanks. 80
train_ds.dataset.lowfreq Low frequency input integer The lowfreq input to the mel filter calculation 0
train_ds.dataset.highfreq High frequency input integer The highfreq input to the mel filter calculation 8000
train_ds.dataset.text_normalizer Text normalizer Collection
train_ds.dataset.text_normalizer._target_ Text normalizater class const The name of the nemo text normalizer class nemo_text_processing.text_normalization.normalize.Normalizer yes
train_ds.dataset.text_normalizer.lang Text normalizer language code string The language to normalize the text against en yes
train_ds.dataset.text_normalizer.input_case Text case string The type of the text input cased
train_ds.dataset.text_normalizer.whitelist Whitelist tsv file string Path to the file containing whitelist text ${whitelist_path} yes
train_ds.dataset.text_normalizer_call_kwargs Kwargs for text normalizer calls collection
train_ds.dataset.text_normalizer_call_kwargs.verbose verbosity bool Flag to set verbosity of the text normalizer FALSE yes
train_ds.dataset.text_normalizer_call_kwargs.punct_pre_process Preprocess punctuation in text_normalizer_call_kwargs bool Flag to pre-process punctuation TRUE yes
train_ds.dataset.text_normalizer_call_kwargs.punct_post_process Postprocess punctuation in text_normalizer_call_kwargs bool Flag to post-process punctuation TRUE yes
train_ds.dataset.text_tokenizer Text Tokenizer collection
train_ds.dataset.text_tokenizer._target_ Text tokenizer class name const nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
train_ds.dataset.text_tokenizer.punct Flag to expect punctuation bool TRUE
train_ds.dataset.text_tokenizer.stresses Flag to expect stresses bool TRUE
train_ds.dataset.text_tokenizer.chars Flag to expect characters bool TRUE
train_ds.dataset.text_tokenizer.apostrophe Flag to expect apostrophe bool TRUE
train_ds.dataset.text_tokenizer.pad_with_space Flag to pad text with space bool TRUE
train_ds.dataset.text_tokenizer.g2p Improve g2p token collection
train_ds.dataset.text_tokenizer.g2p._target_ G2p token class name const nemo.collections.tts.torch.g2ps.EnglishG2p
train_ds.dataset.text_tokenizer.g2p.phoneme_dict Path to phoneme dictionary const ${phoneme_dict_path}
train_ds.dataset.text_tokenizer.g2p.heteronyms Path to heteronyms const ${heteronyms_path}
train_ds.dataset.text_tokenizer.g2p.phoneme_probability Path to the phoneme probability float 0.5
train_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples
train_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE
train_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE
train_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 32
train_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 12
validation_ds Validation Dataset collection Parameters to configure the training dataset
validation_ds.dataset Validation Dataset collection Parameters to configure the training dataset
validation_ds.dataset._target_ Target const The nemo class module to be imported nemo.collections.tts.torch.data.TTSDataset
validation_ds.dataset.manifest_filepath Validation manifest file const Path to the train dataset manifest json file ${validation_dataset}
validation_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped
validation_ds.dataset.min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.1
validation_ds.dataset.int_values Input as integer values bool Load samples as 32 bit integers or not FALSE
validation_ds.dataset.normalize Normalize dataset bool The flag to determine whether or not to normalize the transcript text TRUE
validation_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz. ${sample_rate}
validation_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). TRUE
validation_ds.dataset.sup_data_path Prior folder const Path to the prior folder ${prior_folder}
validation_ds.dataset.sup_data_types Supplementary data types const Supplementary data types ${sup_data_types}
validation_ds.dataset.n_window_size Window size const The size of the fft window in samples ${n_window_size}
validation_ds.dataset.n_window_stride Window stride const The stride of the window in samples ${n_window_stride}
validation_ds.dataset.pitch_fmin Pitch Fmin const The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) ${pitch_fmin}
validation_ds.dataset.pitch_fmax Pitch Fmin const The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) ${pitch_fmax}
validation_ds.dataset.pitch_mean Pitch Average const The average used to normalize the pitch ${pitch_avg}
validation_ds.dataset.pitch_std Pitch std. deviation const The standard deviation used to normalize the pitch ${pitch_std}
validation_ds.dataset.pitch_norm Pitch normalization bool Whether to normalize pitch (via pitch_mean and pitch_std) or not TRUE
validation_ds.dataset.n_fft Number of fft integer The number of fft samples 1024
validation_ds.dataset.win_length STFT window length integer The length of the stft windows. 1024
validation_ds.dataset.hop_length Hop Length integer The hop length between fft computations. 256
validation_ds.dataset.window Window function str Window function type. hann
validation_ds.dataset.n_mels Number of mel integer Number of mel filterbanks. 80
validation_ds.dataset.lowfreq Low frequency input integer The lowfreq input to the mel filter calculation 0
validation_ds.dataset.highfreq High frequency input integer The highfreq input to the mel filter calculation 8000
validation_ds.dataset.text_normalizer Text normalizer Collection
validation_ds.dataset.text_normalizer._target_ Text normalizater class const The name of the nemo text normalizer class nemo_text_processing.text_normalization.normalize.Normalizer yes
validation_ds.dataset.text_normalizer.lang Text normalizer language code string The language to normalize the text against en yes
validation_ds.dataset.text_normalizer.input_case Text case string The type of the text input cased
validation_ds.dataset.text_normalizer.whitelist Whitelist tsv file string Path to the file containing whitelist text ${whitelist_path} yes
validation_ds.dataset.text_normalizer_call_kwargs Kwargs for text normalizer calls collection
validation_ds.dataset.text_normalizer_call_kwargs.verbose verbosity bool Flag to set verbosity of the text normalizer FALSE yes
validation_ds.dataset.text_normalizer_call_kwargs.punct_pre_process Preprocess punctuation in text_normalizer_call_kwargs bool Flag to pre-process punctuation TRUE yes
validation_ds.dataset.text_normalizer_call_kwargs.punct_post_process Postprocess punctuation in text_normalizer_call_kwargs bool Flag to post-process punctuation TRUE yes
validation_ds.dataset.text_tokenizer Text Tokenizer collection
validation_ds.dataset.text_tokenizer._target_ Text tokenizer class name const nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
validation_ds.dataset.text_tokenizer.punct Flag to expect punctuation bool TRUE
validation_ds.dataset.text_tokenizer.stresses Flag to expect stresses bool TRUE
validation_ds.dataset.text_tokenizer.chars Flag to expect characters bool TRUE
validation_ds.dataset.text_tokenizer.apostrophe Flag to expect apostrophe bool TRUE
validation_ds.dataset.text_tokenizer.pad_with_space Flag to pad text with space bool TRUE
validation_ds.dataset.text_tokenizer.g2p Improve g2p token collection
validation_ds.dataset.text_tokenizer.g2p._target_ G2p token class name const nemo.collections.tts.torch.g2ps.EnglishG2p
validation_ds.dataset.text_tokenizer.g2p.phoneme_dict Path to phoneme dictionary const ${phoneme_dict_path}
validation_ds.dataset.text_tokenizer.g2p.heteronyms Path to heteronyms const ${heteronyms_path}
validation_ds.dataset.text_tokenizer.g2p.phoneme_probability Path to the phoneme probability float 0.5
validation_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples
validation_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE
validation_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE
validation_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 32
validation_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 12
optim Optimizer collection
optim.name Optimizer Name str Type of optimizer to be used during training adam
optim.lr Learning rate float Learning rate 0.0002
optim.betas Optimizer betas list List of floats [0.9, 0.98]
optim.weight_decay Weight decay float 0.000001

infer

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 yes
input_batch List of input texts list List of text sentences to render spectrograms. This only works in infer mode yes yes
input_json Input dataset to run inference hidden Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder yes yes
speaker Speaker ID int ID of the speaker to generate spectrograms 0
mode Infer mode string Mode to run inference 1. Inferences on discrete text samples (infer) 2. Inference on a dataset (infer_hifigan_ft) infer infer, infer_hifigan_ft yes

infer_onnx

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 1 yes
input_batch List of input texts list List of text sentences to render spectrograms. This only works in infer mode yes yes
speaker Speaker ID int ID of the speaker to generate spectrograms 0
phoneme_dict_path Phoneme dictionary path string Path to phoneme dictionary that was used to train the model cmudict-0.7b_nv22.01 yes yes
heteronyms Heteronyms path string Path to the heteronyms file that was used to train the model lj_speech.tsv yes yes

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save the model yes yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model yes yes yes
sample_rate Sample rate integer The target sample rate to load the audio, in Hz 22050 yes yes
train_dataset Train Dataset hidden Path to the train dataset manifest json file yes
validation_dataset Validation Dataset hidden Path to the validation dataset manifest json file yes
phoneme_dict_path Phoneme Dictionary Path str Path to the phoneme dictionary path cmudict-0.7b_nv22.01 yes yes
whitelist_path Path to whitelist tsv file str Path to the whitelist path heteronyms-030921 yes yes
heteronyms_path Path to heteronyms path str Path to the heteronyms path lj_speech.tsv yes yes
sup_data_types Supplementary data types list List of supplementary data types [“align_prior_matrix”, “pitch”]
prior_folder hidden yes
model.learn_alignment Learn alignment bool TRUE
model.bin_loss_warmup_epochs Learn alignment integer 100
model.n_speakers N speakers integer Number of speakers in the dataset 1 yes
model.symbols_embedding_dim Symbols Embedding dimension integer The dimension of the symbols embedding 384 yes
model.max_token_duration Max token duration integer Maximum duration to clamp the tokens to 75
model.n_mel_channels Number of channels in Mel Output integer Number of channels in the Mel output 80
model.pitch_embedding_kernel_size Pitch embedding kernel size integer The kernel size of the Conv1D layer generating the pitch embeddings 3
model.n_window_size Window size integer The size of the fft window in samples 1024 yes
model.n_window_stride Window stride integer The stride of the window in samples 256 yes
model.n_fft Number of fft integer The number of fft samples 1024
model.lowfreq Low frequency input integer The lowfreq input to the mel filter calculation 0
model.highfreq High frequency input integer The highfreq input to the mel filter calculation 8000
model.window Window function str Window function type. hann
model.pitch_fmin Pitch Fmin float The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) 65.40639132514966 yes yes
model.pitch_fmax Pitch Fmin float The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) 2093.004522404789 yes yes
model.pitch_mean Pitch Average float The average used to normalize the pitch 212.35873413085938 yes yes
model.pitch_std Pitch std. deviation float The standard deviation used to normalize the pitch 68.52806091308594 yes yes
model.sample_rate Sample rate const The target sample rate to load the audio, in Hz. ${sample_rate} yes
model.text_normalizer Text normalizer Collection
model.text_normalizer._target_ Text normalizater class const The name of the nemo text normalizer class nemo_text_processing.text_normalization.normalize.Normalizer yes
model.text_normalizer.lang Text normalizer language code string The language to normalize the text against en yes
model.text_normalizer.input_case Text case string The type of the text input cased
model.text_normalizer.whitelist Whitelist tsv file string Path to the file containing whitelist text ${whitelist_path} yes
model.text_normalizer_call_kwargs Kwargs for text normalizer calls collection
model.text_normalizer_call_kwargs.verbose verbosity bool Flag to set verbosity of the text normalizer FALSE yes
model.text_normalizer_call_kwargs.punct_pre_process Preprocess punctuation in text_normalizer_call_kwargs bool Flag to pre-process punctuation TRUE yes
model.text_normalizer_call_kwargs.punct_post_process Postprocess punctuation in text_normalizer_call_kwargs bool Flag to post-process punctuation TRUE yes
model.text_tokenizer Text Tokenizer collection
model.text_tokenizer._target_ Text tokenizer class name const nemo.collections.tts.torch.tts_tokenizers.EnglishPhonemesTokenizer
model.text_tokenizer.punct Flag to expect punctuation bool TRUE
model.text_tokenizer.stresses Flag to expect stresses bool TRUE
model.text_tokenizer.chars Flag to expect characters bool TRUE
model.text_tokenizer.apostrophe Flag to expect apostrophe bool TRUE
model.text_tokenizer.pad_with_space Flag to pad text with space bool TRUE
model.text_tokenizer.g2p Improve g2p token collection
model.text_tokenizer.g2p._target_ G2p token class name const nemo.collections.tts.torch.g2ps.EnglishG2p
model.text_tokenizer.g2p.phoneme_dict Path to phoneme dictionary const ${phoneme_dict_path}
model.text_tokenizer.g2p.heteronyms Path to heteronyms const ${heteronyms_path}
model.text_tokenizer.g2p.phoneme_probability Path to the phoneme probability float 0.5
model.train_ds Train Dataset collection Parameters to configure the training dataset
model.train_ds.dataset Train Dataset collection Parameters to configure the training dataset
model.train_ds.dataset._target_ Target const The nemo class module to be imported nemo.collections.tts.torch.data.TTSDataset yes
model.train_ds.dataset.manifest_filepath Train manifest file const Path to the train dataset manifest json file ${train_dataset} yes
model.train_ds.dataset.sample_rate Sample rate The target sample rate to load the audio, in Hz ${sample_rate} yes
model.train_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped
model.train_ds.dataset.min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.1 yes
model.train_ds.dataset.int_values Input as integer values bool Load samples as 32 bit integers or not FALSE yes
model.train_ds.dataset.normalize Normalize dataset bool The flag to determine whether or not to normalize the transcript text TRUE yes
model.train_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). FALSE yes
model.train_ds.dataset.sup_data_path Prior folder const Path to the prior folder ${prior_folder} yes
model.train_ds.dataset.sup_data_types Supplementary data types const Supplementary data types ${sup_data_types} yes
model.train_ds.dataset.n_fft Number of fft samples const The number of fft samples ${model.n_fft} yes
model.train_ds.dataset.win_length STFT window length const The length of the stft windows. ${model.n_window_size}
model.train_ds.dataset.hop_length Hop Length const The hop length between fft computations. ${model.n_window_stride}
model.train_ds.dataset.window Window function const Window function type. ${model.window}
model.train_ds.dataset.n_mels Number of mel const Number of mel filterbanks. ${model.n_mel_channels}
model.train_ds.dataset.lowfreq Low frequency input const The lowfreq input to the mel filter calculation ${model.lowfreq}
model.train_ds.dataset.highfreq High frequency input const The highfreq input to the mel filter calculation ${model.highfreq}
model.train_ds.dataset.n_window_size Window size const The size of the fft window in samples ${model.n_window_size} yes
model.train_ds.dataset.n_window_stride Window stride const The stride of the window in samples ${model.n_window_stride} yes
model.train_ds.dataset.pitch_fmin Pitch Fmin const The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) ${model.pitch_fmin} yes
model.train_ds.dataset.pitch_fmax Pitch Fmin const The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) ${model.pitch_fmax} yes
model.train_ds.dataset.pitch_mean Pitch Average const The average used to normalize the pitch ${model.pitch_mean} yes
model.train_ds.dataset.pitch_std Pitch std. deviation const The standard deviation used to normalize the pitch ${model.pitch_std} yes
model.train_ds.dataset.pitch_norm Pitch normalization bool Whether to normalize pitch (via pitch_mean and pitch_std) or not TRUE
model.train_ds.dataset.use_beta_binomial_interpolator Enable Beta binomial interpolator bool Whether to use beta-binomial interpolator for calculating alignment prior matrix TRUE
model.train_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
model.train_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
model.train_ds.dataloader_params.pin_memory Pin memory bool Whether to pin memory in the PyTorch DataLoader FALSE yes
model.train_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE yes
model.train_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 32 yes yes
model.train_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 12 yes
model.validation_ds Validation Dataset collection Parameters to configure the training dataset yes
model.validation_ds.dataset Validation Dataset collection Parameters to configure the training dataset yes
model.validation_ds.dataset._target_ Target const The nemo class module to be imported nemo.collections.tts.torch.data.TTSDataset yes
model.validation_ds.dataset.manifest_filepath Validation manifest file const Path to the train dataset manifest json file ${validation_dataset} yes
model.validation_ds.dataset.sample_rate Sample rate The target sample rate to load the audio, in Hz ${sample_rate} yes
model.validation_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped
model.validation_ds.dataset.min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped
model.validation_ds.dataset.int_values Input as integer values bool Load samples as 32 bit integers or not FALSE yes
model.validation_ds.dataset.normalize Normalize dataset bool The flag to determine whether or not to normalize the transcript text TRUE yes
model.validation_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). FALSE yes
model.validation_ds.dataset.sup_data_types Supplementary data types const Supplementary data types ${sup_data_types} yes
model.validation_ds.dataset.n_fft Number of fft samples const The number of fft samples ${model.n_fft} yes
model.validation_ds.dataset.win_length STFT window length const The length of the stft windows. ${model.n_window_size}
model.validation_ds.dataset.hop_length Hop Length const The hop length between fft computations. ${model.n_window_stride}
model.validation_ds.dataset.window Window function const Window function type. ${model.window}
model.validation_ds.dataset.n_mels Number of mel const Number of mel filterbanks. ${model.n_mel_channels}
model.validation_ds.dataset.lowfreq Low frequency input const The lowfreq input to the mel filter calculation ${model.lowfreq}
model.validation_ds.dataset.highfreq High frequency input const The highfreq input to the mel filter calculation ${model.highfreq}
model.validation_ds.dataset.sup_data_path Prior folder const Path to the prior folder ${prior_folder} yes
model.validation_ds.dataset.n_window_size Window size const The size of the fft window in samples ${model.n_window_size} yes
model.validation_ds.dataset.n_window_stride Window stride const The stride of the window in samples ${model.n_window_stride} yes
model.validation_ds.dataset.pitch_fmin Pitch Fmin const The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) ${model.pitch_fmin} yes
model.validation_ds.dataset.pitch_fmax Pitch Fmin const The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) ${model.pitch_fmax} yes
model.validation_ds.dataset.pitch_mean Pitch Average const The average used to normalize the pitch ${model.pitch_mean} yes
model.validation_ds.dataset.pitch_std Pitch std. deviation const The standard deviation used to normalize the pitch ${model.pitch_std} yes
model.validation_ds.dataset.pitch_norm Pitch normalization bool Whether to normalize pitch (via pitch_mean and pitch_std) or not TRUE
model.validation_ds.dataset.use_beta_binomial_interpolator Enable Beta binomial interpolator bool Whether to use beta-binomial interpolator for calculating alignment prior matrix TRUE
model.validation_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
model.validation_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
model.validation_ds.dataloader_params.pin_memory Pin memory bool Whether to pin memory in the PyTorch DataLoader FALSE yes
model.validation_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE yes
model.validation_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 32 yes yes
model.validation_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 12 yes
model.optim Optimizer collection yes
model.optim.name Optimizer Name string Type of optimizer to be used during training lamb yes
model.optim.lr Learning rate float Learning rate 0.1 yes yes
model.optim.betas Optimizer betas list Coefficients used to compute the running averages of the gradient and it’s square [0.9, 0.98] yes
model.optim.weight_decay Weight decay float Weight decay (L2 penalty 0.000001 yes
model.optim.sched Learning rate scheduler collection Parameters to configure the learning rate scheduler yes
model.optim.sched.name Scheduler Name string Type of learning rate scheduler to be used NoamAnnealing yes
model.optim.sched.warmup_steps Warm up steps integer No. of steps to warm up the learning rate 1000 yes
model.optim.sched.last_epoch Last epoch integer -1 yes
model.optim.sched.d_model Disable scaling integer Flag to disable scaling based on model dim 1 yes
model.preprocessor Preprocessor config collection Collection to configure the model preprocessor yes
model.preprocessor._target_ Target class of the preprocessor instance const The Nemo class to instantiate. nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor yes
model.preprocessor.dither Dither float 0 yes
model.preprocessor.features Number of channels in Mel Output const Number of channels in the Mel Output ${model.n_mel_channels} yes
model.preprocessor.frame_splicing Spectrogram Frames per step integer Number of spectrogram frames per step 1 yes
model.preprocessor.highfreq High frequency bound in Hz const Upper bound of the mel basis in Hz ${model.highfreq} yes
model.preprocessor.log Log Spectrograms bool Flags to enable logging spectrograms TRUE yes
model.preprocessor.log_zero_guard_type Zero guard type enum Need to avoid taking the log of zero. There are two options: “add” or “clamp”. add yes
model.preprocessor.log_zero_guard_value Zero guard value float Need to avoid taking the log of zero. There are two options: “add” or “clamp”. 0.00001 yes
model.preprocessor.lowfreq Low frequency bound in Hz const Lower bound of the mel basis in Hz ${model.lowfreq} yes
model.preprocessor.mag_power Multiplication with mel basis float Prior to multiplication with mel basis 1 yes
model.preprocessor.n_fft FFT Window size const The size of the window for the FFT in samples. ${model.n_fft} yes
model.preprocessor.n_window_size FFT Window size const The size of the window for the FFT in samples. ${model.n_window_size} yes
model.preprocessor.n_window_stride FFT Window stride const The stride of the window for FFT ${model.n_window_stride} yes
model.preprocessor.normalize Feature Normalization string Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq null no
model.preprocessor.pad_to Pad to integer A multiple pf pad_to 1 yes
model.preprocessor.pad_value Pad Value float The value to that shorter mels are padded with 0 yes
model.preprocessor.preemph Pre-emphasis value float Amount of pre-emphasis to be added to the audio. Can be disabled by passing None. no
model.preprocessor.sample_rate Samping rate const The target sample rate to load the audio in Hz. ${sample_rate} yes
model.preprocessor.window Window type const The type of window to be used. ${model.window} yes
model.preprocessor.window_size Window size bool The size of the window to be used FALSE yes
model.preprocessor.window_stride Window stride bool The stride of the window to be used FALSE yes
model.input_fft Input FFT collection Collection to configure the Input FFT yes
model.input_fft._target_ Target class for the FFT Transformer Encoder const The Nemo FFTEncoder module to be instantiated nemo.collections.tts.modules.transformer.FFTransformerEncoder yes
model.input_fft.n_layer input_fft n_layer integer Number of transformer layers 6 yes
model.input_fft.n_head input_fft num heads integer Number of heads in the MultiHeadAttn 1 yes
model.input_fft.d_model input_fft d_model const Hidden size of the input and output ${model.symbols_embedding_dim} yes
model.input_fft.d_head input_fft d_head integer Hidden size of the attention module 64 yes
model.input_fft.d_inner Input fft d_inner integer Hidden size of the convolutional layers 1536 yes
model.input_fft.kernel_size input_fft kernel_size integer Hidden size of the input and output 3 yes
model.input_fft.dropout input_fft dropout float Dropout parameters 0.1 yes
model.input_fft.dropatt input_fft dropatt float Dropout parameter for attention 0.1 yes
model.input_fft.dropemb input_fft dropemb integer Dropout parameter for embedding 0 yes
model.input_fft.d_embed input_fft d_embed const Hidden size of embeddings (input fft only) ${model.symbols_embedding_dim} yes
model.output_fft output_fft collection Collection to configure the Input FFT yes
model.output_fft._target_ Target class for the FFT Transformer Encoder const The Nemo FFTEncoder module to be instantiated nemo.collections.tts.modules.transformer.FFTransformerDecoder yes
model.output_fft.n_layer output_fft n_layer integer Number of transformer layers 6 yes
model.output_fft.n_head output_fft num heads integer Number of heads in the MultiHeadAttn 1 yes
model.output_fft.d_model output_fft d_model const Hidden size of the input and output ${model.symbols_embedding_dim} yes
model.output_fft.d_head output_fft d_head integer Hidden size of the attention module 64 yes
model.output_fft.d_inner output_fft d_inner integer Hidden size of the convolutional layers 1536 yes
model.output_fft.kernel_size output_fft kernel_size integer Hidden size of the input and output 3 yes
model.output_fft.dropout output_fft dropout float Dropout parameters 0.1 yes
model.output_fft.dropatt output_fft dropatt float Dropout parameter for attention 0.1 yes
model.output_fft.dropemb output_fft dropemb integer Dropout parameter for embedding 0 yes
model.alignment_module alignment_module collection Configuration element for the alignment module yes
model.alignment_module._target_ alignment_module._target_ const Module to be instantiated for alignment nemo.collections.tts.modules.aligner.AlignmentEncoder yes
model.alignment_module.n_text_channels n_text_channels const The dimensionality of symbol embedding ${model.symbols_embedding_dim} yes
model.duration_predictor duration_predictor collection Configuration element for the duration predictor yes
model.duration_predictor._target_ duration_predictor._target_ const Module to be instantiated for duration predictor nemo.collections.tts.modules.fastpitch.TemporalPredictor yes
model.duration_predictor.input_size duration_predictor.input_size const Hidden size of the input and output ${model.symbols_embedding_dim} yes
model.duration_predictor.kernel_size duration_predictor.kernel_size integer Kernel size for convolutional layers 3 yes
model.duration_predictor.filter_size duration_predictor.filter_size integer Filter size for the convolutional layers 256 yes
model.duration_predictor.dropout duration_predictor.dropout float Drop out parameter 0.1 yes
model.duration_predictor.n_layers duration_predictor.n_layers integer Number of layers 2 yes
model.pitch_predictor pitch_predictor collection Configuration element for the pitch predictor yes
model.pitch_predictor._target_ pitch_predictor._target_ const Module to be instantiated for pitch predictor nemo.collections.tts.modules.fastpitch.TemporalPredictor yes
model.pitch_predictor.input_size pitch_predictor.input_size const Hidden size of the input and output ${model.symbols_embedding_dim} yes
model.pitch_predictor.kernel_size pitch_predictor.kernel_size integer Kernel size for convolutional layers 3 yes
model.pitch_predictor.filter_size pitch_predictor.filter_size integer Filter size for the convolutional layers 256 yes
model.pitch_predictor.dropout pitch_predictor.dropout float Drop out parameter 0.1 yes
model.pitch_predictor.n_layers pitch_predictor.n_layers integer Number of layers 2 yes
trainer Trainer Configurations collection Collection of parameters to configure the trainer yes
trainer.max_epochs Number of epochs collection Maximum number of epochs to train the model 100 yes yes

pitch_stats

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
num_files Number of files integer List of text sentences to render spectrograms. This only works in infer mode 10 yes yes
manifest_filepath Manifest hidden Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder yes yes
output_path Output hidden ID of the speaker to generate spectrograms 0
pitch_fmin F min float 64 yes
pitch_fmax F max float 512
n_window_size Window size integer 1024
sample_rate Sample rate integer 22050
render_plots Render plots bool TRUE
compute_stats Compute stats bool TRUE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes
result_dir Results directory hidden Path to the output results directory and logs yes
dataset_name Name string ljs yes
data_dir Data dir hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing CLI parameter hidden FALSE CLI argument
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
ssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. ssd_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different SSD feature layers
ssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
ssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
ssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
ssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
ssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
ssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
ssd_config.arch Arch string The backbone for feature extraction resnet
ssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
ssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
ssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training [0]

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing CLI parameter hidden FALSE CLI argument
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
ssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. ssd_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different SSD feature layers
ssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
ssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
ssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
ssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
ssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
ssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
ssd_config.arch Arch string The backbone for feature extraction resnet
ssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
ssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
ssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training [0]

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 internal FALSE
random_seed Random Seed integer Seed value for the random number generator in the network 42 FALSE
initial_epoch Initial epoch cli hidden 1 CLI argument FALSE
use_multiprocessing CLI parameter hidden FALSE CLI argument FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden FALSE
dataset_config.data_sources.image_directory_path Image path hidden FALSE
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden FALSE
dataset_config.validation_data_sources.image_directory_path Image path hidden FALSE
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden FALSE
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection FALSE
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1 inf
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1 inf FALSE
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE FALSE
training_config.learning_rate collection FALSE
training_config.learning_rate.soft_start_annealing_schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0 inf TRUE < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0 inf TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1 TRUE < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1 TRUE
training_config.regularizer.type Regularizer Type ordered The type of the regularizer being used. __L1__ __L1__,__L2__ TRUE
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 3.00E-11 inf TRUE
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 10 1 inf yes FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor ordered The name of the quantity to be monitored for early stopping loss,validation_loss,val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0 inf FALSE
eval_config Evaluation collection FALSE
eval_config.average_precision_mode Average Precision Mode ordered The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__,__INTEGRATE__ FALSE
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1 inf FALSE
eval_config.batch_size Batch Size integer batch size for evaluation 16 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1 FALSE
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0 inf
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection FALSE
augmentation_config.output_width Model Input width integer 960 1 inf yes
augmentation_config.output_height Model Input height integer 544 1 inf yes
augmentation_config.output_channel Model Input channel ordered_int 3 1,3 yes FALSE
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1 TRUE < augmentation_config.random_crop_max_scale
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1 TRUE
augmentation_config.random_crop_min_ar Random Crop Min Aspect Ratio float the minimum random crop aspect ratio 0.5 0.1 10
augmentation_config.random_crop_max_ar Random Crop Max Aspect Ratio float the maximum random crop aspect ratio 2 0.1 10 TRUE
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1 inf < augmentation_config.zoom_out_max_scale
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1 inf TRUE
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip 0 1
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. FALSE
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted,ImageNet mean will be used for image preprocessing. If set,depending on output_channel,either ‘r/g/b’ or ‘l’ key/value pair must be configured. 0 255
ssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0,2.0,0.5,3.0,1.0/3.0] FALSE
ssd_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different SSD feature layers FALSE
ssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True,two boxes will be generated with an aspect ratio of 1. TRUE
ssd_config.clip_boxes Clip Boxes bool If true,all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
ssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1,0.1,0.2,0.2] FALSE
ssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05,0.1,0.25,0.4,0.55,0.7,0.85] FALSE
ssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be FALSE
ssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided,0.5 will be used as default value. FALSE
ssd_config.arch Arch ordered The backbone for feature extraction resnet resnet FALSE
ssd_config.nlayers Number of Layers ordered_int The number of conv layers in a specific arch 18 10,18,34,50,101,152 FALSE
ssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
ssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training. [0] FALSE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1 internal
random_seed Random Seed integer Seed value for the random number generator in the network 42
initial_epoch Initial epoch cli hidden 1 CLI argument
use_multiprocessing CLI parameter hidden FALSE CLI argument
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.data_sources.label_directory_path KITTI label path hidden hidden
dataset_config.data_sources.image_directory_path Image path hidden
dataset_config.data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_data_sources.label_directory_path KITTI label path hidden
dataset_config.validation_data_sources.image_directory_path Image path hidden
dataset_config.validation_data_sources.tfrecords_directory_path TFRecords path hidden
dataset_config.include_difficult_in_training include difficult label in training bool Whether to use difficult objects in training TRUE
training_config Training collection
training_config.batch_size_per_gpu Batch Size Per GPU integer The number of images per batch per GPU. 10 1
training_config.num_epochs Number of Epochs integer The total number of epochs to run the experiment. 80 1
training_config.enable_qat Enable Quantization Aware Training bool bool FALSE
training_config.learning_rate collection
training_config.learning_rate.soft_start_annealing_schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Minimum Learning Rate float The minimum learning rate in the learning rate schedule. 5.00E-05 0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Maximum Learning Rate float The maximum learning rate in the learning rate schedule. 9.00E-03 0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float The time to ramp up the learning rate from minimum learning rate to maximum learning rate. 0.1 0 1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. 0.8 0 1
training_config.regularizer.type Regularizer Type string The type of the regularizer being used. __L1__ __L1__, __L2__
training_config.regularizer.weight Regularizer Weight float The floating point weight of the regularizer. 3.00E-05 0
training_config.checkpoint_interval Checkpoint Interval integer The interval (in epochs) at which train saves intermediate models. 1 1
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 16 1
training_config.n_workers Workers integer Number of workers in sequence dataset 8 1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping loss, validation_loss, val_loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 0
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 3 0
eval_config Evaluation collection
eval_config.average_precision_mode Average Precision Mode string The mode in which the average precision for each class is calculated. __SAMPLE__ __SAMPLE__, __INTEGRATE__
eval_config.validation_period_during_training Validation Period During Training integer The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. 10 1
eval_config.batch_size Batch Size integer batch size for evaluation 16 1
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.01 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.6 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
augmentation_config Augmentation config collection
augmentation_config.output_width Model Input width integer 960 yes
augmentation_config.output_height Model Input height integer 544 yes
augmentation_config.output_channel Model Input channel integer 3 yes
augmentation_config.random_crop_min_scale Random Crop Min Scale float the minimum random crop size 0.3 0 1
augmentation_config.random_crop_max_scale Random Crop Max Scale float the maximum random crop size 1 0 1
augmentation_config.random_crop_min_ar Random Crop Max Aspect Ratio float the minimum random crop aspect ratio 0.5
augmentation_config.random_crop_max_ar Random Crop MIin Aspect Ratio float the maximum random crop aspect ratio 2
augmentation_config.zoom_out_min_scale Zoom Out Min Scale float Minimum scale of ZoomOut augmentation 1 1
augmentation_config.zoom_out_max_scale Zoom Out Max Scale float Maximum scale of ZoomOut augmentation 4 1
augmentation_config.brightness Brightness integer Brightness delta in color jittering augmentation 32 0 255
augmentation_config.contrast Contrast float Contrast delta factor in color jitter augmentation 0.5 0 1
augmentation_config.saturation Saturation float Saturation delta factor in color jitter augmentation 0.5 0 1
augmentation_config.hue Hue integer Hue delta in color jittering augmentation 18 0 180
augmentation_config.random_flip Random Flip float Probablity of performing random horizontal flip
augmentation_config.image_mean Image Mean collection A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.key Image Mean key string A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
augmentation_config.image_mean.value Image Mean value float A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured.
ssd_config.aspect_ratios_global Aspect Ratio Global string The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. [1.0, 2.0, 0.5, 3.0, 1.0/3.0]
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. ssd_config.aspect_ratios Aspect Ratio string The aspect ratio of anchor boxes for different SSD feature layers
ssd_config.two_boxes_for_ar1 Two boxes for aspect-ratio=1 bool If this parameter is True, two boxes will be generated with an aspect ratio of 1. TRUE
ssd_config.clip_boxes Clip Boxes bool If true, all corner anchor boxes will be truncated so they are fully inside the feature images. FALSE
ssd_config.variances Variance string A list of 4 positive floats to decode bboxes [0.1, 0.1, 0.2, 0.2]
ssd_config.scales Scales string A list of positive floats containing scaling factors per convolutional predictor layer [0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85]
ssd_config.steps Steps string An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be
ssd_config.offsets Offsets string An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value.
ssd_config.arch Arch string The backbone for feature extraction resnet
ssd_config.nlayers Number of Layers integer The number of conv layers in a specific arch 18
ssd_config.freeze_bn Freeze BN bool Whether to freeze all batch normalization layers during training. FALSE
ssd_config.freeze_blocks Freeze Blocks list The list of block IDs to be frozen in the model during training [0]

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
results_dir Results directory hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
verbose verbosity hidden TRUE

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
min_batch_size integer 1
opt_batch_size integer 1
gen_ds_config bool FALSE
engine_file Engine File hidden UNIX path to the model engine file. yes
verbose hidden TRUE
strict_type_constraints bool FALSE
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
cal_image_dir hidden
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100
results_dir hidden

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
experiment_spec_path hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
results_dir Results directory hidden
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

version Schema Version const The version of this schema 1 FALSE
random_seed Random Seed integer Seed value for the random number generator in the network 42 0 inf FALSE
dataset_config Dataset collection Parameters to configure the dataset FALSE
dataset_config.dataset string custom FALSE
dataset_config.augment Augment bool Boolean to augment the dataset or not FALSE TRUE/FALSE
dataset_config.buffer_size buffer_size integer The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset 1 FALSE
dataset_config.filter_data filter_data bool Set this to omit images or masks that are not present TRUE TRUE/FALSE
dataset_config.resize_padding Resize Padding bool If the image needs to be resized by preserving aspect ratio TRUE TRUE/FALSE
dataset_config.resize_method Resize Method ordered The interpolation method for resize BILINEAR BILINEAR,NEAREST_NEIGHBOR,BICUBIC AREA
dataset_config.input_image_type Input Image type ordered Gives information on if the input is RGB or grayscale grayscale color,grayscale FALSE
dataset_config.train_images_path Image path hidden FALSE
dataset_config.train_masks_path Masks path hidden FALSE
dataset_config.val_images_path Image path hidden FALSE
dataset_config.val_masks_path Masks path hidden FALSE
dataset_config.data_class_config Target Class Mappings collection Contains the parameters to configure the mappping of diferent classes yes yes FALSE
dataset_config.data_class_config.target_classes Target Class Mappings list list Contains the parameters to configure the mappping of diferent classes [{“name”: “foreground”,”mapping_class”: “foreground”,”label_id”: 0},{“name”: “background”,”mapping_class”: “background”,”label_id”: 1}] yes FALSE
dataset_config.augmentation_config Data Augmentation collection Collection of parameters to configure augmentation Yes FALSE
dataset_config.augmentation_config.spatial_augmentation collection Configure augmentation pertaining to spatial transformations FALSE
dataset_config.augmentation_config.spatial_augmentation.hflip_probability float probability for flipping image horizontally 0.5 0 1
dataset_config.augmentation_config.spatial_augmentation.vflip_probability float probability for flipping image vertically 0.5 0 1
dataset_config.augmentation_config.spatial_augmentation.crop_and_resize_prob float probability at which to crop and resize 0.5 0 1
dataset_config.augmentation_config.brightness_augmentation collection Configure augmentation pertaining to brightness FALSE
dataset_config.augmentation_config.brightness_augmentation.delta float 0.2 0 1
model_config Model collection FALSE
model_config.arch BackBone Architecture ordered The architecture of the backbone feature extractor to be used for training. resnet resnet,vgg,vanilla_unet,efficientnet_b0,vanilla_dynamic,byom yes FALSE
model_config.enable_qat Enable Quantization aware training bool Set this to true,to enable quantization during re-training of pruned model FALSE FALSE/ TRUE FALSE
model_config.byom_model Model path to BYOM .tltb hidden Set the path to byom model when using byom arch None FALSE
model_config.load_graph Pruned model Load Graph bool
For a pruned model,set this parameter to True. Pruning modifies the original graph,so the pruned model graph and the weights need to be imported.
FALSE FALSE/ TRUE FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template,and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training. FALSE FALSE/ TRUE
model_config.all_projections All Projections bool For templates with shortcut connections,this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers,irrespective of whether there is a change in stride across the input and output. TRUE
model_config.num_layers Number of Layers ordered_int The depth of the feature extractor for scalable templates. 18 10,18,34,50 yes FALSE
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True,MaxPooling is used to downsample; however,for the object-detection network,NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE FALSE/ TRUE
model_config.enable_qat bool FALSE FALSE/ TRUE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 0.1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ __FLOAT32__ yes FALSE
model_config.initializer Kernel Initializer ordered The type of initializer for the kernels __HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__
model_config.model_input_height Model Input height int The model input dimensions 320 32 384 256,288,320,352,384 TRUE / 32
model_config.model_input_width Model Input width int The model input dimensions 320 32 384 256,288,320,352,384 TRUE / 32
model_config.model_input_channels Model input channels ordered_int The model input dimensions 1 1,3 FALSE
training_config Training collection FALSE
training_config.batch_size Batch Size Per GPU integer The number of images per batch per GPU. 3 1 inf yes
training_config.epochs Number of Epochs integer The total number of epochs to run the experiment. 50 1 yes Yes FALSE
training_config.log_summary_steps integer Number of steps after which to display the log summary 10 1 FALSE
training_config.checkpoint_interval checkpoint interval integer Number of epochs after which to save the ceheckpoint 1 FALSE
training_config.loss ordered Loss to be used cross_entropy cross_entropy,cross_dice_sum,dice
training_config.learning_rate float Learning rate 0.0001 0.000002 0.001 TRUE
training_config.lr_scheduler learning rate scheduler string FALSE
training_config.weights_monitor bool Bool to turn on tensorboard visualization of loss and gradients variations FALSE
training_config.regularizer collection Regularizer to use FALSE
training_config.regularizer.type ordered __L2__ __L1__,__L2__ TRUE
training_config.regularizer.weight float 2.00E-05 3.00E-09 3.00E-03 TRUE
training_config.optimizer Optimizer collection FALSE
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 0.6 0.98 yes TRUE
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 0 1 yes
training_config.visualizer collection FALSE
training_config.visualizer.enabled bool FALSE FALSE/ TRUE FALSE
training_config.visualizer.save_summary_steps integer Steps at which to visualize loss on TB. 1 FALSE
training_config.visualizer.infrequent_save_summary_steps integer Steps at which to visualize input images,ground truth and histograms. 1 FALSE
training_config.data_options bool TRUE FALSE/ TRUE FALSE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes
b batch_size integer calibration batch size 8
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. yes
model etlt model from export hidden

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.dataset string custom
dataset_config.augment Augment bool Boolean to augment the dataset or not FALSE
dataset_config.buffer_size buffer_size integer The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset
dataset_config.filter_data filter_data bool Set this to omit images or masks that are not present
dataset_config.resize_padding Resize Padding bool If the image needs to be resized by preserving aspect ratio
dataset_config.resize_method Resize Method string BILINEAR, NEAREST_NEIGHBOR, BICUBIC AREA
dataset_config.input_image_type Input Image type string Gives information on if the input is RGB or grayscale grayscale color, grayscale
dataset_config.data_sources.image_path Image path hidden
dataset_config.data_sources.masks_path Masks path hidden
dataset_config.data_class_config Target Class Mappings collection Contains the parameters to configure the mappping of diferent classes yes yes
dataset_config.data_class_config.target_classes Target Class Mappings list list Contains the parameters to configure the mappping of diferent classes [{“name”: “foreground”, “mapping_class”: “foreground”, “label_id”: 0}, {“name”: “background”, “mapping_class”: “background”, “label_id”: 1}] yes
augmentation_config Data Augmentation collection Collection of parameters to configure augmentation Yes
augmentation_config.spatial_augmentation collection Configure augmentation pertaining to spatial transformations
augmentation_config.spatial_augmentation.hflip_probability float probability for flipping image horizontally
augmentation_config.spatial_augmentation.vflip_probability float probability for flipping image vertically
augmentation_config.spatial_augmentation.crop_and_resize_prob float probability at which to crop and resize
model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet, vanilla_unet_dynamic yes
model_config.enable_qat Enable Quantization aware training bool Set this to true, to enable quantization during re-training of pruned model FALSE
model_config.byom_model Model path to BYOM .tltb hidden Set the path to byom model when using byom arch
model_config.load_graph Pruned model Load Graph bool
For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.
TRUE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE
model_config.num_layers Number of Layers integer The depth of the feature extractor for scalable templates. 18 10, 18, 34, 50, 101 yes
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. FALSE
model_config.enable_qat bool FALSE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 0.1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ __FLOAT32__ yes
model_config.initializer Kernel Initializer enum The type of initializer for the kernels __HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__
model_config.model_input_height Model Input height int The model input dimensions 320
model_config.model_input_width Model Input width int The model input dimensions 320
model_config.model_input_channels Model input channels int The model input dimensions 1
training_config Training collection
training_config.batch_size Batch Size Per GPU integer The number of images per batch per GPU. 3 1 yes
training_config.epochs Number of Epochs integer The total number of epochs to run the experiment. 50 1 yes Yes
training_config.log_summary_steps integer Number of steps after which to display the log summary 10
training_config.checkpoint_interval checkpoint interval integer Number of epochs after which to save the ceheckpoint 1
training_config.loss string Loss to be used cross_entropy cross_entropy, cross_dice_sum, dice
training_config.learning_rate float Learning rate 0.001
training_config.lr_scheduler learning rate scheduler string
training_config.weights_monitor bool Bool to turn on tensorboard visualization of loss and gradients variations
training_config.regularizer collection Regularizer to use
training_config.regularizer.type string __L2__ __L1__, __L2__
training_config.regularizer.weight float 1.00E-05
training_config.optimizer Optimizer collection
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 yes
training_config.visualizer collection
training_config.visualizer.enabled bool FALSE
training_config.visualizer.save_summary_steps integer Steps at which to visualize loss on TB.
training_config.visualizer.infrequent_save_summary_steps integer Steps at which to visualize input images, ground truth and histograms.
training_config.data_options bool TRUE

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.dataset string custom
dataset_config.augment Augment bool Boolean to augment the dataset or not FALSE
dataset_config.buffer_size buffer_size integer The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset
dataset_config.filter_data filter_data bool Set this to omit images or masks that are not present TRUE
dataset_config.resize_padding Resize Padding bool If the image needs to be resized by preserving aspect ratio
dataset_config.resize_method Resize Method string BILINEAR, NEAREST_NEIGHBOR, BICUBIC AREA
dataset_config.input_image_type Input Image type string Gives information on if the input is RGB or grayscale grayscale color, grayscale
dataset_config.data_sources.image_path Image path hidden
dataset_config.data_sources.masks_path Masks path hidden
dataset_config.data_class_config Target Class Mappings collection Contains the parameters to configure the mappping of diferent classes yes yes
dataset_config.data_class_config.target_classes Target Class Mappings list list Contains the parameters to configure the mappping of diferent classes [{“name”: “foreground”, “mapping_class”: “foreground”, “label_id”: 0}, {“name”: “background”, “mapping_class”: “background”, “label_id”: 1}] yes
augmentation_config Data Augmentation collection Collection of parameters to configure augmentation Yes
augmentation_config.spatial_augmentation collection Configure augmentation pertaining to spatial transformations
augmentation_config.spatial_augmentation.hflip_probability float probability for flipping image horizontally
augmentation_config.spatial_augmentation.vflip_probability float probability for flipping image vertically
augmentation_config.spatial_augmentation.crop_and_resize_prob float probability at which to crop and resize
model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet,vanilla_unet_dynamic yes
model_config.enable_qat Enable Quantization aware training bool Set this to true, to enable quantization during re-training of pruned model FALSE
model_config.byom_model Model path to BYOM .tltb hidden Set the path to byom model when using byom arch
model_config.load_graph Pruned model Load Graph bool
For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.
FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE
model_config.num_layers Number of Layers integer The depth of the feature extractor for scalable templates. 18 10, 18, 34, 50, 101 yes
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE
model_config.enable_qat bool FALSE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 0.1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ __FLOAT32__ yes
model_config.initializer Kernel Initializer enum The type of initializer for the kernels __HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__
model_config.model_input_height Model Input height int The model input dimensions 320
model_config.model_input_width Model Input width int The model input dimensions 320
model_config.model_input_channels Model input channels int The model input dimensions 1
training_config Training collection
training_config.batch_size Batch Size Per GPU integer The number of images per batch per GPU. 3 1 yes
training_config.epochs Number of Epochs integer The total number of epochs to run the experiment. 50 1 yes Yes
training_config.log_summary_steps integer Number of steps after which to display the log summary 10
training_config.checkpoint_interval checkpoint interval integer Number of epochs after which to save the ceheckpoint 1
training_config.loss string Loss to be used cross_entropy cross_entropy, cross_dice_sum, dice
training_config.learning_rate float Learning rate 0.00008
training_config.lr_scheduler learning rate scheduler string
training_config.weights_monitor bool Bool to turn on tensorboard visualization of loss and gradients variations
training_config.regularizer collection Regularizer to use
training_config.regularizer.type string __L2__ __L1__, __L2__
training_config.regularizer.weight float 1.00E-05
training_config.optimizer Optimizer collection
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 yes
training_config.visualizer collection
training_config.visualizer.enabled bool FALSE
training_config.visualizer.save_summary_steps integer Steps at which to visualize loss on TB.
training_config.visualizer.infrequent_save_summary_steps integer Steps at which to visualize input images, ground truth and histograms.
training_config.data_options bool TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
random_seed Random Seed integer Seed value for the random number generator in the network 42
dataset_config Dataset collection Parameters to configure the dataset
dataset_config.dataset string custom
dataset_config.augment Augment bool Boolean to augment the dataset or not FALSE
dataset_config.buffer_size buffer_size integer The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset
dataset_config.filter_data filter_data bool Set this to omit images or masks that are not present TRUE
dataset_config.resize_padding Resize Padding bool If the image needs to be resized by preserving aspect ratio
dataset_config.resize_method Resize Method string BILINEAR, NEAREST_NEIGHBOR, BICUBIC AREA
dataset_config.input_image_type Input Image type string Gives information on if the input is RGB or grayscale grayscale color, grayscale
dataset_config.data_sources.image_path Image path hidden
dataset_config.data_sources.masks_path Masks path hidden
dataset_config.data_class_config Target Class Mappings collection Contains the parameters to configure the mappping of diferent classes yes yes
dataset_config.data_class_config.target_classes Target Class Mappings list list Contains the parameters to configure the mappping of diferent classes [{“name”: “foreground”, “mapping_class”: “foreground”, “label_id”: 0}, {“name”: “background”, “mapping_class”: “background”, “label_id”: 1}] yes
augmentation_config Data Augmentation collection Collection of parameters to configure augmentation Yes
augmentation_config.spatial_augmentation collection Configure augmentation pertaining to spatial transformations
augmentation_config.spatial_augmentation.hflip_probability float probability for flipping image horizontally
augmentation_config.spatial_augmentation.vflip_probability float probability for flipping image vertically
augmentation_config.spatial_augmentation.crop_and_resize_prob float probability at which to crop and resize
model_config Model collection
model_config.arch BackBone Architecture string The architecture of the backbone feature extractor to be used for training. resnet resnet,vanilla_unet_dynamic yes
model_config.enable_qat Enable Quantization aware training bool Set this to true, to enable quantization during re-training of pruned model FALSE
model_config.byom_model Model path to BYOM .tltb hidden Set the path to byom model when using byom arch
model_config.load_graph Pruned model Load Graph bool
For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.
FALSE
model_config.freeze_blocks Freeze Blocks integer This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. 0 3
model_config.freeze_bn Freeze Batch Normalization bool A flag to determine whether to freeze the Batch Normalization layers in the model during training.
model_config.all_projections All Projections bool For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. TRUE
model_config.num_layers Number of Layers integer The depth of the feature extractor for scalable templates. 18 10, 18, 34, 50, 101 yes
model_config.use_pooling Use Pooling bool Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions.
model_config.use_batch_norm Use Batch Normalization bool A flag to determine whether to use Batch Normalization layers or not. TRUE
model_config.enable_qat bool FALSE
model_config.dropout_rate Dropout Rate float Probability for drop out 0 0.1
model_config.training_precision.backend_floatx Backend Training Precision string A nested parameter that sets the precision of the backend training framework. __FLOAT32__ __FLOAT32__ yes
model_config.initializer Kernel Initializer enum The type of initializer for the kernels __HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__
model_config.model_input_height Model Input height int The model input dimensions 320
model_config.model_input_width Model Input width int The model input dimensions 320
model_config.model_input_channels Model input channels int The model input dimensions 1
training_config Training collection
training_config.batch_size Batch Size Per GPU integer The number of images per batch per GPU. 3 1 yes
training_config.epochs Number of Epochs integer The total number of epochs to run the experiment. 50 1 yes Yes
training_config.log_summary_steps integer Number of steps after which to display the log summary 10
training_config.checkpoint_interval checkpoint interval integer Number of epochs after which to save the ceheckpoint 1
training_config.loss string Loss to be used cross_entropy cross_entropy, cross_dice_sum, dice
training_config.learning_rate float Learning rate 0.00008
training_config.lr_scheduler learning rate scheduler string
training_config.weights_monitor bool Bool to turn on tensorboard visualization of loss and gradients variations
training_config.regularizer collection Regularizer to use
training_config.regularizer.type string __L2__ __L1__, __L2__
training_config.regularizer.weight float 1.00E-05
training_config.optimizer Optimizer collection
training_config.optimizer.adam.epsilon Optimizer Adam Epsilon float A very small number to prevent any division by zero in the implementation. 1.00E-08 yes
training_config.optimizer.adam.beta1 Optimizer Adam Beta1 float 0.899999976 yes
training_config.optimizer.adam.beta2 Optimizer Adam Beta2 float 0.999000013 yes
training_config.visualizer collection
training_config.visualizer.enabled bool FALSE
training_config.visualizer.save_summary_steps integer Steps at which to visualize loss on TB.
training_config.visualizer.infrequent_save_summary_steps integer Steps at which to visualize input images, ground truth and histograms.
training_config.data_options bool TRUE

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 1 yes
export_format Export format string RIVA
export_to Export To hidden

finetune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
resume_model_weights Pretrained model path hidden Path to the pre-trained model yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save the model yes yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model yes yes yes
train_dataset Train Dataset hidden Path to the train dataset manifest json file yes
validation_dataset Validation Dataset hidden Path to the validation dataset manifest json file yes
sample_rate Sample rate integer The target sample rate to load the audio, in Hz 22050
n_window_stride Window stride integer The stride of the window in samples 256 yes
train_min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.75 yes
train_n_segments Number of segments integer The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. 16384 yes
val_min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.75 yes
val_n_segments Number of segments integer The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. 16384 yes
training_ds Train Dataset collection Parameters to configure the training dataset
training_ds.dataset Train Dataset collection Parameters to configure the training dataset
training_ds.dataset._target_ Target dataset class const Nemo training ds class instance nemo.collections.tts.torch.data.VocoderDataset yes
training_ds.dataset.manifest_filepath Train manifest file const Path to the train dataset manifest json file ${train_dataset} yes
training_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz ${sample_rate} yes
training_ds.dataset.min_duration Min clip duration const All files with a duration lesser than the given value (in seconds) will be dropped ${train_min_duration} yes
training_ds.dataset.n_segments Number of segments const The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. ${train_n_segments} yes
training_ds.dataset.load_precomputed_mel Load precomputed mel bool Load precomputed mel TRUE yes
training_ds.dataset.hop_length Hop length const The hope length between fft computations ${n_window_stride} yes
training_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
training_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
training_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE yes
training_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 16 yes yes
training_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 4 yes
validation_ds Validation Dataset collection Parameters to configure the validation dataset
validation_ds.dataset Validation Dataset collection Parameters to configure the validation dataset
validation_ds.dataset._target_ Target dataset class const Nemo validation ds class instance nemo.collections.tts.torch.data.VocoderDataset yes
validation_ds.dataset.manifest_filepath Train manifest file const Path to the validation dataset manifest json file ${validation_dataset} yes
validation_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz ${sample_rate} yes
validation_ds.dataset.min_duration Min clip duration const All files with a duration lesser than the given value (in seconds) will be dropped ${val_min_duration} yes
validation_ds.dataset.n_segments Number of segments const The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. ${val_n_segments} yes
validation_ds.dataset.load_precomputed_mel Load precomputed mel bool Load precomputed mel TRUE yes
validation_ds.dataset.hop_length Hop length const The hope length between fft computations ${n_window_stride} yes
validation_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
validation_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
validation_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation FALSE yes
validation_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 2 yes yes
validation_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 1 yes
optim Optimizer collection yes
optim._target_ Optimizer Class const The class of the Optimizer to be instantiated torch.optim.AdamW yes
optim.lr Learning rate float Learning rate 0.0001 yes yes
optim.betas Optimizer betas list Coefficients used to compute the running averages of the gradient and it’s square [0.8, 0.99] yes
trainer collection Parameters to configure the trainer object
trainer.max_steps Maximum Steps integer Maximum number of steps to run training 1000 0 yes
trainer.max_epochs Maximum number of epochs integer Maximum number of epochs to run training. This parameter supercedes the trainer.max_steps parameter 2 0 yes yes

infer

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 1 yes
input_path List of input texts hidden Path to the directory containing spectrogram outputs from FastPitch inference yes yes
output_path Input dataset to run inference hidden Path to the output directory containing rendered audio clips yes yes
sample_rate Speaker ID int Sampling rate of the output audio clip. 22050 yes yes

infer_onnx

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 1 yes
input_path List of input texts hidden Path to the directory containing spectrogram outputs from FastPitch inference yes yes
output_path Input dataset to run inference hidden Path to the output directory containing rendered audio clips yes yes
sample_rate Speaker ID int Sampling rate of the output audio clip. 22050 yes yes

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes
result_dir Results directory hidden Path to the output results directory and logs yes
key Save key hidden Key to save the model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 yes yes
train_dataset Train Dataset hidden Path to the train dataset manifest json file yes
validation_dataset Validation Dataset hidden Path to the validation dataset manifest json file yes
sample_rate Sample rate integer The target sample rate to load the audio, in Hz 22050
train_min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped 0.75 yes
train_max_duration Min clip duration float All files with a duration greater than the given value (in seconds) will be dropped
train_n_segments Number of segments integer The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. 8192 yes
val_min_duration Min clip duration float All files with a duration lesser than the given value (in seconds) will be dropped
val_max_duration Min clip duration float All files with a duration greater than the given value (in seconds) will be dropped
val_n_segments Number of segments integer The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. -1 yes
training_ds Train Dataset collection Parameters to configure the training dataset
training_ds.dataset Train Dataset collection Parameters to configure the training dataset
training_ds.dataset._target_ Target dataset class const Nemo training ds class instance nemo.collections.tts.torch.data.VocoderDataset yes
training_ds.dataset.manifest_filepath Train manifest file const Path to the train dataset manifest json file ${train_dataset} yes
training_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz ${sample_rate} yes
training_ds.dataset.n_segments Number of segments const The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. ${train_n_segments} yes
training_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped ${train_max_duration} yes
training_ds.dataset.min_duration Min clip duration const All files with a duration lesser than the given value (in seconds) will be dropped ${train_min_duration} yes
training_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). FALSE yes
training_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
training_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
training_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE yes
training_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 16 yes yes
training_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 4 yes
validation_ds Validation Dataset collection Parameters to configure the validation dataset
validation_ds.dataset Validation Dataset collection Parameters to configure the validation dataset
validation_ds.dataset._target_ Target dataset class const Nemo validation ds class instance nemo.collections.tts.torch.data.VocoderDataset yes
validation_ds.dataset.manifest_filepath Train manifest file const Path to the validation dataset manifest json file ${train_dataset} yes
validation_ds.dataset.sample_rate Sample rate const The target sample rate to load the audio, in Hz ${sample_rate} yes
validation_ds.dataset.n_segments Number of segments const The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. ${val_n_segments} yes
validation_ds.dataset.max_duration Max clip duration float All files with a duration greater than the given value (in seconds) will be dropped ${val_max_duration} yes
validation_ds.dataset.min_duration Min clip duration const All files with a duration lesser than the given value (in seconds) will be dropped ${val_min_duration} yes
validation_ds.dataset.trim Trim bool Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). FALSE yes
validation_ds.dataloader_params Dataloader parameters collection Configuring the dataloader yielding the data samples yes
validation_ds.dataloader_params.drop_last Drop last bool Whether to drop the last samples FALSE yes
validation_ds.dataloader_params.shuffle Enable shuffle bool Whether to shuffle the data or not. We recommend True for training data, and false for validation TRUE yes
validation_ds.dataloader_params.batch_size Batch Size integer Number of samples per batch of data. 16 yes yes
validation_ds.dataloader_params.num_workers Number of workers integer The number of worker threads for loading the dataset 1 yes
model Model Config collection Collection to configure the HiFiGAN model element
model.preprocessor Preprocessor config collection Collection to configure the model preprocessor yes
model.preprocessor._target_ Target class of the preprocessor instance const The Nemo class to instantiate. nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures yes
model.preprocessor.dither Dither float 0 yes
model.preprocessor.frame_splicing Spectrogram Frames per step integer Number of spectrogram frames per step 1 yes
model.preprocessor.nfilt Number of filter integer Number of filters in the conv layer 80
model.preprocessor.highfreq High frequency bound in Hz integer Upper bound of the mel basis in Hz 8000 yes
model.preprocessor.log Log Spectrograms bool Flags to enable logging spectrograms TRUE yes
model.preprocessor.log_zero_guard_type Zero guard type string Need to avoid taking the log of zero. There are two options: “add” or “clamp”. clamp yes
model.preprocessor.log_zero_guard_value Zero guard value float The value to be set so as to not take the log(zero). 0.00001
model.preprocessor.lowfreq Low frequency bound in Hz integer Lower bound of the mel basis in Hz 0 yes
model.preprocessor.mag_power Multiplication with mel basis integer Prior to multiplication with mel basis 1 yes
model.preprocessor.n_fft FFT Window size integer The size of the window for the FFT in samples. 1024 yes
model.preprocessor.n_window_size FFT Window size integer The size of the window for the FFT in samples. 1024 yes
model.preprocessor.n_window_stride FFT Window stride integer The stride of the window for FFT 256 yes
model.preprocessor.normalize Feature Normalization string Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq no
model.preprocessor.pad_to Pad to integer A multiple pf pad_to 0 yes
model.preprocessor.pad_value Pad Value float The value to that shorter mels are padded with -11.52 yes
model.preprocessor.preemph Pre-emphasis value float Amount of pre-emphasis to be added to the audio. Can be disabled by passing None. no
model.preprocessor.sample_rate Samping rate integer The target sample rate to load the audio in Hz. 22050 yes
model.preprocessor.window Window type string The type of window to be used. hann yes
model.preprocessor.exact_pad Exact pad bool TRUE
model.preprocessor.use_grads Use grads bool FALSE
model.optim Optimizer collection yes
model.optim._target_ Optimizer Class const The class of the Optimizer to be instantiated torch.optim.AdamW yes
model.optim.lr Learning rate float Learning rate 0.0002 yes yes
model.optim.betas Optimizer betas list Coefficients used to compute the running averages of the gradient and it’s square [0.8, 0.99] yes
model.sched Learning rate scheduler collection Parameters to configure the learning rate scheduler yes
model.sched.name Scheduler Name string Type of learning rate scheduler to be used CosineAnnealing yes
model.sched.warmup_ratio Warm up steps float Ratio of steps to warm up the learning rate 0.02 yes
model.sched.min_lr Minimum Learning Rate float Lower bound of the learning rate scheduler 1.00E-05 yes
model.max_steps Maximum steps const Maximum number of steps to run training ${trainer.max_steps} yes
model.l1_loss_factor L1 Loss factor integer The multiplicative factor for L1 loss used in training 45 yes
model.denoise_strength Denoise stregth float The small desnoising factor, currently only used in validation 0.0025 yes
trainer collection Parameters to configure the trainer object
trainer.max_steps Maximum Steps integer Maximum number of steps to run training 25000 0 yes

dataset_convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

param_type (internal / hidden / inferred)

CLI

version Schema Version const The version of this schema 1
experiment_spec Experiment spec hidden Path to the training experiment spec file yes yes
result_dir Results directory hidden Path to the output results directory and logs yes yes
key Save key hidden Key to save/load the model yes yes
resume_model_weights Pretrained model path hidden Path to the trained/finetuned model yes
gpus Number of GPUs hidden Number of GPUs to be used to train the model 1 1 1 yes
dataset_name Name string ljs ljspeech, ljs
delimiter Delimiter string

data_dir Data Dir hidden Path to the dataset

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 1.00E-06
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 1.00E-04
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE)
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10
training_config.enable_qat QAT bool Enable QAT or not FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3
training_config.n_workers Workers integer Number of workers in sequence dataset 4
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
yolov3_config YOLOv3 collection
yolov3_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
yolov3_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
yolov3_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
yolov3_config.matching_neutral_box_iou float 0.7
yolov3_config.arch Arch string backbone(architecture) resnet
yolov3_config.nlayers Number of Layers integer number of layers for this architecture 18
yolov3_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2
yolov3_config.loss_loc_weight weighting for location loss float weighting factor for location loss 0.8
yolov3_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 100
yolov3_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1
yolov3_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training
yolov3_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE
yolov3_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__
eval_config.batch_size Batch Size integer batch size for evaluation 8
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version integer The version of this schema 1
threshold float 0.3
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 1.00E-06
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 1.00E-04
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE)
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10
training_config.enable_qat QAT bool Enable QAT or not FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3
training_config.n_workers Workers integer Number of workers in sequence dataset 4
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
yolov3_config YOLOv3 collection
yolov3_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
yolov3_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
yolov3_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
yolov3_config.matching_neutral_box_iou float 0.7
yolov3_config.arch Arch string backbone(architecture) resnet
yolov3_config.nlayers Number of Layers integer number of layers for this architecture 18
yolov3_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2
yolov3_config.loss_loc_weight weighting for location loss float weighting factor for location loss 0.8
yolov3_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 100
yolov3_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1
yolov3_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training
yolov3_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE
yolov3_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__
eval_config.batch_size Batch Size integer batch size for evaluation 8
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
experiment_spec_path Experiment spec path hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
results_dir Results directory hidden
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

version Schema Version integer The version of this schema 1 FALSE
random_seed Random Seed integer Random seed 42 0 inf FALSE
dataset_config Dataset collection Dataset configuration FALSE
dataset_config.data_sources Data Source hidden Data source FALSE
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training FALSE
dataset_config.data_sources.root_path Root Path hidden The root path FALSE
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source FALSE
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training FALSE
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person masked-person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0 0 inf FALSE
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources FALSE
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE TRUE,FALSE
dataset_config.type Type string Dataset type,either kitti or coco kitti kitti,coco FALSE
dataset_config.image_extension Image Extension string The image extension png jpg,jpeg,png,JPG,JPEG,PNG FALSE
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE FALSE TRUE,FALSE FALSE
augmentation_config Data Augmentation collection Data augmentation configuration Yes FALSE
augmentation_config.hue Hue float Hue variance 0.1 0 1
augmentation_config.saturation Saturation float Saturation variance 1.5 1 inf
augmentation_config.exposure Exposure float Exposure 1.5 1 inf
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0 0 1
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5 0 1
augmentation_config.jitter Jitter float Jitter 0.3 0 1
augmentation_config.output_width Output Width integer Output Image Width 960 128 inf TRUE / 32
augmentation_config.output_height Output Height integer Output Image Height 544 128 inf TRUE / 32
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1,3 FALSE
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 0 inf
augmentation_config.image_mean Image Mean collection per-channel image mean values FALSE
augmentation_config.image_mean.key Names for input image channels string channel name r,g,b FALSE
augmentation_config.image_mean.value mean value for the channel of image float mean value of this channel of the name 1 255 FALSE
training_config Training collection Training configuration FALSE
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 1 inf
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 1 inf FALSE
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate,example: 1e-7 1.00E-06 0 inf < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 0.0001 0 inf TRUE TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1 0 1 < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5 0 1 TRUE TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0 inf TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0 1
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 0 inf < training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate
training_config.regularizer Regularizer collection FALSE
training_config.regularizer.type Type ordered Type of regularizer,either NO_REG,L1 or L2 __L1__ __L1__,__L2__,__NO_REG__ TRUE
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 3.00E-11 inf TRUE
training_config.optimizer.adam Adam collection FALSE
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 0 1
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 0 1
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 0 1 TRUE
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE,FALSE
training_config.optimizer.sgd SGD collection FALSE
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) 0 1
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE,FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 1 inf FALSE
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE,FALSE FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed FALSE
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model FALSE
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 4 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE,FALSE FALSE
yolov3_config YOLOv3 collection FALSE
yolov3_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94,60.67),(159.06,114.59),(297.59,176.38)] FALSE
yolov3_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99,31.91),(79.57,31.75),(56.80,56.93)] FALSE
yolov3_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60,13.88),(30.25,20.25),(20.67,49.63)] FALSE
yolov3_config.matching_neutral_box_iou IoU threshold for neutral box float IoU threshold for neutral box 0.7 0 1
yolov3_config.arch Arch ordered backbone(architecture) resnet resnet,vgg,darknet,googlenet,mobilenet_v1,mobilenet_v2,squeezenet FALSE
yolov3_config.nlayers Number of Layers ordered_int number of layers for this architecture 18 10,18,34,50,101,152,16,19,17,53 FALSE
yolov3_config.arch_conv_blocks Extra Convolution Blocks ordered_int Number of extra convolution blocks 2 0,1,2
yolov3_config.loss_loc_weight weighting for location loss float weighting factor for location loss 0.8 0 inf
yolov3_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 100 0 inf
yolov3_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 0 inf
yolov3_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training FALSE
yolov3_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE,FALSE
yolov3_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE TRUE,FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 1 inf
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE TRUE,FALSE FALSE
eval_config.average_precision_mode AP Mode ordered Average Precision mode,either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ __SAMPLE__,__INTEGRATE__
eval_config.batch_size Batch Size integer batch size for evaluation 8 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve TRUE,FALSE FALSE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 1.00E-06
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 1.00E-04
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE)
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10
training_config.enable_qat QAT bool Enable QAT or not FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3
training_config.n_workers Workers integer Number of workers in sequence dataset 4
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE
yolov3_config YOLOv3 collection
yolov3_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)]
yolov3_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)]
yolov3_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)]
yolov3_config.matching_neutral_box_iou float 0.7
yolov3_config.arch Arch string backbone(architecture) resnet
yolov3_config.nlayers Number of Layers integer number of layers for this architecture 18
yolov3_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2
yolov3_config.loss_loc_weight weighting for location loss float weighting factor for location loss 0.8
yolov3_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 100
yolov3_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1
yolov3_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training
yolov3_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE
yolov3_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5
nms_config.top_k Top K integer Maximum number of objects after NMS 200
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__
eval_config.batch_size Batch Size integer batch size for evaluation 8
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 1.00E-04 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) resnet cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture 18 depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment 0.2 (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
threshold float 0.3
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 1.00E-04 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) resnet cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture 18 depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment 0.2 (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
experiment_spec_path Experiment spec path hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
results_dir Results directory hidden
verbose verbosity hidden TRUE

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

version Schema Version integer The version of this schema 1 FALSE
random_seed Random Seed integer Random seed 42 0 inf FALSE
dataset_config Dataset collection Dataset configuration FALSE
dataset_config.data_sources Data Source hidden Data source FALSE
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training FALSE
dataset_config.data_sources.root_path Root Path hidden The root path FALSE
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source FALSE
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training FALSE
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person masked-person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0 0 inf FALSE
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources FALSE
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE TRUE,FALSE
dataset_config.type Type string Dataset type,either kitti or coco kitti kitti,coco FALSE
dataset_config.image_extension Image Extension string The image extension png jpg,jpeg,png,JPG,JPEG,PNG FALSE
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE FALSE TRUE,FALSE FALSE
augmentation_config Data Augmentation collection Data augmentation configuration Yes FALSE
augmentation_config.hue Hue float Hue variance 0.1 0 1
augmentation_config.saturation Saturation float Saturation variance 1.5 1 inf
augmentation_config.exposure Exposure float Exposure 1.5 1 inf
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0 0 1
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5 0 1 TRUE
augmentation_config.jitter Jitter float Jitter 0.3 0 1
augmentation_config.output_width Output Width integer Output Image Width 960 128 inf / 32
augmentation_config.output_height Output Height integer Output Image Height 544 128 inf / 32
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1,3 FALSE
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 0 inf
augmentation_config.mosaic_prob Probability of mosaic augmentation float Probability of mosaic augmentation 0.5 0 1 TRUE
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2 0 0.5 TRUE
augmentation_config.image_mean Image Mean collection per-channel image mean values FALSE
augmentation_config.image_mean.key Names for input image channels string channel name r,g,b FALSE
augmentation_config.image_mean.value mean value for the channel of image float mean value of this channel of the name -inf inf FALSE
training_config Training collection Training configuration FALSE
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 1 inf
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 1 inf FALSE
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate,example: 1e-7 1.00E-06 0 inf < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 0.0001 0 inf TRUE TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1 0 1 < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5 0 1 TRUE TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0 inf TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0 1
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 0 inf < training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate
training_config.regularizer Regularizer collection FALSE
training_config.regularizer.type Type ordered Type of regularizer,either NO_REG,L1 or L2 __L1__ __L1__,__L2__,__NO_REG__ TRUE
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 3.00E-11 inf TRUE
training_config.optimizer.adam Adam collection FALSE
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 0 1
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 0 1
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 0 1
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE,FALSE
training_config.optimizer.sgd SGD collection FALSE
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) 0 1
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE,FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 1 inf FALSE
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE,FALSE FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed FALSE
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model FALSE
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 4 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE,FALSE FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor ordered The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE,FALSE FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 1 inf FALSE
training_config.model_ema ModelEMA bool Enable ModelEMA FALSE TRUE,FALSE
yolov4_config YOLOv4 collection FALSE
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94,60.67),(159.06,114.59),(297.59,176.38)] FALSE
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99,31.91),(79.57,31.75),(56.80,56.93)] FALSE
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60,13.88),(30.25,20.25),(20.67,49.63)] FALSE
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 0.25 1
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 0 1
yolov4_config.arch Arch ordered backbone(architecture) resnet cspdarknet_tiny,cspdarknet_tiny_3l,resnet,vgg,darknet,cspdarknet,efficientnet_b0,mobilenet_v1,mobilenet_v2,squeezenet,googlenet FALSE
yolov4_config.nlayers Number of Layers ordered_int number of layers for this architecture 18 10,18,34,50,101,152,16,19,17,53 FALSE depends on arch
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 0 inf
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 0 inf
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 0 inf
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE,FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE TRUE,FALSE
yolov4_config.activation Activation ordered Activation function relu,leaky_relu,mish
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 0 0.3
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 0 0.3
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 0 0.3
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment 0.2 0 0.3 FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 1 inf
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE TRUE,FALSE FALSE
eval_config.average_precision_mode AP Mode ordered Average Precision mode,either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ __SAMPLE__,__INTEGRATE__
eval_config.batch_size Batch Size integer batch size for evaluation 8 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve TRUE,FALSE FALSE

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate, example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 1.00E-04 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string [(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) resnet cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture 18 depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 2 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment 0.2 (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

train

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

automl_enabled

math_cond

parent_param

depends_on

version Schema Version integer The version of this schema 1 FALSE
random_seed Random Seed integer Random seed 42 0 inf FALSE
dataset_config Dataset collection Dataset configuration FALSE
dataset_config.data_sources Data Source hidden Data source FALSE
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training FALSE
dataset_config.data_sources.root_path Root Path hidden The root path FALSE
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source FALSE
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training FALSE
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training FALSE
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car,van,heavy_truck etc may be grouped under automobile. FALSE
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person masked-person ^[-a-zA-Z0-9_]{1,40}$ FALSE
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0 0 inf FALSE
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources FALSE
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE TRUE,FALSE
dataset_config.type Type string Dataset type,either kitti or coco kitti kitti,coco FALSE
dataset_config.image_extension Image Extension string The image extension png jpg,jpeg,png,JPG,JPEG,PNG FALSE
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE FALSE TRUE,FALSE FALSE
augmentation_config Data Augmentation collection Data augmentation configuration Yes FALSE
augmentation_config.hue Hue float Hue variance 0.1 0 1
augmentation_config.saturation Saturation float Saturation variance 1.5 1 inf
augmentation_config.exposure Exposure float Exposure 1.5 1 inf
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0 0 1
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5 0 1 TRUE
augmentation_config.jitter Jitter float Jitter 0.3 0 1
augmentation_config.output_width Output Width integer Output Image Width 960 128 inf / 32
augmentation_config.output_height Output Height integer Output Image Height 544 128 inf / 32
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1,3 FALSE
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 0 inf
augmentation_config.mosaic_prob Probability of mosaic augmentation float Probability of mosaic augmentation 0.5 0 1 TRUE
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2 0 0.5 TRUE
augmentation_config.image_mean Image Mean collection per-channel image mean values FALSE
augmentation_config.image_mean.key Names for input image channels string channel name r,g,b FALSE
augmentation_config.image_mean.value mean value for the channel of image float mean value of this channel of the name -inf inf FALSE
training_config Training collection Training configuration FALSE
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 1 inf
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 1 inf FALSE
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection FALSE
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate,example: 1e-7 1.00E-06 0 inf < training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate. example: 1e-4 0.0001 0 inf TRUE TRUE
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 0.1 0 1 < training_config.learning_rate.soft_start_annealing_schedule.annealing
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate 0.5 0 1 TRUE TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0 inf TRUE
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0 1
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 0 inf < training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate
training_config.regularizer Regularizer collection FALSE
training_config.regularizer.type Type ordered Type of regularizer,either NO_REG,L1 or L2 __L1__ __L1__,__L2__,__NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 3.00E-11 inf TRUE
training_config.optimizer.adam Adam collection FALSE
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 0 1
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 0 1
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 0 1
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE,FALSE
training_config.optimizer.sgd SGD collection FALSE
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) 0 1
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE,FALSE TRUE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 1 inf FALSE
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE,FALSE FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed FALSE
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model FALSE
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model FALSE
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 1 inf FALSE
training_config.n_workers Workers integer Number of workers in sequence dataset 4 1 inf FALSE
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE,FALSE FALSE
training_config.early_stopping Early Stopping collection FALSE
training_config.early_stopping.monitor Monitor ordered The name of the quantity to be monitored for early stopping loss
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed 0 1
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training 1 5
training_config.visualizer Visualizer collection FALSE
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE,FALSE FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard 1 inf FALSE
training_config.model_ema ModelEMA bool Enable ModelEMA FALSE TRUE,FALSE
yolov4_config YOLOv4 collection FALSE
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(260.69,172.35),(125.91,81.47),(72.27,42.42)] FALSE
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(30.80,71.40),(38.97,26.86),(18.88,17.11)] FALSE
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string FALSE
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 0.25 1
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 0 1
yolov4_config.arch Arch ordered backbone(architecture) cspdarknet_tiny cspdarknet_tiny,cspdarknet_tiny_3l,resnet,vgg,darknet,cspdarknet,efficientnet_b0,mobilenet_v1,mobilenet_v2,squeezenet,googlenet FALSE
yolov4_config.nlayers Number of Layers ordered_int number of layers for this architecture 18 10,18,34,50,101,152,16,19,17,53 FALSE depends on arch
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 0 inf TRUE
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 0 inf
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 0 inf
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE,FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE TRUE,FALSE
yolov4_config.activation Activation ordered Activation function relu,leaky_relu,mish
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 0 1
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 0 0.3
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 0 0.3
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment 0 0.3 FALSE
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 0 1
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 0 1
nms_config.top_k Top K integer Maximum number of objects after NMS 200 1 inf
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS 1 32
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE TRUE,FALSE FALSE
eval_config.average_precision_mode AP Mode ordered Average Precision mode,either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ __SAMPLE__,__INTEGRATE__
eval_config.batch_size Batch Size integer batch size for evaluation 8 1 inf FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5 0 1
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve TRUE,FALSE FALSE

prune

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

model Model path hidden UNIX path to where the input model is located. yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
experiment_spec_path Experiment spec path hidden
key Encode key hidden
normalizer Normalizer string How to normalize max max, L2
equalization_criterion Equalization Criterion string Criteria to equalize the stats of inputs to an element wise op layer. union union, intersection, arithmetic_mean,geometric_mean no
pruning_granularity Pruning Granularity integer Number of filters to remove at a time. 8 no
pruning_threshold Pruning Threshold float Threshold to compare normalized norm against. 0.1 0 1 yes yes
min_num_filters Minimum number of filters integer Minimum number of filters to be kept per layer 16 no
excluded_layers Excluded layers string string of list: List of excluded_layers. Examples: -i item1 item2
results_dir Results directory hidden
verbose verbosity hidden TRUE

export

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

version Schema Version const The version of this schema 1
model Model hidden UNIX path to the model file 0.1 yes
key Encryption Key hidden Encryption key tlt_encode yes
output_file Output File hidden UNIX path to where the pruned model will be saved. yes
force_ptq Force Post-Training Quantization bool Force generating int8 engine using Post Training Quantization FALSE no
cal_image_dir hidden
data_type Pruning Granularity string Number of filters to remove at a time. fp32 int8, fp32, fp16 yes yes
strict_type_constraints bool FALSE
gen_ds_config bool FALSE
cal_cache_file Calibration cache file hidden Unix PATH to the int8 calibration cache file yes yes
batches Number of calibration batches integer Number of batches to calibrate the model when run in INT8 mode 100 no
max_workspace_size integer Example: The integer value of 1<<30, 2<<30
max_batch_size integer 1
batch_size Batch size integer Number of images per batch when generating the TensorRT engine. 100 yes
min_batch_size integer 1
opt_batch_size integer 1
experiment_spec Experiment Spec hidden UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. yes
engine_file Engine File hidden UNIX path to the model engine file. yes
static_batch_size integer -1
results_dir hidden
verbose hidden TRUE

convert

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

e engine file path hidden
k encode key hidden
c cache_file hidden
o outputs string comma separated list of output node names
d input_dims string comma separated list of input dimensions (not required for TLT 3.0 new models). yes
b batch_size integer calibration batch size 8 yes
m max_batch_size integer maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. 16 yes
w max_workspace_size integer maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly.
t data_type string TensorRT data type fp32 fp32, fp16, int8 yes
i input_order string input dimension ordering nchw nchw, nhwc, nc
s strict_type_constraints bool TensorRT strict_type_constraints flag for INT8 mode FALSE
u dla_core int Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). -1
p parse_profile_shapes string comma separated list of optimization profile shapes in the format ,,,, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case.
model etlt model from export hidden

evaluate

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule Soft Start Cosine Annealing Schedule collection
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0.0001 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) cspdarknet_tiny cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

inference

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
threshold float 0.3
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule Soft Start Cosine Annealing Schedule collection
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0.0001 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) cspdarknet_tiny cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

retrain

parameter

display_name

value_type

description

default_value

examples

valid_min

valid_max

valid_options

required

regex

popular

valid_options_description

version Schema Version integer The version of this schema 1
random_seed Random Seed integer Random seed 42
dataset_config Dataset collection Dataset configuration
dataset_config.data_sources Data Source hidden Data source
dataset_config.data_sources.image_directory_path Image Directory hidden Relative path to the directory of images for training
dataset_config.data_sources.root_path Root Path hidden The root path
dataset_config.data_sources.source_weight Source Weight hidden The weighting for the source
dataset_config.data_sources.label_directory_path Label Directory Path hidden The path to the directory of labels for training
dataset_config.data_sources.tfrecords_path TFRecords Path hidden The path to the TFRecords data for training
dataset_config.target_class_mapping Target Class Mappings list This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile.
dataset_config.target_class_mapping.key Class Key string The “key” field is the value of the class name in the tfrecords file. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.target_class_mapping.value Class Value string The “value” field corresponds to the value that the network is expected to learn. person ^[-a-zA-Z0-9_]{1,40}$
dataset_config.validation_fold Validation Fold integer The percentage of the entire dataset to be used as validation data 0
dataset_config.validation_data_sources Validation Data Sources hidden The definition is the same as training data sources
dataset_config.include_difficult_in_training Include Difficult Objects in Training bool Whether or not to include difficult objects in training FALSE TRUE, False
dataset_config.type Type string Dataset type, either kitti or coco kitti
dataset_config.image_extension Image Extension string The image extension png __png__, __jpg__, __jpeg__
dataset_config.is_monochrome Is Monochrome bool Whether or not the images are monochrome(grayscale) FALSE true, false
augmentation_config Data Augmentation collection Data augmentation configuration
augmentation_config.hue Hue float Hue variance 0.1
augmentation_config.saturation Saturation float Saturation variance 1.5
augmentation_config.exposure Exposure float Exposure 1.5
augmentation_config.vertical_flip Vertical Flip Probability float Probability of vertical flip 0
augmentation_config.horizontal_flip Horizontal Flip float Probability of horizontal flip 0.5
augmentation_config.jitter Jitter float Jitter 0.3
augmentation_config.output_width Output Width integer Output Image Width 960
augmentation_config.output_height Output Height integer Output Image Height 544
augmentation_config.output_channel Output Channel integer Output Image Channel 3 1, 3
augmentation_config.randomize_input_shape_period Randomize Input Shape Period integer Period(in number of epochs) to randomize input shape for multi-scale training 0 >=0
augmentation_config.mosaic_prob float 0.5 [0, 1)
augmentation_config.mosaic_min_ratio mosaic min ratio float mosaic min ratio 0.2
augmentation_config.image_mean Image Mean collection per-channel image mean values
augmentation_config.image_mean.key string r’, ‘g’, ‘b’
augmentation_config.image_mean.value float
training_config Training collection Training configuration
training_config.batch_size_per_gpu Batch Size per GPU integer Batch size per GPU in training 8 >=1
training_config.num_epochs Number of Epochs integer Number of Epochs to run the training 80 >=1
training_config.learning_rate.soft_start_annealing_schedule Soft Start Annealing Schedule collection
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate example: 1e-7 >0
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate Max Learning Rate float Maximum learning rate example: 1e-4 >0
training_config.learning_rate.soft_start_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up: example 0.3 (0, 1)
training_config.learning_rate.soft_start_annealing_schedule.annealing Annealing float progress(in percentage) for decreasing learning rate (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule Soft Start Cosine Annealing Schedule collection
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate Max Learning Rate float maximum learning rate 0.0001 >0
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start Soft Start float progress(in percentage) for warm up 0.3 (0, 1)
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate Min Learning Rate float Minimum learning rate 1.00E-07 >0
training_config.regularizer Regularizer collection
training_config.regularizer.type Type string Type of regularizer, either NO_REG, L1 or L2 __L1__ __L1__, __L2__, __NO_REG__
training_config.regularizer.weight Weight float weight decay of regularizer 3.00E-05 >=0
training_config.optimizer.adam Adam collection
training_config.optimizer.adam.epsilon Epsilon float Epsilon of Adam 1.00E-07 (0, 1)
training_config.optimizer.adam.beta1 Beta1 float beta1 of Adam 0.9 (0, 1)
training_config.optimizer.adam.beta2 Beta 2 float beta2 of Adam 0.999 (0, 1)
training_config.optimizer.adam.amsgrad AMSGrad bool AMSGrad of Adam FALSE TRUE, FALSE
training_config.optimizer.sgd SGD collection
training_config.optimizer.sgd.momentum Momentum float momentum of sgd (example: 0.9) (0, 1)
training_config.optimizer.sgd.nesterov Nesterov bool nesterov of sgd (example: FALSE) TRUE, FALSE
training_config.optimizer.rmsprop RMSProp collection
training_config.optimizer.rmsprop.rho Rho float rho of RMSProp (0, 1)
training_config.optimizer.rmsprop.momentum Momentum float momentum of RMSProp (0, 1)
training_config.optimizer.rmsprop.epsilon Epsilon float epsilon of RMSProp (0, 1)
training_config.optimizer.rmsprop.centered Centered bool centered of RMSProp TRUE, FALSE
training_config.checkpoint_interval Checkpoint Interval integer Period(in number of epochs) to save checkpoints 10 >=1
training_config.enable_qat QAT bool Enable QAT or not FALSE TRUE, FALSE
training_config.resume_model_path Resume Model Path hidden Path of the model to be resumed
training_config.pretrain_model_path Pretrained Model Path hidden Path of the pretrained model
training_config.pruned_model_path Pruned Model Path hidden Path of the pruned model
training_config.max_queue_size Max Queue Size integer Maximum Queue Size in Sequence Dataset 3 >=1
training_config.n_workers Workers integer Number of workers in sequence dataset 4 >=1
training_config.use_multiprocessing Use Multiprocessing bool Use multiprocessing or not FALSE TRUE, FALSE
training_config.early_stopping Early Stopping collection
training_config.early_stopping.monitor Monitor string The name of the quantity to be monitored for early stopping “loss”
training_config.early_stopping.min_delta Min Delta float Minimum delta of the quantity to be regarded as changed >=0
training_config.early_stopping.patience Patience integer The number of epochs to be waited for before stopping the training >=1
training_config.visualizer Visualizer collection
training_config.visualizer.enabled Enable bool Enable the visualizer or not TRUE, FALSE
training_config.visualizer.num_images Max Num Images integer Maximum number of images to be displayed in TensorBoard >=1
yolov4_config YOLOv4 collection
yolov4_config.big_anchor_shape Big Anchor Shape string Big anchor shapes in string [(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.mid_anchor_shape Middle Anchor Shape string Middle anchor shapes in string [(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.small_anchor_shape Small Anchor Shape string Small anchor shapes in string numpy array of shape (3, 2) in string format. All elements should be positive float
yolov4_config.matching_neutral_box_iou Matching Neutral Box IoU float Neutral box matching IoU 0.5 (0, 1)
yolov4_config.box_matching_iou Box Matching IoU float box matching IoU 0.25 (0, 1)
yolov4_config.arch Arch string backbone(architecture) cspdarknet_tiny cspdarknet_tiny, cspdarknet_tiny_3l, resnet, vgg, darknet, cspdarknet, efficientnet_b0, mobilenet_v1, mobilenet_v2, squeezenet, googlenet
yolov4_config.nlayers Number of Layers integer number of layers for this architecture depends on arch
yolov4_config.arch_conv_blocks Extra Convolution Blocks integer Number of extra convolution blocks 1
yolov4_config.loss_loc_weight weighting for location loss float weighting factor for location loss 1 1
yolov4_config.loss_neg_obj_weights weighting for loss of negative objects float weighting factor for loss of negative objects 1 1
yolov4_config.loss_class_weights weighting for classification loss float weighting factor for classification loss 1 list of integers
yolov4_config.freeze_blocks Freeze Blocks list ID of blocks to be frozen during training TRUE, FALSE
yolov4_config.freeze_bn Freeze BN bool Whether or not to freeze BatchNormalization layers FALSE TRUE, FALSE
yolov4_config.force_relu Force ReLU bool Whether or not to force activation function to ReLU FALSE relu, leaky_relu, mish
yolov4_config.activation Activation string Activation function (0, 1)
yolov4_config.label_smoothing Label Smoothing float Label Smoothing 0 (0, 1)
yolov4_config.big_grid_xy_extend Big Grid XY Extend float Big anchors adjustment 0.05 (0, 1)
yolov4_config.mid_grid_xy_extend Middle Grid XY Extend float Middle anchors adjustment 0.1 (0, 1)
yolov4_config.small_grid_xy_extend Small Grid XY Extend float Small anchors adjustment (0, 1)
nms_config.confidence_threshold Confidence Threshold float Confidence threshold 0.001 (0, 1)
nms_config.clustering_iou_threshold IoU threshold float IoU threshold 0.5 >0
nms_config.top_k Top K integer Maximum number of objects after NMS 200 0, 1, 2,3, 4,5, 6, 7, 8, 9, 10
nms_config.infer_nms_score_bits NMS Score Bits integer Number of bits for scores for optimized NMS __SAMPLE__, __INTEGRATE__
nms_config.force_on_cpu Force on CPU bool Force NMS to run on CPU in training TRUE >=1
eval_config.average_precision_mode AP Mode enum Average Precision mode, either __SAMPLE__ or __INTEGRATE__ __SAMPLE__ (0, 1)
eval_config.batch_size Batch Size integer batch size for evaluation 8 TRUE, FALSE
eval_config.matching_iou_threshold Matching IoU Threshold float IoU threshold 0.5
eval_config.visualize_pr_curve Visualize PR Curve bool Whether or not to visualize precision-recall curve

© Copyright 2023, NVIDIA.. Last updated on Sep 5, 2023.