Action Specs
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
train_config |
collection |
||||||||||
train_config.train_dataset_path |
hidden |
||||||||||
train_config.val_dataset_path |
hidden |
||||||||||
train_config.pretrained_model_path |
hidden |
||||||||||
train_config.optimizer |
collection |
||||||||||
train_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
|||||||||
train_config.optimizer.sgd.lr |
float |
0.01 |
|||||||||
train_config.optimizer.sgd.decay |
float |
0 |
|||||||||
train_config.optimizer.sgd.momentum |
float |
0.9 |
|||||||||
train_config.optimizer.sgd.nesterov |
bool |
FALSE |
|||||||||
train_config.optimizer.adam |
collection |
||||||||||
train_config.optimizer.adam.lr |
float |
||||||||||
train_config.optimizer.adam.beta_1 |
float |
||||||||||
train_config.optimizer.adam.beta_2 |
float |
||||||||||
train_config.optimizer.adam.epsilon |
float |
||||||||||
train_config.optimizer.adam.decay |
float |
||||||||||
train_config.optimizer.rmsprop |
collection |
||||||||||
train_config.optimizer.rmsprop.lr |
float |
||||||||||
train_config.optimizer.rmsprop.rho |
float |
||||||||||
train_config.optimizer.rmsprop.epsilon |
float |
||||||||||
train_config.optimizer.rmsprop.decay |
float |
||||||||||
train_config.batch_size_per_gpu |
integer |
256 |
|||||||||
train_config.n_epochs |
integer |
80 |
|||||||||
train_config.n_workers |
integer |
2 |
|||||||||
train_config.reg_config |
collection |
||||||||||
train_config.reg_config.type |
string |
L2 |
|||||||||
train_config.reg_config.scope |
string |
Conv2D,Dense |
|||||||||
train_config.reg_config.weight_decay |
float |
0.00005 |
|||||||||
train_config.lr_config |
collection |
ONE OF STEP / SOFT_ANNEAL / COSINE |
|||||||||
train_config.lr_config.step |
collection |
||||||||||
train_config.lr_config.step.learning_rate |
float |
||||||||||
train_config.lr_config.step.step_size |
integer |
||||||||||
train_config.lr_config.step.gamma |
float |
||||||||||
train_config.lr_config.soft_anneal |
collection |
||||||||||
train_config.lr_config.soft_anneal.learning_rate |
float |
0.05 |
|||||||||
train_config.lr_config.soft_anneal.soft_start |
float |
0.056 |
|||||||||
train_config.lr_config.soft_anneal.annealing_divider |
float |
10 |
|||||||||
train_config.lr_config.soft_anneal.annealing_points |
list |
List of float |
[0.3,0.6,0.8] |
||||||||
train_config.lr_config.cosine |
collection |
||||||||||
train_config.lr_config.cosine.learning_rate |
float |
||||||||||
train_config.lr_config.cosine.min_lr_ratio |
float |
||||||||||
train_config.lr_config.cosine.soft_start |
float |
||||||||||
train_config.random_seed |
integer |
42 |
|||||||||
train_config.enable_random_crop |
bool |
||||||||||
train_config.enable_center_crop |
bool |
||||||||||
train_config.enable_color_augmentation |
bool |
||||||||||
train_config.label_smoothing |
float |
||||||||||
train_config.preprocess_mode |
string |
torch |
|||||||||
train_config.mixup_alpha |
float |
||||||||||
train_config.model_parallelism |
list |
||||||||||
train_config.image_mean |
collection |
||||||||||
train_config.image_mean.key |
string |
||||||||||
train_config.image_mean.value |
float |
||||||||||
train_config.disable_horizontal_flip |
bool |
||||||||||
train_config.visualizer_config |
collection |
||||||||||
train_config.visualizer |
Visualizer |
collection |
|||||||||
train_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
||||||||
train_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
||||||||
eval_config |
collection |
||||||||||
eval_config.top_k |
integer |
3 |
|||||||||
eval_config.eval_dataset_path |
hidden |
||||||||||
eval_config.model_path |
hidden |
||||||||||
eval_config.batch_size |
integer |
256 |
|||||||||
eval_config.n_workers |
integer |
2 |
|||||||||
eval_config.enable_center_crop |
bool |
||||||||||
model_config |
collection |
||||||||||
model_config.arch |
string |
squeezenet |
|||||||||
model_config.input_image_size |
string |
3,224,224 |
yes |
yes |
|||||||
model_config.resize_interpolation_method |
string |
__BILINEAR__, __BICUBIC__ |
|||||||||
model_config.n_layers |
integer |
||||||||||
model_config.retain_head |
bool |
FALSE |
|||||||||
model_config.use_batch_norm |
bool |
||||||||||
model_config.use_bias |
bool |
||||||||||
model_config.use_pooling |
bool |
||||||||||
model_config.all_projections |
bool |
||||||||||
model_config.freeze_bn |
bool |
||||||||||
model_config.freeze_blocks |
integer |
||||||||||
model_config.dropout |
float |
1.00E-03 |
|||||||||
model_config.batch_norm_config |
collection |
||||||||||
model_config.batch_norm_config.momentum |
float |
||||||||||
model_config.batch_norm_config.epsilon |
float |
||||||||||
model_config.activation |
collection |
||||||||||
model_config.activation.activation_type |
string |
||||||||||
model_config.activation.activation_parameters |
collection |
||||||||||
model_config.activation.activation_parameters.key |
string |
||||||||||
model_config.activation.activation_parameters.value |
float |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
FALSE |
no |
||||||
cal_image_dir |
hidden |
||||||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
strict_type_constraints |
bool |
FALSE |
|||||||||
gen_ds_config |
bool |
FALSE |
|||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
min_batch_size |
integer |
1 |
|||||||||
opt_batch_size |
integer |
1 |
|||||||||
experiment_spec |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
static_batch_size |
integer |
-1 |
|||||||||
results_dir |
hidden |
||||||||||
verbose |
hidden |
TRUE |
|||||||||
classmap_json |
hidden |
||||||||||
is_byom |
bool |
FALSE |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
cli |
---|---|---|---|---|---|---|---|---|---|---|---|---|
batch_size |
Batch Size |
integer |
Batch size CLI parameter |
1 |
yes |
|||||||
train_config.train_dataset_path |
hidden |
|||||||||||
train_config.val_dataset_path |
hidden |
|||||||||||
train_config.pretrained_model_path |
hidden |
|||||||||||
train_config.optimizer |
collection |
|||||||||||
train_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
||||||||||
train_config.optimizer.sgd.lr |
float |
0.01 |
||||||||||
train_config.optimizer.sgd.decay |
float |
0 |
||||||||||
train_config.optimizer.sgd.momentum |
float |
0.9 |
||||||||||
train_config.optimizer.sgd.nesterov |
bool |
FALSE |
||||||||||
train_config |
collection |
|||||||||||
train_config.optimizer.adam |
collection |
|||||||||||
train_config.optimizer.adam.lr |
float |
|||||||||||
train_config.optimizer.adam.beta_1 |
float |
|||||||||||
train_config.optimizer.adam.beta_2 |
float |
|||||||||||
train_config.optimizer.adam.epsilon |
float |
|||||||||||
train_config.optimizer.adam.decay |
float |
|||||||||||
train_config.optimizer.rmsprop |
collection |
|||||||||||
train_config.optimizer.rmsprop.lr |
float |
|||||||||||
train_config.optimizer.rmsprop.rho |
float |
|||||||||||
train_config.optimizer.rmsprop.epsilon |
float |
|||||||||||
train_config.optimizer.rmsprop.decay |
float |
|||||||||||
train_config.batch_size_per_gpu |
integer |
256 |
||||||||||
train_config.n_epochs |
integer |
80 |
||||||||||
train_config.n_workers |
integer |
2 |
||||||||||
train_config.reg_config |
collection |
|||||||||||
train_config.reg_config.type |
string |
L2 |
||||||||||
train_config.reg_config.scope |
string |
Conv2D,Dense |
||||||||||
train_config.reg_config.weight_decay |
float |
0.00005 |
||||||||||
train_config.lr_config |
collection |
ONE OF STEP / SOFT_ANNEAL / COSINE |
||||||||||
train_config.lr_config.step |
collection |
|||||||||||
train_config.lr_config.step.learning_rate |
float |
|||||||||||
train_config.lr_config.step.step_size |
integer |
|||||||||||
train_config.lr_config.step.gamma |
float |
|||||||||||
train_config.lr_config.soft_anneal |
collection |
|||||||||||
train_config.lr_config.soft_anneal.learning_rate |
float |
0.05 |
||||||||||
train_config.lr_config.soft_anneal.soft_start |
float |
0.056 |
||||||||||
train_config.lr_config.soft_anneal.annealing_divider |
float |
10 |
||||||||||
train_config.lr_config.soft_anneal.annealing_points |
list |
List of float |
[0.3,0.6,0.8] |
|||||||||
train_config.lr_config.cosine |
collection |
|||||||||||
train_config.lr_config.cosine.learning_rate |
float |
|||||||||||
train_config.lr_config.cosine.min_lr_ratio |
float |
|||||||||||
train_config.lr_config.cosine.soft_start |
float |
|||||||||||
train_config.random_seed |
integer |
42 |
||||||||||
train_config.enable_random_crop |
bool |
|||||||||||
train_config.enable_center_crop |
bool |
|||||||||||
train_config.enable_color_augmentation |
bool |
|||||||||||
train_config.label_smoothing |
float |
|||||||||||
train_config.preprocess_mode |
string |
torch |
||||||||||
train_config.mixup_alpha |
float |
|||||||||||
train_config.model_parallelism |
list |
|||||||||||
train_config.image_mean |
collection |
|||||||||||
train_config.image_mean.key |
string |
|||||||||||
train_config.image_mean.value |
float |
|||||||||||
train_config.disable_horizontal_flip |
bool |
|||||||||||
train_config.visualizer_config |
collection |
|||||||||||
train_config.visualizer |
Visualizer |
collection |
||||||||||
train_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||
train_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
|||||||||
eval_config |
collection |
|||||||||||
eval_config.top_k |
integer |
3 |
||||||||||
eval_config.eval_dataset_path |
hidden |
|||||||||||
eval_config.model_path |
hidden |
|||||||||||
eval_config.batch_size |
integer |
256 |
||||||||||
eval_config.n_workers |
integer |
2 |
||||||||||
eval_config.enable_center_crop |
bool |
|||||||||||
model_config |
collection |
|||||||||||
model_config.arch |
string |
squeezenet |
||||||||||
model_config.input_image_size |
string |
3,224,224 |
yes |
yes |
||||||||
model_config.resize_interpolation_method |
string |
__BILINEAR__, __BICUBIC__ |
||||||||||
model_config.n_layers |
integer |
|||||||||||
model_config.retain_head |
bool |
FALSE |
||||||||||
model_config.use_batch_norm |
bool |
|||||||||||
model_config.use_bias |
bool |
|||||||||||
model_config.use_pooling |
bool |
|||||||||||
model_config.all_projections |
bool |
|||||||||||
model_config.freeze_bn |
bool |
|||||||||||
model_config.freeze_blocks |
integer |
|||||||||||
model_config.dropout |
float |
1.00E-03 |
||||||||||
model_config.batch_norm_config |
collection |
|||||||||||
model_config.batch_norm_config.momentum |
float |
|||||||||||
model_config.batch_norm_config.epsilon |
float |
|||||||||||
model_config.activation |
collection |
|||||||||||
model_config.activation.activation_type |
string |
|||||||||||
model_config.activation.activation_parameters |
collection |
|||||||||||
model_config.activation.activation_parameters.key |
string |
|||||||||||
model_config.activation.activation_parameters.value |
float |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
init_epoch |
integer |
CLI Parameter initial epoch |
1 |
||||||||
train_config |
collection |
||||||||||
train_config.train_dataset_path |
hidden |
||||||||||
train_config.val_dataset_path |
hidden |
||||||||||
train_config.pretrained_model_path |
hidden |
||||||||||
train_config.optimizer |
collection |
||||||||||
train_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
|||||||||
train_config.optimizer.sgd.lr |
float |
0.01 |
|||||||||
train_config.optimizer.sgd.decay |
float |
0 |
|||||||||
train_config.optimizer.sgd.momentum |
float |
0.9 |
|||||||||
train_config.optimizer.sgd.nesterov |
bool |
FALSE |
|||||||||
train_config.optimizer.adam |
collection |
||||||||||
train_config.optimizer.adam.lr |
float |
||||||||||
train_config.optimizer.adam.beta_1 |
float |
||||||||||
train_config.optimizer.adam.beta_2 |
float |
||||||||||
train_config.optimizer.adam.epsilon |
float |
||||||||||
train_config.optimizer.adam.decay |
float |
||||||||||
train_config.optimizer.rmsprop |
collection |
||||||||||
train_config.optimizer.rmsprop.lr |
float |
||||||||||
train_config.optimizer.rmsprop.rho |
float |
||||||||||
train_config.optimizer.rmsprop.epsilon |
float |
||||||||||
train_config.optimizer.rmsprop.decay |
float |
||||||||||
train_config.batch_size_per_gpu |
integer |
256 |
|||||||||
train_config.n_epochs |
integer |
80 |
|||||||||
train_config.n_workers |
integer |
2 |
|||||||||
train_config.reg_config |
collection |
||||||||||
train_config.reg_config.type |
string |
L2 |
|||||||||
train_config.reg_config.scope |
string |
Conv2D,Dense |
|||||||||
train_config.reg_config.weight_decay |
float |
0.00005 |
|||||||||
train_config.lr_config |
collection |
ONE OF STEP / SOFT_ANNEAL / COSINE |
|||||||||
train_config.lr_config.step |
collection |
||||||||||
train_config.lr_config.step.learning_rate |
float |
||||||||||
train_config.lr_config.step.step_size |
integer |
||||||||||
train_config.lr_config.step.gamma |
float |
||||||||||
train_config.lr_config.soft_anneal |
collection |
||||||||||
train_config.lr_config.soft_anneal.learning_rate |
float |
0.05 |
|||||||||
train_config.lr_config.soft_anneal.soft_start |
float |
0.056 |
|||||||||
train_config.lr_config.soft_anneal.annealing_divider |
float |
10 |
|||||||||
train_config.lr_config.soft_anneal.annealing_points |
list |
List of float |
[0.3,0.6,0.8] |
||||||||
train_config.lr_config.cosine |
collection |
||||||||||
train_config.lr_config.cosine.learning_rate |
float |
||||||||||
train_config.lr_config.cosine.min_lr_ratio |
float |
||||||||||
train_config.lr_config.cosine.soft_start |
float |
||||||||||
train_config.random_seed |
integer |
42 |
|||||||||
train_config.enable_random_crop |
bool |
||||||||||
train_config.enable_center_crop |
bool |
||||||||||
train_config.enable_color_augmentation |
bool |
||||||||||
train_config.label_smoothing |
float |
||||||||||
train_config.preprocess_mode |
string |
torch |
|||||||||
train_config.mixup_alpha |
float |
||||||||||
train_config.model_parallelism |
list |
||||||||||
train_config.image_mean |
collection |
||||||||||
train_config.image_mean.key |
string |
||||||||||
train_config.image_mean.value |
float |
||||||||||
train_config.disable_horizontal_flip |
bool |
||||||||||
train_config.visualizer_config |
collection |
||||||||||
train_config.visualizer |
Visualizer |
collection |
|||||||||
train_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
||||||||
train_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
||||||||
eval_config |
collection |
||||||||||
eval_config.top_k |
integer |
3 |
|||||||||
eval_config.eval_dataset_path |
hidden |
||||||||||
eval_config.model_path |
hidden |
||||||||||
eval_config.batch_size |
integer |
256 |
|||||||||
eval_config.n_workers |
integer |
2 |
|||||||||
eval_config.enable_center_crop |
bool |
||||||||||
model_config |
collection |
||||||||||
model_config.arch |
string |
squeezenet |
|||||||||
model_config.input_image_size |
string |
3,224,224 |
yes |
yes |
|||||||
model_config.resize_interpolation_method |
string |
__BILINEAR__, __BICUBIC__ |
|||||||||
model_config.n_layers |
integer |
||||||||||
model_config.retain_head |
bool |
FALSE |
|||||||||
model_config.use_batch_norm |
bool |
||||||||||
model_config.use_bias |
bool |
||||||||||
model_config.use_pooling |
bool |
||||||||||
model_config.all_projections |
bool |
||||||||||
model_config.freeze_bn |
bool |
||||||||||
model_config.freeze_blocks |
integer |
||||||||||
model_config.dropout |
float |
1.00E-03 |
|||||||||
model_config.batch_norm_config |
collection |
||||||||||
model_config.batch_norm_config.momentum |
float |
||||||||||
model_config.batch_norm_config.epsilon |
float |
||||||||||
model_config.activation |
collection |
||||||||||
model_config.activation.activation_type |
string |
||||||||||
model_config.activation.activation_parameters |
collection |
||||||||||
model_config.activation.activation_parameters.key |
string |
||||||||||
model_config.activation.activation_parameters.value |
float |
convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
e |
engine file path |
hidden |
|||||||||
k |
encode key |
hidden |
|||||||||
c |
cache_file |
hidden |
|||||||||
o |
outputs |
string |
comma separated list of output node names |
||||||||
d |
input_dims |
string |
comma separated list of input dimensions (not required for TLT 3.0 new models). |
yes |
yes |
||||||
b |
batch_size |
integer |
calibration batch size |
8 |
yes |
||||||
m |
max_batch_size |
integer |
maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. |
16 |
yes |
||||||
w |
max_workspace_size |
integer |
maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly. |
||||||||
t |
data_type |
string |
TensorRT data type |
fp32 |
fp32, fp16, int8 |
yes |
|||||
i |
input_order |
string |
input dimension ordering |
nchw |
nchw, nhwc, nc |
||||||
s |
strict_type_constraints |
bool |
TensorRT strict_type_constraints flag for INT8 mode |
FALSE |
|||||||
u |
dla_core |
int |
Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). |
-1 |
|||||||
p |
parse_profile_shapes |
list |
comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. |
||||||||
platform |
platform |
string |
platform label |
yes |
yes |
||||||
model |
etlt model from export |
hidden |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
popular |
regex |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
|||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
||||||||
dataset_config.image_extension |
Image Extension |
string |
Extension of the images to be used. |
png |
png, jpg, jpeg |
yes |
|||||
dataset_config.data_sources.tfrecords_path |
TFRecord Path |
hidden |
/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* |
||||||||
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
/shared/users/1234/datasets/5678/training |
||||||||
dataset_config.validation_data_source.tfrecords_path |
Validation TFRecord Path |
hidden |
/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* |
||||||||
dataset_config.validation_data_source.image_directory_path |
Validation Image Path |
hidden |
/shared/users/1234/datasets/5678/training |
||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.validation_fold |
Validation Fold |
integer |
In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. |
0 |
|||||||
augmentation_config |
Data Augmentation |
collection |
Collection of parameters to configure the preprocessing and on the fly data augmentation |
Yes |
|||||||
augmentation_config.preprocessing.output_image_width |
Image Width |
integer |
The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. |
1248 |
480 |
yes |
Yes |
||||
augmentation_config.preprocessing.output_image_height |
Image Height |
integer |
The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. |
384 |
272 |
yes |
Yes |
||||
augmentation_config.preprocessing.min_bbox_width |
Bounding Box Width |
float |
The minimum width of the object labels to be considered for training. |
1 |
0 |
yes |
|||||
augmentation_config.preprocessing.min_bbox_height |
Bounding Box Height |
float |
The minimum height of the object labels to be considered for training. |
1 |
0 |
yes |
|||||
augmentation_config.preprocessing.output_image_channel |
Image Channel |
integer |
The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. |
3 |
1, 3 |
yes |
|||||
augmentation_config.preprocessing.crop_right |
Crop Right |
integer |
The right boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_left |
Crop Left |
integer |
The left boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_top |
Crop Top |
integer |
The top boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_bottom |
Crop Bottom |
integer |
The bottom boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.scale_height |
Scale Height |
float |
The floating point factor to scale the height of the cropped images. |
0 |
yes |
||||||
augmentation_config.preprocessing.scale_width |
Scale Width |
float |
The floating point factor to scale the width of the cropped images. |
0 |
yes |
||||||
augmentation_config.spatial_augmentation.hflip_probability |
Horizontal-Flip Probability |
float |
The probability to flip an input image horizontally. |
0.5 |
0 |
1 |
|||||
augmentation_config.spatial_augmentation.vflip_probability |
Vertical-Flip Probability |
float |
The probability to flip an input image vertically. |
0 |
1 |
||||||
augmentation_config.spatial_augmentation.zoom_min |
Minimum Zoom Scale |
float |
The minimum zoom scale of the input image. |
1 |
0 |
||||||
augmentation_config.spatial_augmentation.zoom_max |
Maximum Zoom Scale |
float |
The maximum zoom scale of the input image. |
1 |
0 |
||||||
augmentation_config.spatial_augmentation.translate_max_x |
X-Axis Maximum Traslation |
float |
The maximum translation to be added across the x axis. |
8 |
0 |
||||||
augmentation_config.spatial_augmentation.translate_max_y |
Y-Axis Maximum Translation |
float |
The maximum translation to be added across the y axis. |
8 |
0 |
||||||
augmentation_config.spatial_augmentation.rotate_rad_max |
Image Rotation |
float |
The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. |
0 |
|||||||
augmentation_config.color_augmentation.color_shift_stddev |
Color Shift Standard Deviation |
float |
The standard devidation value for the color shift. |
0 |
1 |
||||||
augmentation_config.color_augmentation.hue_rotation_max |
Hue Maximum Rotation |
float |
The maximum rotation angle for the hue rotation matrix. |
25 |
0 |
360 |
|||||
augmentation_config.color_augmentation.saturation_shift_max |
Saturation Maximum Shift |
float |
The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. |
0.2 |
0 |
1 |
|||||
augmentation_config.color_augmentation.contrast_scale_max |
Contrast Maximum Scale |
float |
The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. |
0.1 |
0 |
1 |
|||||
augmentation_config.color_augmentation.contrast_center |
Contrast Center |
float |
The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. |
0.5 |
0.5 |
||||||
bbox_rasterizer_config |
Bounding box rasterizer |
collection |
Collection of parameters to configure the bounding box rasterizer |
||||||||
bbox_rasterizer_config.deadzone_radius |
Bounding box rasterizer deadzone radius |
float |
0.4 |
0 |
1 |
yes |
|||||
model_config |
Model |
collection |
|||||||||
model_config.arch |
BackBone Architecture |
string |
The architecture of the backbone feature extractor to be used for training. |
resnet |
resnet |
yes |
|||||
model_config.pretrained_model_file |
PTM File Path |
hidden |
This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer. |
/shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5 |
|||||||
model_config.load_graph |
PTM Load Graph |
bool |
A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported. |
FALSE |
|||||||
model_config.freeze_blocks |
Freeze Blocks |
integer |
This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. |
0 |
3 |
||||||
model_config.freeze_bn |
Freeze Batch Normalization |
bool |
A flag to determine whether to freeze the Batch Normalization layers in the model during training. |
||||||||
model_config.all_projections |
All Projections |
bool |
For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. |
||||||||
model_config.num_layers |
Number of Layers |
integer |
The depth of the feature extractor for scalable templates. |
18 |
10, 18, 34, 50, 101 |
yes |
|||||
model_config.use_pooling |
Use Pooling |
bool |
Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. |
||||||||
model_config.use_batch_norm |
Use Batch Normalization |
bool |
A flag to determine whether to use Batch Normalization layers or not. |
TRUE |
|||||||
model_config.dropout_rate |
Dropout Rate |
float |
Probability for drop out |
0 |
1 |
||||||
model_config.training_precision.backend_floatx |
Backend Training Precision |
string |
A nested parameter that sets the precision of the backend training framework. |
__FLOAT32__ |
yes |
||||||
model_config.objective_set.cov |
Objective COV |
collection |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
{} |
yes |
||||||
model_config.objective_set.bbox.scale |
Objective Bounding Box Scale |
float |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
35 |
yes |
||||||
model_config.objective_set.bbox.offset |
Objective Bounding Box Offset |
float |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
0.5 |
yes |
||||||
training_config |
Training |
collection |
|||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
4 |
1 |
yes |
|||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
120 |
1 |
yes |
Yes |
||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-06 |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
5.00E-04 |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.100000001 |
0 |
1 |
yes |
Yes |
|||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.699999988 |
0 |
1 |
yes |
Yes |
|||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__NO_REG__, __L1__, __L2__ |
yes |
|||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-09 |
yes |
||||||
training_config.optimizer.adam.epsilon |
Optimizer Adam Epsilon |
float |
A very small number to prevent any division by zero in the implementation. |
1.00E-08 |
yes |
||||||
training_config.optimizer.adam.beta1 |
Optimizer Adam Beta1 |
float |
0.899999976 |
yes |
|||||||
training_config.optimizer.adam.beta2 |
Optimizer Adam Beta2 |
float |
0.999000013 |
yes |
|||||||
training_config.cost_scaling.enabled |
Enable Cost Scaling |
bool |
Enables cost scaling during training. |
FALSE |
yes |
||||||
training_config.cost_scaling.initial_exponent |
Cost Scaling Initial Exponent |
float |
20 |
yes |
|||||||
training_config.cost_scaling.increment |
Cost Scaling Increment |
float |
0.005 |
yes |
|||||||
training_config.cost_scaling.decrement |
Cost Scaling Decrement |
float |
1 |
yes |
|||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
0 |
yes |
|||||
evaluation_config |
Evaluation |
collection |
yes |
||||||||
evaluation_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
__SAMPLE__, __INTEGRATE__ |
||||||
evaluation_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
yes |
|||||
evaluation_config.first_validation_epoch |
First Validation Epoch |
integer |
The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down. |
30 |
1 |
yes |
|||||
cost_function_config |
Cost function |
collection |
|||||||||
cost_function_config.enable_autoweighting |
Auto-Weighting |
bool |
TRUE |
yes |
|||||||
cost_function_config.max_objective_weight |
Maximum Objective Weight |
float |
0.999899983 |
||||||||
cost_function_config.min_objective_weight |
Minimum Objective Weight |
float |
1.00E-04 |
||||||||
classwise_config |
Class-wise organized parameters |
list |
|||||||||
classwise_config.key |
Class Key |
string |
Name of class for the classwise parameters |
person |
|||||||
classwise_config.value.evaluation_config |
Evaluation config elements per class |
collection |
|||||||||
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap |
Minimum Detection Ground Truth Overlaps |
float |
Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class. |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height |
Minimum Height |
integer |
Minimum height in pixels for a valid ground truth and prediction bbox. |
20 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height |
Maximum Height |
integer |
Maximum height in pixels for a valid ground truth and prediction bbox. |
9999 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width |
Minimum Width |
integer |
Minimum width in pixels for a valid ground truth and prediction bbox. |
10 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width |
Maximum Width |
integer |
Maximum width in pixels for a valid ground truth and prediction bbox. |
9999 |
0 |
yes |
|||||
classwise_config.value.cost_function_config |
Class-wise cost fuction config per class |
collection |
yes |
||||||||
classwise_config.value.cost_function_config.class_weight |
Class Weight |
float |
4 |
yes |
|||||||
classwise_config.value.cost_function_config.coverage_foreground_weight |
Coverage Forground Weight |
float |
0.050000001 |
yes |
|||||||
classwise_config.value.cost_function_config.objectives |
Objectives |
list |
[{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}] |
yes |
|||||||
classwise_config.value.cost_function_config.objectives.name |
Objective Name |
string |
Objective name such as cov or bbox. |
cov |
yes |
||||||
classwise_config.value.cost_function_config.objectives.initial_weight |
Initial Weight |
float |
Initial weight for named objective. |
1 |
yes |
||||||
classwise_config.value.cost_function_config.objectives.weight_target |
Weight Target |
float |
Target weight for named objective. |
1 |
yes |
||||||
classwise_config.value.bbox_rasterizer_config |
Rasterization |
collection |
yes |
||||||||
classwise_config.value.bbox_rasterizer_config.cov_center_x |
Center of Object X-Coordinate |
float |
x-coordinate of the center of the object |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_center_y |
Center of Object Y-Coordinate |
float |
y-coordinate of the center of the object |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_radius_x |
Center of Object X-Radius |
float |
x-radius of the coverage ellipse |
1 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_radius_y |
Center of Object Y-Radius |
float |
y-radius of the coverage ellipse |
1 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.bbox_min_radius |
Bounding Box Minimum Radius |
float |
The minimum radius of the coverage region to be drawn for boxes |
1 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config |
Post-Processing |
collection |
|||||||||
classwise_config.postprocessing_config.clustering_config.coverage_threshold |
Coverage Threshold |
float |
The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. |
0.0075 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.dbscan_eps |
DBSCAN Samples Distance |
float |
The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. |
0.230000004 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples |
DBSCAN Minimum Samples |
float |
The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. |
0.050000001 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height |
Minimum Bounding Box Height |
integer |
The minimum height in pixels to consider as a valid detection post clustering. |
20 |
0 |
10000 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.clustering_algorithm |
Clustering Algorithm |
string |
Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. |
__DBSCAN__ |
__DBSCAN__, __NMS__, __HYBRID__ |
yes |
|||||
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold |
DBSCAN Confidence Threshold |
float |
The confidence threshold used to filter out the clustered bounding box output from DBSCAN. |
0.1 |
0.1 |
yes |
|||||
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold |
NMS IOU Threshold |
float |
The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. |
0.2 |
0 |
1 |
|||||
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold |
NMS Confidence Threshold |
float |
The confidence threshold to filter out clustered bounding boxes from NMS. |
0 |
0 |
1 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
FALSE |
no |
||||||
cal_image_dir |
hidden |
||||||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
strict_type_constraints |
bool |
FALSE |
|||||||||
gen_ds_config |
bool |
FALSE |
|||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
min_batch_size |
integer |
1 |
|||||||||
opt_batch_size |
integer |
1 |
|||||||||
experiment_spec |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
static_batch_size |
integer |
-1 |
|||||||||
results_dir |
hidden |
||||||||||
verbose |
hidden |
TRUE |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
popular |
---|---|---|---|---|---|---|---|---|---|---|
inferencer_config |
collection |
|||||||||
inferencer_config.tlt_config |
collection |
|||||||||
inferencer_config.tlt_config.model |
hidden |
|||||||||
inferencer_config.tensorrt_config |
collection |
|||||||||
inferencer_config.tensorrt_config.parser |
integer |
0,1,2 |
||||||||
inferencer_config.tensorrt_config.backend_data_type |
integer |
0,1,2 |
||||||||
inferencer_config.tensorrt_config.save_engine |
bool |
|||||||||
inferencer_config.tensorrt_config.trt_engine |
hidden |
|||||||||
inferencer_config.tensorrt_config.calibrator_config |
collection |
|||||||||
inferencer_config.input_nodes |
list |
list of string |
||||||||
inferencer_config.output_nodes |
list |
list of string |
||||||||
inferencer_config.batch_size |
integer |
16 |
||||||||
inferencer_config.image_height |
integer |
384 |
||||||||
inferencer_config.image_width |
integer |
1248 |
||||||||
inferencer_config.image_channels |
integer |
3 |
||||||||
inferencer_config.gpu_index |
integer |
0 |
||||||||
inferencer_config.target_classes |
list |
list of string |
[“car”] |
yes |
yes |
|||||
inferencer_config.stride |
integer |
|||||||||
bbox_handler_config |
collection |
|||||||||
bbox_handler_config.kitti_dump |
bool |
TRUE |
||||||||
bbox_handler_config.disable_overlay |
bool |
FALSE |
||||||||
bbox_handler_config.overlay_linewidth |
integer |
2 |
||||||||
bbox_handler_config.classwise_bbox_handler_config |
list |
yes |
yes |
|||||||
bbox_handler_config.classwise_bbox_handler_config.key |
string |
default |
||||||||
bbox_handler_config.classwise_bbox_handler_config.value |
collection |
|||||||||
bbox_handler_config.classwise_bbox_handler_config.value.output_map |
string |
|||||||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config |
collection |
|||||||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.coverage_threshold |
Coverage Threshold |
float |
The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. |
0.005 |
0 |
1 |
||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_eps |
DBSCAN Samples Distance |
float |
The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. |
0.3 |
0 |
1 |
||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_min_samples |
DBSCAN Minimum Samples |
float |
The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. |
0.05 |
0 |
1 |
||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.minimum_bounding_box_height |
Minimum Bounding Box Height |
integer |
The minimum height in pixels to consider as a valid detection post clustering. |
4 |
0 |
10000 |
||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.clustering_algorithm |
Clustering Algorithm |
string |
Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. |
__DBSCAN__ |
__DBSCAN__, __NMS__, __HYBRID__ |
|||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.dbscan_confidence_threshold |
DBSCAN Confidence Threshold |
float |
The confidence threshold used to filter out the clustered bounding box output from DBSCAN. |
0.9 |
0.1 |
|||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_iou_threshold |
NMS IOU Threshold |
float |
The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. |
0 |
1 |
|||||
bbox_handler_config.classwise_bbox_handler_config.value.clustering_config.nms_confidence_threshold |
NMS Confidence Threshold |
float |
The confidence threshold to filter out clustered bounding boxes from NMS. |
0 |
1 |
|||||
bbox_handler_config.classwise_bbox_handler_config.value.confidence_model |
string |
aggregate_cov |
||||||||
bbox_handler_config.classwise_bbox_handler_config.value.output_map |
string |
|||||||||
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color |
collection |
0 |
0,1,2 |
|||||||
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.R |
integer |
255 |
||||||||
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.G |
integer |
0 |
||||||||
bbox_handler_config.classwise_bbox_handler_config.value.bbox_color.B |
integer |
0 |
||||||||
bbox_handler_config.postproc_classes |
list |
list of string |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
model |
Model path |
hidden |
UNIX path to where the input model is located. |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
results_dir |
Results directory |
hidden |
|||||||||
key |
Encode key |
hidden |
|||||||||
normalizer |
Normalizer |
string |
How to normalize |
max |
max, L2 |
||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
|||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
|||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
||||||
excluded_layers |
Excluded layers |
string |
string of list: List of excluded_layers. Examples: -i item1 item2 |
||||||||
verbose |
verbosity |
hidden |
TRUE |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
popular |
regex |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
enable_determinism |
Enable determinism |
bool |
Flag to enable deterministic training |
FALSE |
FALSE, TRUE |
||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
|||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
||||||||
dataset_config.image_extension |
Image Extension |
string |
Extension of the images to be used. |
png |
png, jpg, jpeg |
yes |
|||||
dataset_config.data_sources.tfrecords_path |
TFRecord Path |
hidden |
/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* |
||||||||
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
/shared/users/1234/datasets/5678/training |
||||||||
dataset_config.validation_data_source.tfrecords_path |
Validation TFRecord Path |
hidden |
/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* |
||||||||
dataset_config.validation_data_source.image_directory_path |
Validation Image Path |
hidden |
/shared/users/1234/datasets/5678/training |
||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.validation_fold |
Validation Fold |
integer |
In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. |
0 |
|||||||
augmentation_config |
Data Augmentation |
collection |
Collection of parameters to configure the preprocessing and on the fly data augmentation |
Yes |
|||||||
augmentation_config.preprocessing.output_image_width |
Image Width |
integer |
The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. |
1248 |
480 |
yes |
Yes |
||||
augmentation_config.preprocessing.output_image_height |
Image Height |
integer |
The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. |
384 |
272 |
yes |
Yes |
||||
augmentation_config.preprocessing.min_bbox_width |
Bounding Box Width |
float |
The minimum width of the object labels to be considered for training. |
1 |
0 |
yes |
|||||
augmentation_config.preprocessing.min_bbox_height |
Bounding Box Height |
float |
The minimum height of the object labels to be considered for training. |
1 |
0 |
yes |
|||||
augmentation_config.preprocessing.output_image_channel |
Image Channel |
integer |
The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. |
3 |
1, 3 |
yes |
|||||
augmentation_config.preprocessing.crop_right |
Crop Right |
integer |
The right boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_left |
Crop Left |
integer |
The left boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_top |
Crop Top |
integer |
The top boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.crop_bottom |
Crop Bottom |
integer |
The bottom boundary of the crop to be extracted from the original image. |
0 |
yes |
||||||
augmentation_config.preprocessing.scale_height |
Scale Height |
float |
The floating point factor to scale the height of the cropped images. |
0 |
yes |
||||||
augmentation_config.preprocessing.scale_width |
Scale Width |
float |
The floating point factor to scale the width of the cropped images. |
0 |
yes |
||||||
augmentation_config.spatial_augmentation.hflip_probability |
Horizontal-Flip Probability |
float |
The probability to flip an input image horizontally. |
0.5 |
0 |
1 |
|||||
augmentation_config.spatial_augmentation.vflip_probability |
Vertical-Flip Probability |
float |
The probability to flip an input image vertically. |
0 |
1 |
||||||
augmentation_config.spatial_augmentation.zoom_min |
Minimum Zoom Scale |
float |
The minimum zoom scale of the input image. |
1 |
0 |
||||||
augmentation_config.spatial_augmentation.zoom_max |
Maximum Zoom Scale |
float |
The maximum zoom scale of the input image. |
1 |
0 |
||||||
augmentation_config.spatial_augmentation.translate_max_x |
X-Axis Maximum Traslation |
float |
The maximum translation to be added across the x axis. |
8 |
0 |
||||||
augmentation_config.spatial_augmentation.translate_max_y |
Y-Axis Maximum Translation |
float |
The maximum translation to be added across the y axis. |
8 |
0 |
||||||
augmentation_config.spatial_augmentation.rotate_rad_max |
Image Rotation |
float |
The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. |
0 |
|||||||
augmentation_config.color_augmentation.color_shift_stddev |
Color Shift Standard Deviation |
float |
The standard devidation value for the color shift. |
0 |
1 |
||||||
augmentation_config.color_augmentation.hue_rotation_max |
Hue Maximum Rotation |
float |
The maximum rotation angle for the hue rotation matrix. |
25 |
0 |
360 |
|||||
augmentation_config.color_augmentation.saturation_shift_max |
Saturation Maximum Shift |
float |
The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. |
0.2 |
0 |
1 |
|||||
augmentation_config.color_augmentation.contrast_scale_max |
Contrast Maximum Scale |
float |
The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. |
0.1 |
0 |
1 |
|||||
augmentation_config.color_augmentation.contrast_center |
Contrast Center |
float |
The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. |
0.5 |
0.5 |
||||||
bbox_rasterizer_config |
Bounding box rasterizer |
collection |
Collection of parameters to configure the bounding box rasterizer |
||||||||
bbox_rasterizer_config.deadzone_radius |
Bounding box rasterizer deadzone radius |
float |
0.4 |
0 |
1 |
yes |
|||||
model_config |
Model |
collection |
|||||||||
model_config.arch |
BackBone Architecture |
string |
The architecture of the backbone feature extractor to be used for training. |
resnet |
resnet |
yes |
|||||
model_config.pretrained_model_file |
PTM File Path |
hidden |
This parameter defines the path to a pretrained TLT model file. If the load_graph flag is set to false, it is assumed that only the weights of the pretrained model file is to be used. In this case, TLT train constructs the feature extractor graph in the experiment and loads the weights from the pretrained model file that has matching layer names. Thus, transfer learning across different resolutions and domains are supported. For layers that may be absent in the pretrained model, the tool initializes them with random weights and skips the import for that layer. |
/shared/.pretrained/resnet18/detectnet_v2_vresnet18/resnet18.hdf5 |
|||||||
model_config.load_graph |
PTM Load Graph |
bool |
A flag to determine whether or not to load the graph from the pretrained model file, or just the weights. For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported. |
FALSE |
|||||||
model_config.freeze_blocks |
Freeze Blocks |
integer |
This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. |
0 |
3 |
||||||
model_config.freeze_bn |
Freeze Batch Normalization |
bool |
A flag to determine whether to freeze the Batch Normalization layers in the model during training. |
||||||||
model_config.all_projections |
All Projections |
bool |
For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. |
||||||||
model_config.num_layers |
Number of Layers |
integer |
The depth of the feature extractor for scalable templates. |
18 |
10, 18, 34, 50, 101 |
yes |
|||||
model_config.use_pooling |
Use Pooling |
bool |
Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. |
||||||||
model_config.use_batch_norm |
Use Batch Normalization |
bool |
A flag to determine whether to use Batch Normalization layers or not. |
TRUE |
|||||||
model_config.dropout_rate |
Dropout Rate |
float |
Probability for drop out |
0 |
1 |
||||||
model_config.training_precision.backend_floatx |
Backend Training Precision |
string |
A nested parameter that sets the precision of the backend training framework. |
__FLOAT32__ |
yes |
||||||
model_config.objective_set.cov |
Objective COV |
collection |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
{} |
yes |
||||||
model_config.objective_set.bbox.scale |
Objective Bounding Box Scale |
float |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
35 |
yes |
||||||
model_config.objective_set.bbox.offset |
Objective Bounding Box Offset |
float |
The objectives for training the network. For object-detection networks, set it to learn cov and bbox. These parameters should not be altered for the current training pipeline. |
0.5 |
yes |
||||||
training_config |
Training |
collection |
|||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
4 |
1 |
yes |
|||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
120 |
1 |
yes |
Yes |
||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-06 |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
5.00E-04 |
yes |
Yes |
|||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.100000001 |
0 |
1 |
yes |
Yes |
|||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.699999988 |
0 |
1 |
yes |
Yes |
|||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__NO_REG__, __L1__, __L2__ |
yes |
|||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-09 |
yes |
||||||
training_config.optimizer.adam.epsilon |
Optimizer Adam Epsilon |
float |
A very small number to prevent any division by zero in the implementation. |
1.00E-08 |
yes |
||||||
training_config.optimizer.adam.beta1 |
Optimizer Adam Beta1 |
float |
0.899999976 |
yes |
|||||||
training_config.optimizer.adam.beta2 |
Optimizer Adam Beta2 |
float |
0.999000013 |
yes |
|||||||
training_config.cost_scaling.enabled |
Enable Cost Scaling |
bool |
Enables cost scaling during training. |
FALSE |
yes |
||||||
training_config.cost_scaling.initial_exponent |
Cost Scaling Initial Exponent |
float |
20 |
yes |
|||||||
training_config.cost_scaling.increment |
Cost Scaling Increment |
float |
0.005 |
yes |
|||||||
training_config.cost_scaling.decrement |
Cost Scaling Decrement |
float |
1 |
yes |
|||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
0 |
yes |
|||||
evaluation_config |
Evaluation |
collection |
yes |
||||||||
evaluation_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
__SAMPLE__, __INTEGRATE__ |
||||||
evaluation_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
yes |
|||||
evaluation_config.first_validation_epoch |
First Validation Epoch |
integer |
The first epoch to start running validation. Ideally it is preferred to wait for at least 20-30% of the total number of epochs before starting evaluation, since the predictions in the initial epochs would be fairly inaccurate. Too many candidate boxes may be sent to clustering and this can cause the evaluation to slow down. |
30 |
1 |
yes |
|||||
cost_function_config |
Cost function |
collection |
|||||||||
cost_function_config.enable_autoweighting |
Auto-Weighting |
bool |
TRUE |
yes |
|||||||
cost_function_config.max_objective_weight |
Maximum Objective Weight |
float |
0.999899983 |
||||||||
cost_function_config.min_objective_weight |
Minimum Objective Weight |
float |
1.00E-04 |
||||||||
classwise_config |
Class-wise organized parameters |
list |
|||||||||
classwise_config.key |
Class Key |
string |
Name of class for the classwise parameters |
person |
|||||||
classwise_config.value.evaluation_config |
Evaluation config elements per class |
collection |
|||||||||
classwise_config.value.evaluation_config.minimum_detection_ground_truth_overlap |
Minimum Detection Ground Truth Overlaps |
float |
Minimum IOU between ground truth and predicted box after clustering to call a valid detection. This parameter is a repeatable dictionary and a separate one must be defined for every class. |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.evaluation_config.evaluation_box_config.minimum_height |
Minimum Height |
integer |
Minimum height in pixels for a valid ground truth and prediction bbox. |
20 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.maximum_height |
Maximum Height |
integer |
Maximum height in pixels for a valid ground truth and prediction bbox. |
9999 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.minimum_width |
Minimum Width |
integer |
Minimum width in pixels for a valid ground truth and prediction bbox. |
10 |
0 |
yes |
|||||
classwise_config.value.evaluation_config.evaluation_box_config.maximum_width |
Maximum Width |
integer |
Maximum width in pixels for a valid ground truth and prediction bbox. |
9999 |
0 |
yes |
|||||
classwise_config.value.cost_function_config |
Class-wise cost fuction config per class |
collection |
yes |
||||||||
classwise_config.value.cost_function_config.class_weight |
Class Weight |
float |
4 |
yes |
|||||||
classwise_config.value.cost_function_config.coverage_foreground_weight |
Coverage Forground Weight |
float |
0.050000001 |
yes |
|||||||
classwise_config.value.cost_function_config.objectives |
Objectives |
list |
[{“name”: “cov”, “initial_weight”: 1.0, “weight_target”: 1.0}, {“name”: “bbox”, “initial_weight”: 10.0, “weight_target”: 10.0}] |
yes |
|||||||
classwise_config.value.cost_function_config.objectives.name |
Objective Name |
string |
Objective name such as cov or bbox. |
cov |
yes |
||||||
classwise_config.value.cost_function_config.objectives.initial_weight |
Initial Weight |
float |
Initial weight for named objective. |
1 |
yes |
||||||
classwise_config.value.cost_function_config.objectives.weight_target |
Weight Target |
float |
Target weight for named objective. |
1 |
yes |
||||||
classwise_config.value.bbox_rasterizer_config |
Rasterization |
collection |
yes |
||||||||
classwise_config.value.bbox_rasterizer_config.cov_center_x |
Center of Object X-Coordinate |
float |
x-coordinate of the center of the object |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_center_y |
Center of Object Y-Coordinate |
float |
y-coordinate of the center of the object |
0.5 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_radius_x |
Center of Object X-Radius |
float |
x-radius of the coverage ellipse |
1 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.cov_radius_y |
Center of Object Y-Radius |
float |
y-radius of the coverage ellipse |
1 |
0 |
1 |
yes |
||||
classwise_config.value.bbox_rasterizer_config.bbox_min_radius |
Bounding Box Minimum Radius |
float |
The minimum radius of the coverage region to be drawn for boxes |
1 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config |
Post-Processing |
collection |
|||||||||
classwise_config.postprocessing_config.clustering_config.coverage_threshold |
Coverage Threshold |
float |
The minimum threshold of the coverage tensor output to be considered a valid candidate box for clustering. The four coordinates from the bbox tensor at the corresponding indices are passed for clustering. |
0.0075 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.dbscan_eps |
DBSCAN Samples Distance |
float |
The maximum distance between two samples for one to be considered in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. The greater the dbscan_eps value, the more boxes are grouped together. |
0.230000004 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.dbscan_min_samples |
DBSCAN Minimum Samples |
float |
The total weight in a neighborhood for a point to be considered as a core point. This includes the point itself. |
0.050000001 |
0 |
1 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.minimum_bounding_box_height |
Minimum Bounding Box Height |
integer |
The minimum height in pixels to consider as a valid detection post clustering. |
20 |
0 |
10000 |
yes |
||||
classwise_config.postprocessing_config.clustering_config.clustering_algorithm |
Clustering Algorithm |
string |
Defines the post-processing algorithm to cluter raw detections to the final bbox render. When using HYBRID mode, ensure both DBSCAN and NMS configuration parameters are defined. |
__DBSCAN__ |
__DBSCAN__, __NMS__, __HYBRID__ |
yes |
|||||
classwise_config.postprocessing_config.clustering_config.dbscan_confidence_threshold |
DBSCAN Confidence Threshold |
float |
The confidence threshold used to filter out the clustered bounding box output from DBSCAN. |
0.1 |
0.1 |
yes |
|||||
classwise_config.postprocessing_config.clustering_config.nms_iou_threshold |
NMS IOU Threshold |
float |
The Intersection Over Union (IOU) threshold to filter out redundant boxes from raw detections to form final clustered outputs. |
0.2 |
0 |
1 |
|||||
classwise_config.postprocessing_config.clustering_config.nms_confidence_threshold |
NMS Confidence Threshold |
float |
The confidence threshold to filter out clustered bounding boxes from NMS. |
0 |
0 |
1 |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
dssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
dssd_config.aspect_ratios |
Aspect Ratio |
srting |
The aspect ratio of anchor boxes for different SSD feature layers |
|||||||||||
dssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
dssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
dssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
dssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85] |
||||||||||
dssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
dssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
dssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
dssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
dssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
dssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
dssd_config.pred_num_channels |
Prediction Layer Channel |
integer |
The number of channel of the DSSD prediction layer |
512 |
1 |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
threshold |
Threshold |
float |
0.3 |
|||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
dssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
dssd_config.aspect_ratios |
Aspect Ratio |
srting |
The aspect ratio of anchor boxes for different SSD feature layers |
|||||||||||
dssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
dssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
dssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
dssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85] |
||||||||||
dssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
dssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
dssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
dssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
dssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
dssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
dssd_config.pred_num_channels |
Prediction Layer Channel |
integer |
The number of channel of the DSSD prediction layer |
512 |
1 |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
initial_epoch |
Initial epoch cli |
integer |
1 |
|||||||||||
use_multiprocessing |
CLI parameter |
bool |
FALSE |
|||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
dssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
dssd_config.aspect_ratios |
Aspect Ratio |
srting |
The aspect ratio of anchor boxes for different SSD feature layers |
|||||||||||
dssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
dssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
dssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
dssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.05, 0.1, 0.25, 0.4, 0.55, 0.7, 0.85] |
||||||||||
dssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
dssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
dssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
dssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
dssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
dssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
dssd_config.pred_num_channels |
Prediction Layer Channel |
integer |
The number of channel of the DSSD prediction layer |
512 |
1 |
convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
b |
batch_size |
integer |
calibration batch size |
8 |
yes |
CLI argument |
||||||
c |
cache_file |
path |
calibration cache file (default cal.bin) |
CLI argument |
||||||||
d |
input_dims |
list |
comma separated list of input dimensions (not required for TLT 3.0 new models). |
CLI argument |
||||||||
i |
input_order |
enum |
input dimension ordering |
nchw |
nchw, nhwc, nc |
CLI argument |
||||||
m |
max_batch_size |
integer |
maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. |
16 |
yes |
CLI argument |
||||||
o |
outputs |
list |
comma separated list of output node names |
CLI argument |
||||||||
p |
parse_profile_shapes |
list |
comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. |
CLI argument |
||||||||
s |
strict_type_constraints |
bool |
TensorRT strict_type_constraints flag for INT8 mode |
FALSE |
CLI argument |
|||||||
t |
data_type |
enum |
TensorRT data type |
fp32 |
fp32, fp16, int8 |
yes |
CLI argument |
|||||
u |
dla_core |
int |
Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). |
-1 |
CLI argument |
|||||||
w |
max_workspace_size |
int |
maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly. |
1<<30, 2<<30 |
CLI argument |
|||||||
platform |
platform |
enum |
platform label |
rtx |
yes |
yes |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
training_config |
Training config |
collection |
Parameters to configure the training process |
|||||||||
training_config.train_batch_size |
training batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. |
16 |
0 |
|||||||
training_config.iterations_per_loop |
integer |
10 |
||||||||||
training_config.num_epochs |
number of epochs |
integer |
The number of epochs to train the network |
6 |
0 |
|||||||
training_config.num_examples_per_epoch |
number of images per epoch per gpu |
integer |
Total number of images in the training set divided by the number of GPUs |
118288 |
0 |
|||||||
training_config.checkpoint |
path to pretrained model |
hidden |
The path to the pretrained model, if any |
|||||||||
training_config.pruned_model_path |
path to pruned model |
hidden |
The path to a TAO pruned model for re-training, if any |
|||||||||
training_config.checkpoint_period |
checkpoint period |
integer |
The number of training epochs that should run per model checkpoint/validation |
2 |
0 |
|||||||
training_config.amp |
AMP |
bool |
Whether to use mixed precision training |
TRUE |
||||||||
training_config.moving_average_decay |
moving average decay |
float |
Moving average decay |
0.9999 |
||||||||
training_config.l2_weight_decay |
L2 weight decay |
float |
L2 weight decay |
0.00004 |
||||||||
training_config.l1_weight_decay |
L1 weight decay |
float |
L1 weight decay |
0 |
||||||||
training_config.lr_warmup_epoch |
learning rate warmup epoch |
integer |
The number of warmup epochs in the learning rate schedule |
3 |
0 |
|||||||
training_config.lr_warmup_init |
initial learning rate during warmup |
float |
The initial learning rate in the warmup period |
0.002 |
||||||||
training_config.learning_rate |
maximum learning rate |
float |
The maximum learning rate |
0.02 |
||||||||
training_config.tf_random_seed |
random seed |
integer |
The random seed |
42 |
0 |
|||||||
training_config.clip_gradients_norm |
clip gradient by norm |
float |
Clip gradients by the norm value |
5.00E+00 |
||||||||
training_config.skip_checkpoint_variables |
skip checkpoint variables |
string |
If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. |
-predict* |
||||||||
eval_config |
evaluation config |
collection |
Parameters to configure evaluation |
|||||||||
eval_config.eval_epoch_cycle |
evaluation epoch cycle |
integer |
The number of training epochs that should run per validation |
2 |
0 |
|||||||
eval_config.max_detections_per_image |
maximum detections per image |
integer |
The maximum number of detections to visualize |
100 |
0 |
|||||||
eval_config.min_score_thresh |
minimum confidence threshold |
float |
The lowest confidence of the predicted box and ground truth box that can be considered a match |
0.4 |
||||||||
eval_config.eval_batch_size |
evaluation batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus |
16 |
0 |
|||||||
eval_config.eval_samples |
number of samples for evaluation |
integer |
The number of samples for evaluation |
500 |
||||||||
dataset_config |
dataset config |
collection |
Parameters to configure dataset |
|||||||||
dataset_config.image_size |
image size |
string |
The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input. |
512,512 |
yes |
|||||||
dataset_config.training_file_pattern |
training file pattern |
hidden |
The TFRecord path for training |
|||||||||
dataset_config.validation_file_pattern |
validation file pattern |
hidden |
The TFRecord path for validation |
|||||||||
dataset_config.validation_json_file |
validation json file |
hidden |
The annotation file path for validation |
|||||||||
dataset_config.num_classes |
number of classes |
integer |
The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) |
91 |
yes |
|||||||
dataset_config.max_instances_per_image |
maximum instances per image |
integer |
The maximum number of object instances to parse (default: 100) |
100 |
||||||||
dataset_config.skip_crowd_during_training |
skip crowd during training |
bool |
Specifies whether to skip crowd during training |
TRUE |
||||||||
model_config |
model config |
collection |
Parameters to configure model |
|||||||||
model_config.model_name |
model name |
string |
Model name |
efficientdet-d0 |
||||||||
model_config.min_level |
minimum level |
integer |
The minimum level of the output feature pyramid |
3 |
||||||||
model_config.max_level |
maximum level |
integer |
The maximum level of the output feature pyramid |
7 |
||||||||
model_config.num_scales |
number of scales |
integer |
The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) |
3 |
||||||||
model_config.aspect_ratios |
aspect ratios |
string |
A list of tuples representing the aspect ratios of anchors on each pyramid level |
[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] |
||||||||
model_config.anchor_scale |
anchor scale |
integer |
Scale of the base-anchor size to the feature-pyramid stride |
4 |
||||||||
augmentation_config |
augmentation config |
collection |
Parameters to configure model |
|||||||||
augmentation_config.rand_hflip |
random horizontal flip |
bool |
Whether to perform random horizontal flip |
TRUE |
||||||||
augmentation_config.random_crop_min_scale |
minimum scale of random crop |
float |
The minimum scale of RandomCrop augmentation. |
0.1 |
||||||||
augmentation_config.random_crop_max_scale |
maximum scale of random crop |
float |
The maximum scale of RandomCrop augmentation. |
2 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec_file |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
model_path |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
output_path |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
cal_image_dir |
hidden |
||||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
max_batch_size |
integer |
1 |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
verbose |
hidden |
TRUE |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
training_config |
Training config |
collection |
Parameters to configure the training process |
|||||||||
training_config.train_batch_size |
training batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. |
16 |
0 |
|||||||
training_config.iterations_per_loop |
integer |
10 |
||||||||||
training_config.num_epochs |
number of epochs |
integer |
The number of epochs to train the network |
6 |
0 |
|||||||
training_config.num_examples_per_epoch |
number of images per epoch per gpu |
integer |
Total number of images in the training set divided by the number of GPUs |
118288 |
0 |
|||||||
training_config.checkpoint |
path to pretrained model |
hidden |
The path to the pretrained model, if any |
|||||||||
training_config.pruned_model_path |
path to pruned model |
hidden |
The path to a TAO pruned model for re-training, if any |
|||||||||
training_config.checkpoint_period |
checkpoint period |
integer |
The number of training epochs that should run per model checkpoint/validation |
2 |
0 |
|||||||
training_config.amp |
AMP |
bool |
Whether to use mixed precision training |
TRUE |
||||||||
training_config.moving_average_decay |
moving average decay |
float |
Moving average decay |
0.9999 |
||||||||
training_config.l2_weight_decay |
L2 weight decay |
float |
L2 weight decay |
0.00004 |
||||||||
training_config.l1_weight_decay |
L1 weight decay |
float |
L1 weight decay |
0 |
||||||||
training_config.lr_warmup_epoch |
learning rate warmup epoch |
integer |
The number of warmup epochs in the learning rate schedule |
3 |
0 |
|||||||
training_config.lr_warmup_init |
initial learning rate during warmup |
float |
The initial learning rate in the warmup period |
0.002 |
||||||||
training_config.learning_rate |
maximum learning rate |
float |
The maximum learning rate |
0.02 |
||||||||
training_config.tf_random_seed |
random seed |
integer |
The random seed |
42 |
0 |
|||||||
training_config.clip_gradients_norm |
clip gradient by norm |
float |
Clip gradients by the norm value |
5.00E+00 |
||||||||
training_config.skip_checkpoint_variables |
skip checkpoint variables |
string |
If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. |
-predict* |
||||||||
eval_config |
evaluation config |
collection |
Parameters to configure evaluation |
|||||||||
eval_config.eval_epoch_cycle |
evaluation epoch cycle |
integer |
The number of training epochs that should run per validation |
2 |
0 |
|||||||
eval_config.max_detections_per_image |
maximum detections per image |
integer |
The maximum number of detections to visualize |
100 |
0 |
|||||||
eval_config.min_score_thresh |
minimum confidence threshold |
float |
The lowest confidence of the predicted box and ground truth box that can be considered a match |
0.4 |
||||||||
eval_config.eval_batch_size |
evaluation batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus |
16 |
0 |
|||||||
eval_config.eval_samples |
number of samples for evaluation |
integer |
The number of samples for evaluation |
500 |
||||||||
dataset_config |
dataset config |
collection |
Parameters to configure dataset |
|||||||||
dataset_config.image_size |
image size |
string |
The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input. |
512,512 |
yes |
|||||||
dataset_config.training_file_pattern |
training file pattern |
hidden |
The TFRecord path for training |
|||||||||
dataset_config.validation_file_pattern |
validation file pattern |
hidden |
The TFRecord path for validation |
|||||||||
dataset_config.validation_json_file |
validation json file |
hidden |
The annotation file path for validation |
|||||||||
dataset_config.num_classes |
number of classes |
integer |
The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) |
91 |
yes |
|||||||
dataset_config.max_instances_per_image |
maximum instances per image |
integer |
The maximum number of object instances to parse (default: 100) |
100 |
||||||||
dataset_config.skip_crowd_during_training |
skip crowd during training |
bool |
Specifies whether to skip crowd during training |
TRUE |
||||||||
model_config |
model config |
collection |
Parameters to configure model |
|||||||||
model_config.model_name |
model name |
string |
Model name |
efficientdet-d0 |
||||||||
model_config.min_level |
minimum level |
integer |
The minimum level of the output feature pyramid |
3 |
||||||||
model_config.max_level |
maximum level |
integer |
The maximum level of the output feature pyramid |
7 |
||||||||
model_config.num_scales |
number of scales |
integer |
The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) |
3 |
||||||||
model_config.aspect_ratios |
aspect ratios |
string |
A list of tuples representing the aspect ratios of anchors on each pyramid level |
[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] |
||||||||
model_config.anchor_scale |
anchor scale |
integer |
Scale of the base-anchor size to the feature-pyramid stride |
4 |
||||||||
augmentation_config |
augmentation config |
collection |
Parameters to configure model |
|||||||||
augmentation_config.rand_hflip |
random horizontal flip |
bool |
Whether to perform random horizontal flip |
TRUE |
||||||||
augmentation_config.random_crop_min_scale |
minimum scale of random crop |
float |
The minimum scale of RandomCrop augmentation. |
0.1 |
||||||||
augmentation_config.random_crop_max_scale |
maximum scale of random crop |
float |
The maximum scale of RandomCrop augmentation. |
2 |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
model |
Model path |
hidden |
UNIX path to where the input model is located. |
yes |
|||||||
output_dir |
Output Directory |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
key |
Encode key |
hidden |
|||||||||
normalizer |
Normalizer |
string |
How to normalize |
max |
max, L2 |
||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
|||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
|||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
||||||
excluded_layers |
Excluded layers |
string |
string of list: List of excluded_layers. Examples: -i item1 item2 |
||||||||
verbose |
verbosity |
hidden |
TRUE |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
training_config |
Training config |
collection |
Parameters to configure the training process |
|||||||||
training_config.train_batch_size |
training batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus. |
16 |
0 |
|||||||
training_config.iterations_per_loop |
integer |
10 |
||||||||||
training_config.num_epochs |
number of epochs |
integer |
The number of epochs to train the network |
6 |
0 |
|||||||
training_config.num_examples_per_epoch |
number of images per epoch per gpu |
integer |
Total number of images in the training set divided by the number of GPUs |
118288 |
0 |
|||||||
training_config.checkpoint |
path to pretrained model |
hidden |
The path to the pretrained model, if any |
|||||||||
training_config.pruned_model_path |
path to pruned model |
hidden |
The path to a TAO pruned model for re-training, if any |
|||||||||
training_config.checkpoint_period |
checkpoint period |
integer |
The number of training epochs that should run per model checkpoint/validation |
2 |
0 |
|||||||
training_config.amp |
AMP |
bool |
Whether to use mixed precision training |
TRUE |
||||||||
training_config.moving_average_decay |
moving average decay |
float |
Moving average decay |
0.9999 |
||||||||
training_config.l2_weight_decay |
L2 weight decay |
float |
L2 weight decay |
0.00004 |
||||||||
training_config.l1_weight_decay |
L1 weight decay |
float |
L1 weight decay |
0 |
||||||||
training_config.lr_warmup_epoch |
learning rate warmup epoch |
integer |
The number of warmup epochs in the learning rate schedule |
3 |
0 |
|||||||
training_config.lr_warmup_init |
initial learning rate during warmup |
float |
The initial learning rate in the warmup period |
0.002 |
||||||||
training_config.learning_rate |
maximum learning rate |
float |
The maximum learning rate |
0.02 |
||||||||
training_config.tf_random_seed |
random seed |
integer |
The random seed |
42 |
0 |
|||||||
training_config.clip_gradients_norm |
clip gradient by norm |
float |
Clip gradients by the norm value |
5.00E+00 |
||||||||
training_config.skip_checkpoint_variables |
skip checkpoint variables |
string |
If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. |
-predict* |
||||||||
eval_config |
evaluation config |
collection |
Parameters to configure evaluation |
|||||||||
eval_config.eval_epoch_cycle |
evaluation epoch cycle |
integer |
The number of training epochs that should run per validation |
2 |
0 |
|||||||
eval_config.max_detections_per_image |
maximum detections per image |
integer |
The maximum number of detections to visualize |
100 |
0 |
|||||||
eval_config.min_score_thresh |
minimum confidence threshold |
float |
The lowest confidence of the predicted box and ground truth box that can be considered a match |
0.4 |
||||||||
eval_config.eval_batch_size |
evaluation batch size |
integer |
The batch size for each GPU, so the effective batch size is batch_size_per_gpu * num_gpus |
16 |
0 |
|||||||
eval_config.eval_samples |
number of samples for evaluation |
integer |
The number of samples for evaluation |
500 |
||||||||
dataset_config |
dataset config |
collection |
Parameters to configure dataset |
|||||||||
dataset_config.image_size |
image size |
string |
The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input. |
512,512 |
yes |
|||||||
dataset_config.training_file_pattern |
training file pattern |
hidden |
The TFRecord path for training |
|||||||||
dataset_config.validation_file_pattern |
validation file pattern |
hidden |
The TFRecord path for validation |
|||||||||
dataset_config.validation_json_file |
validation json file |
hidden |
The annotation file path for validation |
|||||||||
dataset_config.num_classes |
number of classes |
integer |
The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) |
91 |
yes |
|||||||
dataset_config.max_instances_per_image |
maximum instances per image |
integer |
The maximum number of object instances to parse (default: 100) |
100 |
||||||||
dataset_config.skip_crowd_during_training |
skip crowd during training |
bool |
Specifies whether to skip crowd during training |
TRUE |
||||||||
model_config |
model config |
collection |
Parameters to configure model |
|||||||||
model_config.model_name |
model name |
string |
Model name |
efficientdet-d0 |
||||||||
model_config.min_level |
minimum level |
integer |
The minimum level of the output feature pyramid |
3 |
||||||||
model_config.max_level |
maximum level |
integer |
The maximum level of the output feature pyramid |
7 |
||||||||
model_config.num_scales |
number of scales |
integer |
The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) |
3 |
||||||||
model_config.aspect_ratios |
aspect ratios |
string |
A list of tuples representing the aspect ratios of anchors on each pyramid level |
[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] |
||||||||
model_config.anchor_scale |
anchor scale |
integer |
Scale of the base-anchor size to the feature-pyramid stride |
4 |
||||||||
augmentation_config |
augmentation config |
collection |
Parameters to configure model |
|||||||||
augmentation_config.rand_hflip |
random horizontal flip |
bool |
Whether to perform random horizontal flip |
TRUE |
||||||||
augmentation_config.random_crop_min_scale |
minimum scale of random crop |
float |
The minimum scale of RandomCrop augmentation. |
0.1 |
||||||||
augmentation_config.random_crop_max_scale |
maximum scale of random crop |
float |
The maximum scale of RandomCrop augmentation. |
2 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
|||||||
data_type |
Pruning Granularity |
enum |
Number of filters to remove at a time. |
int8 |
int8, fp32, fp16 |
yes |
yes |
|||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
no |
|||||||
experiment_spec |
Experiment Spec |
string |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
hidden from train expeirment |
yes |
|||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
TRUE |
no |
|||||||
engine-file |
Engine File |
hidden |
UNIX path to the model engine file. |
/export/input_model_file.<data_type>.trt |
yes |
|||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
|||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
16 |
yes |
|||||||
cal_cache_file |
Calibration cache file |
string |
Unix PATH to the int8 calibration cache file |
hidden |
yes |
yes |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
no |
||||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
|||||||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
|||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
train
comments |
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
valid_options_description |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||||
Generates randomness around a point. Seed is where you begin try converging towards. Only required if needed to replicate a run. Does the log push out this value? |
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
>=0 |
|||||||
verbose |
Verbose |
bool |
Flag of verbosity |
TRUE |
TRUE, FALSE |
||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
||||||||||
JPG/PNG - auto pick this up |
dataset_config.image_extension |
Image Extension |
hidden |
Extension of the images to be used. |
__jpg__ |
__png__, __jpg__, __jpeg__ |
yes |
__png__, __jpg__, __jpeg__ |
|||||
Can be system generated - after conversion. This is the dataset preparation step. |
dataset_config.data_sources.tfrecords_path |
TFRecord Path |
hidden |
/shared/users/1234/datasets/5678/tfrecords/kitti_trainval/* |
|||||||||
Where the dataset is - where the images are. Will it figure it out from the parent directory? |
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
/shared/users/1234/datasets/5678/training |
|||||||||
Read all labels in the label file (car, truck, suv, person). Ask the user to map it to Vehicle/Person. |
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the tfrecords to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||
Class you want to train for (vehicle) |
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||
Class defined in the label file (car, truck, suv -> map to vehicle) |
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||
Default - 0 |
dataset_config.validation_fold |
Validation Fold |
integer |
In case of an n fold tfrecords, you define the index of the fold to use for validation. For sequencewise validation choose the validation fold in the range [0, N-1]. For random split partitioning, force the validation fold index to 0 as the tfrecord is just 2-fold. |
0 |
||||||||
Dataset specific config - augmentation |
augmentation_config |
Data Augmentation |
collection |
Collection of parameters to configure the preprocessing and on the fly data augmentation |
Yes |
||||||||
The resolution at which the network should be trained for. Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16. |
augmentation_config.preprocessing.output_image_width |
Image Width |
integer |
The width of the augmentation output. This is the same as the width of the network input and must be a multiple of 16. |
1248 |
480 |
yes |
Yes |
|||||
Get the max dimesnion of images in the dataset and set the as the default behind the scenes - has to be multiple of 16 |
augmentation_config.preprocessing.output_image_height |
Image Height |
integer |
The height of the augmentation output. This is the same as the height of the network input and must be a multiple of 16. |
384 |
272 |
yes |
Yes |
|||||
Smaller side of image(height or width) |
augmentation_config.preprocessing.output_image_min |
Image smaller side’s size |
integer |
The smaller side of image size. This is used for resize and keep aspect ratio in FasterRCNN. If this value is postive, preprocessor will resize the image and keep aspect ratio, such that the smaller side’s size is this value. The other side will scale accordingly by aspect ratio. This value has to be a multiple of 16. |
0 |
||||||||
Limit of larger side’s size of an image when resize and keep aspect ratio |
augmentation_config.preprocessing.output_image_max |
Limit of larger side’s size when resize and keep aspect ratio |
integer |
The maximum size of image’s larger side. If after resize and keeping aspect ratio, the larger side is exceeds this limit, the image will be resized such that the larger side’s size is this value, and hence the smaller side’s size is smaller than output_image_min. This value has to be a multiple of 16. |
0 |
||||||||
Flag to enable automatic image scaling |
augmentation_config.preprocessing.enable_auto_resize |
Flag to enable or disable automatic image scaling |
bool |
If True, automatic image scaling will be enabled. Otherwise, disabled. |
FALSE |
TRUE, FALSE |
|||||||
Limit of what min dimension you DONT want to train for. Default 10x10 |
augmentation_config.preprocessing.min_bbox_width |
Bounding Box Width |
float |
The minimum width of the object labels to be considered for training. |
1 |
0 |
yes |
>=0 |
|||||
Limit of what min dimension you DONT want to train for. Default 10x10 |
augmentation_config.preprocessing.min_bbox_height |
Bounding Box Height |
float |
The minimum height of the object labels to be considered for training. |
1 |
0 |
yes |
>=0 |
|||||
3 channel default |
augmentation_config.preprocessing.output_image_channel |
Image Channel |
integer |
The channel depth of the augmentation output. This is the same as the channel depth of the network input. Currently, 1-channel input is not recommended for datasets with JPG images. For PNG images, both 3-channel RGB and 1-channel monochrome images are supported. |
3 |
1, 3 |
yes |
3, 1 |
|||||
0 |
augmentation_config.preprocessing.crop_right |
Crop Right |
integer |
The right boundary of the crop to be extracted from the original image. |
0 |
0 |
yes |
>=0 |
|||||
0 |
augmentation_config.preprocessing.crop_left |
Crop Left |
integer |
The left boundary of the crop to be extracted from the original image. |
0 |
0 |
yes |
>=0 |
|||||
0 |
augmentation_config.preprocessing.crop_top |
Crop Top |
integer |
The top boundary of the crop to be extracted from the original image. |
0 |
0 |
yes |
>=0 |
|||||
0 |
augmentation_config.preprocessing.crop_bottom |
Crop Bottom |
integer |
The bottom boundary of the crop to be extracted from the original image. |
0 |
0 |
yes |
>=0 |
|||||
0 |
augmentation_config.preprocessing.scale_height |
Scale Height |
float |
The floating point factor to scale the height of the cropped images. |
0 |
0 |
yes |
>=0 |
|||||
0 |
augmentation_config.preprocessing.scale_width |
Scale Width |
float |
The floating point factor to scale the width of the cropped images. |
0 |
0 |
yes |
>=0 |
|||||
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. |
augmentation_config.spatial_augmentation.hflip_probability |
Horizontal-Flip Probability |
float |
The probability to flip an input image horizontally. |
0.5 |
0 |
1 |
[0, 1) |
|||||
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. |
augmentation_config.spatial_augmentation.vflip_probability |
Vertical-Flip Probability |
float |
The probability to flip an input image vertically. |
0 |
0 |
1 |
[0, 1) |
|||||
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. |
augmentation_config.spatial_augmentation.zoom_min |
Minimum Zoom Scale |
float |
The minimum zoom scale of the input image. |
1 |
0 |
(0, 1] |
||||||
Enable - go to default, disable - go to 1. Check for the right default values with TAO Toolkit Engg. |
augmentation_config.spatial_augmentation.zoom_max |
Maximum Zoom Scale |
float |
The maximum zoom scale of the input image. |
1 |
0 |
[1, 2) |
||||||
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg which will disable vs enable. |
augmentation_config.spatial_augmentation.translate_max_x |
X-Axis Maximum Traslation |
float |
The maximum translation to be added across the x axis. |
8 |
0 |
>=0 |
||||||
Enable - go to default, disable - go to 0. Check for the right default values with TAO Toolkit Engg. |
augmentation_config.spatial_augmentation.translate_max_y |
Y-Axis Maximum Translation |
float |
The maximum translation to be added across the y axis. |
8 |
0 |
>=0 |
||||||
Enable go tyo default, disable - 0 |
augmentation_config.spatial_augmentation.rotate_rad_max |
Image Rotation |
float |
The angle of rotation to be applied to the images and the training labels. The range is defined between [-rotate_rad_max, rotate_rad_max]. |
0.69 |
0 |
>=0 |
||||||
augmentation_config.spatial_augmentation.rotate_probability |
Image Rotation |
float |
The probability of image rotation. The range is [0, 1] |
[0, 1) |
|||||||||
augmentation_config.color_augmentation.color_shift_stddev |
Color Shift Standard Deviation |
float |
The standard devidation value for the color shift. |
0 |
0 |
1 |
[0, 1) |
||||||
augmentation_config.color_augmentation.hue_rotation_max |
Hue Maximum Rotation |
float |
The maximum rotation angle for the hue rotation matrix. |
25 |
0 |
360 |
[0, 360) |
||||||
augmentation_config.color_augmentation.saturation_shift_max |
Saturation Maximum Shift |
float |
The maximum shift that changes the saturation. A value of 1.0 means no change in saturation shift. |
0.2 |
0 |
1 |
[0, 1) |
||||||
augmentation_config.color_augmentation.contrast_scale_max |
Contrast Maximum Scale |
float |
The slope of the contrast as rotated around the provided center. A value of 0.0 leaves the contrast unchanged. |
0.1 |
0 |
1 |
[0, 1) |
||||||
augmentation_config.color_augmentation.contrast_center |
Contrast Center |
float |
The center around which the contrast is rotated. Ideally, this is set to half of the maximum pixel value. Since our input images are scaled between 0 and 1.0, you can set this value to 0.5. |
0.5 |
0.5 |
0.5 |
|||||||
Might need different defaults based on task/scenario |
model_config |
Model |
collection |
||||||||||
model_config.arch |
BackBone Architecture |
string |
The architecture of the backbone feature extractor to be used for training. |
resnet:18 |
resnet:18 |
yes |
‘resnet:18’, ‘resnet:34’, ‘resnet:50’, ‘resnet:101’, ‘vgg16’, ‘vgg:16’, ‘vgg:19’, ‘googlenet’, ‘mobilenet_v1’, ‘mobilenet_v2’, ‘darknet:19’, ‘darknet:53’, ‘resnet101’, ‘efficientnet:b0’, ‘efficientnet:b1’, |
||||||
Confirm correct default values |
model_config.freeze_blocks |
Freeze Blocks |
integer |
This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. |
0 |
3 |
depends on arch |
||||||
Default values. Verify with TAO Toolkit. 2 sets of defaults required. |
model_config.freeze_bn |
Freeze Batch Normalization |
bool |
A flag to determine whether to freeze the Batch Normalization layers in the model during training. |
FALSE |
TRUE, FALSE |
|||||||
Default values. Verify with TAO Toolkit. 2 sets of defaults required. |
model_config.all_projections |
All Projections |
bool |
For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. |
TRUE |
TRUE, FALSE |
|||||||
Default values. Verify with TAO Toolkit. 2 sets of defaults required. |
model_config.use_pooling |
Use Pooling |
bool |
Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. |
FALSE |
TRUE, FALSE |
|||||||
Default values. Verify with TAO Toolkit. 2 sets of defaults required. |
model_config.dropout_rate |
Dropout Rate |
float |
Probability for drop out |
0 |
0 |
0.1 |
[0, 1) |
|||||
model_config.input_image_config |
Input Image |
collection |
Configuration for input images |
||||||||||
model_config.input_image_config.size_height_width |
collection |
||||||||||||
model_config.input_image_config.size_height_width.height |
integer |
384 |
|||||||||||
model_config.input_image_config.size_height_width.width |
integer |
1248 |
|||||||||||
model_config.input_image_config.image_type |
Image Type |
enum |
The type of images, either RGB or GRAYSCALE |
RGB |
__RGB__, __GRAYSCALE__ |
||||||||
model_config.input_image_config.size_min |
Image smaller side’s size |
integer |
The size of an image’s smaller side, should be a multiple of 16. This should be consistent with the size in augmentation_config. This is used when resizing images and keeping aspect ratio |
>=0 |
|||||||||
model_config.input_image_config.size_height_width |
Image size by height and width |
collection |
The size of images by specifying height and width. |
||||||||||
model_config.input_image_config.size_height_width.height |
Image Height |
integer |
The height of images |
>=0 |
|||||||||
model_config.input_image_config.size_height_width.width |
Image Width |
integer |
The width of images |
>=0 |
|||||||||
model_config.input_image_config.image_channel_order |
Image Channel Order |
string |
The channel order of images. Should be either “rgb” or “bgr” for RGB images and “l” for GRAYSCALE images |
rgb |
rgb’, ‘bgr’, ‘l’ |
||||||||
model_config.input_image_config.image_channel_mean |
Image Channel Means |
list |
A dict from ‘r’, ‘g’, ‘b’ or ‘l’(for GRAYSCALE images) to per-channel mean values. |
[{“key”:”r”,”value”:103.0}, {“key”:”g”,”value”:103.0}, {“key”:”b”,”value”:103.0}] |
|||||||||
model_config.input_image_config.image_channel_mean.key |
channel means key |
string |
string => one of r,g,b |
r’, ‘g’, ‘b’, ‘l’ |
|||||||||
model_config.input_image_config.image_channel_mean.value |
channel means value |
float |
value in float |
(0, 255) |
|||||||||
model_config.input_image_config.image_scaling_factor |
Image Scaling Factor |
float |
A scalar to normalize the images after mean subtraction. |
1 |
>0 |
||||||||
model_config.input_image_config.max_objects_num_per_image |
Max Objects Num |
integer |
The maximum number of objects in an image. This is used for padding in data loader as different images can have different number of objects in its labels. |
100 |
>=1 |
||||||||
model_config.anchor_box_config |
Anchor Boxes |
Collection |
|||||||||||
model_config.anchor_box_config.scale |
Anchor Scales |
list |
The list of anchor sizes(scales). |
[64.0,128.0,256.0] |
>0 |
||||||||
model_config.anchor_box_config.ratio |
Anchor Ratios |
list |
The list of anchor aspect ratios. |
[1.0,0.5,2.0] |
>0 |
||||||||
model_config.roi_mini_batch |
ROI Batch Size |
integer |
The batch size of ROIs for training the RCNN in the model |
16 |
>0 |
||||||||
model_config.rpn_stride |
RPN stride |
integer |
The stride of RPN feature map, compared to input resolutions. Currently only 16 is supported. |
16 |
16 |
||||||||
model_config.drop_connect_rate |
Drop Connect Rate |
float |
The rate of DropConnect. This is only useful for EfficientNet backbones. |
(0, 1) |
|||||||||
model_config.rpn_cls_activation_type |
RPN Classification Activation Type |
string |
Type of RPN classification head’s activation function. Currently only “sigmoid” is supported. |
sigmoid |
|||||||||
model_config.use_bias |
Use Bias |
bool |
Whether or not to use bias for convolutional layers |
TRUE, FALSE |
|||||||||
model_config.roi_pooling_config |
ROI Pooling |
collection |
Confiuration fo ROI Pooling layer |
||||||||||
model_config.roi_pooling_config.pool_size |
Pool Size |
integer |
Pool size of the ROI Pooling operation. |
7 |
>0 |
||||||||
model_config.roi_pooling_config.pool_size_2x |
Pool Size Doubled |
bool |
Whether or not to double the pool size and apply a 2x downsampling after ROI Pooling |
FALSE |
TRUE, FALSE |
||||||||
model_config.activation |
Activation |
collection |
Activation function for the model backbone. This is only useful for EfficientNet backbones. |
||||||||||
model_config.activation.activation_type |
Activation Type |
string |
Type of the activation function of backbone. |
relu, swish |
|||||||||
model_config.activation.activation_parameters |
Activation Parameters |
dict |
A dict the maps name of a parameter to its value. |
||||||||||
training_config |
Training |
collection |
>0 |
||||||||||
IMPORTANT. Open to user - default should smarty calculate. Check factors that influence. |
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
8 |
1 |
yes |
>0 |
|||||
Default - what is the optimal number of epcohs for each model. Smart feature in TAO Toolkit to auto stop once model converges |
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
120 |
1 |
yes |
Yes |
TRUE, FALSE |
||||
Toggle for end user |
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
TRUE |
yes |
Yes |
>0 |
|||||
Default |
training_config.learning_rate.soft_start .base_lr |
Minimum Learning Rate |
float |
5.00E-06 |
Yes |
>0 |
|||||||
Default |
training_config.learning_rate.soft_start .start_lr |
Maximum Learning Rate |
float |
5.00E-04 |
Yes |
(0, 1) |
|||||||
Default |
training_config.learning_rate.soft_start .soft_start |
Soft Start |
float |
0.100000001 |
0 |
1 |
Yes |
>1 |
|||||
Default |
training_config.learning_rate.soft_start .annealing_divider |
Annealing |
float |
0.699999988 |
0 |
1 |
Yes |
__NO_REG__, __L1__, __L2__ |
|||||
Default |
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__NO_REG__, __L1__, __L2__ |
yes |
>0 |
|||||
Default |
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-09 |
yes |
(0, 1) |
||||||
Default |
training_config.optimizer.adam.epsilon |
Optimizer Adam Epsilon |
float |
A very small number to prevent any division by zero in the implementation. |
1.00E-08 |
yes |
(0, 1) |
||||||
Default |
training_config.optimizer.adam.beta_1 |
Optimizer Adam Beta1 |
float |
0.899999976 |
yes |
(0, 1) |
|||||||
Default |
training_config.optimizer.adam.beta_2 |
Optimizer Adam Beta2 |
float |
0.999000013 |
yes |
>=1 |
|||||||
Use default as 10. Provide last checpoint to user |
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
0 |
yes |
TRUE, FALSE |
|||||
training_config.enable_augmentation |
Enable Augmentation |
bool |
Whether or not to enable data augmentation |
TRUE |
|||||||||
training_config.retrain_pruned_model |
Pruned Model |
hidden |
The path of pruned model to be retrained |
||||||||||
training_config.pretrained_weights |
Pretrained Weights |
hidden |
The path of the pretrained model(weights) used to initialize the model being trained |
||||||||||
training_config.resume_from_model |
Resume Model |
hidden |
The path of the model used to resume a interrupted training |
(0, 1) |
|||||||||
training_config.rpn_min_overlap |
RPN Min Overlap |
float |
The lower IoU threshold used to match anchor boxes to groundtruth boxes. |
0.1 |
(0, 1) |
||||||||
training_config.rpn_max_overlap |
RPN Max Overlap |
float |
The higher IoU threshold used to match anchor boxes to groundtruth boxes. |
1 |
[0, 1) |
||||||||
training_config.classifier_min_overlap |
Classifier Min Overlap |
float |
The lower IoU threshold used to generate the proposal target. |
0.1 |
(0, 1) |
||||||||
training_config.classifier_max_overlap |
Classifier Max Overlap |
float |
The higher IoU threshold used to generate the proposal target. |
1 |
TRUE, FALSE |
||||||||
training_config.gt_as_roi |
Gt As ROI |
bool |
A flag to include groundtruth boxes in the positive ROIs for training the RCNN |
>0 |
|||||||||
training_config.std_scaling |
RPN Regression Loss Scaling |
float |
A scaling factor (multiplier) for RPN regression loss |
1 |
|||||||||
training_config.classifier_regr_std |
RCNN Regression Loss Scaling |
list |
Scaling factors (denominators) for the RCNN regression loss. A map from ¡®x¡¯, ¡®y¡¯, ¡®w¡¯, ¡®h¡¯ to its corresponding scaling factor, respectively |
[{“key”:”x”,”value”:10.0},{“key”:”y”,”value”:10.0},{“key”:”w”,”value”:5.0},{“key”:”h”,”value”:5.0}] |
|||||||||
training_config.classifier_regr_std.key |
RCNN Regression Loss Scaling Key |
string |
one of x,y,h,w |
>0 |
|||||||||
training_config.classifier_regr_std.value |
RCNN Regression Loss Scaling Value |
float |
float value for key |
||||||||||
training_config.output_model |
Output Model Path |
hidden |
Path of the output model |
>0 |
|||||||||
training_config.rpn_pre_nms_top_N |
RPN Pre-NMS Top N |
integer |
The number of boxes (ROIs) to be retained before the NMS in Proposal layer |
12000 |
>=1 |
||||||||
training_config.rpn_mini_batch |
RPN Mini Batch |
integer |
The batch size to train RPN |
16 |
>0 |
||||||||
training_config.rpn_nms_max_boxes |
RPN NMS Max Boxes |
integer |
The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer |
2000 |
(0, 1) |
||||||||
training_config.rpn_nms_overlap_threshold |
RPN NMS IoU Threshold |
float |
The IoU threshold for NMS in Proposal layer |
0.7 |
>0 |
||||||||
training_config.lambda_rpn_regr |
RPN Regression Loss Weighting |
float |
Weighting factor for RPN regression loss |
1 |
>0 |
||||||||
training_config.lambda_rpn_class |
RPN classification Loss Weighting |
float |
Weighting factor for RPN classification loss. |
1 |
>0 |
||||||||
training_config.lambda_cls_regr |
RCNN Regression Loss Weighting |
float |
Weighting factor for RCNN regression loss |
1 |
>0 |
||||||||
training_config.lambda_cls_class |
RCNN Classification Loss Weighting |
float |
Weighting factor for RCNN classification loss |
1 |
list of floats |
||||||||
training_config.model_parallelism |
Model Parallelism |
list of floats |
List of fractions for model parallelism |
||||||||||
training_config.early_stopping |
Early Stopping |
collection |
“loss” |
||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
>=0 |
|||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
>0 |
|||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
||||||||||
training_config.visualizer |
Visualizer |
collection |
TRUE, False |
||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
>=1 |
|||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
||||||||||
evaluation_config |
Evaluation |
collection |
yes |
||||||||||
evaluation_config.model |
Model Path |
string |
The path to the model to run inference |
>=1 |
|||||||||
evaluation_config.rpn_pre_nms_top_N |
RPN Pre-NMS Top N |
integer |
The number of boxes (ROIs) to be retained before the NMS in Proposal layer during evaluation |
6000 |
(0, 1) |
||||||||
evaluation_config.rpn_nms_overlap_threshold |
RPN overlap threshold |
float |
0.7 |
>0 |
|||||||||
evaluation_config.rpn_nms_max_boxes |
RPN NMS Max Boxes |
integer |
The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer |
300 |
>0 |
||||||||
evaluation_config.classifier_nms_max_boxes |
Classifier NMS Max Boxes |
integer |
The maxinum numbere of boxes for RCNN NMS |
100 |
(0, 1) |
||||||||
evaluation_config.classifier_nms_overlap_threshold |
Classifier NMS Overlap Threshold |
float |
The NMS overlap threshold in RCNN |
0.3 |
(0, 1) |
||||||||
evaluation_config.object_confidence_thres |
Object Confidence Threshold |
float |
The objects confidence threshold |
0.00001 |
TRUE, FALSE |
||||||||
evaluation_config.use_voc07_11point_metric |
Use VOC 11-point Metric |
bool |
Whether to use PASCAL-VOC 11-point metric |
>=1 |
|||||||||
evaluation_config.validation_period_during_training |
Validation Period |
integer |
The period(number of epochs) to run validation during training |
>=1 |
|||||||||
evaluation_config.batch_size |
Batch Size |
integer |
The batch size for evaluation |
(0, 1) |
|||||||||
evaluation_config.trt_evaluation |
TensorRT Evaluation |
Collection |
TensorRT evaluation |
||||||||||
evaluation_config.trt_evaluation.trt_engine |
Trt Engine |
String |
TRT Engine |
(0, 1) |
|||||||||
evaluation_config.gt_matching_iou_threshold |
Gt Matching IoU Threshold |
float |
The IoU threshold to match groundtruth to detected objects. Only one of this collection or gt_matching_iou_threshold_range |
0.5 |
(0, 1) |
||||||||
evaluation_config.gt_matching_iou_threshold_range |
Gt Matching IoU Threshold Range |
collection |
Only one of this collection or gt_matching_iou_threshold |
(0, 1) |
|||||||||
evaluation_config.gt_matching_iou_threshold_range.start |
Start |
float |
The starting value of the IoU range |
TRUE, FALSE |
|||||||||
evaluation_config.gt_matching_iou_threshold_range.end |
End |
float |
The end point of the IoU range(exclusive) |
||||||||||
evaluation_config.gt_matching_iou_threshold_range.step |
Step |
float |
The step size of the IoU range |
||||||||||
evaluation_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Visualize precision-recall curve or not |
||||||||||
inference_config |
>=1 |
||||||||||||
inference_config.images_dir |
Images Directory |
hidden |
Path to the directory of images to run inference on |
>0 |
|||||||||
inference_config.model |
Model Path |
hidden |
Path to the model to run inference on |
>0 |
|||||||||
inference_config.batch_size |
Batch Size |
integer |
The batch size for inference |
(0, 1) |
|||||||||
inference_config.rpn_pre_nms_top_N |
RPN Pre-NMS Top N |
integer |
The number of boxes (ROIs) to be retained before the NMS in Proposal layer during inference |
6000 |
(0, 1) |
||||||||
inference_config.rpn_nms_max_boxes |
RPN NMS Max Boxes |
integer |
The maximum number of boxes (ROIs) to be retained after the NMS in Proposal layer |
300 |
(0, 1) |
||||||||
inference_config.rpn_nms_overlap_threshold |
RPN NMS IoU Threshold |
float |
The IoU threshold for NMS in Proposal layer |
0.7 |
>0 |
||||||||
inference_config.bbox_visualize_threshold |
Visualization Threshold |
float |
The confidence threshold for visualizing the bounding boxes |
0.6 |
(0, 1) |
||||||||
inference_config.object_confidence_thres |
Object Confidence Threshold |
float |
The objects confidence threshold |
0.00001 |
|||||||||
inference_config.classifier_nms_max_boxes |
Classifier NMS Max Boxes |
integer |
The maxinum numbere of boxes for RCNN NMS |
100 |
True, False |
||||||||
inference_config.classifier_nms_overlap_threshold |
Classifier NMS Overlap Threshold |
float |
The NMS overlap threshold in RCNN |
0.3 |
|||||||||
inference_config.detection_image_output_dir |
Image Output Directory |
string |
Path to the directory to save the output images during inference |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 |
|||||||||
inference_config.bbox_caption_on |
Bbox Caption |
bool |
Enable text caption for bounding box or not |
||||||||||
inference_config.labels_dump_dir |
Labels Ouptut Directory |
hidden |
Path to the directory to save the output labels |
||||||||||
inference_config.nms_score_bits |
NMS Score Bits |
integer |
Number of score bits in optimized NMS |
||||||||||
inference_config.trt_inference |
TensorRT Inference |
Collection |
TensorRT inference configurations |
||||||||||
inference_config.trt_inference.trt_engine |
TensorRT Engine |
hidden |
Path to the TensorRT engine to run inference |
convert
parameter |
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
num_shards |
num_shards |
Num shards |
integer |
Number of shards. |
256 |
||||||||
include_masks |
include_masks |
Include masks |
bool |
Whether to include instance segmentation masks. |
FALSE |
||||||||
tag |
tag |
string |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.validation_data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.characters_list_file |
Characters List Path |
string |
||||||||||
training_config |
Training |
collection |
||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
32 |
1 |
|||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
24 |
1 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
1.00E-06 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.00E-05 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.001 |
0 |
1 |
||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.5 |
0 |
1 |
||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L2__ |
__L1__, __L2__ |
|||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
5.00E-04 |
0 |
|||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss |
||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||
eval_config |
Evaluation |
collection |
||||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
5 |
1 |
|||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
1 |
1 |
|||||||
augmentation_config |
Augmentation config |
collection |
||||||||||
augmentation_config.output_width |
Model Input width |
integer |
96 |
1 |
yes |
|||||||
augmentation_config.output_height |
Model Input height |
integer |
48 |
1 |
yes |
|||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
1 |
1,3 |
yes |
||||||
augmentation_config.max_rotate_degree |
Max Rotation Degree |
integer |
The maximum rotation angle for augmentation |
5 |
1 |
|||||||
augmentation_config.keep_original_prob |
Keep Original Probability |
float |
The probability for keeping original images. Only resized will be applied to am image with this probability |
0.3 |
0 |
1 |
||||||
augmentation_config.rotate_prob |
Rotation Probability |
float |
The probability for rotating the image |
0.5 |
0 |
1 |
||||||
augmentation_config.gaussian_kernel_size |
Gaussian Kernel Size |
list |
The kernel size of the Gaussian blur |
[5,7,15] |
1 |
|||||||
augmentation_config.blur_prob |
Gaussian Blur Probability |
float |
The probability for blurring the image with Gaussian blur |
0.5 |
0 |
1 |
||||||
augmentation_config.reverse_color_prob |
Reverse Color Probability |
float |
The probability for reversing the color of the image |
0.5 |
0 |
1 |
||||||
lpr_config.hidden_units |
Hidden Units |
integer |
The number of hidden units in the LSTM layers of LPRNet |
512 |
1 |
|||||||
lpr_config.max_label_length |
Max Label Length |
integer |
The maximum length of license plates in the dataset |
8 |
||||||||
lpr_config.arch |
Architecture |
string |
The architecture of LPRNet |
baseline |
baseline |
|||||||
lpr_config.nlayers |
Number of Layers |
integer |
The number of convolution layers in LPRNet |
18 |
10, 18 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
experiment_spec |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
fp32, fp16 |
yes |
yes |
||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
verbose |
hidden |
TRUE |
|||||||||
strict_type_constraints |
bool |
FALSE |
|||||||||
results_dir |
hidden |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.validation_data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.characters_list_file |
Characters List Path |
string |
||||||||||
training_config |
Training |
collection |
||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
32 |
1 |
|||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
24 |
1 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
1.00E-06 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.00E-05 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.001 |
0 |
1 |
||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.5 |
0 |
1 |
||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L2__ |
__L1__, __L2__ |
|||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
5.00E-04 |
0 |
|||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss |
||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||
eval_config |
Evaluation |
collection |
||||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
5 |
1 |
|||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
1 |
1 |
|||||||
augmentation_config |
Augmentation config |
collection |
||||||||||
augmentation_config.output_width |
Model Input width |
integer |
96 |
1 |
yes |
|||||||
augmentation_config.output_height |
Model Input height |
integer |
48 |
1 |
yes |
|||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
1 |
1,3 |
yes |
||||||
augmentation_config.max_rotate_degree |
Max Rotation Degree |
integer |
The maximum rotation angle for augmentation |
5 |
1 |
|||||||
augmentation_config.keep_original_prob |
Keep Original Probability |
float |
The probability for keeping original images. Only resized will be applied to am image with this probability |
0.3 |
0 |
1 |
||||||
augmentation_config.rotate_prob |
Rotation Probability |
float |
The probability for rotating the image |
0.5 |
0 |
1 |
||||||
augmentation_config.gaussian_kernel_size |
Gaussian Kernel Size |
list |
The kernel size of the Gaussian blur |
[5,7,15] |
1 |
|||||||
augmentation_config.blur_prob |
Gaussian Blur Probability |
float |
The probability for blurring the image with Gaussian blur |
0.5 |
0 |
1 |
||||||
augmentation_config.reverse_color_prob |
Reverse Color Probability |
float |
The probability for reversing the color of the image |
0.5 |
0 |
1 |
||||||
lpr_config.hidden_units |
Hidden Units |
integer |
The number of hidden units in the LSTM layers of LPRNet |
512 |
1 |
|||||||
lpr_config.max_label_length |
Max Label Length |
integer |
The maximum length of license plates in the dataset |
8 |
||||||||
lpr_config.arch |
Architecture |
string |
The architecture of LPRNet |
baseline |
baseline |
|||||||
lpr_config.nlayers |
Number of Layers |
integer |
The number of convolution layers in LPRNet |
18 |
10, 18 |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
initial_epoch |
Initial Epoch CLI |
integer |
1 |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.validation_data_sources.label_directory_path |
Label Path |
hidden |
||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image Path |
hidden |
||||||||||
dataset_config.characters_list_file |
Characters List Path |
string |
||||||||||
training_config |
Training |
collection |
||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
32 |
1 |
|||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
24 |
1 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
1.00E-06 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.00E-05 |
0 |
|||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.001 |
0 |
1 |
||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.5 |
0 |
1 |
||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L2__ |
__L1__, __L2__ |
|||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
5.00E-04 |
0 |
|||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss |
||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||
eval_config |
Evaluation |
collection |
||||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
5 |
1 |
|||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
1 |
1 |
|||||||
augmentation_config |
Augmentation config |
collection |
||||||||||
augmentation_config.output_width |
Model Input width |
integer |
96 |
1 |
yes |
|||||||
augmentation_config.output_height |
Model Input height |
integer |
48 |
1 |
yes |
|||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
1 |
1,3 |
yes |
||||||
augmentation_config.max_rotate_degree |
Max Rotation Degree |
integer |
The maximum rotation angle for augmentation |
5 |
1 |
|||||||
augmentation_config.keep_original_prob |
Keep Original Probability |
float |
The probability for keeping original images. Only resized will be applied to am image with this probability |
0.3 |
0 |
1 |
||||||
augmentation_config.rotate_prob |
Rotation Probability |
float |
The probability for rotating the image |
0.5 |
0 |
1 |
||||||
augmentation_config.gaussian_kernel_size |
Gaussian Kernel Size |
list |
The kernel size of the Gaussian blur |
[5,7,15] |
1 |
|||||||
augmentation_config.blur_prob |
Gaussian Blur Probability |
float |
The probability for blurring the image with Gaussian blur |
0.5 |
0 |
1 |
||||||
augmentation_config.reverse_color_prob |
Reverse Color Probability |
float |
The probability for reversing the color of the image |
0.5 |
0 |
1 |
||||||
lpr_config.hidden_units |
Hidden Units |
integer |
The number of hidden units in the LSTM layers of LPRNet |
512 |
1 |
|||||||
lpr_config.max_label_length |
Max Label Length |
integer |
The maximum length of license plates in the dataset |
8 |
||||||||
lpr_config.arch |
Architecture |
string |
The architecture of LPRNet |
baseline |
baseline |
|||||||
lpr_config.nlayers |
Number of Layers |
integer |
The number of convolution layers in LPRNet |
18 |
10, 18 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
FALSE |
no |
||||||
cal_image_dir |
hidden |
||||||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
strict_type_constraints |
bool |
FALSE |
|||||||||
gen_ds_config |
bool |
FALSE |
|||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
min_batch_size |
integer |
1 |
|||||||||
opt_batch_size |
integer |
1 |
|||||||||
experiment_spec |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
static_batch_size |
integer |
-1 |
|||||||||
results_dir |
hidden |
||||||||||
verbose |
hidden |
TRUE |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
||||||||
threshold |
float |
0.3 |
|||||||||||
include_mask |
bool |
TRUE |
|||||||||||
experiment_spec_file |
hidden |
CLI argument |
|||||||||||
model_dir |
hidden |
CLI argument |
|||||||||||
key |
hidden |
CLI argument |
|||||||||||
seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
123 |
|||||||||
num_epochs |
integer |
10 |
|||||||||||
use_amp |
AMP |
bool |
FALSE |
||||||||||
warmup_steps |
Warmup steps |
integer |
The steps taken for learning rate to ramp up to the init_learning_rate |
10000 |
|||||||||
learning_rate_steps |
Learning rate steps |
string |
A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels |
[100000, 150000, 200000] |
|||||||||
learning_rate_decay_levels |
learning rate decay steps |
string |
A list of decay factors. The length should match the length of learning_rate_steps. |
[0.1, 0.02, 0.01] |
|||||||||
total_steps |
Total training steps |
integer |
The total number of training iterations |
250000 |
|||||||||
train_batch_size |
Training Batchsize |
integer |
The batch size during training |
2 |
|||||||||
eval_batch_size |
Evaluation Batchsize |
integer |
The batch size during validation or evaluation |
4 |
|||||||||
num_steps_per_eval |
Number of steps between each evaluation |
integer |
num_steps_per_eval |
5000 |
|||||||||
momentum |
SGD momentum |
float |
Momentum of the SGD optimizer |
0.9 |
|||||||||
l1_weight_decay |
L1 Weight decay |
float |
L1 regularizer weight |
||||||||||
l2_weight_decay |
L2 weight decay |
float |
L2 regularizer weight |
0.00004 |
|||||||||
warmup_learning_rate |
float |
0.0001 |
|||||||||||
init_learning_rate |
float |
0.005 |
|||||||||||
num_examples_per_epoch |
integer |
118288 |
|||||||||||
checkpoint |
Path to Pretrained model |
hidden |
The path to a pretrained model |
||||||||||
skip_checkpoint_variables |
Name of skipped variables in the pretrained model |
string |
If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. |
||||||||||
pruned_model_path |
Path to pruned model |
hidden |
The path to a pruned MaskRCNN graph |
||||||||||
maskrcnn_config |
MaskRCNN configuration |
collection |
|||||||||||
maskrcnn_config.nlayers |
Number of layers in ResNet |
integer |
The number of layers in ResNet arch |
50 |
|||||||||
maskrcnn_config.arch |
Backbone name |
string |
The backbone feature extractor name |
resnet |
|||||||||
maskrcnn_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all BatchNorm layers in the backbone |
||||||||||
maskrcnn_config.freeze_blocks |
Freeze Block |
string |
A list of conv blocks in the backbone to freeze |
||||||||||
maskrcnn_config.gt_mask_size |
Groundtruth Mask Size |
integer |
The groundtruth mask size |
112 |
|||||||||
maskrcnn_config.rpn_positive_overlap |
RPN positive overlap |
float |
The lower-bound threshold to assign positive labels for anchors |
0.7 |
|||||||||
maskrcnn_config.rpn_negative_overlap |
RPN negative overlap |
float |
The upper-bound threshold to assign negative labels for anchors |
0.3 |
|||||||||
maskrcnn_config.rpn_batch_size_per_im |
RPN batchsize per image |
integer |
The number of sampled anchors per image in RPN |
256 |
|||||||||
maskrcnn_config.rpn_fg_fraction |
RPN foreground fraction |
float |
The desired fraction of positive anchors in a batch |
0.5 |
|||||||||
maskrcnn_config.rpn_min_size |
RPN minimum size |
float |
The minimum proposal height and width |
0 |
|||||||||
maskrcnn_config.batch_size_per_im |
RoI batchsize per image |
integer |
The RoI minibatch size per image |
512 |
|||||||||
maskrcnn_config.fg_fraction |
Foreground fraction |
float |
The target fraction of RoI minibatch that is labeled as foreground |
0.25 |
|||||||||
maskrcnn_config.fg_thresh |
float |
0.5 |
|||||||||||
maskrcnn_config.bg_thresh_hi |
float |
0.5 |
|||||||||||
maskrcnn_config.bg_thresh_lo |
float |
0 |
|||||||||||
maskrcnn_config.fast_rcnn_mlp_head_dim |
classification head dimension |
integer |
The Fast-RCNN classification head dimension |
1024 |
|||||||||
maskrcnn_config.bbox_reg_weights |
bounding-box regularization weights |
string |
The bounding-box regularization weights |
(10., 10., 5., 5.) |
|||||||||
maskrcnn_config.include_mask |
Include mask head |
bool |
Specifies whether to include a mask head |
TRUE |
|||||||||
maskrcnn_config.mrcnn_resolution |
Mask resolution |
integer |
The mask-head resolution |
28 |
|||||||||
maskrcnn_config.train_rpn_pre_nms_topn |
Top N RPN proposals pre NMS during training |
integer |
The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training |
2000 |
|||||||||
maskrcnn_config.train_rpn_post_nms_topn |
Top N RPN proposals post NMS during training |
integer |
The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training |
1000 |
|||||||||
maskrcnn_config.train_rpn_nms_threshold |
NMS threshold in RPN during training |
float |
The NMS IOU threshold in RPN during training |
0.7 |
|||||||||
maskrcnn_config.test_detections_per_image |
Number of bounding boxes after NMS |
integer |
The number of bounding box candidates after NMS |
100 |
|||||||||
maskrcnn_config.test_nms |
NMS threshold during test |
float |
The NMS IOU threshold during test |
0.5 |
|||||||||
maskrcnn_config.test_rpn_pre_nms_topn |
Top N RPN proposals pre NMS during test |
integer |
The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test |
1000 |
|||||||||
maskrcnn_config.test_rpn_post_nms_topn |
Top N RPN proposals post NMS during test |
integer |
The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test |
1000 |
|||||||||
maskrcnn_config.test_rpn_nms_thresh |
NMS threshold in RPN during test |
float |
The NMS IOU threshold in RPN during test |
0.7 |
|||||||||
maskrcnn_config.min_level |
Minimum FPN level |
integer |
The minimum level of the output feature pyramid |
2 |
|||||||||
maskrcnn_config.max_level |
Maximum FPN level |
integer |
The maximum level of the output feature pyramid |
6 |
|||||||||
maskrcnn_config.num_scales |
number of scales |
integer |
The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) |
1 |
|||||||||
maskrcnn_config.aspect_ratios |
aspect ratios |
string |
A list of tuples representing the aspect ratios of anchors on each pyramid level |
[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] |
|||||||||
maskrcnn_config.anchor_scale |
anchor scale |
integer |
Scale of the base-anchor size to the feature-pyramid stride |
8 |
|||||||||
maskrcnn_config.rpn_box_loss_weight |
RPN box loss weight |
float |
The weight for adjusting RPN box loss in the total loss |
1 |
|||||||||
maskrcnn_config.fast_rcnn_box_loss_weight |
FastRCNN box regression weight |
float |
The weight for adjusting FastRCNN box regression loss in the total loss |
1 |
|||||||||
maskrcnn_config.mrcnn_weight_loss_mask |
Mask loss weight |
float |
The weight for adjusting mask loss in the total loss |
1 |
|||||||||
data_config |
Dataset configuration |
collection |
|||||||||||
data_config.image_size |
Image size |
string |
The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input. |
(256, 256) |
|||||||||
data_config.augment_input_data |
augment input data |
bool |
Specifies whether to augment the data |
TRUE |
|||||||||
data_config.eval_samples |
Number of evaluation samples |
integer |
The number of samples for evaluation |
500 |
|||||||||
data_config.training_file_pattern |
Train file pattern |
hidden |
The TFRecord path for training |
||||||||||
data_config.validation_file_pattern |
validation file pattern |
hidden |
The TFRecord path for validation |
||||||||||
data_config.val_json_file |
validation json path |
hidden |
The annotation file path for validation |
||||||||||
data_config.num_classes |
Number of classes |
integer |
The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) |
91 |
|||||||||
data_config.skip_crowd_during_training |
skip crowd during training |
bool |
Specifies whether to skip crowd during training |
TRUE |
|||||||||
data_config.prefetch_buffer_size |
prefetch buffer size |
integer |
The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE) |
||||||||||
data_config.shuffle_buffer_size |
shuffle buffer size |
integer |
The shuffle buffer size used by tf.data.Dataset (default: 4096) |
4096 |
|||||||||
data_config.n_workers |
Number of workers |
integer |
The number of workers to parse and preprocess data (default: 16) |
16 |
|||||||||
data_config.max_num_instances |
maximum number of instances |
integer |
The maximum number of object instances to parse (default: 200) |
200 |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
model |
Model path |
hidden |
UNIX path to where the input model is located. |
yes |
|||||||
output_dir |
Output Directory |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
key |
Encode key |
hidden |
|||||||||
normalizer |
Normalizer |
string |
How to normalize |
max |
max, L2 |
||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
|||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
|||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
||||||
excluded_layers |
Excluded layers |
string |
string of list: List of excluded_layers. Examples: -i item1 item2 |
||||||||
verbose |
verbosity |
hidden |
TRUE |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
||||||||
experiment_spec_file |
hidden |
CLI argument |
|||||||||||
model_dir |
hidden |
CLI argument |
|||||||||||
key |
hidden |
CLI argument |
|||||||||||
seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
123 |
|||||||||
num_epochs |
integer |
10 |
|||||||||||
use_amp |
AMP |
bool |
FALSE |
||||||||||
warmup_steps |
Warmup steps |
integer |
The steps taken for learning rate to ramp up to the init_learning_rate |
10000 |
|||||||||
learning_rate_steps |
Learning rate steps |
string |
A list of steps at which the learning rate decays by the factor specified in learning_rate_decay_levels |
[100000, 150000, 200000] |
|||||||||
learning_rate_decay_levels |
learning rate decay steps |
string |
A list of decay factors. The length should match the length of learning_rate_steps. |
[0.1, 0.02, 0.01] |
|||||||||
total_steps |
Total training steps |
integer |
The total number of training iterations |
250000 |
|||||||||
train_batch_size |
Training Batchsize |
integer |
The batch size during training |
2 |
|||||||||
eval_batch_size |
Evaluation Batchsize |
integer |
The batch size during validation or evaluation |
4 |
|||||||||
num_steps_per_eval |
Number of steps between each evaluation |
integer |
num_steps_per_eval |
5000 |
|||||||||
momentum |
SGD momentum |
float |
Momentum of the SGD optimizer |
0.9 |
|||||||||
l1_weight_decay |
L1 Weight decay |
float |
L1 regularizer weight |
||||||||||
l2_weight_decay |
L2 weight decay |
float |
L2 regularizer weight |
0.00004 |
|||||||||
warmup_learning_rate |
float |
0.0001 |
|||||||||||
init_learning_rate |
float |
0.005 |
|||||||||||
num_examples_per_epoch |
integer |
118288 |
|||||||||||
checkpoint |
Path to Pretrained model |
hidden |
The path to a pretrained model |
||||||||||
skip_checkpoint_variables |
Name of skipped variables in the pretrained model |
string |
If specified, the weights of the layers with matching regular expressions will not be loaded. This is especially helpful for transfer learning. |
||||||||||
pruned_model_path |
Path to pruned model |
hidden |
The path to a pruned MaskRCNN graph |
||||||||||
maskrcnn_config |
MaskRCNN configuration |
collection |
|||||||||||
maskrcnn_config.nlayers |
Number of layers in ResNet |
integer |
The number of layers in ResNet arch |
50 |
|||||||||
maskrcnn_config.arch |
Backbone name |
string |
The backbone feature extractor name |
resnet |
|||||||||
maskrcnn_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all BatchNorm layers in the backbone |
||||||||||
maskrcnn_config.freeze_blocks |
Freeze Block |
string |
A list of conv blocks in the backbone to freeze |
||||||||||
maskrcnn_config.gt_mask_size |
Groundtruth Mask Size |
integer |
The groundtruth mask size |
112 |
|||||||||
maskrcnn_config.rpn_positive_overlap |
RPN positive overlap |
float |
The lower-bound threshold to assign positive labels for anchors |
0.7 |
|||||||||
maskrcnn_config.rpn_negative_overlap |
RPN negative overlap |
float |
The upper-bound threshold to assign negative labels for anchors |
0.3 |
|||||||||
maskrcnn_config.rpn_batch_size_per_im |
RPN batchsize per image |
integer |
The number of sampled anchors per image in RPN |
256 |
|||||||||
maskrcnn_config.rpn_fg_fraction |
RPN foreground fraction |
float |
The desired fraction of positive anchors in a batch |
0.5 |
|||||||||
maskrcnn_config.rpn_min_size |
RPN minimum size |
float |
The minimum proposal height and width |
0 |
|||||||||
maskrcnn_config.batch_size_per_im |
RoI batchsize per image |
integer |
The RoI minibatch size per image |
512 |
|||||||||
maskrcnn_config.fg_fraction |
Foreground fraction |
float |
The target fraction of RoI minibatch that is labeled as foreground |
0.25 |
|||||||||
maskrcnn_config.fg_thresh |
float |
0.5 |
|||||||||||
maskrcnn_config.bg_thresh_hi |
float |
0.5 |
|||||||||||
maskrcnn_config.bg_thresh_lo |
float |
0 |
|||||||||||
maskrcnn_config.fast_rcnn_mlp_head_dim |
classification head dimension |
integer |
The Fast-RCNN classification head dimension |
1024 |
|||||||||
maskrcnn_config.bbox_reg_weights |
bounding-box regularization weights |
string |
The bounding-box regularization weights |
(10., 10., 5., 5.) |
|||||||||
maskrcnn_config.include_mask |
Include mask head |
bool |
Specifies whether to include a mask head |
TRUE |
|||||||||
maskrcnn_config.mrcnn_resolution |
Mask resolution |
integer |
The mask-head resolution |
28 |
|||||||||
maskrcnn_config.train_rpn_pre_nms_topn |
Top N RPN proposals pre NMS during training |
integer |
The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during training |
2000 |
|||||||||
maskrcnn_config.train_rpn_post_nms_topn |
Top N RPN proposals post NMS during training |
integer |
The number of top-scoring RPN proposals to keep after applying NMS (total number produced) during training |
1000 |
|||||||||
maskrcnn_config.train_rpn_nms_threshold |
NMS threshold in RPN during training |
float |
The NMS IOU threshold in RPN during training |
0.7 |
|||||||||
maskrcnn_config.test_detections_per_image |
Number of bounding boxes after NMS |
integer |
The number of bounding box candidates after NMS |
100 |
|||||||||
maskrcnn_config.test_nms |
NMS threshold during test |
float |
The NMS IOU threshold during test |
0.5 |
|||||||||
maskrcnn_config.test_rpn_pre_nms_topn |
Top N RPN proposals pre NMS during test |
integer |
The number of top-scoring RPN proposals to keep before applying NMS (per FPN level) during test |
1000 |
|||||||||
maskrcnn_config.test_rpn_post_nms_topn |
Top N RPN proposals post NMS during test |
integer |
The number of top scoring RPN proposals to keep after applying NMS (total number produced) during test |
1000 |
|||||||||
maskrcnn_config.test_rpn_nms_thresh |
NMS threshold in RPN during test |
float |
The NMS IOU threshold in RPN during test |
0.7 |
|||||||||
maskrcnn_config.min_level |
Minimum FPN level |
integer |
The minimum level of the output feature pyramid |
2 |
|||||||||
maskrcnn_config.max_level |
Maximum FPN level |
integer |
The maximum level of the output feature pyramid |
6 |
|||||||||
maskrcnn_config.num_scales |
number of scales |
integer |
The number of anchor octave scales on each pyramid level (e.g. if set to 3, the anchor scales are [2^0, 2^(1/3), 2^(2/3)]) |
1 |
|||||||||
maskrcnn_config.aspect_ratios |
aspect ratios |
string |
A list of tuples representing the aspect ratios of anchors on each pyramid level |
[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] |
|||||||||
maskrcnn_config.anchor_scale |
anchor scale |
integer |
Scale of the base-anchor size to the feature-pyramid stride |
8 |
|||||||||
maskrcnn_config.rpn_box_loss_weight |
RPN box loss weight |
float |
The weight for adjusting RPN box loss in the total loss |
1 |
|||||||||
maskrcnn_config.fast_rcnn_box_loss_weight |
FastRCNN box regression weight |
float |
The weight for adjusting FastRCNN box regression loss in the total loss |
1 |
|||||||||
maskrcnn_config.mrcnn_weight_loss_mask |
Mask loss weight |
float |
The weight for adjusting mask loss in the total loss |
1 |
|||||||||
data_config |
Dataset configuration |
collection |
|||||||||||
data_config.image_size |
Image size |
string |
The image dimension as a tuple within quote marks. “(height, width)” indicates the dimension of the resized and padded input. |
(256, 256) |
|||||||||
data_config.augment_input_data |
augment input data |
bool |
Specifies whether to augment the data |
TRUE |
|||||||||
data_config.eval_samples |
Number of evaluation samples |
integer |
The number of samples for evaluation |
500 |
|||||||||
data_config.training_file_pattern |
Train file pattern |
hidden |
The TFRecord path for training |
||||||||||
data_config.validation_file_pattern |
validation file pattern |
hidden |
The TFRecord path for validation |
||||||||||
data_config.val_json_file |
validation json path |
hidden |
The annotation file path for validation |
||||||||||
data_config.num_classes |
Number of classes |
integer |
The number of classes. If there are N categories in the annotation, num_classes should be N+1 (background class) |
91 |
|||||||||
data_config.skip_crowd_during_training |
skip crowd during training |
bool |
Specifies whether to skip crowd during training |
TRUE |
|||||||||
data_config.prefetch_buffer_size |
prefetch buffer size |
integer |
The prefetch buffer size used by tf.data.Dataset (default: AUTOTUNE) |
||||||||||
data_config.shuffle_buffer_size |
shuffle buffer size |
integer |
The shuffle buffer size used by tf.data.Dataset (default: 4096) |
4096 |
|||||||||
data_config.n_workers |
Number of workers |
integer |
The number of workers to parse and preprocess data (default: 16) |
16 |
|||||||||
data_config.max_num_instances |
maximum number of instances |
integer |
The maximum number of object instances to parse (default: 200) |
200 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
FALSE |
no |
||||||
cal_image_dir |
hidden |
||||||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
strict_type_constraints |
bool |
FALSE |
|||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
class_map |
hidden |
||||||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
verbose |
hidden |
TRUE |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
random_seed |
integer |
42 |
|||||||||
model_config |
collection |
||||||||||
model_config.arch |
string |
resnet |
|||||||||
model_config.input_image_size |
string |
3,80,60 |
yes |
yes |
|||||||
model_config.resize_interpolation_method |
string |
__BILINEAR__, __BICUBIC__ |
|||||||||
model_config.n_layers |
integer |
10 |
|||||||||
model_config.use_imagenet_head |
bool |
||||||||||
model_config.use_batch_norm |
bool |
TRUE |
|||||||||
model_config.use_bias |
bool |
||||||||||
model_config.use_pooling |
bool |
||||||||||
model_config.all_projections |
bool |
TRUE |
|||||||||
model_config.freeze_bn |
bool |
||||||||||
model_config.freeze_blocks |
integer |
||||||||||
model_config.dropout |
float |
||||||||||
model_config.batch_norm_config |
collection |
||||||||||
model_config.batch_norm_config.momentum |
float |
||||||||||
model_config.batch_norm_config.epsilon |
float |
||||||||||
model_config.activation |
collection |
||||||||||
model_config.activation.activation_type |
string |
||||||||||
model_config.activation.activation_parameters |
collection |
||||||||||
model_config.activation.activation_parameters.key |
string |
||||||||||
model_config.activation.activation_parameters.value |
float |
||||||||||
dataset_config |
collection |
||||||||||
dataset_config.train_csv_path |
hidden |
||||||||||
dataset_config.image_directory_path |
hidden |
||||||||||
dataset_config.val_csv_path |
hidden |
||||||||||
training_config |
Training |
collection |
|||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
100 |
1 |
||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
10 |
1 |
||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
||||||||
training_config.learning_rate |
collection |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
1.00E-06 |
0 |
||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.00E-02 |
0 |
||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
|||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.7 |
0 |
1 |
|||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__L1__, __L2__ |
||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
9.00E-05 |
0 |
||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
|||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
||||||||
training_config.early_stopping |
Early Stopping |
collection |
|||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
||||||||
training_config.visualizer |
Visualizer |
collection |
|||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
||||||||
training_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
|||||||||
training_config.optimizer.sgd.momentum |
float |
0.9 |
|||||||||
training_config.optimizer.sgd.nesterov |
bool |
FALSE |
augment
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
cli |
---|---|---|---|---|---|---|---|---|---|---|---|---|
batch_size |
integer |
CLI parameter |
4 |
yes |
||||||||
spatial_config |
collection |
|||||||||||
spatial_config.rotation_config |
collection |
|||||||||||
spatial_config.rotation_config.angle |
float |
10 |
||||||||||
spatial_config.rotation_config.units |
string |
degrees |
||||||||||
spatial_config.shear_config |
collection |
|||||||||||
spatial_config.shear_config.shear_ratio_x |
float |
|||||||||||
spatial_config.shear_config.shear_ratio_y |
float |
|||||||||||
spatial_config.flip_config |
collection |
|||||||||||
spatial_config.flip_config.flip_horizontal |
bool |
|||||||||||
spatial_config.flip_config.flip_vertical |
bool |
|||||||||||
spatial_config.translation_config |
collection |
|||||||||||
spatial_config.translation_config.translate_x |
integer |
|||||||||||
spatial_config.translation_config.translate_y |
integer |
|||||||||||
color_config |
collection |
|||||||||||
color_config.hue_saturation_config |
collection |
|||||||||||
color_config.hue_saturation_config.hue_rotation_angle |
float |
5 |
||||||||||
color_config.hue_saturation_config.saturation_shift |
float |
1 |
||||||||||
color_config.contrast_config |
collection |
|||||||||||
color_config.contrast_config.contrast |
float |
|||||||||||
color_config.contrast_config.center |
float |
|||||||||||
color_config.brightness_config |
collection |
|||||||||||
color_config.brightness_config.offset |
float |
|||||||||||
partition_config |
collection |
|||||||||||
partition_config.partition_mode |
string |
Enum |
__ID_WISE__, __RANDOM__ |
|||||||||
partition_config.dataset_percentage |
float |
|||||||||||
blur_config |
collection |
|||||||||||
blur_config.std |
float |
|||||||||||
blur_config.size |
float |
|||||||||||
output_image_width |
integer |
1248 |
yes |
|||||||||
output_image_height |
integer |
384 |
yes |
|||||||||
output_image_channel |
integer |
3 |
yes |
|||||||||
image_extension |
string |
.png |
yes |
|||||||||
dataset_config |
collection |
|||||||||||
dataset_config.image_path |
const |
hidden |
images |
|||||||||
dataset_config.label_path |
const |
hidden |
labels |
convert_coco
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
popular |
regex |
notes |
---|---|---|---|---|---|---|---|---|---|---|---|---|
coco_config |
collection |
|||||||||||
coco_config.root_directory_path |
hidden |
|||||||||||
coco_config.img_dir_names |
list |
List of image directories correspoding to each partition |
[“images”] |
The order of image directories must match annotation_files based on partitions |
||||||||
coco_config.annotation_files |
list |
List of JSON files with COCO dataset format |
[“annotations.json”] |
|||||||||
coco_config.num_partitions |
integer |
The number of partitions to use to split the data (N folds). The number of partitions must match the size of the img_dir_names and annotation_files |
1 |
|||||||||
coco_config.num_shards |
list |
The number of shards per fold. If the size of num_shards is 1, then same number of shards will be applied to every partition |
[256] |
|||||||||
sample_modifier_config |
collection |
|||||||||||
sample_modifier_config.filter_samples_containing_only |
list |
list of string |
||||||||||
sample_modifier_config.dominant_target_classes |
list |
list of string |
||||||||||
sample_modifier_config.minimum_target_class_imbalance |
list |
list of string |
||||||||||
sample_modifier_config.minimum_target_class_imbalance.key |
string |
|||||||||||
sample_modifier_config.minimum_target_class_imbalance.value |
float |
|||||||||||
sample_modifier_config.num_duplicates |
integer |
|||||||||||
sample_modifier_config.max_training_samples |
integer |
|||||||||||
sample_modifier_config.source_to_target_class_mapping |
list |
list of string |
||||||||||
sample_modifier_config.source_to_target_class_mapping.key |
string |
|||||||||||
sample_modifier_config.source_to_target_class_mapping.value |
string |
|||||||||||
image_directory_path |
hidden |
|||||||||||
target_class_mapping |
list |
list of string |
||||||||||
target_class_mapping.key |
Class Key |
string |
||||||||||
target_class_mapping.value |
Class Value |
string |
convert_efficientdet
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
num_shards |
num_shards |
integer |
Number of shards |
256 |
|||||||
include_masks |
include_masks |
bool |
Whether to include instance segmentation masks |
FALSE |
|||||||
tag |
tag |
string |
Tag |
convert_kitti
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
popular |
regex |
notes |
---|---|---|---|---|---|---|---|---|---|---|---|---|
kitti_config |
collection |
|||||||||||
kitti_config.root_directory_path |
hidden |
|||||||||||
kitti_config.image_dir_name |
const |
images |
||||||||||
kitti_config.label_dir_name |
const |
labels |
||||||||||
kitti_config.point_clouds_dir |
string |
|||||||||||
kitti_config.calibrations_dir |
string |
|||||||||||
kitti_config.kitti_sequence_to_frames_file |
string |
The name of the KITTI sequence to frame mapping file. This file must be present within the dataset root as mentioned in the root_directory_path. This file must be uploaded by the user along with images and labels. The name of that file must be filled in this field |
||||||||||
kitti_config.image_extension |
string |
The extension of the images in the image_dir_name parameter. |
.png |
.jpg, .png, .jpeg |
yes |
yes |
||||||
kitti_config.num_partitions |
integer |
The number of partitions to use to split the data (N folds). This field is ignored when the partition model is set to random, as by default only two partitions are generated: val and train. In sequence mode, the data is split into n-folds. The number of partitions is ideally fewer than the total number of sequences in the kitti_sequence_to_frames file. Valid options: n=2 for random partition, n< number of sequences in the kitti_sequence_to_frames_file |
2 |
|||||||||
kitti_config.num_shards |
integer |
The number of shards per fold. |
10 |
1 |
20 |
|||||||
kitti_config.partition_mode |
string |
The method employed when partitioning the data to multiple folds. Two methods are supported: Random partitioning: The data is divided in to 2 folds, train and val. This mode requires that the val_split parameter be set. Sequence-wise partitioning: The data is divided into n partitions (defined by the num_partitions parameter) based on the number of sequences available. |
random |
random, sequence |
||||||||
kitti_config.val_split |
float |
The percentage of data to be separated for validation. This only works under “random” partition mode. This partition is available in fold 0 of the TFrecords generated. Set the validation fold to 0 in the dataset_config. |
0 |
0 |
100 |
Must not be exposed from API since each dataset is its own and cannot be split into train, val, test, etc… through the API |
||||||
sample_modifier_config |
collection |
|||||||||||
sample_modifier_config.filter_samples_containing_only |
list |
list of string |
||||||||||
sample_modifier_config.dominant_target_classes |
list |
list of string |
||||||||||
sample_modifier_config.minimum_target_class_imbalance |
list |
|||||||||||
sample_modifier_config.minimum_target_class_imbalance.key |
string |
|||||||||||
sample_modifier_config.minimum_target_class_imbalance.value |
float |
|||||||||||
sample_modifier_config.num_duplicates |
integer |
|||||||||||
sample_modifier_config.max_training_samples |
integer |
|||||||||||
sample_modifier_config.source_to_target_class_mapping |
list |
|||||||||||
sample_modifier_config.source_to_target_class_mapping.key |
string |
|||||||||||
sample_modifier_config.source_to_target_class_mapping.value |
string |
|||||||||||
image_directory_path |
hidden |
|||||||||||
target_class_mapping |
list |
Better not expose these on dataset convert and use the target_class_mapping in the train / eval / inference spec |
||||||||||
target_class_mapping.key |
Class Key |
string |
||||||||||
target_class_mapping.value |
Class Value |
string |
kmeans
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
size_x |
integer |
Network input width |
yes |
||||||||
size_y |
integer |
Network input height |
yes |
||||||||
num_clusters |
integer |
Number of clusters needed. |
9 |
||||||||
max_steps |
integer |
Maximum kmeans steps. Kmeans will stop even if not converged at max_steps |
10000 |
||||||||
min_x |
integer |
Ignore boxes with width (as in network input-size image) not larger than this value. |
0 |
||||||||
min_y |
integer |
Ignore boxes with height (as in network input-size image) not larger than this value. |
0 |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
experiment_spec_file |
hidden |
CLI argument |
||||||||||||
results_dir |
hidden |
CLI argument |
||||||||||||
key |
hidden |
CLI argument |
||||||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
string |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
string |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
string |
||||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
8 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
100 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
4.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.50E-02 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.3 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__L1__, __L2__ |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
2.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
||||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
|||||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
2 |
||||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
|||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
|||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
|||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
|||||||||||
training_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
||||||||||||
training_config.optimizer.sgd.momentum |
float |
0.9 |
||||||||||||
training_config.optimizer.sgd.nesterov |
bool |
TRUE |
||||||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
__SAMPLE__, __INTEGRATE__ |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
1248 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
384 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
|||||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
|||||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
|||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
|||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
|||||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
|||||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
|||||||||||
augmentation_config.hue |
Hue |
float |
Hue delta in color jittering augmentation |
|||||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
string |
|||||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||||
retinanet_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
retinanet_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different RetinaNet feature layers |
||||||||||
retinanet_config.aspect_ratios_global |
string |
[1.0, 2.0, 0.5] |
||||||||||||
retinanet_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
FALSE |
||||||||||
retinanet_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
retinanet_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
retinanet_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.045, 0.09, 0.2, 0.4, 0.55, 0.7] |
||||||||||
retinanet_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
retinanet_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
retinanet_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
retinanet_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
retinanet_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
retinanet_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
retinanet_config.loss_loc_weight |
Localization loss weight |
float |
This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss |
0.8 |
||||||||||
retinanet_config.focal_loss_alpha |
Alpha (Focal loss) |
float |
Alpha in the focal loss equation |
0.25 |
||||||||||
retinanet_config.focal_loss_gamma |
Gamma (Focal loss) |
float |
Gamma in the focal loss equation |
2 |
||||||||||
retinanet_config.n_kernels |
Number of kernels |
integer |
This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. |
1 |
||||||||||
retinanet_config.feature_size |
Feature size |
integer |
This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). |
256 |
||||||||||
retinanet_config.pos_iou_thresh |
Postive IOU threshold |
float |
The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box. |
|||||||||||
retinanet_config.neg_iou_thresh |
Negative IOU threshold |
float |
The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training. |
|||||||||||
retinanet_config.n_anchor_levels |
Number of Anchor levels |
integer |
Number of anchor levels between two adjacent scales. |
1 |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
CLI argument |
|||||||
data_type |
Pruning Granularity |
enum |
Number of filters to remove at a time. |
int8 |
int8, fp32, fp16 |
yes |
yes |
CLI argument |
|||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
no |
CLI argument |
|||||||
experiment_spec |
Experiment Spec |
string |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
hidden from train expeirment |
yes |
CLI argument |
|||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
CLI argument |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
CLI argument |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
TRUE |
no |
CLI argument |
|||||||
engine-file |
Engine File |
hidden |
UNIX path to the model engine file. |
/export/input_model_file.<data_type>.trt |
yes |
CLI argument |
|||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
CLI argument |
|||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
16 |
yes |
CLI argument |
|||||||
cal_cache_file |
Calibration cache file |
string |
Unix PATH to the int8 calibration cache file |
hidden |
yes |
yes |
CLI argument |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
threshold |
CLI parameter initial epoch |
float |
0.3 |
|||||||||||
experiment_spec_file |
hidden |
CLI argument |
||||||||||||
results_dir |
hidden |
CLI argument |
||||||||||||
key |
hidden |
CLI argument |
||||||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
string |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
string |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
string |
||||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
8 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
100 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
4.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.50E-02 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.3 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__L1__, __L2__ |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
2.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
||||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
|||||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
2 |
||||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
|||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
|||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
|||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
|||||||||||
training_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
||||||||||||
training_config.optimizer.sgd.momentum |
float |
0.9 |
||||||||||||
training_config.optimizer.sgd.nesterov |
bool |
TRUE |
||||||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
__SAMPLE__, __INTEGRATE__ |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
1248 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
384 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
|||||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
|||||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
|||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
|||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
|||||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
|||||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
|||||||||||
augmentation_config.hue |
Hue |
float |
Hue delta in color jittering augmentation |
|||||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
string |
|||||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||||
retinanet_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
retinanet_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different RetinaNet feature layers |
||||||||||
retinanet_config.aspect_ratios_global |
string |
[1.0, 2.0, 0.5] |
||||||||||||
retinanet_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
FALSE |
||||||||||
retinanet_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
retinanet_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
retinanet_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.045, 0.09, 0.2, 0.4, 0.55, 0.7] |
||||||||||
retinanet_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
retinanet_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
retinanet_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
retinanet_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
retinanet_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
retinanet_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
retinanet_config.loss_loc_weight |
Localization loss weight |
float |
This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss |
0.8 |
||||||||||
retinanet_config.focal_loss_alpha |
Alpha (Focal loss) |
float |
Alpha in the focal loss equation |
0.25 |
||||||||||
retinanet_config.focal_loss_gamma |
Gamma (Focal loss) |
float |
Gamma in the focal loss equation |
2 |
||||||||||
retinanet_config.n_kernels |
Number of kernels |
integer |
This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. |
1 |
||||||||||
retinanet_config.feature_size |
Feature size |
integer |
This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). |
256 |
||||||||||
retinanet_config.pos_iou_thresh |
Postive IOU threshold |
float |
The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box. |
|||||||||||
retinanet_config.neg_iou_thresh |
Negative IOU threshold |
float |
The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training. |
|||||||||||
retinanet_config.n_anchor_levels |
Number of Anchor levels |
integer |
Number of anchor levels between two adjacent scales. |
1 |
inference_seq
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
||||||||
out_thres |
float |
0.3 |
model convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
b |
batch_size |
integer |
calibration batch size |
8 |
yes |
CLI argument |
||||||
c |
cache_file |
path |
calibration cache file (default cal.bin) |
CLI argument |
||||||||
d |
input_dims |
list |
comma separated list of input dimensions (not required for TLT 3.0 new models). |
CLI argument |
||||||||
i |
input_order |
enum |
input dimension ordering |
nchw |
nchw, nhwc, nc |
CLI argument |
||||||
m |
max_batch_size |
integer |
maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. |
16 |
yes |
CLI argument |
||||||
o |
outputs |
list |
comma separated list of output node names |
CLI argument |
||||||||
p |
parse_profile_shapes |
list |
comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. |
CLI argument |
||||||||
s |
strict_type_constraints |
bool |
TensorRT strict_type_constraints flag for INT8 mode |
FALSE |
CLI argument |
|||||||
t |
data_type |
enum |
TensorRT data type |
fp32 |
fp32, fp16, int8 |
yes |
CLI argument |
|||||
u |
dla_core |
int |
Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). |
-1 |
CLI argument |
|||||||
w |
max_workspace_size |
int |
maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly. |
1<<30, 2<<30 |
CLI argument |
|||||||
platform |
platform |
enum |
platform label |
rtx |
yes |
yes |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
no |
CLI argument |
||||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
CLI argument |
||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
CLI argument |
|||||||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
CLI argument |
|||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
CLI argument |
||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
CLI argument |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
CLI argument |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
initial_epoch |
CLI parameter initial epoch |
integer |
0 |
|||||||||||
experiment_spec_file |
hidden |
CLI argument |
||||||||||||
results_dir |
hidden |
CLI argument |
||||||||||||
key |
hidden |
CLI argument |
||||||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
string |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
string |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
string |
||||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
8 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
100 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
4.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
1.50E-02 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.3 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
__L1__, __L2__ |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
2.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
10 |
||||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
|||||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
2 |
||||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
|||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
|||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
|||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
|||||||||||
training_config.optimizer.sgd |
collection |
One of SGD / ADAM / RMSPROP |
||||||||||||
training_config.optimizer.sgd.momentum |
float |
0.9 |
||||||||||||
training_config.optimizer.sgd.nesterov |
bool |
TRUE |
||||||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
__SAMPLE__, __INTEGRATE__ |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
1248 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
384 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
|||||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
|||||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
|||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
|||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
|||||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
|||||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
|||||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
|||||||||||
augmentation_config.hue |
Hue |
float |
Hue delta in color jittering augmentation |
|||||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
string |
|||||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||||
retinanet_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
retinanet_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different RetinaNet feature layers |
||||||||||
retinanet_config.aspect_ratios_global |
string |
[1.0, 2.0, 0.5] |
||||||||||||
retinanet_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
FALSE |
||||||||||
retinanet_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
retinanet_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
retinanet_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.045, 0.09, 0.2, 0.4, 0.55, 0.7] |
||||||||||
retinanet_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
retinanet_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
retinanet_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
retinanet_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
retinanet_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
retinanet_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
|||||||||||
retinanet_config.loss_loc_weight |
Localization loss weight |
float |
This is a positive float controlling how much location regression loss should contribute to the final loss. The final loss is calculated as classification_loss + loss_loc_weight * loc_loss |
0.8 |
||||||||||
retinanet_config.focal_loss_alpha |
Alpha (Focal loss) |
float |
Alpha in the focal loss equation |
0.25 |
||||||||||
retinanet_config.focal_loss_gamma |
Gamma (Focal loss) |
float |
Gamma in the focal loss equation |
2 |
||||||||||
retinanet_config.n_kernels |
Number of kernels |
integer |
This setting controls the number of convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value generates a larger network and usually means the network is harder to train. |
1 |
||||||||||
retinanet_config.feature_size |
Feature size |
integer |
This setting controls the number of channels of the convolutional layers in the RetinaNet subnets for classification and anchor box regression. A larger value gives a larger network and usually means the network is harder to train. Note that RetinaNet FPN generates 5 feature maps, thus the scales field requires a list of 6 scaling factors. The last number is not used if two_boxes_for_ar1 is set to False. There are also three underlying scaling factors at each feature map level (2^0, 2^⅓, 2^⅔ ). |
256 |
||||||||||
retinanet_config.pos_iou_thresh |
Postive IOU threshold |
float |
The intersection-over-union similarity threshold that must be met in order to match a given ground truth box to a given anchor box. |
|||||||||||
retinanet_config.neg_iou_thresh |
Negative IOU threshold |
float |
The maximum allowed intersection-over-union similarity of an anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an anchor box is neither a positive, nor a negative box, it will be ignored during training. |
|||||||||||
retinanet_config.n_anchor_levels |
Number of Anchor levels |
integer |
Number of anchor levels between two adjacent scales. |
1 |
dataset_convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
dataset_name |
Name |
string |
merge |
yes |
|||||||
original_json |
Original json |
hidden |
|||||||||
finetune_json |
Finetune json |
hidden |
|||||||||
original_minutes |
Original minutes |
integer |
300 |
||||||||
delimiter |
Delimiter |
string |
|
||||||||
save_path |
Save Path |
hidden |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||||
export_format |
Export format |
string |
RIVA |
RIVA, ONNX |
yes |
||||||||
export_to |
Export To |
hidden |
finetune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
yes |
||||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
yes |
||||||||
key |
Save key |
hidden |
Key to save the model |
yes |
yes |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
yes |
yes |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained model |
yes |
|||||||||
sample_rate |
Sample rate |
int |
The target sample rate to load the audio, in Hz |
22050 |
|||||||||
train_dataset |
Train Dataset |
hidden |
Path to the train dataset manifest json file |
||||||||||
validation_dataset |
Validation Dataset |
hidden |
Path to the validation dataset manifest json file |
||||||||||
prior_folder |
hidden |
||||||||||||
n_speakers |
Number of speakers |
int |
Number of speakers in the dataset |
1 |
yes |
||||||||
n_window_size |
Window size |
int |
The size of the fft window in samples |
1024 |
yes |
||||||||
n_window_stride |
Window stride |
int |
The stride of the window in samples |
256 |
yes |
||||||||
pitch_fmin |
Pitch Fmin |
float |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
64 |
yes |
||||||||
pitch_fmax |
Pitch Fmin |
float |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
512 |
yes |
||||||||
pitch_avg |
Pitch Average |
float |
The average used to normalize the pitch |
yes |
yes |
||||||||
pitch_std |
Pitch std. deviation |
float |
The standard deviation used to normalize the pitch |
yes |
yes |
||||||||
train_ds |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
train_ds.dataset |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
train_ds.dataset._target_ |
Target |
const |
The nemo class module to be imported |
nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset |
|||||||||
train_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the train dataset manifest json file |
${train_dataset} |
|||||||||
train_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
||||||||||
train_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
0.1 |
|||||||||
train_ds.dataset.int_values |
Input as integer values |
bool |
Load samples as 32 bit integers or not |
FALSE |
|||||||||
train_ds.dataset.normalize |
Normalize dataset |
bool |
The flag to determine whether or not to normalize the transcript text |
TRUE |
|||||||||
train_ds.dataset.sample_rate |
Sample rate |
const |
The target sample rate to load the audio, in Hz. |
${sample_rate} |
|||||||||
train_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
|||||||||
train_ds.dataset.sup_data_path |
Prior folder |
const |
Path to the prior folder |
${prior_folder} |
|||||||||
train_ds.dataset.n_window_size |
Window size |
const |
The size of the fft window in samples |
${n_window_size} |
|||||||||
train_ds.dataset.n_window_stride |
Window stride |
const |
The stride of the window in samples |
${n_window_stride} |
|||||||||
train_ds.dataset.pitch_fmin |
Pitch Fmin |
const |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
${pitch_fmin} |
|||||||||
train_ds.dataset.pitch_fmax |
Pitch Fmin |
const |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
${pitch_fmax} |
|||||||||
train_ds.dataset.pitch_avg |
Pitch Average |
const |
The average used to normalize the pitch |
${pitch_avg} |
|||||||||
train_ds.dataset.pitch_std |
Pitch std. deviation |
const |
The standard deviation used to normalize the pitch |
${pitch_std} |
|||||||||
train_ds.dataset.vocab |
Training data vocabulary |
collection |
Collection describing the vocabular component of the training dataset |
||||||||||
train_ds.dataset.vocab.notation |
Vocabulary Notation |
str |
Either chars or phonemes as general notation |
phonemes |
|||||||||
train_ds.dataset.vocab.punct |
Punctuation |
bool |
Whether to reserve graphemes from basic punctuation |
TRUE |
|||||||||
train_ds.dataset.vocab.spaces |
Spaces |
bool |
Whether to prepend spaces to every punctuation |
TRUE |
|||||||||
train_ds.dataset.vocab.stresses |
Stresses |
bool |
TRUE |
||||||||||
train_ds.dataset.vocab.add_blank_at |
Add blank at |
str |
Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels |
None |
last, last_but_none, None |
||||||||
train_ds.dataset.vocab.pad_with_space |
Pad with space |
bool |
Whether to pad text with spaces at the beginning and at the end. |
TRUE |
|||||||||
train_ds.dataset.vocab.chars |
Chars |
bool |
Whether to additionaly use chars together with phonemes |
TRUE |
|||||||||
train_ds.dataset.vocab.improved_version_g2p |
Imporved version G2P |
bool |
Whether to use the new version of g2p. |
TRUE |
|||||||||
train_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
||||||||||
train_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
|||||||||
train_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
|||||||||
train_ds.dataloader_params.batch_size |
Batch Size |
int |
Number of samples per batch of data. |
32 |
|||||||||
train_ds.dataloader_params.num_workers |
Number of workers |
int |
The number of worker threads for loading the dataset |
12 |
|||||||||
validation_ds |
Validation Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
validation_ds.dataset |
Validation Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
validation_ds.dataset._target_ |
Target |
const |
The nemo class module to be imported |
nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset |
|||||||||
validation_ds.dataset.manifest_filepath |
Validation manifest file |
const |
Path to the train dataset manifest json file |
${validation_dataset} |
|||||||||
validation_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
||||||||||
validation_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
||||||||||
validation_ds.dataset.int_values |
Input as integer values |
bool |
Load samples as 32 bit integers or not |
FALSE |
|||||||||
validation_ds.dataset.normalize |
Normalize dataset |
bool |
The flag to determine whether or not to normalize the transcript text |
TRUE |
|||||||||
validation_ds.dataset.sample_rate |
Sample rate |
const |
The target sample rate to load the audio, in Hz. |
${sample_rate} |
|||||||||
validation_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
|||||||||
validation_ds.dataset.sup_data_path |
Prior folder |
const |
Path to the prior folder |
${prior_folder} |
|||||||||
validation_ds.dataset.n_window_size |
Window size |
const |
The size of the fft window in samples |
${n_window_size} |
|||||||||
validation_ds.dataset.n_window_stride |
Window stride |
const |
The stride of the window in samples |
${n_window_stride} |
|||||||||
validation_ds.dataset.pitch_fmin |
Pitch Fmin |
const |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
${pitch_fmin} |
|||||||||
validation_ds.dataset.pitch_fmax |
Pitch Fmin |
const |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
${pitch_fmax} |
|||||||||
validation_ds.dataset.pitch_avg |
Pitch Average |
const |
The average used to normalize the pitch |
${pitch_avg} |
|||||||||
validation_ds.dataset.pitch_std |
Pitch std. deviation |
const |
The standard deviation used to normalize the pitch |
${pitch_std} |
|||||||||
validation_ds.dataset.vocab |
Validation data vocabulary |
collection |
Collection describing the vocabular component of the training dataset |
||||||||||
validation_ds.dataset.vocab.notation |
Vocabulary Notation |
str |
Either chars or phonemes as general notation |
phonemes |
|||||||||
validation_ds.dataset.vocab.punct |
Punctuation |
bool |
Whether to reserve graphemes from basic punctuation |
TRUE |
|||||||||
validation_ds.dataset.vocab.spaces |
Spaces |
bool |
Whether to prepend spaces to every punctuation |
TRUE |
|||||||||
validation_ds.dataset.vocab.stresses |
Stresses |
bool |
TRUE |
||||||||||
validation_ds.dataset.vocab.add_blank_at |
Add blank at |
str |
Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels |
None |
|||||||||
validation_ds.dataset.vocab.pad_with_space |
Pad with space |
bool |
Whether to pad text with spaces at the beginning and at the end. |
TRUE |
|||||||||
validation_ds.dataset.vocab.chars |
Chars |
bool |
Whether to additionaly use chars together with phonemes |
TRUE |
|||||||||
validation_ds.dataset.vocab.improved_version_g2p |
Imporved version G2P |
bool |
Whether to use the new version of g2p. |
TRUE |
|||||||||
validation_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
||||||||||
validation_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
|||||||||
validation_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
|||||||||
validation_ds.dataloader_params.batch_size |
Batch Size |
int |
Number of samples per batch of data. |
32 |
|||||||||
validation_ds.dataloader_params.num_workers |
Number of workers |
int |
The number of worker threads for loading the dataset |
12 |
|||||||||
optim |
Optimizer |
collection |
|||||||||||
optim.name |
Optimizer Name |
str |
Type of optimizer to be used during training |
adam |
|||||||||
optim.lr |
Learning rate |
float |
Learning rate |
0.0002 |
|||||||||
optim.betas |
Optimizer betas |
list |
List of floats |
[0.9, 0.98] |
|||||||||
optim.weight_decay |
Weight decay |
float |
0.000001 |
infer
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||
input_batch |
List of input texts |
list |
List of text sentences to render spectrograms. This only works in infer mode |
yes |
|||||||
input_json |
Input dataset to run inference |
hidden |
Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder |
yes |
|||||||
speaker |
Speaker ID |
int |
ID of the speaker to generate spectrograms |
0 |
|||||||
mode |
Infer mode |
string |
Mode to run inference 1. Inferences on discrete text samples (infer) 2. Inference on a dataset (infer_hifigan_ft) |
infer |
infer, infer_hifigan_ft |
yes |
infer_onnx
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||
input_batch |
List of input texts |
list |
List of text sentences to render spectrograms. This only works in infer mode |
yes |
yes |
||||||
speaker |
Speaker ID |
int |
ID of the speaker to generate spectrograms |
0 |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
yes |
||||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
yes |
||||||||
key |
Save key |
hidden |
Key to save the model |
yes |
yes |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
yes |
yes |
yes |
|||||||
sample_rate |
Sample rate |
integer |
The target sample rate to load the audio, in Hz |
22050 |
yes |
yes |
|||||||
train_dataset |
Train Dataset |
hidden |
Path to the train dataset manifest json file |
yes |
|||||||||
validation_dataset |
Validation Dataset |
hidden |
Path to the validation dataset manifest json file |
yes |
|||||||||
prior_folder |
hidden |
yes |
|||||||||||
model.learn_alignment |
Learn alignment |
bool |
TRUE |
||||||||||
model.n_speakers |
N speakers |
integer |
Number of speakers in the dataset |
1 |
yes |
||||||||
model.symbols_embedding_dim |
Symbols Embedding dimension |
integer |
The dimension of the symbols embedding |
384 |
yes |
||||||||
model.max_token_duration |
Max token duration |
integer |
Maximum duration to clamp the tokens to |
75 |
|||||||||
model.n_mel_channels |
Number of channels in Mel Output |
integer |
Number of channels in the Mel output |
80 |
|||||||||
model.pitch_embedding_kernel_size |
Pitch embedding kernel size |
integer |
The kernel size of the Conv1D layer generating the pitch embeddings |
3 |
|||||||||
model.n_window_size |
Window size |
integer |
The size of the fft window in samples |
1024 |
yes |
||||||||
model.n_window_stride |
Window stride |
integer |
The stride of the window in samples |
256 |
yes |
||||||||
model.pitch_fmin |
Pitch Fmin |
float |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
64 |
yes |
yes |
|||||||
model.pitch_fmax |
Pitch Fmin |
float |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
512 |
yes |
yes |
|||||||
model.pitch_avg |
Pitch Average |
float |
The average used to normalize the pitch |
yes |
yes |
||||||||
model.pitch_std |
Pitch std. deviation |
float |
The standard deviation used to normalize the pitch |
yes |
yes |
||||||||
model.train_ds |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
model.train_ds.dataset |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||||
model.train_ds.dataset._target_ |
Target |
const |
The nemo class module to be imported |
nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset |
yes |
||||||||
model.train_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the train dataset manifest json file |
${train_dataset} |
yes |
||||||||
model.train_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
yes |
|||||||||
model.train_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
0.1 |
yes |
||||||||
model.train_ds.dataset.int_values |
Input as integer values |
bool |
Load samples as 32 bit integers or not |
FALSE |
yes |
||||||||
model.train_ds.dataset.normalize |
Normalize dataset |
bool |
The flag to determine whether or not to normalize the transcript text |
TRUE |
yes |
||||||||
model.train_ds.dataset.sample_rate |
Sample rate |
const |
The target sample rate to load the audio, in Hz. |
${sample_rate} |
yes |
||||||||
model.train_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
yes |
||||||||
model.train_ds.dataset.sup_data_path |
Prior folder |
const |
Path to the prior folder |
${prior_folder} |
yes |
||||||||
model.train_ds.dataset.n_window_size |
Window size |
const |
The size of the fft window in samples |
${model.n_window_size} |
yes |
||||||||
model.train_ds.dataset.n_window_stride |
Window stride |
const |
The stride of the window in samples |
${model.n_window_stride} |
yes |
||||||||
model.train_ds.dataset.pitch_fmin |
Pitch Fmin |
const |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
${model.pitch_fmin} |
yes |
||||||||
model.train_ds.dataset.pitch_fmax |
Pitch Fmin |
const |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
${model.pitch_fmax} |
yes |
||||||||
model.train_ds.dataset.pitch_avg |
Pitch Average |
const |
The average used to normalize the pitch |
${model.pitch_avg} |
yes |
||||||||
model.train_ds.dataset.pitch_std |
Pitch std. deviation |
const |
The standard deviation used to normalize the pitch |
${model.pitch_std} |
yes |
||||||||
model.train_ds.dataset.vocab |
Training data vocabulary |
collection |
Collection describing the vocabular component of the training dataset |
yes |
|||||||||
model.train_ds.dataset.vocab.notation |
Vocabulary Notation |
string |
Either chars or phonemes as general notation |
phonemes |
yes |
||||||||
model.train_ds.dataset.vocab.punct |
Punctuation |
bool |
Whether to reserve graphemes from basic punctuation |
TRUE |
yes |
||||||||
model.train_ds.dataset.vocab.spaces |
Spaces |
bool |
Whether to prepend spaces to every punctuation |
TRUE |
yes |
||||||||
model.train_ds.dataset.vocab.stresses |
Stresses |
bool |
TRUE |
yes |
|||||||||
model.train_ds.dataset.vocab.add_blank_at |
Add blank at |
string |
Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels |
None |
last, last_but_none, None |
yes |
|||||||
model.train_ds.dataset.vocab.pad_with_space |
Pad with space |
bool |
Whether to pad text with spaces at the beginning and at the end. |
TRUE |
yes |
||||||||
model.train_ds.dataset.vocab.chars |
Chars |
bool |
Whether to additionaly use chars together with phonemes |
TRUE |
yes |
||||||||
model.train_ds.dataset.vocab.improved_version_g2p |
Imporved version G2P |
bool |
Whether to use the new version of g2p. |
TRUE |
yes |
||||||||
model.train_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
yes |
|||||||||
model.train_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||||
model.train_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
yes |
||||||||
model.train_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
32 |
yes |
yes |
|||||||
model.train_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
12 |
yes |
||||||||
model.validation_ds |
Validation Dataset |
collection |
Parameters to configure the training dataset |
yes |
|||||||||
model.validation_ds.dataset |
Validation Dataset |
collection |
Parameters to configure the training dataset |
yes |
|||||||||
model.validation_ds.dataset._target_ |
Target |
const |
The nemo class module to be imported |
nemo.collections.asr.data.audio_to_text.AudioToCharWithPriorAndPitchDataset |
yes |
||||||||
model.validation_ds.dataset.manifest_filepath |
Validation manifest file |
const |
Path to the train dataset manifest json file |
${validation_dataset} |
yes |
||||||||
model.validation_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
yes |
|||||||||
model.validation_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
yes |
|||||||||
model.validation_ds.dataset.int_values |
Input as integer values |
bool |
Load samples as 32 bit integers or not |
FALSE |
yes |
||||||||
model.validation_ds.dataset.normalize |
Normalize dataset |
bool |
The flag to determine whether or not to normalize the transcript text |
TRUE |
yes |
||||||||
model.validation_ds.dataset.sample_rate |
Sample rate |
const |
The target sample rate to load the audio, in Hz. |
${sample_rate} |
yes |
||||||||
model.validation_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
yes |
||||||||
model.validation_ds.dataset.sup_data_path |
Prior folder |
const |
Path to the prior folder |
${prior_folder} |
yes |
||||||||
model.validation_ds.dataset.n_window_size |
Window size |
const |
The size of the fft window in samples |
${model.n_window_size} |
yes |
||||||||
model.validation_ds.dataset.n_window_stride |
Window stride |
const |
The stride of the window in samples |
${model.n_window_stride} |
yes |
||||||||
model.validation_ds.dataset.pitch_fmin |
Pitch Fmin |
const |
The fmin input to the librosa.pyin function. The default value is librosa.note_to_hz(“C2”) |
${model.pitch_fmin} |
yes |
||||||||
model.validation_ds.dataset.pitch_fmax |
Pitch Fmin |
const |
The fmax input to the librosa.pyin function. The default value is librosa.note_to_hz(“C7”) |
${model.pitch_fmax} |
yes |
||||||||
model.validation_ds.dataset.pitch_avg |
Pitch Average |
const |
The average used to normalize the pitch |
${model.pitch_avg} |
yes |
||||||||
model.validation_ds.dataset.pitch_std |
Pitch std. deviation |
const |
The standard deviation used to normalize the pitch |
${model.pitch_std} |
yes |
||||||||
model.validation_ds.dataset.vocab |
Validation data vocabulary |
collection |
Collection describing the vocabular component of the training dataset |
yes |
|||||||||
model.validation_ds.dataset.vocab.notation |
Vocabulary Notation |
string |
Either chars or phonemes as general notation |
phonemes |
yes |
||||||||
model.validation_ds.dataset.vocab.punct |
Punctuation |
bool |
Whether to reserve graphemes from basic punctuation |
TRUE |
yes |
||||||||
model.validation_ds.dataset.vocab.spaces |
Spaces |
bool |
Whether to prepend spaces to every punctuation |
TRUE |
yes |
||||||||
model.validation_ds.dataset.vocab.stresses |
Stresses |
bool |
TRUE |
yes |
|||||||||
model.validation_ds.dataset.vocab.add_blank_at |
Add blank at |
string |
Add blanks to labels in the specified order. If this string is empty, then there will be no blank in the labels |
None |
yes |
||||||||
model.validation_ds.dataset.vocab.pad_with_space |
Pad with space |
bool |
Whether to pad text with spaces at the beginning and at the end. |
TRUE |
yes |
||||||||
model.validation_ds.dataset.vocab.chars |
Chars |
bool |
Whether to additionaly use chars together with phonemes |
TRUE |
yes |
||||||||
model.validation_ds.dataset.vocab.improved_version_g2p |
Imporved version G2P |
bool |
Whether to use the new version of g2p. |
TRUE |
yes |
||||||||
model.validation_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
yes |
|||||||||
model.validation_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||||
model.validation_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
yes |
||||||||
model.validation_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
32 |
yes |
yes |
|||||||
model.validation_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
12 |
yes |
||||||||
model.optim |
Optimizer |
collection |
yes |
||||||||||
model.optim.name |
Optimizer Name |
string |
Type of optimizer to be used during training |
lamb |
yes |
||||||||
model.optim.lr |
Learning rate |
float |
Learning rate |
0.1 |
yes |
yes |
|||||||
model.optim.betas |
Optimizer betas |
list |
Coefficients used to compute the running averages of the gradient and it’s square |
[0.9, 0.98] |
yes |
||||||||
model.optim.weight_decay |
Weight decay |
float |
Weight decay (L2 penalty |
0.000001 |
yes |
||||||||
model.optim.sched |
Learning rate scheduler |
collection |
Parameters to configure the learning rate scheduler |
yes |
|||||||||
model.optim.sched.name |
Scheduler Name |
string |
Type of learning rate scheduler to be used |
NoamAnnealing |
yes |
||||||||
model.optim.sched.warmup_steps |
Warm up steps |
integer |
No. of steps to warm up the learning rate |
1000 |
yes |
||||||||
model.optim.sched.last_epoch |
Last epoch |
integer |
-1 |
yes |
|||||||||
model.optim.sched.d_model |
Disable scaling |
integer |
Flag to disable scaling based on model dim |
1 |
yes |
||||||||
model.preprocessor |
Preprocessor config |
collection |
Collection to configure the model preprocessor |
yes |
|||||||||
model.preprocessor._target_ |
Target class of the preprocessor instance |
const |
The Nemo class to instantiate. |
nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor |
yes |
||||||||
model.preprocessor.dither |
Dither |
float |
0 |
yes |
|||||||||
model.preprocessor.features |
Number of channels in Mel Output |
const |
Number of channels in the Mel Output |
${model.n_mel_channels} |
yes |
||||||||
model.preprocessor.frame_splicing |
Spectrogram Frames per step |
integer |
Number of spectrogram frames per step |
1 |
yes |
||||||||
model.preprocessor.highfreq |
High frequency bound in Hz |
integer |
Upper bound of the mel basis in Hz |
8000 |
yes |
||||||||
model.preprocessor.log |
Log Spectrograms |
bool |
Flags to enable logging spectrograms |
TRUE |
yes |
||||||||
model.preprocessor.log_zero_guard_type |
Zero guard type |
enum |
Need to avoid taking the log of zero. There are two options: “add” or “clamp”. |
add |
yes |
||||||||
model.preprocessor.lowfreq |
Low frequency bound in Hz |
integer |
Lower bound of the mel basis in Hz |
0 |
yes |
||||||||
model.preprocessor.mag_power |
Multiplication with mel basis |
integer |
Prior to multiplication with mel basis |
1 |
yes |
||||||||
model.preprocessor.n_fft |
FFT Window size |
const |
The size of the window for the FFT in samples. |
${model.n_window_size} |
yes |
||||||||
model.preprocessor.n_window_size |
FFT Window size |
const |
The size of the window for the FFT in samples. |
${model.n_window_size} |
yes |
||||||||
model.preprocessor.n_window_stride |
FFT Window stride |
const |
The stride of the window for FFT |
${model.n_window_stride} |
yes |
||||||||
model.preprocessor.normalize |
Feature Normalization |
string |
Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq |
null |
yes |
||||||||
model.preprocessor.pad_to |
Pad to |
integer |
A multiple pf pad_to |
1 |
yes |
||||||||
model.preprocessor.pad_value |
Pad Value |
float |
The value to that shorter mels are padded with |
0 |
yes |
||||||||
model.preprocessor.preemph |
Pre-emphasis value |
float |
Amount of pre-emphasis to be added to the audio. Can be disabled by passing None. |
yes |
|||||||||
model.preprocessor.sample_rate |
Samping rate |
const |
The target sample rate to load the audio in Hz. |
${sample_rate} |
yes |
||||||||
model.preprocessor.window |
Window type |
string |
The type of window to be used. |
hann |
yes |
||||||||
model.preprocessor.window_size |
Window size |
integer |
The size of the window to be used |
yes |
|||||||||
model.preprocessor.window_stride |
Window stride |
integer |
The stride of the window to be used |
yes |
|||||||||
model.input_fft |
Input FFT |
collection |
Collection to configure the Input FFT |
yes |
|||||||||
model.input_fft._target_ |
Target class for the FFT Transformer Encoder |
const |
The Nemo FFTEncoder module to be instantiated |
nemo.collections.tts.modules.transformer.FFTransformerEncoder |
yes |
||||||||
model.input_fft.n_layer |
input_fft n_layer |
integer |
Number of transformer layers |
6 |
yes |
||||||||
model.input_fft.n_head |
input_fft num heads |
integer |
Number of heads in the MultiHeadAttn |
1 |
yes |
||||||||
model.input_fft.d_model |
input_fft d_model |
const |
Hidden size of the input and output |
${model.symbols_embedding_dim} |
yes |
||||||||
model.input_fft.d_head |
input_fft d_head |
integer |
Hidden size of the attention module |
64 |
yes |
||||||||
model.input_fft.d_inner |
Input fft d_inner |
integer |
Hidden size of the convolutional layers |
1536 |
yes |
||||||||
model.input_fft.kernel_size |
input_fft kernel_size |
integer |
Hidden size of the input and output |
3 |
yes |
||||||||
model.input_fft.dropout |
input_fft dropout |
float |
Dropout parameters |
0.1 |
yes |
||||||||
model.input_fft.dropatt |
input_fft dropatt |
float |
Dropout parameter for attention |
0.1 |
yes |
||||||||
model.input_fft.dropemb |
input_fft dropemb |
integer |
Dropout parameter for embedding |
0 |
yes |
||||||||
model.input_fft.d_embed |
input_fft d_embed |
const |
Hidden size of embeddings (input fft only) |
${model.symbols_embedding_dim} |
yes |
||||||||
model.output_fft |
output_fft |
collection |
Collection to configure the Input FFT |
yes |
|||||||||
model.output_fft._target_ |
Target class for the FFT Transformer Encoder |
const |
The Nemo FFTEncoder module to be instantiated |
nemo.collections.tts.modules.transformer.FFTransformerDecoder |
yes |
||||||||
model.output_fft.n_layer |
output_fft n_layer |
integer |
Number of transformer layers |
6 |
yes |
||||||||
model.output_fft.n_head |
output_fft num heads |
integer |
Number of heads in the MultiHeadAttn |
1 |
yes |
||||||||
model.output_fft.d_model |
output_fft d_model |
const |
Hidden size of the input and output |
${model.symbols_embedding_dim} |
yes |
||||||||
model.output_fft.d_head |
output_fft d_head |
integer |
Hidden size of the attention module |
64 |
yes |
||||||||
model.output_fft.d_inner |
output_fft d_inner |
integer |
Hidden size of the convolutional layers |
1536 |
yes |
||||||||
model.output_fft.kernel_size |
output_fft kernel_size |
integer |
Hidden size of the input and output |
3 |
yes |
||||||||
model.output_fft.dropout |
output_fft dropout |
float |
Dropout parameters |
0.1 |
yes |
||||||||
model.output_fft.dropatt |
output_fft dropatt |
float |
Dropout parameter for attention |
0.1 |
yes |
||||||||
model.output_fft.dropemb |
output_fft dropemb |
integer |
Dropout parameter for embedding |
0 |
yes |
||||||||
model.alignment_module |
alignment_module |
collection |
Configuration element for the alignment module |
yes |
|||||||||
model.alignment_module._target_ |
alignment_module._target_ |
const |
Module to be instantiated for alignment |
nemo.collections.tts.modules.aligner.AlignmentEncoder |
yes |
||||||||
model.alignment_module.n_text_channels |
n_text_channels |
const |
The dimensionality of symbol embedding |
${model.symbols_embedding_dim} |
yes |
||||||||
model.duration_predictor |
duration_predictor |
collection |
Configuration element for the duration predictor |
yes |
|||||||||
model.duration_predictor._target_ |
duration_predictor._target_ |
const |
Module to be instantiated for duration predictor |
nemo.collections.tts.modules.fastpitch.TemporalPredictor |
yes |
||||||||
model.duration_predictor.input_size |
duration_predictor.input_size |
const |
Hidden size of the input and output |
${model.symbols_embedding_dim} |
yes |
||||||||
model.duration_predictor.kernel_size |
duration_predictor.kernel_size |
integer |
Kernel size for convolutional layers |
3 |
yes |
||||||||
model.duration_predictor.filter_size |
duration_predictor.filter_size |
integer |
Filter size for the convolutional layers |
256 |
yes |
||||||||
model.duration_predictor.dropout |
duration_predictor.dropout |
float |
Drop out parameter |
0.1 |
yes |
||||||||
model.duration_predictor.n_layers |
duration_predictor.n_layers |
integer |
Number of layers |
2 |
yes |
||||||||
model.pitch_predictor |
pitch_predictor |
collection |
Configuration element for the pitch predictor |
yes |
|||||||||
model.pitch_predictor._target_ |
pitch_predictor._target_ |
const |
Module to be instantiated for pitch predictor |
nemo.collections.tts.modules.fastpitch.TemporalPredictor |
yes |
||||||||
model.pitch_predictor.input_size |
pitch_predictor.input_size |
const |
Hidden size of the input and output |
${model.symbols_embedding_dim} |
yes |
||||||||
model.pitch_predictor.kernel_size |
pitch_predictor.kernel_size |
integer |
Kernel size for convolutional layers |
3 |
yes |
||||||||
model.pitch_predictor.filter_size |
pitch_predictor.filter_size |
integer |
Filter size for the convolutional layers |
256 |
yes |
||||||||
model.pitch_predictor.dropout |
pitch_predictor.dropout |
float |
Drop out parameter |
0.1 |
yes |
||||||||
model.pitch_predictor.n_layers |
pitch_predictor.n_layers |
integer |
Number of layers |
2 |
yes |
||||||||
trainer |
Trainer Configurations |
collection |
Collection of parameters to configure the trainer |
yes |
|||||||||
trainer.max_epochs |
Number of epochs |
collection |
Maximum number of epochs to train the model |
100 |
yes |
yes |
pitch_stats
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||
num_files |
Number of files |
integer |
List of text sentences to render spectrograms. This only works in infer mode |
10 |
yes |
yes |
|||||
manifest_filepath |
Manifest |
hidden |
Path to the dataset to run inference on. This only works in mode=infer_hifigan_ft to generate spectrograms as a dataset for training a vocoder |
yes |
yes |
||||||
output_path |
Output |
hidden |
ID of the speaker to generate spectrograms |
0 |
|||||||
pitch_fmin |
F min |
float |
64 |
yes |
|||||||
pitch_fmax |
F max |
float |
512 |
yes |
|||||||
n_window_size |
Window size |
integer |
1024 |
||||||||
sample_rate |
Sample rate |
integer |
22050 |
||||||||
render_plots |
Render plots |
bool |
TRUE |
||||||||
compute_stats |
Compute stats |
bool |
TRUE |
convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
dataset_name |
Name |
string |
ljs |
yes |
|||||||
data_dir |
Data dir |
hidden |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
ssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
ssd_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different SSD feature layers |
||||||||||
ssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
ssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
ssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
ssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95] |
||||||||||
ssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
ssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
ssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
ssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
ssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
ssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
threshold |
Threshold |
float |
0.3 |
|||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
ssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
ssd_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different SSD feature layers |
||||||||||
ssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
ssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
ssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
ssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95] |
||||||||||
ssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
ssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
ssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
ssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
ssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
ssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
param_type (internal / hidden / inferred) |
CLI |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
internal |
|||||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
||||||||||
initial_epoch |
Initial epoch cli |
integer |
1 |
|||||||||||
use_multiprocessing |
CLI parameter |
bool |
FALSE |
|||||||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
|||||||||||
dataset_config.data_sources.label_directory_path |
KITTI label path |
hidden |
hidden |
|||||||||||
dataset_config.data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.target_class_mapping |
Target Class Mappings |
list |
This parameter maps the class names in the dataset to the target class to be trained in the network. An element is defined for every source class to target class mapping. This field was included with the intention of grouping similar class objects under one umbrella. For example: car, van, heavy_truck etc may be grouped under automobile. |
|||||||||||
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||||
dataset_config.validation_data_sources.label_directory_path |
KITTI label path |
hidden |
||||||||||||
dataset_config.validation_data_sources.image_directory_path |
Image path |
hidden |
||||||||||||
dataset_config.validation_data_sources.tfrecords_directory_path |
TFRecords path |
hidden |
||||||||||||
dataset_config.include_difficult_in_training |
include difficult label in training |
bool |
Whether to use difficult objects in training |
TRUE |
||||||||||
training_config |
Training |
collection |
||||||||||||
training_config.batch_size_per_gpu |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
10 |
1 |
|||||||||
training_config.num_epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
80 |
1 |
|||||||||
training_config.enable_qat |
Enable Quantization Aware Training |
bool |
bool |
FALSE |
||||||||||
training_config.learning_rate |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule |
collection |
|||||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Minimum Learning Rate |
float |
The minimum learning rate in the learning rate schedule. |
5.00E-05 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Maximum Learning Rate |
float |
The maximum learning rate in the learning rate schedule. |
9.00E-03 |
0 |
|||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
The time to ramp up the learning rate from minimum learning rate to maximum learning rate. |
0.1 |
0 |
1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
The time to cool down the learning rate from maximum learning rate to minimum learning rate. Greater than soft_start. |
0.8 |
0 |
1 |
||||||||
training_config.regularizer.type |
Regularizer Type |
string |
The type of the regularizer being used. |
__L1__ |
L1, L2 |
|||||||||
training_config.regularizer.weight |
Regularizer Weight |
float |
The floating point weight of the regularizer. |
3.00E-05 |
0 |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
The interval (in epochs) at which train saves intermediate models. |
1 |
1 |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
16 |
1 |
|||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
8 |
1 |
|||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
|||||||||||
training_config.early_stopping |
Early Stopping |
collection |
||||||||||||
training_config.early_stopping.monitor |
Monitor |
string |
The name of the quantity to be monitored for early stopping |
loss, validation_loss, val_loss |
||||||||||
training_config.early_stopping.min_delta |
Min Delta |
float |
Minimum delta of the quantity to be regarded as changed |
0 |
||||||||||
training_config.early_stopping.patience |
Patience |
integer |
The number of epochs to be waited for before stopping the training |
0 |
||||||||||
training_config.visualizer |
Visualizer |
collection |
||||||||||||
training_config.visualizer.enabled |
Enable |
bool |
Enable the visualizer or not |
|||||||||||
training_config.visualizer.num_images |
Max Num Images |
integer |
Maximum number of images to be displayed in TensorBoard |
3 |
0 |
|||||||||
eval_config |
Evaluation |
collection |
||||||||||||
eval_config.average_precision_mode |
Average Precision Mode |
string |
The mode in which the average precision for each class is calculated. |
__SAMPLE__ |
SAMPLE/INTEGRATE |
|||||||||
eval_config.validation_period_during_training |
Validation Period During Training |
integer |
The interval at which evaluation is run during training. The evaluation is run at this interval starting from the value of the first validation epoch parameter as specified below. |
10 |
1 |
|||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
16 |
1 |
|||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
0 |
1 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
|||||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.01 |
0 |
1 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.6 |
0 |
1 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
0 |
|||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
1 |
32 |
|||||||||
augmentation_config |
Augmentation config |
collection |
||||||||||||
augmentation_config.output_width |
Model Input width |
integer |
300 |
yes |
||||||||||
augmentation_config.output_height |
Model Input height |
integer |
300 |
yes |
||||||||||
augmentation_config.output_channel |
Model Input channel |
integer |
3 |
yes |
||||||||||
augmentation_config.random_crop_min_scale |
Random Crop Min Scale |
float |
the minimum random crop size |
0.3 |
0 |
1 |
||||||||
augmentation_config.random_crop_max_scale |
Random Crop Max Scale |
float |
the maximum random crop size |
1 |
0 |
1 |
||||||||
augmentation_config.random_crop_min_ar |
Random Crop Max Aspect Ratio |
float |
the minimum random crop aspect ratio |
0.5 |
||||||||||
augmentation_config.random_crop_max_ar |
Random Crop MIin Aspect Ratio |
float |
the maximum random crop aspect ratio |
2 |
||||||||||
augmentation_config.zoom_out_min_scale |
Zoom Out Min Scale |
float |
Minimum scale of ZoomOut augmentation |
1 |
1 |
|||||||||
augmentation_config.zoom_out_max_scale |
Zoom Out Max Scale |
float |
Maximum scale of ZoomOut augmentation |
4 |
1 |
|||||||||
augmentation_config.brightness |
Brightness |
integer |
Brightness delta in color jittering augmentation |
32 |
0 |
255 |
||||||||
augmentation_config.contrast |
Contrast |
float |
Contrast delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation delta factor in color jitter augmentation |
0.5 |
0 |
1 |
||||||||
augmentation_config.hue |
Hue |
integer |
Hue delta in color jittering augmentation |
18 |
0 |
180 |
||||||||
augmentation_config.random_flip |
Random Flip |
float |
Probablity of performing random horizontal flip |
|||||||||||
augmentation_config.image_mean |
Image Mean |
collection |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.key |
Image Mean key |
string |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
augmentation_config.image_mean.value |
Image Mean value |
float |
A key/value pair to specify image mean values. If omitted, ImageNet mean will be used for image preprocessing. If set, depending on output_channel, either ‘r/g/b’ or ‘l’ key/value pair must be configured. |
|||||||||||
ssd_config.aspect_ratios_global |
Aspect Ratio Global |
string |
The anchor boxes of aspect ratios defined in aspect_ratios_global will be generated for each feature layer used for prediction. Note that either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
[1.0, 2.0, 0.5, 3.0, 1.0/3.0] |
||||||||||
Note: Either the aspect_ratios_global or aspect_ratios parameter is required; you don’t need to specify both. |
ssd_config.aspect_ratios |
Aspect Ratio |
string |
The aspect ratio of anchor boxes for different SSD feature layers |
||||||||||
ssd_config.two_boxes_for_ar1 |
Two boxes for aspect-ratio=1 |
bool |
If this parameter is True, two boxes will be generated with an aspect ratio of 1. |
TRUE |
||||||||||
ssd_config.clip_boxes |
Clip Boxes |
bool |
If true, all corner anchor boxes will be truncated so they are fully inside the feature images. |
FALSE |
||||||||||
ssd_config.variances |
Variance |
string |
A list of 4 positive floats to decode bboxes |
[0.1, 0.1, 0.2, 0.2] |
||||||||||
ssd_config.scales |
Scales |
string |
A list of positive floats containing scaling factors per convolutional predictor layer |
[0.1, 0.24166667, 0.38333333, 0.525, 0.66666667, 0.80833333, 0.95] |
||||||||||
ssd_config.steps |
Steps |
string |
An optional list inside quotation marks with a length that is the number of feature layers for prediction.The elements should be floats or tuples/lists of two floats. The steps define how many pixels apart the anchor-box center points should be |
|||||||||||
ssd_config.offsets |
Offsets |
string |
An optional list of floats inside quotation marks with length equal to the number of feature layers for prediction. The first anchor box will have a margin of offsets[i]*steps[i] pixels from the left and top borders. If offsets are not provided, 0.5 will be used as default value. |
|||||||||||
ssd_config.arch |
Arch |
string |
The backbone for feature extraction |
resnet |
||||||||||
ssd_config.nlayers |
Number of Layers |
integer |
The number of conv layers in a specific arch |
18 |
||||||||||
ssd_config.freeze_bn |
Freeze BN |
bool |
Whether to freeze all batch normalization layers during training. |
FALSE |
||||||||||
ssd_config.freeze_blocks |
Freeze Blocks |
list |
The list of block IDs to be frozen in the model during training |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
||||||
experiment_spec |
Experiment Spec |
hidden |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
data_type |
Pruning Granularity |
string |
Number of filters to remove at a time. |
fp32 |
int8, fp32, fp16 |
yes |
yes |
||||
max_workspace_size |
integer |
Example: The integer value of 1<<30, 2<<30 |
|||||||||
max_batch_size |
integer |
1 |
|||||||||
min_batch_size |
integer |
1 |
|||||||||
opt_batch_size |
integer |
1 |
|||||||||
gen_ds_config |
bool |
FALSE |
|||||||||
engine_file |
Engine File |
hidden |
UNIX path to the model engine file. |
yes |
|||||||
verbose |
hidden |
TRUE |
|||||||||
strict_type_constraints |
bool |
FALSE |
|||||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
100 |
yes |
||||||
cal_image_dir |
hidden |
||||||||||
cal_cache_file |
Calibration cache file |
hidden |
Unix PATH to the int8 calibration cache file |
yes |
yes |
||||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
|||||||
results_dir |
hidden |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
model |
Model path |
hidden |
UNIX path to where the input model is located. |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
yes |
|||||||
experiment_spec_path |
hidden |
||||||||||
key |
Encode key |
hidden |
|||||||||
normalizer |
Normalizer |
string |
How to normalize |
max |
max, L2 |
||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
|||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
|||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
||||||
excluded_layers |
Excluded layers |
string |
string of list: List of excluded_layers. Examples: -i item1 item2 |
||||||||
results_dir |
Results directory |
hidden |
|||||||||
verbose |
verbosity |
hidden |
TRUE |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
random_seed |
Random Seed |
integer |
Seed value for the random number generator in the network |
42 |
|||||||
dataset_config |
Dataset |
collection |
Parameters to configure the dataset |
||||||||
dataset_config.dataset |
string |
custom |
|||||||||
dataset_config.augment |
Augment |
bool |
Boolean to augment the dataset or not |
FALSE |
|||||||
dataset_config.buffer_size |
buffer_size |
integer |
The is is the buffer for the number of images atmost to be used in an iteration. Total number of images in the dataset |
||||||||
dataset_config.filter_data |
filter_data |
bool |
Set this to omit images or masks that are not present |
||||||||
dataset_config.resize_padding |
Resize Padding |
bool |
If the image needs to be resized by preserving aspect ratio |
||||||||
dataset_config.resize_method |
Resize Method |
string |
BILINEAR, NEAREST_NEIGHBOR, BICUBIC AREA |
||||||||
dataset_config.input_image_type |
Input Image type |
string |
Gives information on if the input is RGB or grayscale |
color |
color, grayscale |
||||||
dataset_config.data_sources.image_path |
Image path |
hidden |
|||||||||
dataset_config.data_sources.masks_path |
Masks path |
hidden |
|||||||||
dataset_config.data_class_config |
Target Class Mappings |
collection |
Contains the parameters to configure the mappping of diferent classes |
yes |
yes |
||||||
dataset_config.data_class_config.target_classes |
Target Class Mappings list |
list |
Contains the parameters to configure the mappping of diferent classes |
yes |
|||||||
dataset_config.data_class_config.target_classes.name |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
yes |
^[-a-zA-Z0-9_]{1,40}$ |
yes |
||||
dataset_config.data_class_config.target_classes.mapping_class |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
yes |
^[-a-zA-Z0-9_]{1,40}$ |
yes |
||||
dataset_config.data_class_config.target_classes.label_id |
Class label ID |
integer |
1 |
yes |
yes |
||||||
augmentation_config |
Data Augmentation |
collection |
Collection of parameters to configure augmentation |
Yes |
|||||||
augmentation_config.spatial_augmentation |
collection |
Configure augmentation pertaining to spatial transformations |
|||||||||
augmentation_config.spatial_augmentation.hflip_probability |
float |
probability for flipping image horizontally |
|||||||||
augmentation_config.spatial_augmentation.vflip_probability |
float |
probability for flipping image vertically |
|||||||||
augmentation_config.spatial_augmentation.crop_and_resize_prob |
float |
probability at which to crop and resize |
|||||||||
model_config |
Model |
collection |
|||||||||
model_config.arch |
BackBone Architecture |
string |
The architecture of the backbone feature extractor to be used for training. |
vanilla_unet_dynamic |
resnet |
yes |
|||||
model_config.enable_qat |
Enable Quantization aware training |
bool |
Set this to true, to enable quantization during re-training of pruned model |
FALSE |
|||||||
model_config.byom_model |
Model path to BYOM .tltb |
hidden |
Set the path to byom model when using byom arch |
||||||||
model_config.load_graph |
Pruned model Load Graph |
bool |
For a pruned model, set this parameter to True. Pruning modifies the original graph, so the pruned model graph and the weights need to be imported.
|
FALSE |
|||||||
model_config.freeze_blocks |
Freeze Blocks |
integer |
This parameter defines which blocks may be frozen from the instantiated feature extractor template, and is different for different feature extractor templates. |
0 |
3 |
||||||
model_config.freeze_bn |
Freeze Batch Normalization |
bool |
A flag to determine whether to freeze the Batch Normalization layers in the model during training. |
||||||||
model_config.all_projections |
All Projections |
bool |
For templates with shortcut connections, this parameter defines whether or not all shortcuts should be instantiated with 1x1 projection layers, irrespective of whether there is a change in stride across the input and output. |
TRUE |
|||||||
model_config.num_layers |
Number of Layers |
integer |
The depth of the feature extractor for scalable templates. |
18 |
10, 18, 34, 50, 101 |
yes |
|||||
model_config.use_pooling |
Use Pooling |
bool |
Choose between using strided convolutions or MaxPooling while downsampling. When True, MaxPooling is used to downsample; however, for the object-detection network, NVIDIA recommends setting this to False and using strided convolutions. |
||||||||
model_config.use_batch_norm |
Use Batch Normalization |
bool |
A flag to determine whether to use Batch Normalization layers or not. |
TRUE |
|||||||
model_config.enable_qat |
bool |
FALSE |
|||||||||
model_config.dropout_rate |
Dropout Rate |
float |
Probability for drop out |
0 |
0.1 |
||||||
model_config.training_precision.backend_floatx |
Backend Training Precision |
string |
A nested parameter that sets the precision of the backend training framework. |
__FLOAT32__ |
__FLOAT32__ |
yes |
|||||
model_config.initializer |
Kernel Initializer |
enum |
The type of initializer for the kernels |
__HE_UNIFORM__,__ HE_NORMAL__,__ GLOROT_UNIFORM__ |
|||||||
model_config.model_input_height |
Model Input height |
int |
The model input dimensions |
||||||||
model_config.model_input_height |
Model Input width |
int |
The model input dimensions |
||||||||
model_config.model_input_channels |
Model input channels |
int |
The model input dimensions |
||||||||
training_config |
Training |
collection |
|||||||||
training_config.batch_size |
Batch Size Per GPU |
integer |
The number of images per batch per GPU. |
1 |
1 |
yes |
|||||
training_config.epochs |
Number of Epochs |
integer |
The total number of epochs to run the experiment. |
120 |
1 |
yes |
Yes |
||||
training_config.log_summary_steps |
integer |
Number of steps after which to display the log summary |
200 |
||||||||
training_config.checkpoint_interval |
checkpoint interval |
integer |
Number of epochs after which to save the ceheckpoint |
1 |
|||||||
training_config.loss |
string |
Loss to be used |
cross_entropy |
cross_entropy, cross_dice_sum, dice |
|||||||
training_config.learning_rate |
float |
Learning rate |
0.00008 |
||||||||
training_config.lr_scheduler |
learning rate scheduler |
string |
|||||||||
training_config.weights_monitor |
bool |
Bool to turn on tensorboard visualization of loss and gradients variations |
|||||||||
training_config.regularizer |
collection |
Regularizer to use |
|||||||||
training_config.regularizer.type |
string |
__L2__ |
__L1__, __L2__ |
||||||||
training_config.regularizer.weight |
float |
1.00E-05 |
|||||||||
training_config.optimizer |
Optimizer |
collection |
|||||||||
training_config.optimizer.adam.epsilon |
Optimizer Adam Epsilon |
float |
A very small number to prevent any division by zero in the implementation. |
1.00E-08 |
yes |
||||||
training_config.optimizer.adam.beta1 |
Optimizer Adam Beta1 |
float |
0.899999976 |
yes |
|||||||
training_config.optimizer.adam.beta2 |
Optimizer Adam Beta2 |
float |
0.999000013 |
yes |
|||||||
training_config.visualizer |
collection |
||||||||||
training_config.visualizer.enabled |
bool |
FALSE |
|||||||||
training_config.visualizer.save_summary_steps |
integer |
Steps at which to visualize loss on TB. |
|||||||||
training_config.visualizer.infrequent_save_summary_steps |
integer |
Steps at which to visualize input images, ground truth and histograms. |
|||||||||
training_config.data_options |
bool |
TRUE |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
||||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
||||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
||||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
|||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
||||||
export_format |
Export format |
string |
RIVA |
RIVA, ONNX |
yes |
|||||||
export_to |
Export To |
const |
finetune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the pre-trained model |
||||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save the model |
yes |
|||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
yes |
yes |
||||||
train_dataset |
Train Dataset |
hidden |
Path to the train dataset manifest json file |
yes |
|||||||
validation_dataset |
Validation Dataset |
hidden |
Path to the validation dataset manifest json file |
yes |
|||||||
training_ds |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||
training_ds.dataset |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||
training_ds.dataset._target_ |
Target dataset class |
const |
Nemo training ds class instance |
nemo.collections.tts.data.datalayers.MelAudioDataset |
yes |
||||||
training_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the train dataset manifest json file |
${train_dataset} |
yes |
||||||
training_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
0.75 |
yes |
||||||
training_ds.dataset.n_segments |
Number of segments |
int |
The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. |
16384 |
yes |
||||||
training_ds.dataset.mel_hop_size |
Mel Hop Size |
int |
Mel hop size |
256 |
yes |
||||||
training_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
||||||||
training_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||
training_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
yes |
||||||
training_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
16 |
yes |
yes |
|||||
training_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
4 |
yes |
||||||
validation_ds |
Validation Dataset |
collection |
Parameters to configure the validation dataset |
||||||||
validation_ds.dataset |
Validation Dataset |
collection |
Parameters to configure the validation dataset |
||||||||
validation_ds.dataset._target_ |
Target dataset class |
const |
Nemo validation ds class instance |
nemo.collections.tts.data.datalayers.MelAudioDataset |
yes |
||||||
validation_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the validation dataset manifest json file |
${validation_dataset} |
yes |
||||||
validation_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
0.75 |
yes |
||||||
validation_ds.dataset.n_segments |
Number of segments |
int |
The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. |
16384 |
yes |
||||||
validation_ds.dataset.mel_hop_size |
Mel Hop Size |
int |
Mel hop size |
256 |
yes |
||||||
validation_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
||||||||
validation_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||
validation_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
FALSE |
yes |
||||||
validation_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
2 |
yes |
yes |
|||||
validation_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
1 |
yes |
||||||
optim |
Optimizer |
collection |
yes |
||||||||
optim._target_ |
Optimizer Class |
const |
The class of the Optimizer to be instantiated |
torch.optim.AdamW |
yes |
||||||
optim.lr |
Learning rate |
float |
Learning rate |
0.0001 |
yes |
yes |
|||||
optim.betas |
Optimizer betas |
list |
Coefficients used to compute the running averages of the gradient and it’s square |
[0.8, 0.99] |
yes |
||||||
trainer |
collection |
Parameters to configure the trainer object |
|||||||||
trainer.max_steps |
Maximum Steps |
int |
Maximum number of steps to run training |
1000 |
0 |
yes |
|||||
trainer.max_epochs |
Maximum number of epochs |
int |
Maximum number of epochs to run training. This parameter supercedes the trainer.max_steps parameter |
0 |
yes |
yes |
infer
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||
input_path |
List of input texts |
hidden |
Path to the directory containing spectrogram outputs from FastPitch inference |
yes |
yes |
||||||
output_path |
Input dataset to run inference |
hidden |
Path to the output directory containing rendered audio clips |
yes |
yes |
||||||
sample_rate |
Speaker ID |
int |
Sampling rate of the output audio clip. |
22050 |
yes |
yes |
infer_onnx
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save/load the model |
yes |
|||||||
resume_model_weights |
Pretrained model path |
hidden |
Path to the trained/finetuned model |
||||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
1 |
1 |
|||||
input_path |
List of input texts |
hidden |
Path to the directory containing spectrogram outputs from FastPitch inference |
yes |
yes |
||||||
output_path |
Input dataset to run inference |
hidden |
Path to the output directory containing rendered audio clips |
yes |
yes |
||||||
sample_rate |
Speaker ID |
int |
Sampling rate of the output audio clip. |
22050 |
yes |
yes |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
|||||||
experiment_spec |
Experiment spec |
hidden |
Path to the training experiment spec file |
yes |
|||||||
result_dir |
Results directory |
hidden |
Path to the output results directory and logs |
yes |
|||||||
key |
Save key |
hidden |
Key to save the model |
yes |
|||||||
gpus |
Number of GPUs |
hidden |
Number of GPUs to be used to train the model |
1 |
yes |
yes |
|||||
train_dataset |
Train Dataset |
hidden |
Path to the train dataset manifest json file |
yes |
|||||||
validation_dataset |
Validation Dataset |
hidden |
Path to the validation dataset manifest json file |
yes |
|||||||
training_ds |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||
training_ds.dataset |
Train Dataset |
collection |
Parameters to configure the training dataset |
||||||||
training_ds.dataset._target_ |
Target dataset class |
const |
Nemo training ds class instance |
nemo.collections.tts.data.datalayers.AudioDataset |
yes |
||||||
training_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the train dataset manifest json file |
${train_dataset} |
yes |
||||||
training_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
||||||||
training_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
0.1 |
yes |
||||||
training_ds.dataset.n_segments |
Number of segments |
int |
The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. |
8192 |
yes |
||||||
training_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
yes |
||||||
training_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
yes |
|||||||
training_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||
training_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
yes |
||||||
training_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
16 |
yes |
yes |
|||||
training_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
4 |
yes |
||||||
validation_ds |
Validation Dataset |
collection |
Parameters to configure the validation dataset |
||||||||
validation_ds.dataset |
Validation Dataset |
collection |
Parameters to configure the validation dataset |
||||||||
validation_ds.dataset._target_ |
Target dataset class |
const |
Nemo validation ds class instance |
nemo.collections.tts.data.datalayers.AudioDataset |
yes |
||||||
validation_ds.dataset.manifest_filepath |
Train manifest file |
const |
Path to the validation dataset manifest json file |
${train_dataset} |
yes |
||||||
validation_ds.dataset.max_duration |
Max clip duration |
float |
All files with a duration greater than the given value (in seconds) will be dropped |
||||||||
validation_ds.dataset.min_duration |
Min clip duration |
float |
All files with a duration lesser than the given value (in seconds) will be dropped |
||||||||
validation_ds.dataset.n_segments |
Number of segments |
int |
The length of the audio in sample to load. For example, given a sampling rate of 16kHz, and n_segments=16000, a random 1 second of audio from the clip will be loaded. The section will sample randomly every time the audio is batched. This can be set to -1 to load the entire audio. |
-1 |
yes |
||||||
validation_ds.dataset.trim |
Trim |
bool |
Whether to trim silence from beginning and end of the audio signal using librosa.effects.trim(). |
FALSE |
yes |
||||||
validation_ds.dataloader_params |
Dataloader parameters |
collection |
Configuring the dataloader yielding the data samples |
||||||||
validation_ds.dataloader_params.drop_last |
Drop last |
bool |
Whether to drop the last samples |
FALSE |
yes |
||||||
validation_ds.dataloader_params.shuffle |
Enable shuffle |
bool |
Whether to shuffle the data or not. We recommend True for training data, and false for validation |
TRUE |
yes |
||||||
validation_ds.dataloader_params.batch_size |
Batch Size |
integer |
Number of samples per batch of data. |
16 |
yes |
yes |
|||||
validation_ds.dataloader_params.num_workers |
Number of workers |
integer |
The number of worker threads for loading the dataset |
1 |
yes |
||||||
model |
Model Config |
collection |
Collection to configure the HiFiGAN model element |
||||||||
model.preprocessor |
Preprocessor config |
collection |
Collection to configure the model preprocessor |
||||||||
model.preprocessor._target_ |
Target class of the preprocessor instance |
const |
The Nemo class to instantiate. |
nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures |
yes |
||||||
model.preprocessor.dither |
Dither |
float |
0 |
yes |
|||||||
model.preprocessor.frame_splicing |
Spectrogram Frames per step |
integer |
Number of spectrogram frames per step |
1 |
yes |
||||||
model.preprocessor.nfilt |
Number of filter |
integer |
Number of filters in the conv layer |
80 |
|||||||
model.preprocessor.highfreq |
High frequency bound in Hz |
integer |
Upper bound of the mel basis in Hz |
8000 |
yes |
||||||
model.preprocessor.log |
Log Spectrograms |
bool |
Flags to enable logging spectrograms |
TRUE |
yes |
||||||
model.preprocessor.log_zero_guard_type |
Zero guard type |
string |
Need to avoid taking the log of zero. There are two options: “add” or “clamp”. |
clamp |
yes |
||||||
model.preprocessor.log_zero_guard_value |
Zero guard value |
float |
The value to be set so as to not take the log(zero). |
0.00001 |
|||||||
model.preprocessor.lowfreq |
Low frequency bound in Hz |
integer |
Lower bound of the mel basis in Hz |
0 |
yes |
||||||
model.preprocessor.mag_power |
Multiplication with mel basis |
integer |
Prior to multiplication with mel basis |
1 |
yes |
||||||
model.preprocessor.n_fft |
FFT Window size |
integer |
The size of the window for the FFT in samples. |
1024 |
yes |
||||||
model.preprocessor.n_window_size |
FFT Window size |
integer |
The size of the window for the FFT in samples. |
1024 |
yes |
||||||
model.preprocessor.n_window_stride |
FFT Window stride |
integer |
The stride of the window for FFT |
256 |
yes |
||||||
model.preprocessor.normalize |
Feature Normalization |
string |
Options disable feature normalization. all_features normalizes the entire spectrogram per channel/freq |
||||||||
model.preprocessor.pad_to |
Pad to |
integer |
A multiple pf pad_to |
0 |
yes |
||||||
model.preprocessor.pad_value |
Pad Value |
float |
The value to that shorter mels are padded with |
-11.52 |
yes |
||||||
model.preprocessor.preemph |
Pre-emphasis value |
float |
Amount of pre-emphasis to be added to the audio. Can be disabled by passing None. |
||||||||
model.preprocessor.sample_rate |
Samping rate |
integer |
The target sample rate to load the audio in Hz. |
22050 |
yes |
||||||
model.preprocessor.window |
Window type |
string |
The type of window to be used. |
hann |
yes |
||||||
model.preprocessor.exact_pad |
Exact pad |
bool |
TRUE |
||||||||
model.preprocessor.use_grads |
Use grads |
bool |
FALSE |
||||||||
model.optim |
Optimizer |
collection |
yes |
||||||||
model.optim._target_ |
Optimizer Class |
const |
The class of the Optimizer to be instantiated |
torch.optim.AdamW |
yes |
||||||
model.optim.lr |
Learning rate |
float |
Learning rate |
0.0002 |
yes |
yes |
|||||
model.optim.betas |
Optimizer betas |
list |
Coefficients used to compute the running averages of the gradient and it’s square |
[0.8, 0.99] |
yes |
||||||
model.sched |
Learning rate scheduler |
collection |
Parameters to configure the learning rate scheduler |
||||||||
model.sched.name |
Scheduler Name |
string |
Type of learning rate scheduler to be used |
CosineAnnealing |
yes |
||||||
model.sched.warmup_ratio |
Warm up steps |
float |
Ratio of steps to warm up the learning rate |
0.02 |
yes |
||||||
model.sched.min_lr |
Minimum Learning Rate |
float |
Lower bound of the learning rate scheduler |
1.00E-05 |
yes |
||||||
model.max_steps |
Maximum steps |
const |
Maximum number of steps to run training |
${trainer.max_steps} |
yes |
||||||
model.l1_loss_factor |
L1 Loss factor |
int |
The multiplicative factor for L1 loss used in training |
45 |
yes |
||||||
model.denoise_strength |
Denoise stregth |
float |
The small desnoising factor, currently only used in validation |
0.0025 |
yes |
||||||
model.generator |
Generator configuration |
collection |
Parameters to configure the generator. |
||||||||
model.generator._target_ |
Class for the HiFiGAN generator |
const |
Target Nemo Generator class to instantiate |
nemo.collections.tts.modules.hifigan_modules.Generator |
yes |
||||||
model.generator.resblock |
Resblock |
int |
Type of Residual Block to be used |
1 |
1,2 |
yes |
|||||
model.generator.upsample_rates |
Upsample rate |
list |
List of upsample rate for the ConvTranspose1D layer |
[8,8,2,2] |
0 |
yes |
|||||
model.generator.upsample_kernel_sizes |
Upsample kernel size |
list |
List of kernel dimensions for the ConvTranspose1D layers. Note: This number of elements in this list must be equal to the number of elements in the model.generator.upsample_rates parameter. |
[16, 16, 4, 4] |
0 |
yes |
|||||
model.generator.upsample_initial_channel |
Upsample initial channel |
int |
Number of channels in the first upsample layer. The channel count of the subsequent layers are computer as upsample_initial_count/ (2 ** i), where i is range(len(upsample_kernel_sizes)) |
512 |
8 |
yes |
|||||
model.generator.resblock_kernel_sizes |
Resblock kernel sizes |
list |
Size of all the Conv1D kernels in a resblock |
[3, 7, 11] |
yes |
||||||
model.generator.resblock_dilation_sizes |
Resblock dilation sizes |
list |
Dilation factor per Conv1D layer in a resblock |
[[1,3,5], [1,3,5], [1,3,5]] |
yes |
||||||
trainer |
collection |
Parameters to configure the trainer object |
|||||||||
trainer.max_steps |
Maximum Steps |
int |
Maximum number of steps to run training |
25000 |
0 |
yes |
|||||
trainer.max_epochs |
Maximum number of epochs |
int |
Maximum number of epochs to run training |
100 |
0 |
yes |
yes |
convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
b |
batch_size |
integer |
calibration batch size |
8 |
yes |
||||||
c |
cache_file |
path |
calibration cache file (default cal.bin) |
||||||||
d |
input_dims |
list |
comma separated list of input dimensions (not required for TLT 3.0 new models). |
||||||||
i |
input_order |
enum |
input dimension ordering |
nchw |
nchw, nhwc, nc |
||||||
m |
max_batch_size |
integer |
maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. |
16 |
yes |
||||||
o |
outputs |
list |
comma separated list of output node names |
||||||||
p |
parse_profile_shapes |
list |
comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. |
||||||||
s |
strict_type_constraints |
bool |
TensorRT strict_type_constraints flag for INT8 mode |
FALSE |
|||||||
t |
data_type |
enum |
TensorRT data type |
fp32 |
fp32, fp16, int8 |
yes |
|||||
u |
dla_core |
int |
Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). |
-1 |
|||||||
w |
max_workspace_size |
int |
maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly. |
1<<30, 2<<30 |
|||||||
platform |
platform |
enum |
platform label |
rtx |
yes |
yes |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
integer |
The version of this schema |
1 |
||||||||
random_seed |
Random Seed |
integer |
Random seed |
42 |
||||||||
dataset_config |
Dataset |
collection |
Dataset configuration |
|||||||||
dataset_config.data_sources |
Data Source |
hidden |
Data source |
|||||||||
dataset_config.data_sources.image_directory_path |
Image Directory |
hidden |
Relative path to the directory of images for training |
|||||||||
dataset_config.data_sources.root_path |
Root Path |
hidden |
The root path |
|||||||||
dataset_config.data_sources.source_weight |
Source Weight |
hidden |
The weighting for the source |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Directory Path |
hidden |
The path to the directory of labels for training |
|||||||||
dataset_config.data_sources.tfrecords_path |
TFRecords Path |
hidden |
The path to the TFRecords data for training |
|||||||||
dataset_config.target_class_mapping |
Target Class Mapping |
collection |
The Mapping from source class names to target class names |
|||||||||
Class you want to train for (vehicle) |
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
Class defined in the label file (car, truck, suv -> map to vehicle) |
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.validation_fold |
Validation Fold |
integer |
The percentage of the entire dataset to be used as validation data |
0 |
||||||||
dataset_config.validation_data_sources |
Validation Data Sources |
hidden |
The definition is the same as training data sources |
|||||||||
dataset_config.include_difficult_in_training |
Include Difficult Objects in Training |
bool |
Whether or not to include difficult objects in training |
FALSE |
||||||||
dataset_config.type |
Type |
string |
Dataset type, either kitti or coco |
kitti |
||||||||
dataset_config.image_extension |
Image Extension |
string |
The image extension |
|||||||||
dataset_config.is_monochrome |
Is Monochrome |
bool |
Whether or not the images are monochrome(grayscale) |
FALSE |
||||||||
augmentation_config |
Data Augmentation |
collection |
Data augmentation configuration |
|||||||||
augmentation_config.hue |
Hue |
float |
Hue variance |
0.1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation variance |
1.5 |
||||||||
augmentation_config.exposure |
Exposure |
float |
Exposure |
1.5 |
||||||||
augmentation_config.vertical_flip |
Vertical Flip Probability |
float |
Probability of vertical flip |
0 |
||||||||
augmentation_config.horizontal_flip |
Horizontal Flip |
float |
Probability of horizontal flip |
0.5 |
||||||||
augmentation_config.jitter |
Jitter |
float |
Jitter |
0.3 |
||||||||
augmentation_config.output_width |
Output Width |
integer |
Output Image Width |
1248 |
||||||||
augmentation_config.output_height |
Output Height |
integer |
Output Image Height |
384 |
||||||||
augmentation_config.output_channel |
Output Channel |
integer |
Output Image Channel |
3 |
||||||||
augmentation_config.randomize_input_shape_period |
Randomize Input Shape Period |
integer |
Period(in number of epochs) to randomize input shape for multi-scale training |
0 |
||||||||
augmentation_config.image_mean |
Image Mean |
collection |
per-channel image mean values |
|||||||||
augmentation_config.image_mean.key |
string |
|||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||
training_config |
Training |
collection |
Training configuration |
|||||||||
training_config.batch_size_per_gpu |
Batch Size per GPU |
integer |
Batch size per GPU in training |
8 |
||||||||
training_config.num_epochs |
Number of Epochs |
integer |
Number of Epochs to run the training |
80 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule |
Soft Start Annealing Schedule |
collection |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate, example: 1e-7 |
1.00E-06 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
Maximum learning rate. example: 1e-4 |
1.00E-04 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up: example 0.3 |
0.1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
progress(in percentage) for decreasing learning rate |
0.5 |
||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
maximum learning rate |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate |
|||||||||
training_config.regularizer |
Regularizer |
collection |
||||||||||
training_config.regularizer.type |
Type |
string |
Type of regularizer, either NO_REG, L1 or L2 |
__L1__ |
||||||||
training_config.regularizer.weight |
Weight |
float |
weight decay of regularizer |
3.00E-05 |
||||||||
training_config.optimizer.adam |
Adam |
collection |
||||||||||
training_config.optimizer.adam.epsilon |
Epsilon |
float |
Epsilon of Adam |
1.00E-07 |
||||||||
training_config.optimizer.adam.beta1 |
Beta1 |
float |
beta1 of Adam |
0.9 |
||||||||
training_config.optimizer.adam.beta2 |
Beta 2 |
float |
beta2 of Adam |
0.999 |
||||||||
training_config.optimizer.adam.amsgrad |
AMSGrad |
bool |
AMSGrad of Adam |
FALSE |
||||||||
training_config.optimizer.sgd |
SGD |
collection |
||||||||||
training_config.optimizer.sgd.momentum |
Momentum |
float |
momentum of sgd (example: 0.9) |
|||||||||
training_config.optimizer.sgd.nesterov |
Nesterov |
bool |
nesterov of sgd (example: FALSE) |
|||||||||
training_config.optimizer.rmsprop |
RMSProp |
collection |
||||||||||
training_config.optimizer.rmsprop.rho |
Rho |
float |
rho of RMSProp |
|||||||||
training_config.optimizer.rmsprop.momentum |
Momentum |
float |
momentum of RMSProp |
|||||||||
training_config.optimizer.rmsprop.epsilon |
Epsilon |
float |
epsilon of RMSProp |
|||||||||
training_config.optimizer.rmsprop.centered |
Centered |
bool |
centered of RMSProp |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
Period(in number of epochs) to save checkpoints |
10 |
||||||||
training_config.enable_qat |
QAT |
bool |
Enable QAT or not |
FALSE |
||||||||
training_config.resume_model_path |
Resume Model Path |
hidden |
Path of the model to be resumed |
|||||||||
training_config.pretrain_model_path |
Pretrained Model Path |
hidden |
Path of the pretrained model |
|||||||||
training_config.pruned_model_path |
Pruned Model Path |
hidden |
Path of the pruned model |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
3 |
||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
4 |
||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
FALSE |
||||||||
yolov3_config |
YOLOv3 |
collection |
||||||||||
yolov3_config.big_anchor_shape |
Big Anchor Shape |
string |
Big anchor shapes in string |
[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] |
||||||||
yolov3_config.mid_anchor_shape |
Middle Anchor Shape |
string |
Middle anchor shapes in string |
[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] |
||||||||
yolov3_config.small_anchor_shape |
Small Anchor Shape |
string |
Small anchor shapes in string |
[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] |
||||||||
yolov3_config.matching_neutral_box_iou |
float |
0.7 |
||||||||||
yolov3_config.arch |
Arch |
string |
backbone(architecture) |
resnet |
||||||||
yolov3_config.nlayers |
Number of Layers |
integer |
number of layers for this architecture |
18 |
||||||||
yolov3_config.arch_conv_blocks |
Extra Convolution Blocks |
integer |
Number of extra convolution blocks |
2 |
||||||||
yolov3_config.loss_loc_weight |
weighting for location loss |
float |
weighting factor for location loss |
0.8 |
||||||||
yolov3_config.loss_neg_obj_weights |
weighting for loss of negative objects |
float |
weighting factor for loss of negative objects |
100 |
||||||||
yolov3_config.loss_class_weights |
weighting for classification loss |
float |
weighting factor for classification loss |
1 |
||||||||
yolov3_config.freeze_blocks |
Freeze Blocks |
list |
ID of blocks to be frozen during training |
|||||||||
yolov3_config.freeze_bn |
Freeze BN |
bool |
Whether or not to freeze BatchNormalization layers |
FALSE |
||||||||
yolov3_config.force_relu |
Force ReLU |
bool |
Whether or not to force activation function to ReLU |
FALSE |
||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.001 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.5 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||
nms_config.force_on_cpu |
Force on CPU |
bool |
Force NMS to run on CPU in training |
TRUE |
||||||||
eval_config.average_precision_mode |
AP Mode |
enum |
Average Precision mode, either __SAMPLE__ or __INTEGRATE__ |
__SAMPLE__ |
||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
export
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
1 |
||||||||
model |
Model |
hidden |
UNIX path to the model file |
0.1 |
yes |
|||||||
data_type |
Pruning Granularity |
enum |
Number of filters to remove at a time. |
int8 |
int8, fp32, fp16 |
yes |
yes |
|||||
batches |
Number of calibration batches |
integer |
Number of batches to calibrate the model when run in INT8 mode |
100 |
no |
|||||||
experiment_spec |
Experiment Spec |
string |
UNIX path to the Experiment spec file used to train the model. This may be the train or retrain spec file. |
hidden from train expeirment |
yes |
|||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
|||||||
force_ptq |
Force Post-Training Quantization |
bool |
Force generating int8 engine using Post Training Quantization |
TRUE |
no |
|||||||
engine-file |
Engine File |
hidden |
UNIX path to the model engine file. |
/export/input_model_file.<data_type>.trt |
yes |
|||||||
key |
Encryption Key |
hidden |
Encryption key |
tlt_encode |
yes |
|||||||
batch_size |
Batch size |
integer |
Number of images per batch when generating the TensorRT engine. |
16 |
yes |
|||||||
cal_cache_file |
Calibration cache file |
string |
Unix PATH to the int8 calibration cache file |
hidden |
yes |
yes |
inference
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
integer |
The version of this schema |
1 |
||||||||
threshold |
float |
0.3 |
||||||||||
random_seed |
Random Seed |
integer |
Random seed |
42 |
||||||||
dataset_config |
Dataset |
collection |
Dataset configuration |
|||||||||
dataset_config.data_sources |
Data Source |
hidden |
Data source |
|||||||||
dataset_config.data_sources.image_directory_path |
Image Directory |
hidden |
Relative path to the directory of images for training |
|||||||||
dataset_config.data_sources.root_path |
Root Path |
hidden |
The root path |
|||||||||
dataset_config.data_sources.source_weight |
Source Weight |
hidden |
The weighting for the source |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Directory Path |
hidden |
The path to the directory of labels for training |
|||||||||
dataset_config.data_sources.tfrecords_path |
TFRecords Path |
hidden |
The path to the TFRecords data for training |
|||||||||
dataset_config.target_class_mapping |
Target Class Mapping |
collection |
The Mapping from source class names to target class names |
|||||||||
Class you want to train for (vehicle) |
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
Class defined in the label file (car, truck, suv -> map to vehicle) |
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.validation_fold |
Validation Fold |
integer |
The percentage of the entire dataset to be used as validation data |
0 |
||||||||
dataset_config.validation_data_sources |
Validation Data Sources |
hidden |
The definition is the same as training data sources |
|||||||||
dataset_config.include_difficult_in_training |
Include Difficult Objects in Training |
bool |
Whether or not to include difficult objects in training |
FALSE |
||||||||
dataset_config.type |
Type |
string |
Dataset type, either kitti or coco |
kitti |
||||||||
dataset_config.image_extension |
Image Extension |
string |
The image extension |
|||||||||
dataset_config.is_monochrome |
Is Monochrome |
bool |
Whether or not the images are monochrome(grayscale) |
FALSE |
||||||||
augmentation_config |
Data Augmentation |
collection |
Data augmentation configuration |
|||||||||
augmentation_config.hue |
Hue |
float |
Hue variance |
0.1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation variance |
1.5 |
||||||||
augmentation_config.exposure |
Exposure |
float |
Exposure |
1.5 |
||||||||
augmentation_config.vertical_flip |
Vertical Flip Probability |
float |
Probability of vertical flip |
0 |
||||||||
augmentation_config.horizontal_flip |
Horizontal Flip |
float |
Probability of horizontal flip |
0.5 |
||||||||
augmentation_config.jitter |
Jitter |
float |
Jitter |
0.3 |
||||||||
augmentation_config.output_width |
Output Width |
integer |
Output Image Width |
1248 |
||||||||
augmentation_config.output_height |
Output Height |
integer |
Output Image Height |
384 |
||||||||
augmentation_config.output_channel |
Output Channel |
integer |
Output Image Channel |
3 |
||||||||
augmentation_config.randomize_input_shape_period |
Randomize Input Shape Period |
integer |
Period(in number of epochs) to randomize input shape for multi-scale training |
0 |
||||||||
augmentation_config.image_mean |
Image Mean |
collection |
per-channel image mean values |
|||||||||
augmentation_config.image_mean.key |
string |
|||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||
training_config |
Training |
collection |
Training configuration |
|||||||||
training_config.batch_size_per_gpu |
Batch Size per GPU |
integer |
Batch size per GPU in training |
8 |
||||||||
training_config.num_epochs |
Number of Epochs |
integer |
Number of Epochs to run the training |
80 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule |
Soft Start Annealing Schedule |
collection |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate, example: 1e-7 |
1.00E-06 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
Maximum learning rate. example: 1e-4 |
1.00E-04 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up: example 0.3 |
0.1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
progress(in percentage) for decreasing learning rate |
0.5 |
||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
maximum learning rate |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate |
|||||||||
training_config.regularizer |
Regularizer |
collection |
||||||||||
training_config.regularizer.type |
Type |
string |
Type of regularizer, either NO_REG, L1 or L2 |
__L1__ |
||||||||
training_config.regularizer.weight |
Weight |
float |
weight decay of regularizer |
3.00E-05 |
||||||||
training_config.optimizer.adam |
Adam |
collection |
||||||||||
training_config.optimizer.adam.epsilon |
Epsilon |
float |
Epsilon of Adam |
1.00E-07 |
||||||||
training_config.optimizer.adam.beta1 |
Beta1 |
float |
beta1 of Adam |
0.9 |
||||||||
training_config.optimizer.adam.beta2 |
Beta 2 |
float |
beta2 of Adam |
0.999 |
||||||||
training_config.optimizer.adam.amsgrad |
AMSGrad |
bool |
AMSGrad of Adam |
FALSE |
||||||||
training_config.optimizer.sgd |
SGD |
collection |
||||||||||
training_config.optimizer.sgd.momentum |
Momentum |
float |
momentum of sgd (example: 0.9) |
|||||||||
training_config.optimizer.sgd.nesterov |
Nesterov |
bool |
nesterov of sgd (example: FALSE) |
|||||||||
training_config.optimizer.rmsprop |
RMSProp |
collection |
||||||||||
training_config.optimizer.rmsprop.rho |
Rho |
float |
rho of RMSProp |
|||||||||
training_config.optimizer.rmsprop.momentum |
Momentum |
float |
momentum of RMSProp |
|||||||||
training_config.optimizer.rmsprop.epsilon |
Epsilon |
float |
epsilon of RMSProp |
|||||||||
training_config.optimizer.rmsprop.centered |
Centered |
bool |
centered of RMSProp |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
Period(in number of epochs) to save checkpoints |
10 |
||||||||
training_config.enable_qat |
QAT |
bool |
Enable QAT or not |
FALSE |
||||||||
training_config.resume_model_path |
Resume Model Path |
hidden |
Path of the model to be resumed |
|||||||||
training_config.pretrain_model_path |
Pretrained Model Path |
hidden |
Path of the pretrained model |
|||||||||
training_config.pruned_model_path |
Pruned Model Path |
hidden |
Path of the pruned model |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
3 |
||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
4 |
||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
FALSE |
||||||||
yolov3_config |
YOLOv3 |
collection |
||||||||||
yolov3_config.big_anchor_shape |
Big Anchor Shape |
string |
Big anchor shapes in string |
[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] |
||||||||
yolov3_config.mid_anchor_shape |
Middle Anchor Shape |
string |
Middle anchor shapes in string |
[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] |
||||||||
yolov3_config.small_anchor_shape |
Small Anchor Shape |
string |
Small anchor shapes in string |
[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] |
||||||||
yolov3_config.matching_neutral_box_iou |
float |
0.7 |
||||||||||
yolov3_config.arch |
Arch |
string |
backbone(architecture) |
resnet |
||||||||
yolov3_config.nlayers |
Number of Layers |
integer |
number of layers for this architecture |
18 |
||||||||
yolov3_config.arch_conv_blocks |
Extra Convolution Blocks |
integer |
Number of extra convolution blocks |
2 |
||||||||
yolov3_config.loss_loc_weight |
weighting for location loss |
float |
weighting factor for location loss |
0.8 |
||||||||
yolov3_config.loss_neg_obj_weights |
weighting for loss of negative objects |
float |
weighting factor for loss of negative objects |
100 |
||||||||
yolov3_config.loss_class_weights |
weighting for classification loss |
float |
weighting factor for classification loss |
1 |
||||||||
yolov3_config.freeze_blocks |
Freeze Blocks |
list |
ID of blocks to be frozen during training |
|||||||||
yolov3_config.freeze_bn |
Freeze BN |
bool |
Whether or not to freeze BatchNormalization layers |
FALSE |
||||||||
yolov3_config.force_relu |
Force ReLU |
bool |
Whether or not to force activation function to ReLU |
FALSE |
||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.001 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.5 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||
nms_config.force_on_cpu |
Force on CPU |
bool |
Force NMS to run on CPU in training |
TRUE |
||||||||
eval_config.average_precision_mode |
AP Mode |
enum |
Average Precision mode, either __SAMPLE__ or __INTEGRATE__ |
__SAMPLE__ |
||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
prune
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
const |
The version of this schema |
no |
||||||||
pruning_threshold |
Pruning Threshold |
float |
Threshold to compare normalized norm against. |
0.1 |
0 |
1 |
yes |
yes |
||||
pruning_granularity |
Pruning Granularity |
integer |
Number of filters to remove at a time. |
8 |
no |
|||||||
min_num_filters |
Minimum number of filters |
integer |
Minimum number of filters to be kept per layer |
16 |
no |
|||||||
equalization_criterion |
Equalization Criterion |
string |
Criteria to equalize the stats of inputs to an element wise op layer. |
union |
union, intersection, arithmetic_mean,geometric_mean |
no |
||||||
model |
Model path |
hidden |
UNIX path to where the input model is located. |
hidden |
yes |
|||||||
output_file |
Output File |
hidden |
UNIX path to where the pruned model will be saved. |
hidden |
yes |
train
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
integer |
The version of this schema |
1 |
||||||||
random_seed |
Random Seed |
integer |
Random seed |
42 |
||||||||
dataset_config |
Dataset |
collection |
Dataset configuration |
|||||||||
dataset_config.data_sources |
Data Source |
hidden |
Data source |
|||||||||
dataset_config.data_sources.image_directory_path |
Image Directory |
hidden |
Relative path to the directory of images for training |
|||||||||
dataset_config.data_sources.root_path |
Root Path |
hidden |
The root path |
|||||||||
dataset_config.data_sources.source_weight |
Source Weight |
hidden |
The weighting for the source |
|||||||||
dataset_config.data_sources.label_directory_path |
Label Directory Path |
hidden |
The path to the directory of labels for training |
|||||||||
dataset_config.data_sources.tfrecords_path |
TFRecords Path |
hidden |
The path to the TFRecords data for training |
|||||||||
dataset_config.target_class_mapping |
Target Class Mapping |
collection |
The Mapping from source class names to target class names |
|||||||||
Class you want to train for (vehicle) |
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
Class defined in the label file (car, truck, suv -> map to vehicle) |
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
||||||
dataset_config.validation_fold |
Validation Fold |
integer |
The percentage of the entire dataset to be used as validation data |
0 |
||||||||
dataset_config.validation_data_sources |
Validation Data Sources |
hidden |
The definition is the same as training data sources |
|||||||||
dataset_config.include_difficult_in_training |
Include Difficult Objects in Training |
bool |
Whether or not to include difficult objects in training |
FALSE |
||||||||
dataset_config.type |
Type |
string |
Dataset type, either kitti or coco |
kitti |
||||||||
dataset_config.image_extension |
Image Extension |
string |
The image extension |
|||||||||
dataset_config.is_monochrome |
Is Monochrome |
bool |
Whether or not the images are monochrome(grayscale) |
FALSE |
||||||||
augmentation_config |
Data Augmentation |
collection |
Data augmentation configuration |
|||||||||
augmentation_config.hue |
Hue |
float |
Hue variance |
0.1 |
||||||||
augmentation_config.saturation |
Saturation |
float |
Saturation variance |
1.5 |
||||||||
augmentation_config.exposure |
Exposure |
float |
Exposure |
1.5 |
||||||||
augmentation_config.vertical_flip |
Vertical Flip Probability |
float |
Probability of vertical flip |
0 |
||||||||
augmentation_config.horizontal_flip |
Horizontal Flip |
float |
Probability of horizontal flip |
0.5 |
||||||||
augmentation_config.jitter |
Jitter |
float |
Jitter |
0.3 |
||||||||
augmentation_config.output_width |
Output Width |
integer |
Output Image Width |
1248 |
||||||||
augmentation_config.output_height |
Output Height |
integer |
Output Image Height |
384 |
||||||||
augmentation_config.output_channel |
Output Channel |
integer |
Output Image Channel |
3 |
||||||||
augmentation_config.randomize_input_shape_period |
Randomize Input Shape Period |
integer |
Period(in number of epochs) to randomize input shape for multi-scale training |
0 |
||||||||
augmentation_config.image_mean |
Image Mean |
collection |
per-channel image mean values |
|||||||||
augmentation_config.image_mean.key |
string |
|||||||||||
augmentation_config.image_mean.value |
float |
|||||||||||
training_config |
Training |
collection |
Training configuration |
|||||||||
training_config.batch_size_per_gpu |
Batch Size per GPU |
integer |
Batch size per GPU in training |
8 |
||||||||
training_config.num_epochs |
Number of Epochs |
integer |
Number of Epochs to run the training |
80 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule |
Soft Start Annealing Schedule |
collection |
||||||||||
training_config.learning_rate.soft_start_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate, example: 1e-7 |
1.00E-06 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
Maximum learning rate. example: 1e-4 |
1.00E-04 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up: example 0.3 |
0.1 |
||||||||
training_config.learning_rate.soft_start_annealing_schedule.annealing |
Annealing |
float |
progress(in percentage) for decreasing learning rate |
0.5 |
||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.max_learning_rate |
Max Learning Rate |
float |
maximum learning rate |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.soft_start |
Soft Start |
float |
progress(in percentage) for warm up |
|||||||||
training_config.learning_rate.soft_start_cosine_annealing_schedule.min_learning_rate |
Min Learning Rate |
float |
Minimum learning rate |
|||||||||
training_config.regularizer |
Regularizer |
collection |
||||||||||
training_config.regularizer.type |
Type |
string |
Type of regularizer, either NO_REG, L1 or L2 |
__L1__ |
||||||||
training_config.regularizer.weight |
Weight |
float |
weight decay of regularizer |
3.00E-05 |
||||||||
training_config.optimizer.adam |
Adam |
collection |
||||||||||
training_config.optimizer.adam.epsilon |
Epsilon |
float |
Epsilon of Adam |
1.00E-07 |
||||||||
training_config.optimizer.adam.beta1 |
Beta1 |
float |
beta1 of Adam |
0.9 |
||||||||
training_config.optimizer.adam.beta2 |
Beta 2 |
float |
beta2 of Adam |
0.999 |
||||||||
training_config.optimizer.adam.amsgrad |
AMSGrad |
bool |
AMSGrad of Adam |
FALSE |
||||||||
training_config.optimizer.sgd |
SGD |
collection |
||||||||||
training_config.optimizer.sgd.momentum |
Momentum |
float |
momentum of sgd (example: 0.9) |
|||||||||
training_config.optimizer.sgd.nesterov |
Nesterov |
bool |
nesterov of sgd (example: FALSE) |
|||||||||
training_config.optimizer.rmsprop |
RMSProp |
collection |
||||||||||
training_config.optimizer.rmsprop.rho |
Rho |
float |
rho of RMSProp |
|||||||||
training_config.optimizer.rmsprop.momentum |
Momentum |
float |
momentum of RMSProp |
|||||||||
training_config.optimizer.rmsprop.epsilon |
Epsilon |
float |
epsilon of RMSProp |
|||||||||
training_config.optimizer.rmsprop.centered |
Centered |
bool |
centered of RMSProp |
|||||||||
training_config.checkpoint_interval |
Checkpoint Interval |
integer |
Period(in number of epochs) to save checkpoints |
10 |
||||||||
training_config.enable_qat |
QAT |
bool |
Enable QAT or not |
FALSE |
||||||||
training_config.resume_model_path |
Resume Model Path |
hidden |
Path of the model to be resumed |
|||||||||
training_config.pretrain_model_path |
Pretrained Model Path |
hidden |
Path of the pretrained model |
|||||||||
training_config.pruned_model_path |
Pruned Model Path |
hidden |
Path of the pruned model |
|||||||||
training_config.max_queue_size |
Max Queue Size |
integer |
Maximum Queue Size in Sequence Dataset |
3 |
||||||||
training_config.n_workers |
Workers |
integer |
Number of workers in sequence dataset |
4 |
||||||||
training_config.use_multiprocessing |
Use Multiprocessing |
bool |
Use multiprocessing or not |
FALSE |
||||||||
yolov3_config |
YOLOv3 |
collection |
||||||||||
yolov3_config.big_anchor_shape |
Big Anchor Shape |
string |
Big anchor shapes in string |
[(114.94, 60.67), (159.06, 114.59), (297.59, 176.38)] |
||||||||
yolov3_config.mid_anchor_shape |
Middle Anchor Shape |
string |
Middle anchor shapes in string |
[(42.99, 31.91), (79.57, 31.75), (56.80, 56.93)] |
||||||||
yolov3_config.small_anchor_shape |
Small Anchor Shape |
string |
Small anchor shapes in string |
[(15.60, 13.88), (30.25, 20.25), (20.67, 49.63)] |
||||||||
yolov3_config.matching_neutral_box_iou |
float |
0.7 |
||||||||||
yolov3_config.arch |
Arch |
string |
backbone(architecture) |
resnet |
||||||||
yolov3_config.nlayers |
Number of Layers |
integer |
number of layers for this architecture |
18 |
||||||||
yolov3_config.arch_conv_blocks |
Extra Convolution Blocks |
integer |
Number of extra convolution blocks |
2 |
||||||||
yolov3_config.loss_loc_weight |
weighting for location loss |
float |
weighting factor for location loss |
0.8 |
||||||||
yolov3_config.loss_neg_obj_weights |
weighting for loss of negative objects |
float |
weighting factor for loss of negative objects |
100 |
||||||||
yolov3_config.loss_class_weights |
weighting for classification loss |
float |
weighting factor for classification loss |
1 |
||||||||
yolov3_config.freeze_blocks |
Freeze Blocks |
list |
ID of blocks to be frozen during training |
|||||||||
yolov3_config.freeze_bn |
Freeze BN |
bool |
Whether or not to freeze BatchNormalization layers |
FALSE |
||||||||
yolov3_config.force_relu |
Force ReLU |
bool |
Whether or not to force activation function to ReLU |
FALSE |
||||||||
nms_config.confidence_threshold |
Confidence Threshold |
float |
Confidence threshold |
0.001 |
||||||||
nms_config.clustering_iou_threshold |
IoU threshold |
float |
IoU threshold |
0.5 |
||||||||
nms_config.top_k |
Top K |
integer |
Maximum number of objects after NMS |
200 |
||||||||
nms_config.infer_nms_score_bits |
NMS Score Bits |
integer |
Number of bits for scores for optimized NMS |
|||||||||
nms_config.force_on_cpu |
Force on CPU |
bool |
Force NMS to run on CPU in training |
TRUE |
||||||||
eval_config.average_precision_mode |
AP Mode |
enum |
Average Precision mode, either __SAMPLE__ or __INTEGRATE__ |
__SAMPLE__ |
||||||||
eval_config.batch_size |
Batch Size |
integer |
batch size for evaluation |
8 |
||||||||
eval_config.matching_iou_threshold |
Matching IoU Threshold |
float |
IoU threshold |
0.5 |
||||||||
eval_config.visualize_pr_curve |
Visualize PR Curve |
bool |
Whether or not to visualize precision-recall curve |
convert
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
---|---|---|---|---|---|---|---|---|---|---|---|
b |
batch_size |
integer |
calibration batch size |
8 |
yes |
||||||
c |
cache_file |
path |
calibration cache file (default cal.bin) |
||||||||
d |
input_dims |
list |
comma separated list of input dimensions (not required for TLT 3.0 new models). |
||||||||
i |
input_order |
enum |
input dimension ordering |
nchw |
nchw, nhwc, nc |
||||||
m |
max_batch_size |
integer |
maximum TensorRT engine batch size (default 16). If meet with out-of-memory issue, please decrease the batch size accordingly. |
16 |
yes |
||||||
o |
outputs |
list |
comma separated list of output node names |
||||||||
p |
parse_profile_shapes |
list |
comma separated list of optimization profile shapes in the format <input_name>,<min_shape>,<opt_shape>,<max_shape>, where each shape has x as delimiter, e.g.,NxC, NxCxHxW, NxCxDxHxW, etc. Can be specified multiple times if there are multiple input tensors for the model. This argument is only useful in dynamic shape case. |
||||||||
s |
strict_type_constraints |
bool |
TensorRT strict_type_constraints flag for INT8 mode |
FALSE |
|||||||
t |
data_type |
enum |
TensorRT data type |
fp32 |
fp32, fp16, int8 |
yes |
|||||
u |
dla_core |
int |
Use DLA core N for layers that support DLA (default = -1, which means no DLA core will be utilized for inference. Note that it’ll always allow GPU fallback). |
-1 |
|||||||
w |
max_workspace_size |
int |
maximum workspace size of TensorRT engine (default 1<<30). If meet with out-of-memory issue, please increase the workspace size accordingly. |
1<<30, 2<<30 |
|||||||
platform |
platform |
enum |
platform label |
rtx |
yes |
yes |
evaluate
parameter |
display_name |
value_type |
description |
default_value |
examples |
valid_min |
valid_max |
valid_options |
required |
regex |
popular |
valid_options_description |
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
version |
Schema Version |
integer |
The version of this schema |
1 |
|||||||||
random_seed |
Random Seed |
integer |
Random seed |
42 |
|||||||||
dataset_config |
Dataset |
collection |
Dataset configuration |
||||||||||
dataset_config.data_sources |
Data Source |
hidden |
Data source |
||||||||||
dataset_config.data_sources.image_directory_path |
Image Directory |
hidden |
Relative path to the directory of images for training |
||||||||||
dataset_config.data_sources.root_path |
Root Path |
hidden |
The root path |
||||||||||
dataset_config.data_sources.source_weight |
Source Weight |
hidden |
The weighting for the source |
||||||||||
dataset_config.data_sources.label_directory_path |
Label Directory Path |
hidden |
The path to the directory of labels for training |
||||||||||
dataset_config.data_sources.tfrecords_path |
TFRecords Path |
hidden |
The path to the TFRecords data for training |
||||||||||
dataset_config.target_class_mapping |
Target Class Mapping |
collection |
The Mapping from source class names to target class names |
||||||||||
Class you want to train for (vehicle) |
dataset_config.target_class_mapping.key |
Class Key |
string |
The “key” field is the value of the class name in the tfrecords file. |
person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||
Class defined in the label file (car, truck, suv -> map to vehicle) |
dataset_config.target_class_mapping.value |
Class Value |
string |
The “value” field corresponds to the value that the network is expected to learn. |
masked-person |
^[-a-zA-Z0-9_]{1,40}$ |
|||||||
dataset_config.validation_fold |
Validation Fold |
integer |
The percentage of the entire dataset to be used as validation data |
0 |
|||||||||
dataset_config.validation_data_sources |
Validation Data Sources |
hidden |
The definition is the same as training data sources |
||||||||||
dataset_config.include_difficult_in_training |
Include Difficult Objects in Training |
bool |
Whether or not to include difficult objects in training |
FALSE |
TRUE, False |
||||||||
dataset_config.type |
Type |
string |
Dataset type, either kitti or coco |
kitti |
|||||||||
dataset_config. |