NVIDIA Clara Train 4.1
1.0

Bring your own Inference

By default, AIAA is using TritonInference. This means the inference requests are sending to Triton and let Triton leverage multiple GPUs.

You can also write your own inference procedure by extending the base class below, just make sure you put your custom inference in <AIAA workspace>/lib folder.

Below is the base class of Inference.

Copy
Copied!
            

class Inference: @abstractmethod def inference(self, name, data, config: ModelConfig, triton_config): """Defines an inference procedure.""" pass @abstractmethod def close(self): """Closes any resources you might have opened.""" pass

Following is an example of a CustomInference.

Copy
Copied!
            

import logging import torch from aiaa.configs.modelconfig import ModelConfig from aiaa.inference.inference import Inference from aiaa.utils.class_utils import instantiate_class class CustomInference(Inference): def __init__( self, is_batched_data=False, network=None, device='cuda' ): self.network = network self.device = device self.is_batched_data = is_batched_data self.model = None def inference(self, name, data, config: ModelConfig, triton_config): logger = logging.getLogger(__name__) logger.info('Run CustomInference for:{}'.format(name)) if self.model is None: self._init_context(config) input_key = config.get_inference_input() output_key = config.get_inference_output() logger.debug('Input Key:{}; Output Key:{}'.format(input_key, output_key)) inputs = data[input_key] if input_key else data inputs = inputs if torch.is_tensor(inputs) else torch.from_numpy(inputs) inputs = inputs if self.is_batched_data else inputs[None] inputs = inputs.to(self.device) logger.info('Input Shape:{}'.format(inputs.shape)) logger.info('Input Type:{}'.format(type(inputs))) outputs = self._simple_inference(inputs) logger.info('Output Shape:{}'.format(outputs.shape)) outputs = outputs[0] data.update({output_key: outputs}) return data def _init_context(self, config: ModelConfig): logger = logging.getLogger(__name__) if self.model: return if self.network is None: logger.info('Loading TorchScript Model from:{}'.format(config.get_path())) model = torch.jit.load(config.get_path()) else: name = self.network['name'] args = self.network['args'] logger.info('Loading PyTorch Model Checkpoints from:{}'.format(config.get_path())) logger.info('Constructing PyTorch Model Network{}'.format(name)) model = instantiate_class(name, args) model.load_state_dict(torch.load(config.get_path())) model.to(self.device) model.eval() self.model = model def _simple_inference(self, inputs): with torch.no_grad(): outputs = self.model(inputs) return outputs def close(self): if self.model and hasattr(self.model, 'close'): self.model.close() self.model = None

Let’s save this custom inference in custom_inference.py and copy it into <AIAA workspace>/lib folder.

Then you can use it in AIAA config, for example:

Copy
Copied!
            

{ "version": 1, "type": "annotation", "labels": [ "custom_organ" ], "description": "Custom Model to segment custom organ with user clicks", "pre_transforms": [ { "name": "monai.transforms.LoadImaged", "args": { "keys": "image" } }, { "name": "aiaa.apps.dextr3d.transforms.PointsToImaged", "args": { "keys": "points", "ref_image": "image" } }, { "name": "monai.transforms.AddChanneld", "args": { "keys": [ "image", "points" ] } }, { "name": "monai.transforms.Spacingd", "args": { "keys": [ "image", "points" ], "pixdim": [ 1.0, 1.0, 1.0 ] } }, { "name": "monai.transforms.ScaleIntensityRanged", "args": { "keys": "image", "a_min": -1024, "a_max": 1024, "b_min": -1.0, "b_max": 1.0, "clip": true } }, { "name": "aiaa.apps.dextr3d.transforms.CropForegroundd", "args": { "keys": [ "image", "points" ], "source_key": "points", "margin": 20 } }, { "name": "monai.transforms.AddExtremePointsChanneld", "args": { "keys": "image", "label_key": "points", "sigma": 3, "pert": 0 } }, { "name": "aiaa.apps.dextr3d.transforms.Resized", "args": { "keys": "image", "shape": [ 128, 128, 128 ], "device": "cuda" } } ], "inference": { "input": "image", "output": "pred", "AIAA": { "name": "custom_inference.CustomInference", "args": { "is_batched_data": false } } }, "post_transforms": [ { "name": "monai.transforms.AddChanneld", "args": { "keys": "pred" } }, { "name": "monai.transforms.Activationsd", "args": { "keys": "pred", "softmax": true } }, { "name": "monai.transforms.AsDiscreted", "args": { "keys": "pred", "argmax": true } }, { "name": "monai.transforms.SqueezeDimd", "args": { "keys": "pred", "dim": 0 } }, { "name": "monai.transforms.ToNumpyd", "args": { "keys": "pred" } }, { "name": "aiaa.apps.dextr3d.transforms.ReverseResized", "args": { "keys": "pred", "ref_shape_key": "image", "device": "cuda" } }, { "name": "aiaa.apps.dextr3d.transforms.RestoreCroppedLabeld", "args": { "keys": "pred", "ref_image": "image" } } ], "writer": { "name": "aiaa.transforms.Writer", "args": { "image": "pred", "json": "result" } } }

Let’s save this config to config_aiaa.json then we can load this model using the curl command below:

Copy
Copied!
            

curl -X PUT "http://127.0.0.1:$AIAA_PORT/admin/model/custom_model" \ -F "config=@config_aiaa.json;type=application/json" \ -F "data=@[where you store the model]/model.ts"

Note

Note that this custom inference only works with the AIAA backend.

© Copyright 2020, NVIDIA. Last updated on Feb 2, 2023.