-
Notifications
You must be signed in to change notification settings - Fork 33
Description
I hava a dali pipeline like below:
import nvidia.dali as dali
import nvidia.dali.types as types
BATCH_SIZE = 16
INPUT_SIZE = 640
FILL_VALUE = 114
NUM_THREAD = 32
def parse_args():
import argparse
parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file")
parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline')
return parser.parse_args()
def get_shift(images):
shapes = dali.fn.peek_image_shape(images)[:2]
max_size = dali.fn.reductions.max(shapes, axes=[0])
index = dali.fn.cast((shapes == max_size), dtype=types.INT64)
min_size = dali.fn.reductions.min(shapes, axes=[0])
scale_ratio = INPUT_SIZE / max_size
dst_min_size = dali.fn.cast(min_size * scale_ratio, dtype=types.INT64)
shift = (INPUT_SIZE - dali.fn.stack(dst_min_size, dst_min_size)) // 2
shift = shift * index
scale_ratio=dali.fn.reshape(scale_ratio,shape=[1])
return shift, scale_ratio
@dali.pipeline_def(batch_size=BATCH_SIZE, num_threads=NUM_THREAD, device_id=0)
def simple_pipeline():
orig_image_decode = dali.fn.external_source(device="cpu", name="orig_image_bytes")
shift, scale_ratio = get_shift(orig_image_decode)
shift = dali.fn.cast(shift, dtype=types.FLOAT)
orig_image_decode = dali.fn.decoders.image(orig_image_decode, device="mixed", output_type=types.RGB)
image = dali.fn.resize(orig_image_decode, size=INPUT_SIZE, mode="not_larger")
image = dali.fn.pad(image, axis_names="HW", shape=[INPUT_SIZE, INPUT_SIZE], fill_value=FILL_VALUE)
mt = dali.fn.transforms.translation(offset=shift)
image = dali.fn.warp_affine(image, matrix=mt, fill_value=FILL_VALUE, inverse_map=False)
image = dali.fn.cast(image, dtype=types.FLOAT)
image = dali.fn.transpose(image, perm=[2, 0, 1])
image = image / 255.0
return orig_image_decode, image, shift, scale_ratio
def main(filename):
simple_pipeline().serialize(filename=filename)
if __name__ == '__main__':
args = parse_args()
main(args.file_path)
It accept image bytes and decode it for next process steps, I use dali because dali can utilize GPU to accelerate decode process. The pipelne will be loaded by triton server, the config.pbtxt file is like below:
name: "yolov7-preprocess"
backend: "dali"
max_batch_size: 16
input [
{
name: "orig_image_bytes"
data_type: TYPE_UINT8
dims: [ -1 ]
}
]
output [
{
name: "orig_image_decode"
data_type: TYPE_UINT8
dims: [-1,-1,3]
},
{
name: "letterboxed-image"
data_type: TYPE_FP32
dims: [3,640,640]
},
{
name: "shift"
data_type: TYPE_FP32
dims: [2]
},
{
name: "scale_ratio"
data_type: TYPE_FP32
dims: [1],
reshape:{shape:[1,1]}
}
]
dynamic_batching {
}
parameters: [
{
key: "num_threads"
value: { string_value: "16" }
}
]
because each image bytes length is different, so input data dimension is [-1]. I use triton-client to exec infer request for my model:
import tritonclient.http as httpclient
model_name = "yolov7-preprocess"
model_url = "xxxxxxxxxxxxxxxxx"
def post_infer_request(batch_image_file_path):
images = []
for image_path in batch_image_file_path:
image = np.fromfile(image_path, dtype="uint8")
image = image[np.newaxis, :]
images.append(image)
images_input = np.vstack(images)
inputs = []
inputs.append(
httpclient.InferInput("orig_image_bytes", images_input.shape, "UINT8")
)
triton_client = httpclient.InferenceServerClient(
url=model_url, verbose=False
)
outputs = []
outputs.append(httpclient.InferRequestedOutput('letterboxed-image', binary_data=True))
result = triton_client.infer(model_name, inputs, outputs=outputs)
return result
For all same images, I can use np.vstack to compose batch image to one input, because they have same bytes length, for example 4 image, each image have bytes length 430226, so I can compose it to one input which size is [4,4330226]. However, the reality is that not all images have the same dimensions, so they cannot be stacked together.So is there any way can post batch image data for one request in this situation?