Search code examples
tensorrttensorrt-python

TRT inference using onnx - Error Code 1: Cuda Driver (invalid resource handle)


Currently I'm tryin to convert given onnx file to tensorrt file, and do inference on the generated tensorrt file. To do so, I used tensorrt python binding API, but "Error Code 1: Cuda Driver (invalid resource handle)" happens and there is no kind description about this. Can anyone help me to overcome this situation? Thx in advance, and below is my code snippet.

def trt_export(self):
    fp_16_mode = True

    ## Obviously, I provided appropriate file names
    trt_file_name = "PATH_TO_TRT_FILE"
    onnx_name = "PATH_TO_ONNX_FILE"
   
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

    builder = trt.Builder(TRT_LOGGER)
    network = builder.create_network(EXPLICIT_BATCH)
    parser = trt.OnnxParser(network, TRT_LOGGER)

    config = builder.create_builder_config()

    config.max_workspace_size = (1<<30)
    config.set_flag(trt.BuilderFlag.FP16)
    config.default_device_type = trt.DeviceType.GPU

    profile = builder.create_optimization_profile()
    profile.set_shape('input', (1, 3, IMG_SIZE, IMG_SIZE), (12, 3, IMG_SIZE, IMG_SIZE), (32, 3, IMG_SIZE, IMG_SIZE))    # random nubmers for min. opt. max batch
    config.add_optimization_profile(profile)

    with open(onnx_name, 'rb') as model:
        if not parser.parse(model.read()):
            for error in range(parser.num_errors):
                print(parser.get_error(error))

    engine = builder.build_engine(network, config)
    buf = engine.serialize()
    with open(trt_file_name, 'wb') as f:
        f.write(buf)

def validate_trt_result(self, input_path):
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
    
    trt_file_name = "PATH_TO_TRT_FILE"

    trt_runtime = trt.Runtime(TRT_LOGGER)

    with open(trt_file_name, 'rb') as f:
        engine_data = f.read()

    engine = trt_runtime.deserialize_cuda_engine(engine_data)

    cuda.init()
    device = cuda.Device(0)
    ctx = device.make_context()

    inputs, outputs, bindings = [], [], []

    context = engine.create_execution_context()
    stream = cuda.Stream()
    
    index = 0
    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * -1   # assuming one batch
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        bindings.append(int(device_mem))
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
            context.set_binding_shape(index, [1, 3, IMG_SIZE, IMG_SIZE])
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))
        index += 1

    print(context.all_binding_shapes_specified)

    input_img = cv2.imread(input_path)
    input_r = cv2.resize(input_img, dsize = (256, 256))
    input_p = np.transpose(input_r, (2, 0, 1))  
    input_e = np.expand_dims(input_p, axis = 0)
    input_f = input_e.astype(np.float32)
    input_f /= 255      
    
    numpy_array_input = [input_f]
    hosts = [input.host for input in inputs]
    trt_types = [trt.int32]
    
    for numpy_array, host, trt_types in zip(numpy_array_input, hosts, trt_types):
        numpy_array = np.asarray(numpy_array).astype(trt.nptype(trt_types)).ravel()
        print(numpy_array.shape)
        np.copyto(host, numpy_array)

    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]

    #### ERROR HAPPENS HERE ####
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    #### ERROR HAPPENS HERE ####

    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    stream.synchronize()

    print("TRT model inference result : ")

    output = outputs[0].host
    for one in output :
        print(one)
    
    ctx.pop()

Solution

  • Looks like ctx.push() line is missing before a line with memcpy_htod_async.

    Such a error can happen if TensorFlow / PyTorch is also using CUDA in parallel with TensorRT.

    See the related question/answer: https://stackoverflow.com/a/73996477/5655977