Install TensorRT
Please refer to this official website first:
https://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing
After downloading TensorRT 4.0 ( in my case ), we can install it.
$ dpkg -i nv-tensorrt-repo-ubuntu1604-cuda9.0-ga-trt4.0.1.6-20180612_1-1_amd64.deb
$ apt-get update
$ apt-get install tensorrt
$ apt-get install python-libnvinfer-dev
$ apt-get install uff-converter-tf
Convert TensorFlow model to UFF format
I still will use my simple CNN model as usual in my previous post:
[ONNX] Train in Tensorflow and export to ONNX (Part II)
https://danny270degree.blogspot.com/2018/08/onnx-train-in-tensorflow-and-export-to_20.html
# UFF format # freeze graph and remove nodes used for training import uff graph_def = tf.get_default_graph().as_graph_def() model_output = 'output/output/BiasAdd' frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, [model_output]) frozen_graph = tf.graph_util.remove_training_nodes(frozen_graph) # Create UFF model and dump it on disk uff_model = uff.from_tensorflow(frozen_graph, [model_output]) dump = open('my_mnist/MNIST_simple_cnn.uff', 'wb') dump.write(uff_model) dump.close()So, you will generate a converted MNIST_simple_cnn.uff file in the folder: my_mnist
Use TensorRT to do inference
Inspired by this article: TensorRT becomes a valuable tool for Data Scientist , I modify it to do MNIST inference:
import pycuda.driver as cuda import pycuda.autoinit import argparse import tensorrt as trt from tensorrt.lite import Engine from tensorrt.parsers import uffparser import tensorflow as tf import numpy as np import tensorflow.examples.tutorials.mnist.input_data as input_data import cv2 n_input = 784 # MNIST data input (img shape: 28*28) n_classes = 10 # MNIST total classes (0-9 digits) # Run inference on device def infer(context, input_img, batch_size): # load engine engine = context.get_engine() assert(engine.get_nb_bindings() == 2) # create output array to receive data dims = engine.get_binding_dimensions(1).to_DimsCHW() elt_count = dims.C() * dims.H() * dims.W() * batch_size # convert input data to Float32 input_img = input_img.astype(np.float32) # Allocate pagelocked memory output = cuda.pagelocked_empty(elt_count, dtype=np.float32) # alocate device memory d_input = cuda.mem_alloc(batch_size * input_img.size * input_img.dtype.itemsize) d_output = cuda.mem_alloc(batch_size * output.size * output.dtype.itemsize) bindings = [int(d_input), int(d_output)] stream = cuda.Stream() # transfer input data to device cuda.memcpy_htod_async(d_input, input_img, stream) # execute model context.enqueue(batch_size, bindings, stream.handle, None) # transfer predictions back cuda.memcpy_dtoh_async(output, d_output, stream) # return predictions return output if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--uff_graph", default="MNIST_simple_cnn.uff", type=str, help = "MNIST model to import") parser.add_argument("--precision", default="fp32", type=str, help = "There are fp32, fp16, and int32 to define the data type in the model") args = parser.parse_args() # prepare parser uff_model = open(args.uff_graph, 'rb').read() parser = uffparser.create_uff_parser() parser.register_input("inputs/X", (1, 28, 28), 0) parser.register_output("output/output/BiasAdd") # to create trt engine trt_logger = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO) # Build your TensorRT inference engine if config['precision'] == 'fp32': engine = trt.utils.uff_to_trt_engine(logger=trt_logger, stream=uff_model, parser=parser, max_batch_size=1, # 1 sample at a time max_workspace_size= 1 << 30, # 1 GB GPU memory workspace datatype=trt.infer.DataType.FLOAT) # that's very cool, we can set precision elif config['precision'] == 'fp16': engine = trt.utils.uff_to_trt_engine(logger=trt_logger, stream=uff_model, parser=parser, max_batch_size=1, # 1 sample at a time max_workspace_size= 1 << 30, # 1 GB GPU memory workspace datatype=trt.infer.DataType.HALF) # Specified FP16 but not supported on GTX platform else: engine = trt.utils.uff_to_trt_engine(logger=trt_logger, stream=uff_model, parser=parser, max_batch_size=1, # 1 sample at a time max_workspace_size= 1 << 30, # 1 GB GPU memory workspace datatype=trt.infer.DataType.INT32) # Specified INT8 but I have no calibrator provided # Mark these because of not working... # Serialize TensorRT engine to a file for when you are ready to deploy your model. #trt.utils.write_engine_to_file("mnist_FP32.engine", # engine.serialize()) #engine_single = Engine(PLAN="mnist_FP32.engine", # postprocessors={"output/output/BiasAdd"}) # Use Single engine #result = engine_single.infer(picture) #print(result, result.shape) context = engine.create_execution_context() picture = cv2.imread("2.png", cv2.IMREAD_GRAYSCALE) picture = picture.reshape(1, 28, 28) picture = picture / 255.0 # Use infer helper function prediction = infer(context, picture, 1) print(prediction, prediction.shape)
After executing this Python code for inferenceing, the result is:
(array([-1.9128938 , 0.43256244, 12.799435 , 9.221975 , -5.592257 ,
-5.0760493 , -8.883412 , -1.0613143 , 6.1645894 , 0.30224222],
dtype=float32), (10,))
('The result is: ', 2) <== The inferencing answer is correct because I use "2.png" to input.
P.S:
Python API is not available for Jetson TX2 platform and there is no future plan.
Here are samples can give you some idea of your requirement:
1. https://github.com/AastaNV/ChatBot
Python for pre-processing
C++ for TensorRT inference
Swig as an interface between Python and C++
2. TensorFlow-TensorRT
https://github.com/NVIDIA-Jetson/tf_trt_models
Reference:
https://medium.com/@fortyq/tensorrt-becomes-a-valuable-tool-for-data-scientist-64cf1b764df2
https://github.com/Tutufa/TRT3_UFF_simple_example/blob/master/UFF_TensorRT3.ipynb
https://devblogs.nvidia.com/tensorrt-3-faster-tensorflow-inference/
http://jinfagang.gitlab.io/2017/12/12/%E5%B5%8C%E5%85%A5%E5%BC%8F%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E4%B8%89%EF%BC%9ATensorRT_Python_API%E5%8A%A0%E9%80%9F%E9%83%A8%E7%BD%B2%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6/
https://github.com/yfor1008/tensorRT_for_keras/blob/dev/code/convert_densenet.py
No comments:
Post a Comment