Navigation

    Gpushare.com

    • Register
    • Login
    • Search
    • Popular
    • Categories
    • Recent
    • Tags

    【GiantPandaCV】【从零开始学TVM】三,基于ONNX模型结构了解TVM的前端(2)

    机器学习
    1
    1
    40
    Loading More Posts
    • Oldest to Newest
    • Newest to Oldest
    • Most Votes
    Reply
    • Reply as topic
    Log in to reply
    This topic has been deleted. Only users with topic management privileges can see it.
    • Violet_恒源智享云
      Violet_恒源智享云 last edited by

      (接上篇)
      这里有一个_get_convert_map可以获取ONNX 特定Opset Version中被TVM支持的OP字典,字典的Key是ONNX OP的类型名字,而字典的Value就是转换之后的Relay IR。

      # _convert_map defines maps of name to converter functor(callable)
      # for 1 to 1 mapping, use Renamer if nothing but name is different
      # use AttrCvt if attributes need to be converted
      # for 1 to N mapping(composed), use custom callable functions
      # for N to 1 mapping, currently not supported(?)
      def _get_convert_map(opset):
          return {
              # defs/experimental
              "Identity": Renamer("copy"),
              "Affine": Affine.get_converter(opset),
              "BitShift": BitShift.get_converter(opset),
              "ThresholdedRelu": ThresholdedRelu.get_converter(opset),
              "ScaledTanh": ScaledTanh.get_converter(opset),
              "ParametricSoftplus": ParametricSoftPlus.get_converter(opset),
              "Constant": Constant.get_converter(opset),
              "ConstantOfShape": ConstantOfShape.get_converter(opset),
              # 'GivenTensorFill'
              "FC": AttrCvt("dense", ignores=["axis", "axis_w"]),
              "Scale": Scale.get_converter(opset),
              # 'GRUUnit'
              # 'ATen'
              # 'ImageScaler'
              # 'MeanVarianceNormalization'
              # 'Crop'
              # 'Embedding'
              "Upsample": Upsample.get_converter(opset),
              "SpatialBN": BatchNorm.get_converter(opset),
              # defs/generator
              # 'Constant' # Implemented
              # 'RandomUniform'
              # 'RandomNormal'
              # 'RandomUniformLike'
              # 'RandomNormalLike'
              # defs/logical
              # defs/math
              "Add": Add.get_converter(opset),
              "Sub": Sub.get_converter(opset),
              "Mul": Mul.get_converter(opset),
              "Div": Div.get_converter(opset),
              "Neg": Renamer("negative"),
              "Abs": Absolute.get_converter(opset),
              "Reciprocal": Reciprocal.get_converter(opset),
              "Floor": Renamer("floor"),
              "Ceil": Renamer("ceil"),
              "Round": Renamer("round"),
              "IsInf": Renamer("isinf"),
              "IsNaN": Renamer("isnan"),
              "Sqrt": Renamer("sqrt"),
              "Relu": Renamer("relu"),
              "LeakyRelu": Renamer("leaky_relu"),
              "Selu": Selu.get_converter(opset),
              "Elu": Elu.get_converter(opset),
              "Exp": Renamer("exp"),
              "Greater": Greater.get_converter(opset),
              "Less": Less.get_converter(opset),
              "Log": Renamer("log"),
              "Acos": Renamer("acos"),
              "Acosh": Renamer("acosh"),
              "Asin": Renamer("asin"),
              "Asinh": Renamer("asinh"),
              "Atan": Renamer("atan"),
              "Atanh": Renamer("atanh"),
              "Cos": Renamer("cos"),
              "Cosh": Renamer("cosh"),
              "Sin": Renamer("sin"),
              "Sinh": Renamer("sinh"),
              "Tan": Renamer("tan"),
              "Tanh": Renamer("tanh"),
              "Pow": Renamer("power"),
              "PRelu": Prelu.get_converter(opset),
              "Sigmoid": Renamer("sigmoid"),
              "HardSigmoid": HardSigmoid.get_converter(opset),
              "Max": Maximum.get_converter(opset),
              "Min": Minimum.get_converter(opset),
              "Sum": Sum.get_converter(opset),
              "Mean": Mean.get_converter(opset),
              "Clip": Clip.get_converter(opset),
              "Softplus": Softplus.get_converter(opset),
              # softmax default axis is different in onnx
              "Softmax": Softmax.get_converter(opset),
              "LogSoftmax": LogSoftmax.get_converter(opset),
              "OneHot": OneHot.get_converter(opset),
              # 'Hardmax'
              "Softsign": Softsign.get_converter(opset),
              "Gemm": Gemm.get_converter(opset),
              "MatMul": MatMul.get_converter(opset),
              "Mod": Mod.get_converter(opset),
              "Xor": Renamer("logical_xor"),
              # defs/nn
              "AveragePool": AveragePool.get_converter(opset),
              "LpPool": LpPool.get_converter(opset),
              "MaxPool": MaxPool.get_converter(opset),
              "MaxUnpool": MaxUnpool.get_converter(opset),
              "Conv": Conv.get_converter(opset),
              "ConvTranspose": ConvTranspose.get_converter(opset),
              "GlobalAveragePool": Renamer("global_avg_pool2d"),
              "GlobalMaxPool": Renamer("global_max_pool2d"),
              "BatchNormalization": BatchNorm.get_converter(opset),
              "InstanceNormalization": InstanceNorm.get_converter(opset),
              # 'LpNormalization'
              "Dropout": AttrCvt("dropout", {"ratio": "rate"}, ignores=["is_test"]),
              "Flatten": Flatten.get_converter(opset),
              "LRN": LRN.get_converter(opset),
              # Recurrent Layers
              "LSTM": LSTM.get_converter(opset),
              "GRU": GRU.get_converter(opset),
              # defs/vision
              "MaxRoiPool": MaxRoiPool.get_converter(opset),
              "RoiAlign": RoiAlign.get_converter(opset),
              "NonMaxSuppression": NonMaxSuppression.get_converter(opset),
              # defs/reduction
              "ReduceMax": ReduceMax.get_converter(opset),
              "ReduceMin": ReduceMin.get_converter(opset),
              "ReduceSum": ReduceSum.get_converter(opset),
              "ReduceMean": ReduceMean.get_converter(opset),
              "ReduceProd": ReduceProd.get_converter(opset),
              "ReduceLogSumExp": ReduceLogSumExp.get_converter(opset),
              "ReduceLogSum": ReduceLogSum.get_converter(opset),
              "ReduceSumSquare": ReduceSumSquare.get_converter(opset),
              "ReduceL1": ReduceL1.get_converter(opset),
              "ReduceL2": ReduceL2.get_converter(opset),
              # defs/sorting
              "ArgMax": ArgMax.get_converter(opset),
              "ArgMin": ArgMin.get_converter(opset),
              "TopK": TopK.get_converter(opset),
              # defs/tensor
              "Cast": Cast.get_converter(opset),
              "Reshape": Reshape.get_converter(opset),
              "Expand": Expand.get_converter(opset),
              "Concat": Concat.get_converter(opset),
              "Split": Split.get_converter(opset),
              "Slice": Slice.get_converter(opset),
              "Transpose": AttrCvt("transpose", {"perm": "axes"}),
              "DepthToSpace": DepthToSpace.get_converter(opset),
              "SpaceToDepth": SpaceToDepth.get_converter(opset),
              "Gather": Gather.get_converter(opset),
              "GatherElements": GatherElements.get_converter(opset),
              "GatherND": GatherND.get_converter(opset),
              "Size": AttrCvt("ndarray_size", extras={"dtype": "int64"}),
              "Scatter": Scatter.get_converter(opset),
              "ScatterElements": Scatter.get_converter(opset),
              "Squeeze": AttrCvt("squeeze", {"axes": "axis"}),
              "Unsqueeze": Unsqueeze.get_converter(opset),
              "Pad": Pad.get_converter(opset),
              "Shape": Shape.get_converter(opset),
              "Sign": Sign.get_converter(opset),
              "Equal": Equal.get_converter(opset),
              "Not": Not.get_converter(opset),
              "And": And.get_converter(opset),
              "Tile": Tile.get_converter(opset),
              "Erf": Erf.get_converter(opset),
              "Where": Where.get_converter(opset),
              "Or": Or.get_converter(opset),
              "Resize": Resize.get_converter(opset),
              "NonZero": NonZero.get_converter(opset),
              "Range": Range.get_converter(opset),
              "CumSum": CumSum.get_converter(opset),
              # defs/control_flow
              "Loop": Loop.get_converter(opset),
              "If": If.get_converter(opset),
              # Torch ATen Dispatcher.
              "ATen": ATen.get_converter(opset),
              # Quantization
              "QuantizeLinear": QuantizeLinear.get_converter(opset),
              "DequantizeLinear": DequantizeLinear.get_converter(opset),
              "DynamicQuantizeLinear": DynamicQuantizeLinear.get_converter(opset),
          }
      

      我们以卷积层为例来看看ONNX的OP是如何被转换成Relay表达式的。卷积OP一般有输入,权重,偏置这三个项,对应了下面函数中的inputs[0],inputs[1],inputs[2]。而auto_pad这个属性是ONNX特有的属性,TVM的Relay 卷积OP不支持这种属性,所以需要将ONNX 卷积OP需要Pad的数值计算出来并分情况进行处理(这里有手动对输入进行Pad以及给Relay的卷积OP增加一个padding参数两种做法,具体问题具体分析)。然后需要注意的是在这个转换函数中inputs[0]是Relay IR,而不是真实的数据,我们可以通过打印下面代码中的inputs[0]看到。

      class Conv(OnnxOpConverter):
          """Operator converter for Conv."""
      
          @classmethod
          def _impl_v1(cls, inputs, attr, params):
              # Use shape of input to determine convolution type.
              data = inputs[0]
              input_shape = infer_shape(data)
              ndim = len(input_shape)
              if "auto_pad" in attr:
                  attr["auto_pad"] = attr["auto_pad"].decode("utf-8")
                  if attr["auto_pad"] in ("SAME_UPPER", "SAME_LOWER"):
                      # Warning: Convolution does not yet support dynamic shapes,
                      # one will need to run dynamic_to_static on this model after import
                      data = autopad(data, attr["strides"], attr["kernel_shape"], attr["dilations"], ndim)
                  elif attr["auto_pad"] == "VALID":
                      attr["pads"] = tuple([0 for i in range(ndim - 2)])
                  elif attr["auto_pad"] == "NOTSET":
                      pass
                  else:
                      msg = 'Value {} in attribute "auto_pad" of operator Conv is invalid.'
                      raise tvm.error.OpAttributeInvalid(msg.format(attr["auto_pad"]))
                  attr.pop("auto_pad")
      
              out = AttrCvt(
                  op_name=dimension_picker("conv"),
                  transforms={
                      "kernel_shape": "kernel_size",
                      "dilations": ("dilation", 1),
                      "pads": ("padding", 0),
                      "group": ("groups", 1),
                  },
                  custom_check=dimension_constraint(),
              )([data, inputs[1]], attr, params)
      
              use_bias = len(inputs) == 3
              if use_bias:
                  out = _op.nn.bias_add(out, inputs[2])
              return out
      

      0x3. 在TVM中新增OP

      现在我们已经知道TVM是如何将ONNX转换成Realy IR的了,那么如果我们在适配自定义模型的时候某些OP TVM还不支持怎么办?这个时候就需要我们自定义OP了,自定义OP的方式可以是基于已有的OP进行拼接,也可以在TVM中独立实现这个OP,然后再在前端新增转换接口。这里以SeLU为例简单介绍新增OP需要做什么?

      首先我们需要实现一个SeLU Class,这个类继承了OnnxOpConverter,然后实现_impl_v1方法,代码如下:

      class Selu(OnnxOpConverter):
          """Operator converter for Selu."""
      
          @classmethod
          def _impl_v1(cls, inputs, attr, params):
              alpha = float(attr.get("alpha", 1.6732))
              gamma = float(attr.get("gamma", 1.0507))
              return _expr.const(gamma) * (
                  _expr.const(-alpha) * _op.nn.relu(_expr.const(1.0) - _op.exp(inputs[0]))
                  + _op.nn.relu(inputs[0])
              )
      

      可以看到这里是基于一些常用的算子按照SeLU的公式来拼出这个OP,在实现了这个转换逻辑之后,我们需要将这个OP注册到_convert_map中,即在_get_convert_map新增一行:"Selu": Selu.get_converter(opset),,然后保存源码重新编译TVM即可。这里新增SeLU类继承的OnnxOpConverter类实现如下:

      class OnnxOpConverter(object):
          """A helper class for holding onnx op converters."""
      
          @classmethod
          def get_converter(cls, opset):
              """获取匹配给定的算子集合的转换器
      
              Parameters
              ----------
              opset: int
                  opset from model.
      
              Returns
              -------
              converter, which should be `_impl_vx`. Number x is the biggest
                  number smaller than or equal to opset belongs to all support versions.
              """
              # 这里的_impl_v_xxx方法是每个OP的具体实现方法,xxx代表版本,对应ONNX的Opset Version
              versions = [int(d.replace("_impl_v", "")) for d in dir(cls) if "_impl_v" in d]
              versions = sorted(versions + [opset])
              version = versions[max([i for i, v in enumerate(versions) if v == opset]) - 1]
              if hasattr(cls, "_impl_v{}".format(version)):
                  return getattr(cls, "_impl_v{}".format(version))
              raise NotImplementedError(
                  "opset version {} of {} not implemented".format(version, cls.__name__)
              )
      

      重新编译完TVM之后就可以对我们的自定义模型完成部署了,从模型部署的角度来看,TVM还是挺易用的。

      0x4. 总结

      这篇文章主要是探索了TVM中是如何用Relay的前端接口将ONNX模型加载进行TVM并吐出Relay IR的,并且还给出了要支持ONNX自定义的OP应该怎么做。其实在TVM中支持编译很多的DL框架,在下图可以看到:


      TVM支持的深度学习框架

      其实它们的前端交互过程和ONNX也大同小异,希望对TVM感兴趣的读者在阅读这篇文章之后对新增OP,甚至是在TVM中支持一种新的DL框架有一个整体把握。

      0x5. 推荐阅读

      • ONNX初探

      • ONNX 再探

      • onnx2pytorch和onnx-simplifier新版介绍

      • onnx simplifier 和 optimizer

      • 深度学习框架OneFlow是如何和ONNX交互的?

      • 【从零开始学深度学习编译器】一,深度学习编译器及TVM 介绍

      • 【从零开始学深度学习编译器】二,TVM中的scheduler

      ————————————————————————
      转载来源:公众号【GiantPandaCV】
      欢迎关注GiantPandaCV, 在这里你将看到独家的深度学习分享,坚持原创,每天分享我们学习到的新鲜知识。( • ̀ω•́ )✧
      有对文章相关的问题,或者想要加入交流群,欢迎添加BBuf微信:

      1 Reply Last reply Reply Quote 1
      • First post
        Last post