Navigation

    Gpushare.com

    • Register
    • Login
    • Search
    • Popular
    • Categories
    • Recent
    • Tags

    【2 精度对齐】tf2.x与pytorch模型精度对齐

    技术交流
    2
    2
    100
    Loading More Posts
    • Oldest to Newest
    • Newest to Oldest
    • Most Votes
    Reply
    • Reply as topic
    Log in to reply
    This topic has been deleted. Only users with topic management privileges can see it.
    • 183****0229
      183****0229 last edited by 183****0229

      今天继续为大家带来剩余的tf2.x与pytorch部分模型精度对齐

      • 5、GRU bidirectional
      • 6、nn.BatchNorm1d vs layers.BatchNormalization
      • 7、nn.LayerNorm vs layers.LayerNormalization
      • 8、Conv2d valid padding
      • 9、Conv2d same padding

      导包+准备工具

      import torch
      import torch.nn as nn
      import torch.nn.functional as F
      
      import tensorflow as tf
      from tensorflow import keras
      import numpy as np
      import math
      
      # 动态增加TF的GPU显存
      gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
      for gpu in gpus:
          tf.config.experimental.set_memory_growth(device=gpu, enable=True)
      
      def reoder_process(x):
          reoder_index = [4,5,6,7,0,1,2,3,8,9,10,11]
          if len(x.shape)==2:
              return x[:,reoder_index]
          else:
              return x[reoder_index]
      
      def pad(x, kernel_size=3, dilation=1):
          """For stride=1 or stride = 2 or stride = 3"""
          pad_total = dilation * (kernel_size - 1)
          pad_beg = pad_total // 2
          pad_end = pad_total - pad_beg
      
          x_padded = F.pad(
              x, pad=(pad_beg, pad_end, pad_beg, pad_end))
          
          return x_padded
      
      
      def compare_difference(a,b):
          o = np.abs((a.detach().numpy()-b.numpy())).max()
          print(f"max diffenence is {o}")
          o = np.abs((a.detach().numpy()-b.numpy())).mean()
          print(f"mean diffenence is {o}")
      
      

      5、GRU bidirectional

      # 创建模型
      pt_gru_bi = nn.GRU(input_size=2,hidden_size=4,batch_first=True,num_layers=1,bidirectional=True)
      tf_gru_bi = keras.layers.Bidirectional(layer=keras.layers.GRU(units=4,return_sequences=True,return_state=True),merge_mode='concat')
      tf_gru_bi.build(input_shape=(None,3,2))
      
      # 复制权重
      forward_input_kernel = reoder_process(pt_gru_bi.weight_ih_l0.T.detach().numpy())
      forward_recur_kernel = reoder_process(pt_gru_bi.weight_hh_l0.T.detach().numpy())
      forward_bias = torch.stack([reoder_process(pt_gru_bi.bias_ih_l0.detach()),reoder_process(pt_gru_bi.bias_hh_l0.detach())]).numpy()
      
      backward_input_kernel = reoder_process(pt_gru_bi.weight_ih_l0_reverse.T.detach().numpy())
      backward_recur_kernel = reoder_process(pt_gru_bi.weight_hh_l0_reverse.T.detach().numpy())
      backward_bias = torch.stack([reoder_process(pt_gru_bi.bias_ih_l0_reverse.detach()),reoder_process(pt_gru_bi.bias_hh_l0_reverse.detach())]).numpy() 
      
      weights = [forward_input_kernel,forward_recur_kernel,forward_bias,backward_input_kernel,backward_recur_kernel,backward_bias]
      
      tf_gru_bi.set_weights(weights=weights)
      
      # 比较
      x = np.random.randn(1,3,2).astype(np.float32)
      pt_x = torch.from_numpy(x)
      tf_x = tf.constant(x)
      
      # pt_outputs 输出的是每个seqlen对应位置的 (forward_hidden + backward_hidden) pt_hidden_states是最后一个seqlen对应的 forward_hidden 和 第一个seqlen对应的backward_hidden
      pt_outputs,pt_hidden_states = pt_gru_bi(pt_x)
      tf_outputs,tf_forward_hidden_states,tf_backward_hidden_states = tf_gru_bi(tf_x)
      tf_hidden_states = tf.stack([tf_forward_hidden_states,tf_backward_hidden_states])
      
      compare_difference(pt_outputs,tf_outputs)
      # max diffenence is 2.9802322387695312e-08
      # mean diffenence is 1.1253480813877559e-08
      compare_difference(pt_hidden_states,tf_hidden_states)
      # max diffenence is 2.9802322387695312e-08
      # mean diffenence is 1.0943040251731873e-08
      

      6、nn.BatchNorm1d vs layers.BatchNormalization

      # 创建模型
      # Input: :math:`(b, C)` or :math:`(N, C, L)` # C这个维度batchnorm
      pt_bn = nn.BatchNorm1d(num_features=4,eps=1e-05) # dim=1的维度。
      tf_bn = keras.layers.BatchNormalization(axis=1,epsilon=1e-05)
      tf_bn.build(input_shape=(None,4))
      
      x = np.random.randn(5,4).astype(np.float32)
      pt_x = torch.from_numpy(x)
      tf_x = tf.constant(x)
      
      a=pt_bn(pt_x)
      b=tf_bn(tf_x,training=True)
      compare_difference(a,b)
      # max diffenence is 1.4901161193847656e-07
      # mean diffenence is 2.9802322387695312e-08
      

      7、nn.LayerNorm vs layers.LayerNormalization

      # 创建模型
      pt_lm = nn.LayerNorm(normalized_shape=4,eps=1e-05)
      tf_lm = keras.layers.LayerNormalization(axis=-1,epsilon=1e-05)
      tf_lm.build(input_shape=(None,4))
      
      # 比较
      x = np.random.randn(2,4).astype(np.float32)
      pt_x = torch.from_numpy(x)
      tf_x = tf.constant(x)
      a=pt_lm(pt_x)
      b=tf_lm(tf_x)
      compare_difference(a,b)
      # max diffenence is 2.384185791015625e-07
      # mean diffenence is 2.9802322387695312e-08
      

      8、Conv2d valid padding

      # 创建模型
      pt_conv2d = nn.Conv2d(in_channels=10,out_channels=2,kernel_size=3)
      tf_conv2d = keras.layers.Conv2D(filters=2,kernel_size=3)
      tf_conv2d.build(input_shape=(None,5,5,10))
      # pytorch cnn weight shape [output_channel,input_channel,height,width]
      # tf cnn weight shape [height,width,channel,filter]
      
      # 复制权重
      weight = pt_conv2d.weight.detach().numpy().transpose(2,3,1,0)
      bias = pt_conv2d.bias.detach().numpy()
      tf_conv2d.set_weights(weights=[weight,bias])
      
      # 比较
      x = np.random.randn(2,5,5,10).astype(np.float32)
      pt_x = torch.from_numpy(x).permute(0,3,1,2)
      tf_x = tf.constant(x)
      a = pt_conv2d(pt_x).permute(0,2,3,1)
      b = tf_conv2d(tf_x)
      compare_difference(a,b)
      # max diffenence is 2.980232238769531e-07
      # mean diffenence is 9.045470505952835e-08
      

      9、Conv2d same padding

      # 重新定义pytorch conv2d
      class Conv2d(nn.Conv2d):
          def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                       padding=0, dilation=1, groups=1, bias=True):
              self.padding_type =None
              if padding =="same":
                  self.padding_type = "same"
                  padding =0
              super(Conv2d, self).__init__(
                  in_channels, out_channels, kernel_size, stride,padding, dilation,
                  groups, bias)
      
              nn.init.xavier_uniform_(self.weight)
      
          def forward(self, x):
              if self.padding_type=="same":
                  ih, iw = x.shape[-2:]
                  kh, kw = self.weight.shape[-2:]
                  oh = math.ceil(ih / self.stride[0])
                  ow = math.ceil(iw / self.stride[1])
                  pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
                  pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
                  if pad_h > 0 or pad_w > 0:
                      x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
              out = F.conv2d(x, self.weight, self.bias, self.stride,
                             self.padding, self.dilation, self.groups)
              return out
      
      # 创建模型
      kernel_size = 3
      stride = 5
      pt_conv2d = nn.Conv2d(in_channels=3,out_channels=2,kernel_size=kernel_size,stride=stride,padding=0)
      pt_conv2d_same = Conv2d(in_channels=3,out_channels=2,kernel_size=kernel_size,stride=stride,padding="same")
      tf_conv2d = keras.layers.Conv2D(filters=2,kernel_size=kernel_size,padding="same",strides=stride)
      
      tf_conv2d.build(input_shape=(None,26,26,3))
      
      # 复制权重
      weight = pt_conv2d.weight.data.numpy().transpose(2,3,1,0)
      bias = pt_conv2d.bias.data.numpy()
      tf_conv2d.set_weights(weights=[weight,bias])
      pt_conv2d_same.weight.data = pt_conv2d.weight.data
      pt_conv2d_same.bias.data = pt_conv2d.bias.data
      
      # 比较
      x = np.random.randn(2,26,26,3).astype(np.float32)
      pt_x = torch.from_numpy(x).permute(0,3,1,2)
      tf_x = tf.constant(x)
      # 手动给pytorch原生的conv2d进行pandding操作。
      pt_x_padded = pad(pt_x,kernel_size=kernel_size)
      pt_a = pt_conv2d(pt_x_padded).permute(0,2,3,1)
      pt_b = pt_conv2d_same(pt_x).permute(0,2,3,1)
      tf_c = tf_conv2d(tf_x)
      
      compare_difference(pt_a ,tf_c )
      # max diffenence is 1.341104507446289e-07
      # mean diffenence is 3.928370517769508e-08
      
      compare_difference(pt_b ,tf_c )
      # max diffenence is 1.341104507446289e-07
      # mean diffenence is 3.928370517769508e-08
      
      
      1 Reply Last reply Reply Quote 3
      • Pinned by  183****0229 183****0229 
      • Unpinned by  System 
      • Alice_恒源云
        Alice_恒源云 last edited by

        精度对齐上篇,请移步至【1 精度对齐】tf2.x与pytorch模型精度对齐

        包含:
        1、nn.Linear vs layers.Dense
        2、nn.Conv1d vs layers.Conv1D
        3、nn.Embedding vs layers.Embedding
        4、nn.GRU vs layers.GRU

        1 Reply Last reply Reply Quote 0
        • First post
          Last post