【2 精度对齐】tf2.x与pytorch模型精度对齐
-
今天继续为大家带来剩余的tf2.x与pytorch部分模型精度对齐
- 5、
GRU bidirectional
- 6、
nn.BatchNorm1d vs layers.BatchNormalization
- 7、
nn.LayerNorm vs layers.LayerNormalization
- 8、
Conv2d valid padding
- 9、
Conv2d same padding
导包+准备工具
import torch import torch.nn as nn import torch.nn.functional as F import tensorflow as tf from tensorflow import keras import numpy as np import math # 动态增加TF的GPU显存 gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(device=gpu, enable=True) def reoder_process(x): reoder_index = [4,5,6,7,0,1,2,3,8,9,10,11] if len(x.shape)==2: return x[:,reoder_index] else: return x[reoder_index] def pad(x, kernel_size=3, dilation=1): """For stride=1 or stride = 2 or stride = 3""" pad_total = dilation * (kernel_size - 1) pad_beg = pad_total // 2 pad_end = pad_total - pad_beg x_padded = F.pad( x, pad=(pad_beg, pad_end, pad_beg, pad_end)) return x_padded def compare_difference(a,b): o = np.abs((a.detach().numpy()-b.numpy())).max() print(f"max diffenence is {o}") o = np.abs((a.detach().numpy()-b.numpy())).mean() print(f"mean diffenence is {o}")
5、GRU bidirectional
# 创建模型 pt_gru_bi = nn.GRU(input_size=2,hidden_size=4,batch_first=True,num_layers=1,bidirectional=True) tf_gru_bi = keras.layers.Bidirectional(layer=keras.layers.GRU(units=4,return_sequences=True,return_state=True),merge_mode='concat') tf_gru_bi.build(input_shape=(None,3,2)) # 复制权重 forward_input_kernel = reoder_process(pt_gru_bi.weight_ih_l0.T.detach().numpy()) forward_recur_kernel = reoder_process(pt_gru_bi.weight_hh_l0.T.detach().numpy()) forward_bias = torch.stack([reoder_process(pt_gru_bi.bias_ih_l0.detach()),reoder_process(pt_gru_bi.bias_hh_l0.detach())]).numpy() backward_input_kernel = reoder_process(pt_gru_bi.weight_ih_l0_reverse.T.detach().numpy()) backward_recur_kernel = reoder_process(pt_gru_bi.weight_hh_l0_reverse.T.detach().numpy()) backward_bias = torch.stack([reoder_process(pt_gru_bi.bias_ih_l0_reverse.detach()),reoder_process(pt_gru_bi.bias_hh_l0_reverse.detach())]).numpy() weights = [forward_input_kernel,forward_recur_kernel,forward_bias,backward_input_kernel,backward_recur_kernel,backward_bias] tf_gru_bi.set_weights(weights=weights) # 比较 x = np.random.randn(1,3,2).astype(np.float32) pt_x = torch.from_numpy(x) tf_x = tf.constant(x) # pt_outputs 输出的是每个seqlen对应位置的 (forward_hidden + backward_hidden) pt_hidden_states是最后一个seqlen对应的 forward_hidden 和 第一个seqlen对应的backward_hidden pt_outputs,pt_hidden_states = pt_gru_bi(pt_x) tf_outputs,tf_forward_hidden_states,tf_backward_hidden_states = tf_gru_bi(tf_x) tf_hidden_states = tf.stack([tf_forward_hidden_states,tf_backward_hidden_states]) compare_difference(pt_outputs,tf_outputs) # max diffenence is 2.9802322387695312e-08 # mean diffenence is 1.1253480813877559e-08 compare_difference(pt_hidden_states,tf_hidden_states) # max diffenence is 2.9802322387695312e-08 # mean diffenence is 1.0943040251731873e-08
6、nn.BatchNorm1d vs layers.BatchNormalization
# 创建模型 # Input: :math:`(b, C)` or :math:`(N, C, L)` # C这个维度batchnorm pt_bn = nn.BatchNorm1d(num_features=4,eps=1e-05) # dim=1的维度。 tf_bn = keras.layers.BatchNormalization(axis=1,epsilon=1e-05) tf_bn.build(input_shape=(None,4)) x = np.random.randn(5,4).astype(np.float32) pt_x = torch.from_numpy(x) tf_x = tf.constant(x) a=pt_bn(pt_x) b=tf_bn(tf_x,training=True) compare_difference(a,b) # max diffenence is 1.4901161193847656e-07 # mean diffenence is 2.9802322387695312e-08
7、nn.LayerNorm vs layers.LayerNormalization
# 创建模型 pt_lm = nn.LayerNorm(normalized_shape=4,eps=1e-05) tf_lm = keras.layers.LayerNormalization(axis=-1,epsilon=1e-05) tf_lm.build(input_shape=(None,4)) # 比较 x = np.random.randn(2,4).astype(np.float32) pt_x = torch.from_numpy(x) tf_x = tf.constant(x) a=pt_lm(pt_x) b=tf_lm(tf_x) compare_difference(a,b) # max diffenence is 2.384185791015625e-07 # mean diffenence is 2.9802322387695312e-08
8、Conv2d valid padding
# 创建模型 pt_conv2d = nn.Conv2d(in_channels=10,out_channels=2,kernel_size=3) tf_conv2d = keras.layers.Conv2D(filters=2,kernel_size=3) tf_conv2d.build(input_shape=(None,5,5,10)) # pytorch cnn weight shape [output_channel,input_channel,height,width] # tf cnn weight shape [height,width,channel,filter] # 复制权重 weight = pt_conv2d.weight.detach().numpy().transpose(2,3,1,0) bias = pt_conv2d.bias.detach().numpy() tf_conv2d.set_weights(weights=[weight,bias]) # 比较 x = np.random.randn(2,5,5,10).astype(np.float32) pt_x = torch.from_numpy(x).permute(0,3,1,2) tf_x = tf.constant(x) a = pt_conv2d(pt_x).permute(0,2,3,1) b = tf_conv2d(tf_x) compare_difference(a,b) # max diffenence is 2.980232238769531e-07 # mean diffenence is 9.045470505952835e-08
9、Conv2d same padding
# 重新定义pytorch conv2d class Conv2d(nn.Conv2d): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): self.padding_type =None if padding =="same": self.padding_type = "same" padding =0 super(Conv2d, self).__init__( in_channels, out_channels, kernel_size, stride,padding, dilation, groups, bias) nn.init.xavier_uniform_(self.weight) def forward(self, x): if self.padding_type=="same": ih, iw = x.shape[-2:] kh, kw = self.weight.shape[-2:] oh = math.ceil(ih / self.stride[0]) ow = math.ceil(iw / self.stride[1]) pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) if pad_h > 0 or pad_w > 0: x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) out = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) return out # 创建模型 kernel_size = 3 stride = 5 pt_conv2d = nn.Conv2d(in_channels=3,out_channels=2,kernel_size=kernel_size,stride=stride,padding=0) pt_conv2d_same = Conv2d(in_channels=3,out_channels=2,kernel_size=kernel_size,stride=stride,padding="same") tf_conv2d = keras.layers.Conv2D(filters=2,kernel_size=kernel_size,padding="same",strides=stride) tf_conv2d.build(input_shape=(None,26,26,3)) # 复制权重 weight = pt_conv2d.weight.data.numpy().transpose(2,3,1,0) bias = pt_conv2d.bias.data.numpy() tf_conv2d.set_weights(weights=[weight,bias]) pt_conv2d_same.weight.data = pt_conv2d.weight.data pt_conv2d_same.bias.data = pt_conv2d.bias.data # 比较 x = np.random.randn(2,26,26,3).astype(np.float32) pt_x = torch.from_numpy(x).permute(0,3,1,2) tf_x = tf.constant(x) # 手动给pytorch原生的conv2d进行pandding操作。 pt_x_padded = pad(pt_x,kernel_size=kernel_size) pt_a = pt_conv2d(pt_x_padded).permute(0,2,3,1) pt_b = pt_conv2d_same(pt_x).permute(0,2,3,1) tf_c = tf_conv2d(tf_x) compare_difference(pt_a ,tf_c ) # max diffenence is 1.341104507446289e-07 # mean diffenence is 3.928370517769508e-08 compare_difference(pt_b ,tf_c ) # max diffenence is 1.341104507446289e-07 # mean diffenence is 3.928370517769508e-08
- 5、
-
183****0229
-
-
精度对齐上篇,请移步至【1 精度对齐】tf2.x与pytorch模型精度对齐
包含:
1、nn.Linear vs layers.Dense
2、nn.Conv1d vs layers.Conv1D
3、nn.Embedding vs layers.Embedding
4、nn.GRU vs layers.GRU