안녕하세요.
tensorflow에서 만들어놨던 model을 pytorch로 변환하는 작업을 하고 있습니다.
이상하게 tensorflow에서 훈련시켰을 때보다 성능이 낮아지네요… gpu util도 확 낮아져서 epoch당 시간도 2배 이상 차이 나더라구요…
예를 들어
tensorflow) RMSE=0.125, 0.45sec per epoch
pytorch) RMSE=0.20, 2.1sec per epoch
이정도로 차이가 납니다… 혹시 이렇게 성능 차이가 발생하는 원인을 아시나요?
아니면 혹시 제가 model을 옮기는 과정에서 뭔가 잘못 옮기기라도 한걸까요… 일단 model 내부의 shape은 전부 일치합니다…
tensorflow 버전>
def Model(self):
input = Input(shape=(self.len_data, self.n_channel))
x1 = AveragePooling1D(pool_size=4, strides=4, padding='same')(input)
x2 = AveragePooling1D(pool_size=25, strides=4, padding='same')(input)
x3 = AveragePooling1D(pool_size=100, strides=4, padding='same')(input)
x = Concatenate(axis=-1)([x1, x2, x3])
x = Conv1D(50, 20, activation='relu', padding='same', strides=2)(x)
x = BatchNormalization()(x)
x = Conv1D(75, 3, activation='relu', padding='same', strides=2)(x)
x = AveragePooling1D(pool_size=3, strides=2, padding='same')(x)
x = Conv1D(100, 3, activation='relu', padding='same', strides=2)(x)
x = AveragePooling1D(pool_size=3, strides=2, padding='same')(x)
x = Conv1D(125, 3, activation='relu', padding='same', strides=2)(x)
x = AveragePooling1D(pool_size=3, strides=2, padding='same')(x)
x = Conv1D(150, 3, activation='relu', padding='same', strides=2)(x)
x = Flatten()(x)
x = Dropout(0.2)(x)
x = Dense(50, activation='linear')(x)
x = Dense(250, activation='relu')(x)
x = Dense(250, activation='relu')(x)
y = Dense(1, activation='linear')(x)
return Model(input, y)
pytorch 버전>
class Model(nn.Module):
def __init__(self,
input_dim = 19,
init_avgpool_k = [4, 25, 100],
init_avgpool_pad = [0, 11, 48],
conv_dim = [50, 75, 100, 125, 150],
dense_dim = [50, 250, 250, 1]):
super(Model, self).__init__()
# 1. initial average pooling
self.init_avgpool = nn.ModuleList()
for i in range(3):
self.init_avgpool.append(
nn.AvgPool1d(kernel_size = init_avgpool_k[i], stride = 4, padding = init_avgpool_pad[i])
)
# 2. convolutional layers + batch normalization or average pooling
self.featurizer = nn.ModuleList()
in_dim = input_dim*3
for i in range(5):
conv_block = list()
if i == 0:
conv_block.extend(
[
nn.Conv1d(in_channels = in_dim, out_channels = conv_dim[i], kernel_size = 20, stride = 2, padding = 10),
nn.ReLU(),
nn.BatchNorm1d(num_features = conv_dim[i], momentum=0.99, eps=0.001)
]
)
else:
conv_block.extend(
[
nn.Conv1d(in_channels = in_dim, out_channels = conv_dim[i], kernel_size = 3, stride = 2, padding = 1),
nn.ReLU()
]
)
if i != 4:
conv_block.append(
nn.AvgPool1d(kernel_size = 3, stride = 2, padding = 1)
)
self.featurizer.append(nn.Sequential(*conv_block))
in_dim = conv_dim[i]
# 3. predictor
predictor = list()
predictor.extend(
[
nn.Flatten(),
nn.Dropout(0.2)
]
)
in_dim = 300
predictor.append(
nn.Linear(in_features=in_dim, out_features=dense_dim[0])
)
for i in range(2):
predictor.extend(
[
nn.Linear(in_features=dense_dim[i], out_features=dense_dim[i+1]),
nn.ReLU()
]
)
in_dim = dense_dim[i]
predictor.append(
nn.Linear(in_features=in_dim, out_features=dense_dim[-1])
)
self.predictor = nn.Sequential(*predictor)
def forward(self, input):
input_pooled = []
for init_avgpool_layer in self.init_avgpool:
input_pooled.append(init_avgpool_layer(input))
x = torch.cat(input_pooled, dim=1)
for conv_block in self.featurizer:
x = conv_block(x)
y = self.predictor(x)
return y