import torch
import torch.nn as nn
import numpy as np

# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=1000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# Model definition using Transformer
class AttentionModel(nn.Module):
    def __init__(self, input_dim=1, d_model=64, output_dim = 3, nhead=4, num_layers=2, dropout=0.2, n_fc_layers = 1,
                fc_hidden_units = 10):

        super(AttentionModel, self).__init__()
        layers = []
        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        for i in range(n_fc_layers):
            if i == 0:
                layers += [nn.Linear(d_model, fc_hidden_units)]
            else:
                layers += [nn.Linear(fc_hidden_units, fc_hidden_units)]
        self.network = nn.Sequential(*layers)
        self.decoder = nn.Linear(fc_hidden_units, output_dim)

    def forward(self, x):
        print(x.shape)
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.network(x)
        x = self.decoder(x[:, -1, :])
        return x

Output:
2020-04-24 03:58:46,007 [Trial 0] Failed with parameters: {‘batch_size’: 40, ‘fc_layers’: 3, ‘hidden_units’: 240, ‘num_layers’: 2,’head_num’:4}…
RuntimeError: The size of tensor a (230) must match the size of tensor b (21) at non-singleton dimension 1

My code was running fine in the previous trial and it suddenly got into this error and I cant fix it.
Anyone knows why?