I am trying minimal example code of Informer specified on HuggingFace website. However I am getting this error while running that:
Code:
from transformers import InformerConfig, InformerModel
from huggingface_hub import hf_hub_download
import torch
# Initializing an Informer configuration with 12 time steps for prediction
configuration = InformerConfig(prediction_length=12)
# Randomly initializing a model (with random weights) from the configuration
model = InformerModel(configuration)
# Accessing the model configuration
configuration = model.config
file = hf_hub_download(
repo_id="kashif/tourism-monthly-batch", filename="train-batch.pt", repo_type="dataset"
)
batch = torch.load(file)
model = InformerModel.from_pretrained("huggingface/informer-tourism-monthly")
# during training, one provides both past and future values
# as well as possible additional features
outputs = model(
past_values=batch["past_values"],
past_time_features=batch["past_time_features"],
past_observed_mask=batch["past_observed_mask"],
static_categorical_features=batch["static_categorical_features"],
static_real_features=batch["static_real_features"],
future_values=batch["future_values"],
future_time_features=batch["future_time_features"],
)
last_hidden_state = outputs.last_hidden_state
Out:
Traceback (most recent call last):
File "D:\data\test.py", line 25, in <module>
outputs = model(
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\informer\modeling_informer.py", line 1711, in forward
encoder_outputs = self.encoder(
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\informer\modeling_informer.py", line 1180, in forward
hidden_states = self.value_embedding(inputs_embeds)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\informer\modeling_informer.py", line 307, in forward
return self.value_projection(x)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1190, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\LSTM\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1536x23 and 22x32)
Any ideas what could be wrong?
It looks like the data or pretrained model's topology they provided in the tutorial cause some dimensionality error in the model's InformalValueEmbedding
layers which consists of a Linear
layer with input size 22 and output size 32. This layer in located and used in both InformerEncoder
and InformerDecoder
parts of the model:
>> print(model)
InformerModel(
(scaler): InformerMeanScaler()
(embedder): InformerFeatureEmbedder(
(embedders): ModuleList(
(0): Embedding(366, 2)
)
)
(encoder): InformerEncoder(
(value_embedding): InformerValueEmbedding(
(value_projection): Linear(in_features=22, out_features=32, bias=False)
)
(embed_positions): InformerSinusoidalPositionalEmbedding(48, 32)
(layers): ModuleList(
(0-3): 4 x InformerEncoderLayer(
(self_attn): InformerProbSparseAttention(
(k_proj): Linear(in_features=32, out_features=32, bias=True)
(v_proj): Linear(in_features=32, out_features=32, bias=True)
(q_proj): Linear(in_features=32, out_features=32, bias=True)
(out_proj): Linear(in_features=32, out_features=32, bias=True)
)
(self_attn_layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
(activation_fn): GELUActivation()
(fc1): Linear(in_features=32, out_features=32, bias=True)
(fc2): Linear(in_features=32, out_features=32, bias=True)
(final_layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
)
)
(layernorm_embedding): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
(conv_layers): ModuleList(
(0-2): 3 x InformerConvLayer(
(downConv): Conv1d(32, 32, kernel_size=(3,), stride=(1,), padding=(1,), padding_mode=circular)
(norm): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): ELU(alpha=1.0)
(maxPool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
(3): None
)
)
(decoder): InformerDecoder(
(value_embedding): InformerValueEmbedding(
(value_projection): Linear(in_features=22, out_features=32, bias=False)
)
(embed_positions): InformerSinusoidalPositionalEmbedding(48, 32)
(layers): ModuleList(
(0-3): 4 x InformerDecoderLayer(
(self_attn): InformerProbSparseAttention(
(k_proj): Linear(in_features=32, out_features=32, bias=True)
(v_proj): Linear(in_features=32, out_features=32, bias=True)
(q_proj): Linear(in_features=32, out_features=32, bias=True)
(out_proj): Linear(in_features=32, out_features=32, bias=True)
)
(activation_fn): GELUActivation()
(self_attn_layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
(encoder_attn): InformerAttention(
(k_proj): Linear(in_features=32, out_features=32, bias=True)
(v_proj): Linear(in_features=32, out_features=32, bias=True)
(q_proj): Linear(in_features=32, out_features=32, bias=True)
(out_proj): Linear(in_features=32, out_features=32, bias=True)
)
(encoder_attn_layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
(fc1): Linear(in_features=32, out_features=32, bias=True)
(fc2): Linear(in_features=32, out_features=32, bias=True)
(final_layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
)
)
(layernorm_embedding): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
)
)
You can change the input dimension of these layers by
from torch import nn
model.decoder.value_embedding.value_projection = nn.Linear(23, 32, bias=False)
model.encoder.value_embedding.value_projection = nn.Linear(23, 32, bias=False)
but this will reset the pretrained weights in these layers. You want to fine-tune the model for some time to learn the weights there before making predictions.