Hugging Face – Trainer object returning constant values for metrics every epoch

  Kiến thức lập trình

This is my first time playing around around with the Hugging Face library: my goal is a simple binary text classification task. Since my text data is in several different languages my aim is to fine-tune the XLM-RoBERTa model. My code is as follows:

import transformers
from transformers import (
    AutoModel,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from datasets import Dataset, DatasetDict
import torch

# setting seed
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
transformers.set_seed(SEED)

# tokenize
model_ckpt = 'xlm-roberta-base'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

def tokenize(batch):
 return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=128)

dataset_encoded = joint_dataset.map(tokenize, batched=True, batch_size=None)

# FYI dataset_encoded ->
# DatasetDict({
#     train: Dataset({
#         features: ['text', 'label', 'input_ids', 'attention_mask'],
#         num_rows: 3395
#     })
#     validation: Dataset({
#         features: ['text', 'label', 'input_ids', 'attention_mask'],
#         num_rows: 425
#     })
#     test: Dataset({
#         features: ['text', 'label', 'input_ids', 'attention_mask'],
#         num_rows: 425
#     })
# })

# train model
num_labels = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels).to(device)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    recall = recall_score(labels, preds, average='macro')
    precision = precision_score(labels, preds, average='macro')
    f1 = f1_score(labels, preds, average='macro')
    return {'accuracy': acc, 'macro_f1': f1, 'macro_recall': recall, 'macro_precision': precision}

batch_size = 16
num_train_epochs = 3
logging_steps = len(dataset_encoded['train']) // batch_size
output_dir = './results'
logging_dir = './logs'
os.makedirs(output_dir, exist_ok=True)
os.makedirs(logging_dir, exist_ok=True)
training_args = TrainingArguments(output_dir=output_dir,
                                  num_train_epochs=num_train_epochs,
                                  learning_rate=1e-5,
                                  per_device_train_batch_size=batch_size,
                                  per_device_eval_batch_size=batch_size,
                                  weight_decay=0.01,
                                  evaluation_strategy="epoch",
                                  disable_tqdm=False,
                                  logging_dir=logging_dir,
                                  logging_steps=logging_steps,
                                  push_to_hub=False,
                                  log_level="error")

trainer = Trainer(model=model, args=training_args,
                  compute_metrics=compute_metrics,
                  train_dataset=dataset_encoded['train'],
                  eval_dataset=dataset_encoded['validation'],
                  tokenizer=tokenizer)

trainer.train()

The code runs smoothly however the ‘computed’ metrics remain the same after the first epoch. At first I thought the issue might be related to the learning rate but after changing it I keep getting the same results regardless of the batch size and epoch number.

Progress Callback

It is also worthy of note that I also get the same metric values when making predictions on the test set:

preds_output = trainer.predict(dataset_encoded['test'])
print(preds_output.metrics)

Test set Predictions

LEAVE A COMMENT