I was trying to fine tune a model using lora in my Macbook Pro M1 Max
device_map = "mps"
torch.set_default_device(device_map)
# Load dataset (you can process it here)
dataset = load_dataset(dataset_name, split="train")
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
# Load base model
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map=device_map,
)
model.config.use_cache = False
model.config.pretraining_tp = 1
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=max_seq_length,
tokenizer=tokenizer,
args=training_arguments,
packing=packing,
)
# Train model
trainer.train()
The code returns error
TypeError Traceback (most recent call last)
Cell In[10], line 79
76 DEVICE = "mps"
78 # Train model
---> 79 trainer.train()
File ~/miniforge3/envs/llm_tuning/lib/python3.11/site-packages/transformers/trainer.py:1539, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1534 self.model_wrapped = self.model
1536 inner_training_loop = find_executable_batch_size(
1537 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1538 )
-> 1539 return inner_training_loop(
1540 args=args,
1541 resume_from_checkpoint=resume_from_checkpoint,
1542 trial=trial,
1543 ignore_keys_for_eval=ignore_keys_for_eval,
1544 )
File ~/miniforge3/envs/llm_tuning/lib/python3.11/site-packages/transformers/trainer.py:1656, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1654 model = self.accelerator.prepare(self.model)
1655 else:
-> 1656 model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
1657 else:
1658 # to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
...
TypeError: device() received an invalid combination of arguments - got (NoneType), but expected one of:
* (torch.device device)
didn't match because some of the arguments have invalid types: (!NoneType!)
* (str type, int index)
I tried various methods but it kept giving the error