In [1]:
import torch
import sys
import gc
print(sys.version)
print(f"PyTorch Version: {torch.__version__}")
print(torch.cuda.is_available())
print(torch.cuda.device_count())
if torch.cuda.is_available():
print(f"CUDA Version: {torch.version.cuda}")
print(torch.cuda.get_device_name(0))
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
import bitsandbytes
import peft
print(f"bitsandbytes version: {bitsandbytes.__version__}")
print(f"peft version: {peft.__version__}")
print(torch.cuda.is_bf16_supported())
3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)] PyTorch Version: 2.5.1+cu121 True 1 CUDA Version: 12.1 NVIDIA GeForce RTX 4080 Laptop GPU bitsandbytes version: 0.43.1 peft version: 0.11.1 True
In [2]:
from transformers import AutoTokenizer
import re
import torch, numpy as np
from datasets import load_dataset, Dataset
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
run_name="Qwen-0.5B-SFT-ultrachat"
#Load data
SYSTEM_PROMPT = "You are a Taylor Swift expert. Answer CORRECTLY and CONCISELY questions about Taylor Swift's life, achievements, songs, and more."
dataset_name = "lamini/taylor_swift"
def get_data(dataset_name, split="train") -> Dataset:
"""Loads and formats the dataset into Qwen-compatible ChatML format."""
data = load_dataset(dataset_name)[split]
def format_for_qwen(example):
return {
"prompt": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": example["question"]}
],
"answer": example["answer"]
}
return data.map(format_for_qwen).select_columns(["prompt", "answer"])
train_dataset = get_data(dataset_name, split="train")
eval_dataset = get_data(dataset_name, split="test")
print(train_dataset.column_names)
print(eval_dataset.column_names)
print (len(train_dataset), len(eval_dataset))
['prompt', 'answer'] ['prompt', 'answer'] 783 87
In [3]:
# load tokeniser
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token # Qwen models should have an EOS token
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
# Qwen models should have bos_token:
tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token})
tokenizer.bos_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left" # For autoregressive models
In [4]:
from rouge_score import rouge_scorer
# ROUGE-L based reward function
def rouge_reward_func(prompts, completions, answer, **kwargs) -> list[float]:
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
responses = [completion[0]['content'] for completion in completions]
rewards = [scorer.score(ref_answer, response)['rougeL'].fmeasure for response, ref_answer in zip(responses, answer)]
return rewards
# Length similarity reward function
def length_similarity_reward_func(prompts, completions, answer, scale_factor=0.4, **kwargs) -> list[float]:
responses = [completion[0]['content'] for completion in completions]
rewards = [
(min(len(response.split()), len(ref_answer.split())) / max(len(response.split()), len(ref_answer.split()))) * scale_factor
if len(ref_answer.split()) > 0 else 0.0
for response, ref_answer in zip(responses, answer)
]
return rewards
# Final combined reward (60% ROUGE, 40% length)
def combined_reward(prompts, completions, answer, **kwargs) -> list[float]:
rouge_scores = rouge_reward_func(prompts, completions, answer)
length_scores = length_similarity_reward_func(prompts, completions, answer)
final_rewards = [
(0.6 * rouge) + (0.4 * length)
for rouge, length in zip(rouge_scores, length_scores)
]
return final_rewards
In [5]:
from transformers import AutoModelForCausalLM
from trl import GRPOConfig, GRPOTrainer
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
output_dir = "./results_grpo_on_ts"
training_args = GRPOConfig(
output_dir=output_dir,
learning_rate=1e-4,
adam_beta1=0.9,
adam_beta2=0.99,
optim="paged_adamw_32bit",
weight_decay=0.01,
warmup_ratio=0.05,
max_steps=101,
lr_scheduler_type='cosine_with_restarts',
bf16=True,
per_device_train_batch_size=4,
gradient_accumulation_steps=2,
num_generations=4,
max_prompt_length=256,
max_completion_length=256,
#num_train_epochs=6,
log_on_each_node=False,
report_to="none",
logging_steps=10,
save_steps=10,
#eval_strategy="steps",
#eval_steps=10,
gradient_checkpointing=True,
gradient_checkpointing_kwargs={"use_reentrant": False},
#load_best_model_at_end=True, # Crucial for saving best model
#metric_for_best_model="eval_loss"
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto", ########
trust_remote_code=True # Required for Qwen models
).to("cuda")
# Prepare LoRA Configuration
peft_config = LoraConfig(
r=32,
lora_alpha=32,
target_modules="all-linear", ##############
task_type="CAUSAL_LM",
lora_dropout=0.05,
bias="none",
)
from peft import get_peft_model
model = get_peft_model(model, peft_config)
# Double-check if model is fully on GPU
print(model.hf_device_map)
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
{'': 0}
In [6]:
trainer = GRPOTrainer(
model=model,
processing_class=tokenizer,
reward_funcs=[lambda prompts, completions, answer: combined_reward(prompts, completions, answer)],
args=training_args,
train_dataset=train_dataset,
#eval_dataset=tokenized_eval_dataset,
peft_config=peft_config,
)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
In [7]:
trainer.train()
print (torch.cuda.memory_summary())
# Save QLoRA weights
trainer.model.save_pretrained("Qwen-0.5B-qlora", safe_serialization=True)
#trainer.save_model("best_Qwen-0.5B-qlora")
# Accessing the logs (after training):
log_history = trainer.state.log_history
# Plot the loss vs steps
import matplotlib.pyplot as plt
# Extract training loss
train_steps = [entry["step"] for entry in trainer.state.log_history if "loss" in entry]
train_losses = [entry["loss"] for entry in trainer.state.log_history if "loss" in entry]
# Extract validation loss
#val_steps = [entry["step"] for entry in trainer.state.log_history if "eval_loss" in entry]
#val_losses = [entry["eval_loss"] for entry in trainer.state.log_history if "eval_loss" in entry]
# Plot both training and validation loss
plt.plot(train_steps, train_losses, label="Training Loss", linestyle="-")
#plt.plot(val_steps, val_losses, label="Validation Loss", linestyle="--")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss vs. Steps")
plt.legend()
plt.show()
#trainer.eval_dataset = eval_dataset
#print("Evaluation on test set:", trainer.evaluate())
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
[101/101 40:37, Epoch 1/2]
| Step | Training Loss |
|---|---|
| 10 | 0.059700 |
| 20 | 0.011300 |
| 30 | 0.011800 |
| 40 | 0.006300 |
| 50 | 0.003600 |
| 60 | 0.006700 |
| 70 | 0.007200 |
| 80 | 0.007900 |
| 90 | 0.007800 |
| 100 | 0.007600 |
C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
|===========================================================================| | PyTorch CUDA memory summary, device ID 0 | |---------------------------------------------------------------------------| | CUDA OOMs: 0 | cudaMalloc retries: 0 | |===========================================================================| | Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed | |---------------------------------------------------------------------------| | Allocated memory | 1048 MiB | 7271 MiB | 21697 GiB | 21696 GiB | | from large pool | 955 MiB | 7144 MiB | 17732 GiB | 17731 GiB | | from small pool | 92 MiB | 175 MiB | 3965 GiB | 3965 GiB | |---------------------------------------------------------------------------| | Active memory | 1048 MiB | 7271 MiB | 21697 GiB | 21696 GiB | | from large pool | 955 MiB | 7144 MiB | 17732 GiB | 17731 GiB | | from small pool | 92 MiB | 175 MiB | 3965 GiB | 3965 GiB | |---------------------------------------------------------------------------| | Requested memory | 1040 MiB | 7263 MiB | 21339 GiB | 21338 GiB | | from large pool | 947 MiB | 7136 MiB | 17375 GiB | 17375 GiB | | from small pool | 92 MiB | 175 MiB | 3963 GiB | 3963 GiB | |---------------------------------------------------------------------------| | GPU reserved memory | 10736 MiB | 10736 MiB | 10738 MiB | 2048 KiB | | from large pool | 10552 MiB | 10552 MiB | 10552 MiB | 0 KiB | | from small pool | 184 MiB | 184 MiB | 186 MiB | 2048 KiB | |---------------------------------------------------------------------------| | Non-releasable memory | 87317 KiB | 4064 MiB | 21016 GiB | 21016 GiB | | from large pool | 57600 KiB | 4033 MiB | 16969 GiB | 16969 GiB | | from small pool | 29717 KiB | 49 MiB | 4046 GiB | 4046 GiB | |---------------------------------------------------------------------------| | Allocations | 1157 | 1836 | 52155 K | 52154 K | | from large pool | 123 | 178 | 2571 K | 2571 K | | from small pool | 1034 | 1713 | 49583 K | 49582 K | |---------------------------------------------------------------------------| | Active allocs | 1157 | 1836 | 52155 K | 52154 K | | from large pool | 123 | 178 | 2571 K | 2571 K | | from small pool | 1034 | 1713 | 49583 K | 49582 K | |---------------------------------------------------------------------------| | GPU reserved segments | 156 | 156 | 157 | 1 | | from large pool | 64 | 64 | 64 | 0 | | from small pool | 92 | 92 | 93 | 1 | |---------------------------------------------------------------------------| | Non-releasable allocs | 67 | 191 | 21952 K | 21952 K | | from large pool | 14 | 40 | 1780 K | 1780 K | | from small pool | 53 | 177 | 20171 K | 20171 K | |---------------------------------------------------------------------------| | Oversize allocations | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Oversize GPU segments | 0 | 0 | 0 | 0 | |===========================================================================|
In [8]:
#trainer.eval_dataset = eval_dataset
#print("Evaluation on test set:", trainer.evaluate())
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
prompt_text = "".join([d['content'] for d in example["prompt"]]) + " " + separator
generated_text = trainer.model.generate(
**trainer.processing_class(prompt_text, return_tensors='pt', padding=True).to('cuda')
)
generated_text = trainer.processing_class.batch_decode(generated_text, skip_special_tokens=True)[0]
if separator in generated_text:
generated_text = generated_text.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
Average ROUGE-L F1 score on test set: 0.3118916600762736
In [9]:
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text1)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
# Compute averages
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[9], line 23 21 # Compute averages 22 average_rouge_l = total_rouge_l / len(eval_dataset) ---> 23 average_inference_time = total_inference_time / len(eval_dataset) 25 print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}") NameError: name 'total_inference_time' is not defined
In [10]:
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
Average ROUGE-L F1 score on test set: 0.3201781228082373
In [11]:
prompt_text1==prompt_text
Out[11]:
True
In [12]:
print(generated_text)
print(generated_text1)
generated_text1==generated_text
Taylor Swift received the 2019 Grammy Award for Best New Artist. She also won the Taylor Swift received several awards and recognitions from Billboard for her cultural impact. Some of the most notable
Out[12]:
False
In [13]:
import torch
import torch.nn.functional as F
def compute_perplexity(model, tokenizer, text):
"""Compute perplexity of a given text sequence using a trained model."""
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to("cuda")
with torch.no_grad():
outputs = model(**inputs, labels=inputs["input_ids"])
# Compute loss:
loss = outputs.loss
return torch.exp(loss).item() # Perplexity = exp(loss)
In [14]:
ppl = compute_perplexity(trainer.model, trainer.processing_class, prompt_text1)
print(f"Perplexity: {ppl}")
Perplexity: 90.62464904785156
In [15]:
ppl = compute_perplexity(trainer.model, trainer.processing_class, prompt_text)
print(f"Perplexity: {ppl}")
Perplexity: 90.2175064086914
In [16]:
sum_ppl =0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate Perplexity:
ppl = compute_perplexity(trainer.model, trainer.processing_class, prompt_text1)
sum_ppl += ppl
# Compute averages
average_ppl = sum_ppl / len(eval_dataset)
In [17]:
print(f"Average Perplexity: {average_ppl}")
Average Perplexity: 84.77902230449106
In [18]:
trainer.train()
print (torch.cuda.memory_summary())
# Save QLoRA weights
trainer.model.save_pretrained("Qwen-0.5B-qlora", safe_serialization=True)
#trainer.save_model("best_Qwen-0.5B-qlora")
# Accessing the logs (after training):
log_history = trainer.state.log_history
# Plot the loss vs steps
import matplotlib.pyplot as plt
# Extract training loss
train_steps = [entry["step"] for entry in trainer.state.log_history if "loss" in entry]
train_losses = [entry["loss"] for entry in trainer.state.log_history if "loss" in entry]
# Extract validation loss
#val_steps = [entry["step"] for entry in trainer.state.log_history if "eval_loss" in entry]
#val_losses = [entry["eval_loss"] for entry in trainer.state.log_history if "eval_loss" in entry]
# Plot both training and validation loss
plt.plot(train_steps, train_losses, label="Training Loss", linestyle="-")
#plt.plot(val_steps, val_losses, label="Validation Loss", linestyle="--")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss vs. Steps")
plt.legend()
plt.show()
C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
[101/101 49:12, Epoch 1/2]
| Step | Training Loss |
|---|---|
| 10 | 0.007900 |
| 20 | 0.006600 |
| 30 | 0.014200 |
| 40 | 0.012400 |
| 50 | 0.005300 |
| 60 | 0.005700 |
| 70 | 0.007600 |
| 80 | 0.031500 |
| 90 | 0.019300 |
| 100 | 99.167000 |
C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
|===========================================================================| | PyTorch CUDA memory summary, device ID 0 | |---------------------------------------------------------------------------| | CUDA OOMs: 0 | cudaMalloc retries: 0 | |===========================================================================| | Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed | |---------------------------------------------------------------------------| | Allocated memory | 1048 MiB | 7271 MiB | 44249 GiB | 44248 GiB | | from large pool | 955 MiB | 7144 MiB | 35685 GiB | 35684 GiB | | from small pool | 92 MiB | 175 MiB | 8563 GiB | 8563 GiB | |---------------------------------------------------------------------------| | Active memory | 1048 MiB | 7271 MiB | 44249 GiB | 44248 GiB | | from large pool | 955 MiB | 7144 MiB | 35685 GiB | 35684 GiB | | from small pool | 92 MiB | 175 MiB | 8563 GiB | 8563 GiB | |---------------------------------------------------------------------------| | Requested memory | 1040 MiB | 7263 MiB | 43559 GiB | 43558 GiB | | from large pool | 947 MiB | 7136 MiB | 35002 GiB | 35001 GiB | | from small pool | 92 MiB | 175 MiB | 8557 GiB | 8557 GiB | |---------------------------------------------------------------------------| | GPU reserved memory | 19944 MiB | 19944 MiB | 29548 MiB | 9604 MiB | | from large pool | 19760 MiB | 19760 MiB | 29300 MiB | 9540 MiB | | from small pool | 184 MiB | 184 MiB | 248 MiB | 64 MiB | |---------------------------------------------------------------------------| | Non-releasable memory | 87315 KiB | 4064 MiB | 42222 GiB | 42222 GiB | | from large pool | 57600 KiB | 4053 MiB | 33483 GiB | 33483 GiB | | from small pool | 29715 KiB | 49 MiB | 8738 GiB | 8738 GiB | |---------------------------------------------------------------------------| | Allocations | 1160 | 1839 | 116829 K | 116827 K | | from large pool | 123 | 178 | 5161 K | 5161 K | | from small pool | 1037 | 1716 | 111667 K | 111666 K | |---------------------------------------------------------------------------| | Active allocs | 1160 | 1839 | 116829 K | 116827 K | | from large pool | 123 | 178 | 5161 K | 5161 K | | from small pool | 1037 | 1716 | 111667 K | 111666 K | |---------------------------------------------------------------------------| | GPU reserved segments | 163 | 163 | 220 | 57 | | from large pool | 71 | 71 | 96 | 25 | | from small pool | 92 | 92 | 124 | 32 | |---------------------------------------------------------------------------| | Non-releasable allocs | 69 | 205 | 50286 K | 50286 K | | from large pool | 14 | 40 | 3644 K | 3644 K | | from small pool | 55 | 191 | 46641 K | 46641 K | |---------------------------------------------------------------------------| | Oversize allocations | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Oversize GPU segments | 0 | 0 | 0 | 0 | |===========================================================================|
In [20]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
prompt_text = "".join([d['content'] for d in example["prompt"]]) + " " + separator
generated_text = trainer.model.generate(
**trainer.processing_class(prompt_text, return_tensors='pt', padding=True).to('cuda')
)
generated_text = trainer.processing_class.batch_decode(generated_text, skip_special_tokens=True)[0]
if separator in generated_text:
generated_text = generated_text.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text1)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
# Compute averages
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
Average ROUGE-L F1 score on test set: 0.33428491735408133 Average ROUGE-L F1 score on test set: 0.33276141847479157
In [19]:
sum_ppl =0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate Perplexity:
ppl = compute_perplexity(trainer.model, trainer.processing_class, prompt_text1)
sum_ppl += ppl
# Compute averages
average_ppl = sum_ppl / len(eval_dataset)
print(f"Average Perplexity: {average_ppl}")
Average Perplexity: 81.32067114731362
In [21]:
trainer.train()
print (torch.cuda.memory_summary())
# Save QLoRA weights
trainer.model.save_pretrained("Qwen-0.5B-qlora", safe_serialization=True)
#trainer.save_model("best_Qwen-0.5B-qlora")
# Accessing the logs (after training):
log_history = trainer.state.log_history
# Plot the loss vs steps
import matplotlib.pyplot as plt
# Extract training loss
train_steps = [entry["step"] for entry in trainer.state.log_history if "loss" in entry]
train_losses = [entry["loss"] for entry in trainer.state.log_history if "loss" in entry]
# Extract validation loss
#val_steps = [entry["step"] for entry in trainer.state.log_history if "eval_loss" in entry]
#val_losses = [entry["eval_loss"] for entry in trainer.state.log_history if "eval_loss" in entry]
# Plot both training and validation loss
plt.plot(train_steps, train_losses, label="Training Loss", linestyle="-")
#plt.plot(val_steps, val_losses, label="Validation Loss", linestyle="--")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss vs. Steps")
plt.legend()
plt.show()
C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
[101/101 39:08, Epoch 1/2]
| Step | Training Loss |
|---|---|
| 10 | 56.560900 |
| 20 | 0.011400 |
| 30 | 0.022900 |
| 40 | 0.008100 |
| 50 | 0.009300 |
| 60 | 0.008900 |
| 70 | 0.010500 |
| 80 | 0.010500 |
| 90 | 0.009900 |
| 100 | 0.009700 |
C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs) C:\Users\alexa\miniconda3\envs\grpo_env\lib\site-packages\peft\tuners\tuners_utils.py:179: FutureWarning: `num_logits_to_keep` is deprecated and will be removed in version 4.50 for `Qwen2ForCausalLM.forward`. Use `logits_to_keep` instead. return self.model.forward(*args, **kwargs)
|===========================================================================| | PyTorch CUDA memory summary, device ID 0 | |---------------------------------------------------------------------------| | CUDA OOMs: 0 | cudaMalloc retries: 0 | |===========================================================================| | Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed | |---------------------------------------------------------------------------| | Allocated memory | 1048 MiB | 7271 MiB | 65636 GiB | 65635 GiB | | from large pool | 955 MiB | 7146 MiB | 52722 GiB | 52721 GiB | | from small pool | 92 MiB | 175 MiB | 12913 GiB | 12913 GiB | |---------------------------------------------------------------------------| | Active memory | 1048 MiB | 7271 MiB | 65636 GiB | 65635 GiB | | from large pool | 955 MiB | 7146 MiB | 52722 GiB | 52721 GiB | | from small pool | 92 MiB | 175 MiB | 12913 GiB | 12913 GiB | |---------------------------------------------------------------------------| | Requested memory | 1040 MiB | 7263 MiB | 64621 GiB | 64620 GiB | | from large pool | 947 MiB | 7136 MiB | 51718 GiB | 51717 GiB | | from small pool | 92 MiB | 175 MiB | 12903 GiB | 12903 GiB | |---------------------------------------------------------------------------| | GPU reserved memory | 13072 MiB | 19944 MiB | 41486 MiB | 28414 MiB | | from large pool | 12886 MiB | 19760 MiB | 41174 MiB | 28288 MiB | | from small pool | 186 MiB | 186 MiB | 312 MiB | 126 MiB | |---------------------------------------------------------------------------| | Non-releasable memory | 87315 KiB | 4064 MiB | 62646 GiB | 62645 GiB | | from large pool | 57600 KiB | 4053 MiB | 49468 GiB | 49468 GiB | | from small pool | 29715 KiB | 49 MiB | 13177 GiB | 13177 GiB | |---------------------------------------------------------------------------| | Allocations | 1160 | 1839 | 178322 K | 178321 K | | from large pool | 123 | 178 | 7664 K | 7664 K | | from small pool | 1037 | 1716 | 170658 K | 170656 K | |---------------------------------------------------------------------------| | Active allocs | 1160 | 1839 | 178322 K | 178321 K | | from large pool | 123 | 178 | 7664 K | 7664 K | | from small pool | 1037 | 1716 | 170658 K | 170656 K | |---------------------------------------------------------------------------| | GPU reserved segments | 159 | 163 | 279 | 120 | | from large pool | 66 | 71 | 123 | 57 | | from small pool | 93 | 93 | 156 | 63 | |---------------------------------------------------------------------------| | Non-releasable allocs | 69 | 205 | 77491 K | 77491 K | | from large pool | 14 | 40 | 5420 K | 5420 K | | from small pool | 55 | 191 | 72071 K | 72071 K | |---------------------------------------------------------------------------| | Oversize allocations | 0 | 0 | 0 | 0 | |---------------------------------------------------------------------------| | Oversize GPU segments | 0 | 0 | 0 | 0 | |===========================================================================|
In [22]:
sum_ppl =0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate Perplexity:
ppl = compute_perplexity(trainer.model, trainer.processing_class, prompt_text1)
sum_ppl += ppl
# Compute averages
average_ppl = sum_ppl / len(eval_dataset)
print(f"Average Perplexity: {average_ppl}")
Average Perplexity: 83.73667605169888
In [23]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
prompt_text = "".join([d['content'] for d in example["prompt"]]) + " " + separator
generated_text = trainer.model.generate(
**trainer.processing_class(prompt_text, return_tensors='pt', padding=True).to('cuda')
)
generated_text = trainer.processing_class.batch_decode(generated_text, skip_special_tokens=True)[0]
if separator in generated_text:
generated_text = generated_text.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
separator = ";Assistant:"
total_rouge_l = 0
for example in eval_dataset:
# Prepare prompt
prompt_text1 = "".join([d['content'] for d in example["prompt"]]) + " " + separator
# Tokenize and generate using trainer.model
tokenized_input = trainer.processing_class(prompt_text1, return_tensors='pt', padding=True).to('cuda')
generated_ids = trainer.model.generate(**tokenized_input)
generated_text1 = trainer.processing_class.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Extract the response part if separator is present
if separator in generated_text1:
generated_text1 = generated_text1.split(separator, 1)[-1].strip()
# Calculate ROUGE-L F1 score
rouge_scores = scorer.score(example["answer"], generated_text1)
rouge_l_f1 = rouge_scores['rougeL'].fmeasure
total_rouge_l += rouge_l_f1
# Compute averages
average_rouge_l = total_rouge_l / len(eval_dataset)
print(f"Average ROUGE-L F1 score on test set: {average_rouge_l}")
Average ROUGE-L F1 score on test set: 0.32765298023497946 Average ROUGE-L F1 score on test set: 0.3227116878619112
In [ ]: