In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
!pip install -q transformers datasets peft accelerate evaluate scikit-learn

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model

model_name = "bert-base-uncased"
dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

encoded_dataset = dataset.map(preprocess, batched=True)
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format("torch")

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS"
)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir="./lora-imdb",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-4,
    num_train_epochs=2,
    logging_dir="./logs",
    report_to="none",
    eval_strategy="epoch",
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"].shuffle(seed=42).select(range(2000)),
    eval_dataset=encoded_dataset["test"].shuffle(seed=42).select(range(1000)),
)

trainer.train()

model.save_pretrained("./lora-imdb-adapter")

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,No log,0.55726
2,No log,0.4208


In [None]:
from transformers import pipeline

sentiment = pipeline(
    "text-classification",
    model="./lora-imdb-adapter",
    tokenizer="bert-base-uncased"
)
label_map = {"LABEL_0": "NEGATIVE", "LABEL_1": "POSITIVE"}

examples = [
    "I absolutely loved this film—best sci-fi I’ve seen in years!",
    "It was okay, not great, but worth a watch.",
    "Terrible plot, terrible acting, total waste of time."
]

for text in examples:
    result = sentiment(text)[0]
    print(f"{text[:40]}... → {label_map[result['label']]} ({result['score']:.2f})")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


I absolutely loved this film—best sci-fi... → POSITIVE (0.68)
It was okay, not great, but worth a watc... → NEGATIVE (0.58)
Terrible plot, terrible acting, total wa... → NEGATIVE (0.95)
