Pretrained models¶

References:

  • See tutorials from the company "huggingface" 1 2
In [1]:
# You must install transformers library first 
# !pip install transformers
In [2]:
# Ignore warnings for the demo 
# TODO: comment this out if you'd like to see the warnings 
import logging
logging.disable(logging.WARNING)

Softmax¶

This function generalizes the logistic/sigmoid to multiple classes

In [3]:
import torch
torch.set_printoptions(sci_mode=False) # no scientific notation on prints
                                       # just to visualize outputs better
In [4]:
a = torch.tensor([2.0, -3.0, 5.0, 7.0])
a
Out[4]:
tensor([ 2., -3.,  5.,  7.])
In [5]:
softmax = torch.nn.Softmax(dim=0) #the dimension of the input tensor
                                  # for which you want it to sum to 1
out = softmax(a)
out 
Out[5]:
tensor([    0.0059,     0.0000,     0.1185,     0.8756])
In [6]:
sum(out)
Out[6]:
tensor(1.)

Generation¶

In [7]:
from transformers import pipeline
In [8]:
# "Distilled" version of GPT-2 (smaller but still decent performance)
generator = pipeline("text-generation", model="distilgpt2")
In [9]:
generator(
    "Hath in the",
    max_length=30,
    num_return_sequences=5,
)
/Users/katherinekeith/miniconda3/envs/cs375/lib/python3.8/site-packages/transformers/generation/utils.py:1186: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)
  warnings.warn(
Out[9]:
[{'generated_text': 'Hath in the Middle East,” he said.\n\n\n\n"At the same time, I think it\'s interesting that we still'},
 {'generated_text': 'Hath in the dark. The eyes of her father-in-law, Srinagar, are now glazed with red lights.\n\n'},
 {'generated_text': 'Hath in the morning, the man was rushed to the scene and died in a hospital at the scene. (Published Thursday, Jan. 21,'},
 {'generated_text': 'Hath in the United States.\n\n\nThe Obama administration․s attempt to build on the campaign rhetoric and the press conferences around it at'},
 {'generated_text': 'Hath in the last few episodes.\n\n\n\n\n\nThis episode also includes exclusive material and the interview with Eileen Lee (Dr.'}]
In [10]:
# Why don't these sound like Shakespeare?

Zero-shot text classification¶

No fine-tuning on in-domain data.

In [11]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
In [12]:
classifier(
    "while the total impact on grain production is unclear china has said the flooding slashed summer grain output by 11 percent from last year's harvest",
    candidate_labels=["not about aid", "about aid"],
)
Out[12]:
{'sequence': "while the total impact on grain production is unclear china has said the flooding slashed summer grain output by 11 percent from last year's harvest",
 'labels': ['about aid', 'not about aid'],
 'scores': [0.6267188191413879, 0.3732811510562897]}

Fine-tuning¶

In [13]:
import csv
import os 
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
from transformers import Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score

Load data

In [14]:
#change with your own path to HW 2 data
path_to_hw2_data = './data/triage/'
In [15]:
def load_triage_data(data_dir, split_name):
    all_texts = []
    all_labels = []
    
    with open(os.path.join(data_dir, split_name + ".csv"),
              newline='', mode="r", encoding="utf8") as infile:
        reader = csv.DictReader(infile, delimiter="|")
        for row in reader:
            text = row["Text"]
            label = int(row["Label"])
            all_texts.append(text)
            all_labels.append(label)
    print(f'read {len(all_texts)} lines of data')
    return all_texts, all_labels
In [16]:
train_texts, train_labels = load_triage_data(path_to_hw2_data, 'train')
read 21046 lines of data
In [17]:
dev_texts, dev_labels = load_triage_data(path_to_hw2_data, 'dev')
read 2573 lines of data

Tokenize

In [18]:
MAX_LENGTH = 20 #specify the maximum length of a sentence 
MODEL_NAME = 'distilbert-base-uncased' 
#DistilBERT is a small, fast, cheap and light Transformer model 
In [19]:
tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_NAME) # The model_name needs to match our pre-trained model.
In [20]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=MAX_LENGTH)
dev_encodings  = tokenizer(dev_texts, truncation=True, padding=True, max_length=MAX_LENGTH)
In [21]:
train_labels_encoded = torch.tensor(train_labels)
dev_labels_encoded = torch.tensor(dev_labels)
In [22]:
' '.join(train_encodings[0].tokens[0:MAX_LENGTH])
Out[22]:
'[CLS] i would like to know when the national archives will begin to work [SEP] [PAD] [PAD] [PAD] [PAD] [PAD]'
In [23]:
' '.join(train_encodings[200].tokens[0:MAX_LENGTH])
Out[23]:
'[CLS] night time loading takes some time under the flood ##lights [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'
In [24]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)
In [25]:
train_dataset = MyDataset(train_encodings, train_labels_encoded)
dev_dataset = MyDataset(dev_encodings, dev_labels_encoded)
In [26]:
model = DistilBertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
In [27]:
# Hyperparameters you need to set! 
training_args = TrainingArguments(
    num_train_epochs=1,              # total number of training epochs
    per_device_train_batch_size=50,  # batch size per device during training
    per_device_eval_batch_size=50,   # batch size for evaluation
    learning_rate=1e-5,              # initial learning rate for Adam optimizer
    output_dir='./results',          # output directory
    logging_steps=10,               # number of steps to output logging (set lower because of small dataset size)
    evaluation_strategy='steps',     # evaluate during fine-tuning so that we can see progress
)
In [28]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(axis=-1)
    acc = accuracy_score(labels, preds)
    return {
      'accuracy': acc,
    }
In [29]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=dev_dataset,           # evaluation dataset (usually a validation set; here we just send our test set)
    compute_metrics=compute_metrics      # our custom evaluation function 
)
In [30]:
trainer.train()
/Users/katherinekeith/miniconda3/envs/cs375/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
  warnings.warn(
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
[421/421 08:19, Epoch 1/1]
Step Training Loss Validation Loss Accuracy
10 0.702100 0.679824 0.592305
20 0.678100 0.664889 0.592693
30 0.667200 0.652154 0.592693
40 0.665000 0.642443 0.688302
50 0.638400 0.613920 0.691411
60 0.597900 0.588133 0.687136
70 0.588600 0.564596 0.737660
80 0.547000 0.548516 0.741935
90 0.561600 0.533032 0.753206
100 0.562800 0.531821 0.746211
110 0.547300 0.517024 0.759813
120 0.525600 0.511233 0.759425
130 0.515900 0.511245 0.758259
140 0.495500 0.508545 0.754761
150 0.489200 0.503313 0.761757
160 0.504100 0.499360 0.762145
170 0.494500 0.499739 0.764477
180 0.488700 0.503218 0.760979
190 0.544900 0.499092 0.761757
200 0.534500 0.497670 0.760202
210 0.476000 0.499546 0.762145
220 0.529300 0.494279 0.763700
230 0.475200 0.493959 0.763311
240 0.489200 0.492553 0.769918
250 0.507400 0.500709 0.759036
260 0.520600 0.490385 0.767198
270 0.494600 0.489512 0.766421
280 0.488100 0.489295 0.769530
290 0.471300 0.489661 0.765643
300 0.510800 0.490641 0.767586
310 0.481000 0.491523 0.767975
320 0.495500 0.490885 0.768752
330 0.461800 0.488755 0.766421
340 0.464300 0.488700 0.766421
350 0.497300 0.488610 0.769141
360 0.486800 0.488406 0.766032
370 0.462500 0.488761 0.766421
380 0.479400 0.488920 0.767198
390 0.548900 0.489771 0.769530
400 0.483900 0.489711 0.769141
410 0.544700 0.489442 0.768752
420 0.462700 0.489191 0.769141

/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
Out[30]:
TrainOutput(global_step=421, training_loss=0.5281096048423061, metrics={'train_runtime': 499.8907, 'train_samples_per_second': 42.101, 'train_steps_per_second': 0.842, 'total_flos': 108902690316960.0, 'train_loss': 0.5281096048423061, 'epoch': 1.0})
In [31]:
trainer.evaluate()
/var/folders/hw/ghdf3kcs6wl0q0hlk8nmlh3c0000gn/T/ipykernel_53374/3135598630.py:8: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  item['labels'] = torch.tensor(self.labels[idx])
[52/52 00:07]
Out[31]:
{'eval_loss': 0.4891909658908844,
 'eval_accuracy': 0.7691410804508356,
 'eval_runtime': 7.294,
 'eval_samples_per_second': 352.754,
 'eval_steps_per_second': 7.129,
 'epoch': 1.0}