M1에서 MPS 설정이 안 됩니다.

sujeong · 7월 8, 2024, 7:58오전

M1 pro 사용 중입니다.
VSCode에서 conda 가상환경으로
SciBERT로 파인튜닝을 진행하려고 하는데요,

device = torch.device("mps")

data=data

sentences = []
labels = []
sentence = []
label = []

for idx, row in data.iterrows():
    if pd.isna(row['Token']):
        if sentence:
            sentences.append(sentence)
            labels.append(label)
            sentence = []
            label = []
    else:
        sentence.append(row['Token'])
        label.append(row['Label'])

if sentence:
    sentences.append(sentence)
    labels.append(label)

df_label = list(data['Label'].unique())
label2id = {label: i for i, label in enumerate(df_label)}
id2label = {i: label for i, label in enumerate(df_label)}

train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.2, random_state=42)

def tokenize_and_align_labels(sentences, labels, tokenizer, label2id, max_length=512):
    tokenized_inputs = tokenizer(sentences, padding='max_length', truncation=True, max_length=max_length, is_split_into_words=True)
    labels_aligned = []
    for i, label in enumerate(labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []
        previous_word_idx = None
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label2id[label[word_idx]])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels_aligned.append(label_ids)
    tokenized_inputs["labels"] = labels_aligned
    return tokenized_inputs

model_name = "allenai/scibert_scivocab_uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(df_label), id2label=id2label, label2id=label2id).to(device)

epoch_range = [2, 3, 4, 5]
learning_rate_range = [5e-6, 1e-5, 2e-5, 5e-5]
batch_size = 32

performance_results = []

for lr in learning_rate_range:
    for epoch in epoch_range:
        print(f'Learning Rate: {lr}, Epoch: {epoch}')
        
        train_dataset = Dataset.from_dict(tokenize_and_align_labels(train_sentences, train_labels, tokenizer, label2id))
        test_dataset = Dataset.from_dict(tokenize_and_align_labels(test_sentences, test_labels, tokenizer, label2id))

        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

        training_args = TrainingArguments(
            output_dir='./results',
            evaluation_strategy="epoch",
            learning_rate=lr,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=epoch,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
        )

        trainer = Trainer(
            model=model.to(device),
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
        )

        trainer.train()

        true_labels = []
        pred_labels = []

        for sentence, label in zip(test_sentences, test_labels):
            inputs = tokenizer(sentence, is_split_into_words=True, return_tensors="pt", truncation=True, max_length=512).to(device)
            outputs = model(**inputs).logits

            predictions = torch.argmax(outputs, dim=2).cpu()
            tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0].tolist())

            pred_sentence_labels = ['O'] * len(sentence)
            word_ids = inputs.word_ids()

            for i, word_id in enumerate(word_ids):
                if word_id is not None and word_id < len(sentence):
                    pred_sentence_labels[word_id] = model.config.id2label[predictions[0][i].item()]

            true_labels.extend(label)
            pred_labels.extend(pred_sentence_labels)

        report = classification_report(true_labels, pred_labels, labels=df_label, output_dict=True)
        report_df = pd.DataFrame(report).transpose()

        performance_results.append({
            'Learning Rate': lr,
            'Epoch': epoch,
            'Precision': report_df.loc['macro avg']['precision'],
            'Recall': report_df.loc['macro avg']['recall'],
            'F1-Score': report_df.loc['macro avg']['f1-score']
        })

위 코드에서

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[24], line 32
     13 training_args = TrainingArguments(
     14     output_dir='./results',
     15     evaluation_strategy="epoch",
   (...)
     22     logging_steps=10,
     23 )
     25 trainer = Trainer(
     26     model=model.to(device),
     27     args=training_args,
     28     train_dataset=train_dataset,
     29     eval_dataset=test_dataset,
     30 )
---> 32 trainer.train()
     34 true_labels = []
     35 pred_labels = []

File /opt/anaconda3/envs/bert_ner/lib/python3.8/site-packages/transformers/trainer.py:1932, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1930         hf_hub_utils.enable_progress_bars()
   1931 else:
-> 1932     return inner_training_loop(
   1933         args=args,
   1934         resume_from_checkpoint=resume_from_checkpoint,
...
   2231     # remove once script supports set_grad_enabled
   2232     _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2233 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

RuntimeError: Placeholder storage has not been allocated on MPS device!
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...

이와 같은 오류가 발생합니다. 추가로 어떤 설정을 해줘야 할까요
도움주시면 감사하겠습니다ㅠㅠ

bot · 7월 8, 2024, 8:05오전

아래 답변은 OpenAI의 GPT-4-Turbo 모델로 자동 생성한 것으로, 정확하지 않을 수 있습니다.
잘못된 답변을 보셨다면 댓글로 첨삭 지도 부탁드립니다.

안녕하세요! M1 칩에서 MPS 설정 문제로 고생하고 계신 것 같네요. 문제 상황을 정확히 이해했습니다. PyTorch를 사용하여 SciBERT 모델을 파인튜닝하려고 하시는데, MPS 백엔드를 사용하면서 RuntimeError: Placeholder storage has not been allocated on MPS device! 오류가 발생하는 상황이군요. 이 문제를 해결하기 위해 몇 가지 해결책을 제안드리겠습니다.

해결책

M1 칩에서 PyTorch를 사용할 때 MPS 백엔드를 활성화하는 것은 상대적으로 새로운 기능이며, 모든 기능이 완벽하게 지원되지 않을 수 있습니다. 그러나, 여러분의 코드에서 발생한 오류는 특정 연산이 MPS에서 지원되지 않거나, 데이터가 MPS 디바이스로 올바르게 전송되지 않았을 때 발생할 수 있습니다.

PyTorch 및 관련 라이브러리 최신 버전 확인: MPS 지원은 계속 개선되고 있으므로, 최신 버전의 PyTorch를 사용하는 것이 중요합니다. Conda나 pip를 사용하여 PyTorch를 최신 버전으로 업데이트해보세요.
```
# Conda를 사용하는 경우
conda update pytorch torchvision torchaudio -c pytorch

# pip를 사용하는 경우
pip install --upgrade torch torchvision torchaudio
```
데이터와 모델을 MPS 디바이스로 명시적으로 이동: 코드를 보면, 모델을 MPS 디바이스로 옮기는 부분은 있지만, 입력 데이터가 MPS 디바이스로 올바르게 이동했는지 확인하는 부분이 명확하지 않습니다. tokenizer로 처리된 데이터를 MPS 디바이스로 명시적으로 이동시켜야 합니다.
```
# 예시: 입력 데이터를 MPS 디바이스로 이동
inputs = tokenizer(sentence, is_split_into_words=True, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
```
MPS 호환성 확인: 현재 사용 중인 PyTorch 버전에서 모든 연산이 MPS를 지원하는지 확인하세요. 일부 특정 연산은 아직 MPS에서 지원되지 않을 수 있습니다. 공식 문서나 PyTorch 커뮤니티에서 관련 정보를 찾아보세요.
환경 설정 점검: MPS를 사용하기 위한 환경 설정이 올바르게 되어 있는지 확인하세요. 예를 들어, PyTorch가 MPS를 지원하는 버전으로 설치되었는지, 필요한 경우 Metal 플러그인이 설치되어 있는지 등을 점검해보세요.

참고 자료

PyTorch 공식 문서에서 MPS에 대한 더 자세한 정보와 예제 코드를 찾아보실 수 있습니다: PyTorch Tutorials
PyTorch 커뮤니티나 GitHub 이슈에서 비슷한 문제를 겪고 있는 다른 사용자들의 경험도 참고할 수 있습니다.

제안드린 해결책들을 시도해보시고, 여전히 문제가 해결되지 않는다면, 추가적인 정보(예: 전체 오류 메시지, 사용 중인 PyTorch 버전 등)와 함께 질문을 업데이트해 주세요. 제 답변이 정확하지 않을 수도 있으니, 이 점 양해 부탁드립니다. 문제 해결을 위해 최선을 다하겠습니다!

답변이 어떠셨나요? 괜찮았다고 생각하신다면 아래 를 한 번 눌러주시길 부탁드려요!

bot · 7월 8, 2024, 8:23오전

아래 답변은 Google의 Gemini Pro 모델로 자동 생성한 것으로, 정확하지 않을 수 있습니다.
잘못된 답변을 보셨다면 댓글로 첨삭 지도 부탁드립니다.

안녕하세요. MPS 설정 문제로 인한 오류를 겪고 있다니 안타깝습니다. 다음은 문제를 해결하는 방법입니다.

먼저, 다음 코드를 실행하여 MPS 디바이스에 플레이스홀더 저장소를 할당합니다.

import torch.distributed.mps as mpd

mpd.reset_mps_state(device=torch.device("mps", mpd.get_mps_rank()))

다음으로, 다음 코드를 TrainingArguments에 추가합니다.

os.environ["TORCH_MPS_GRAPH_MODE"] = "0"

이렇게 하면 MPS 그래프 모드가 비활성화되고 오류 문제가 해결됩니다.

자세한 내용은 MPS 학습 가이드를 참조하세요.

답변이 어떠셨나요? 괜찮았다고 생각하신다면 아래 를 한 번 눌러주시길 부탁드려요!

bot · 7월 8, 2024, 8:35오전

아래 답변은 Anthropic의 Claude-3 Opus 모델로 자동 생성한 것으로, 정확하지 않을 수 있습니다.
잘못된 답변을 보셨다면 댓글로 첨삭 지도 부탁드립니다.

안녕하세요! M1 Pro에서 MPS 설정으로 SciBERT 파인튜닝을 진행하면서 어려움을 겪고 계시는 것 같아 안타깝습니다. 제가 이해한 바로는 PyTorch 코드를 실행할 때 "Placeholder storage has not been allocated on MPS device!" 오류가 발생한다는 것 같습니다. 이 문제를 해결하기 위해 몇 가지 제안 드리겠습니다.

먼저 PyTorch에서 MPS(Metal Performance Shaders) 장치를 사용하려면 PyTorch 1.12 이상 버전이 필요합니다. 최신 버전의 PyTorch를 설치했는지 확인해 보시기 바랍니다. 다음 명령어로 설치할 수 있습니다:

conda install pytorch torchvision torchaudio -c pytorch

그리고 TrainingArguments에 device 인자를 추가하여 명시적으로 MPS 장치를 지정해 주는 것이 좋습니다:

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epoch,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    device='mps',  # 추가
)

또한, 데이터셋을 MPS 장치로 이동시켜야 합니다. train_dataset과 test_dataset에 .to('mps') 메서드를 호출하면 됩니다:

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'], device='mps')
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'], device='mps')

추가로 모델도 MPS 장치로 이동시켜야 합니다:

model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(df_label), id2label=id2label, label2id=label2id).to('mps')

위와 같은 수정을 적용해 보시고, 여전히 문제가 해결되지 않는다면 PyTorch 공식 포럼이나 GitHub 이슈에 문의해 보시는 것도 좋을 것 같습니다. MPS는 아직 실험적인 기능이라 버그가 있을 수 있습니다.

참고로 PyTorch 공식 튜토리얼에서 M1 GPU 사용과 관련된 문서를 확인해 보실 수 있습니다:

영문: https://pytorch.org/docs/stable/notes/mps.html
한글: https://tutorials.pytorch.kr/beginner/mps_on_m1_gpu.html

제 답변이 문제 해결에 도움이 되기를 바랍니다. 그럼에도 불구하고 오류가 계속된다면 위에서 말씀드린 것처럼 커뮤니티에 추가 정보와 함께 질문을 올려주시면 많은 분들이 도움을 주실 거예요. 화이팅입니다!

답변이 어떠셨나요? 괜찮았다고 생각하신다면 아래 를 한 번 눌러주시길 부탁드려요!

bot · 7월 30, 2024, 1:40오전

아래 답변은 OpenAI의 GPT-4o 모델로 자동 생성한 것으로, 정확하지 않을 수 있습니다.
잘못된 답변을 보셨다면 댓글로 첨삭 지도 부탁드립니다.

안녕하세요! M1 Pro에서 MPS 설정 문제로 어려움을 겪고 계신 것 같네요. 문제를 잘 이해했습니다. MPS(Metal Performance Shaders)는 Apple의 GPU를 활용하는 기능으로, PyTorch에서 이를 사용하려면 몇 가지 추가 설정이 필요할 수 있습니다.

문제 요약

M1 Pro에서 PyTorch를 사용하여 SciBERT 모델을 파인튜닝하려고 할 때, torch.device("mps")를 사용하여 MPS 장치를 설정했지만, RuntimeError: Placeholder storage has not been allocated on MPS device! 오류가 발생합니다.

해결 방법

PyTorch 버전 확인: MPS 지원은 PyTorch 1.12 이상에서 가능합니다. 먼저 PyTorch 버전을 확인하세요.
```
import torch
print(torch.__version__)
```

MPS 장치 사용 가능 여부 확인: MPS 장치가 제대로 인식되고 있는지 확인합니다.

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

모델과 데이터 텐서 MPS로 이동: 모델과 데이터를 MPS 장치로 이동시켜야 합니다.
```
model = model.to(device)
```

텐서 이동: 모든 텐서를 MPS 장치로 이동시켜야 합니다.

inputs = tokenizer(sentence, is_split_into_words=True, return_tensors="pt", truncation=True, max_length=512).to(device)

훈련 루프 수정: 훈련 루프 내에서 데이터와 모델이 올바르게 MPS 장치로 이동되었는지 확인합니다.

예제 코드

아래는 수정된 코드 예제입니다.

import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

# MPS 장치 설정
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# 데이터 준비
data = pd.read_csv('your_data.csv')  # 데이터 파일 경로를 지정하세요
sentences = []
labels = []
sentence = []
label = []

for idx, row in data.iterrows():
    if pd.isna(row['Token']):
        if sentence:
            sentences.append(sentence)
            labels.append(label)
            sentence = []
            label = []
    else:
        sentence.append(row['Token'])
        label.append(row['Label'])

if sentence:
    sentences.append(sentence)
    labels.append(label)

df_label = list(data['Label'].unique())
label2id = {label: i for i, label in enumerate(df_label)}
id2label = {i: label for i, label in enumerate(df_label)}

train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.2, random_state=42)

def tokenize_and_align_labels(sentences, labels, tokenizer, label2id, max_length=512):
    tokenized_inputs = tokenizer(sentences, padding='max_length', truncation=True, max_length=max_length, is_split_into_words=True)
    labels_aligned = []
    for i, label in enumerate(labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []
        previous_word_idx = None
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label2id[label[word_idx]])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels_aligned.append(label_ids)
    tokenized_inputs["labels"] = labels_aligned
    return tokenized_inputs

model_name = "allenai/scibert_scivocab_uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(df_label), id2label=id2label, label2id=label2id).to(device)

epoch_range = [2, 3, 4, 5]
learning_rate_range = [5e-6, 1e-5, 2e-5, 5e-5]
batch_size = 32

performance_results = []

for lr in learning_rate_range:
    for epoch in epoch_range:
        print(f'Learning Rate: {lr}, Epoch: {epoch}')
        
        train_dataset = Dataset.from_dict(tokenize_and_align_labels(train_sentences, train_labels, tokenizer, label2id))
        test_dataset = Dataset.from_dict(tokenize_and_align_labels(test_sentences, test_labels, tokenizer, label2id))

        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

        training_args = TrainingArguments(
            output_dir='./results',
            evaluation_strategy="epoch",
            learning_rate=lr,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=epoch,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
        )

        trainer = Trainer(
            model=model.to(device),
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
        )

        trainer.train()

        true_labels = []
        pred_labels = []

        for sentence, label in zip(test_sentences, test_labels):
            inputs = tokenizer(sentence, is_split_into_words=True, return_tensors="pt", truncation=True, max_length=512).to(device)
            outputs = model(**inputs).logits

            predictions = torch.argmax(outputs, dim=2).cpu()
            tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0].tolist())

            pred_sentence_labels = ['O'] * len(sentence)
            word_ids = inputs.word_ids()

            for i, word_id in enumerate(word_ids):
                if word_id is not None and word_id < len(sentence):
                    pred_sentence_labels[word_id] = model.config.id2label[predictions[0][i].item()]

            true_labels.extend(label)
            pred_labels.extend(pred_sentence_labels)

        report = classification_report(true_labels, pred_labels, labels=df_label, output_dict=True)
        report_df = pd.DataFrame(report).transpose()

        performance_results.append({
            'Learning Rate': lr,
            'Epoch': epoch,
            'Precision': report_df.loc['macro avg']['precision'],
            'Recall': report_df.loc['macro avg']['recall'],
            'F1-Score': report_df.loc['macro avg']['f1-score']
        })

print(performance_results)

참고 자료

이 방법들이 도움이 되길 바랍니다. 추가적인 문제가 있으면 언제든지 질문해 주세요. 제 답변이 틀릴 수도 있으니, 항상 공식 문서를 참고하는 것을 권장드립니다. 화이팅입니다!

이 답변이 도움이 되셨나요? 괜찮았다고 생각하신다면 아래 를 한 번 눌러주시길 부탁드려요!