umap
umap copied to clipboard
Fatal Python error: Fatal Python error: Segmentation faultSegmentation fault
This code crashes on MacOS Sonoma
% pip show umap-learn
Name: umap-learn
Version: 0.5.3
Summary: Uniform Manifold Approximation and Projection
Home-page: http://github.com/lmcinnes/umap
Author:
Author-email:
License: BSD
Location: /Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/site-packages
Requires: numba, numpy, pynndescent, scikit-learn, scipy, tqdm
Required-by:
% ipython
Python 3.10.13 (main, Sep 11 2023, 08:21:04) [Clang 14.0.6 ]
Type 'copyright', 'credits' or 'license' for more information
IPython 8.20.0 -- An enhanced Interactive Python. Type '?' for help.
from transformers import GPT2Tokenizer, GPT2Model
import torch
import umap
import matplotlib.pyplot as plt
import numpy as np
# Load the pre-trained GPT-2 model and tokenizer
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2Model.from_pretrained(model_name)
# Move the model to the 'mps' device if available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
model.to(device)
# Define the words for which you want to extract embeddings
words = ["dog", "puppy", "dog-puppy"]
# Extract the embedding vectors for each word
embeddings = []
max_length = 0
for word in words:
input_ids = tokenizer.encode(word, add_special_tokens=False, return_tensors='pt')
input_ids = input_ids.to(device)
with torch.no_grad():
outputs = model(input_ids)
hidden_states = outputs.last_hidden_state
word_embedding = hidden_states.squeeze(0).cpu().numpy()
embeddings.append(word_embedding)
max_length = max(max_length, word_embedding.shape[0])
# Pad the embedding vectors to the maximum length
padded_embeddings = []
for embedding in embeddings:
padded_embedding = np.pad(embedding, ((0, max_length - embedding.shape[0]), (0, 0)), mode='constant')
padded_embeddings.append(padded_embedding)
# Reshape the padded embeddings to a 2-dimensional array
reshaped_embeddings = np.array(padded_embeddings).reshape(len(padded_embeddings), -1)
# Apply UMAP to reduce the dimensionality of the embeddings
reducer = umap.UMAP(n_components=2, random_state=42)
reduced_embeddings = reducer.fit_transform(reshaped_embeddings)
# Plot the reduced embeddings
plt.figure(figsize=(8, 6))
for i, word in enumerate(words):
plt.scatter(reduced_embeddings[i, 0], reduced_embeddings[i, 1], label=word)
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('Embedding Vectors of Words')
plt.legend()
plt.show()
Fatal Python error: Fatal Python error: Segmentation faultSegmentation fault
Thread 0xThread 0x000070000239c000000070000239c000 (most recent call first):
(most recent call first):
File File ""/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py"", line , line 320320 in in waitwait
File "/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py", line 607 File in wait
"/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/threading.py" File , line "607/Users/davidlaxer/anaconda3/envs/AI-Feynman/lib/python3.10/site-packages/IPython/core/history.py in zsh: segmentation fault ipython