machine-learning-articles
machine-learning-articles copied to clipboard
Word2Vec Explained
TL;DR
Article Link
https://towardsdatascience.com/word2vec-explained-49c52b4ccb71
Author
Vatsal
Key Takeaways
Implementation
- Data
- Requirements
- Import Data
- Preprocess Data
- Embed
- PCA on Embeddings
Useful Code Snippets
w = w2v(
filtered_lines,
min_count=3,
sg = 1,
window=7
)
print(w.wv.most_similar('thou'))
emb_df = (
pd.DataFrame(
[w.wv.get_vector(str(n)) for n in w.wv.key_to_index],
index = w.wv.key_to_index
)
)
print(emb_df.shape)
emb_df.head()
pca = PCA(n_components=2, random_state=7)
pca_mdl = pca.fit_transform(emb_df)
emb_df_PCA = (
pd.DataFrame(
pca_mdl,
columns=['x','y'],
index = emb_df.index
)
)
plt.clf()
fig = plt.figure(figsize=(6,4))
plt.scatter(
x = emb_df_PCA['x'],
y = emb_df_PCA['y'],
s = 0.4,
color = 'maroon',
alpha = 0.5
)
plt.xlabel('PCA-1')
plt.ylabel('PCA-2')
plt.title('PCA Visualization')
plt.plot()