vscode-jupyter
vscode-jupyter copied to clipboard
DSBugBash: Suggestion - provide refactoring for a notebook into functions
During creation of cells in the notebook, the user is building up a series of things to do.
It would be super cool to take a set of cells (or code) and refactor it into a function.
Example:
This: from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from string import punctuation
#%% sents = sent_tokenize(text) sents
#%% word_sent = word_tokenize(text.lower()) word_sent
#%% _stopwords = set(stopwords.words('english') + list(punctuation) + list(['”', '“'])) _stopwords
#%% word_sent=[word for word in word_sent if word not in _stopwords] word_sent
#%% from nltk.probability import FreqDist freq = FreqDist(word_sent) freq
#%% from heapq import nlargest nlargest(10, freq, key=freq.get)
#%% from collections import defaultdict ranking = defaultdict(int)
for i, sent in enumerate(sents): for w in word_tokenize(sent.lower()): if w in freq: ranking[i] += freq[w]
ranking
#%% sents_idx = nlargest(4, ranking, key=ranking.get) sents_idx
#%% [sents[j] for j in sorted(sents_idx)]
Gets turned into:
def summarize(text, n): sents = sent_tokenize(text)
assert n <= len(sents)
word_sent = word_tokenize(text.lower())
_stopwords = set(stopwords.words('english') + list(punctuation))
word_sent=[word for word in word_sent if word not in _stopwords]
freq = FreqDist(word_sent)
ranking = defaultdict(int)
for i,sent in enumerate(sents):
for w in word_tokenize(sent.lower()):
if w in freq:
ranking[i] += freq[w]
sents_idx = nlargest(n, ranking, key=ranking.get)
return [sents[j] for j in sorted(sents_idx)]