data-science-from-scratch icon indicating copy to clipboard operation
data-science-from-scratch copied to clipboard

Chapter 13: Naive Bayes

Open m4ur0jr opened this issue 5 years ago • 0 comments

I can't get the counts value.

image

`#TESTANDO O MODELO def get_subject_data(path):

path = r"C:\Users\mauro\Google Drive\Análise de Dados\Python\Livro_DS_Zero\Spam\20021010_spam"

data = []

# regex for stripping out the leading "Subject:" and any spaces after it
subject_regex = re.compile(r"^Subject:\s+")

# glob.glob returns every filename that matches the wildcarded path
for fn in glob.glob(path):
    is_spam = "ham" not in fn
    
    with open(fn,'r') as file:
        for line in file:
            if line.startswith("Subject:"):
                subject = subject_regex.sub("", line).strip()
                data.append((subject, is_spam))

return data

def p_spam_given_word(word_prob): word, prob_if_spam, prob_if_not_spam = word_prob return prob_if_spam / (prob_if_spam + prob_if_not_spam)

def train_and_test_model(path):

data = get_subject_data(path)
random.seed(0)      # just so you get the same answers as me
train_data, test_data = split_data(data, 0.75)    

classifier = NaiveBayesClassifier()
classifier.train(train_data)

classified = [(subject, is_spam, classifier.classify(subject))
          for subject, is_spam in test_data]

counts = Counter((is_spam, spam_probability > 0.5) # (actual, predicted)
                 for _, is_spam, spam_probability in classified)

print(counts)

`

m4ur0jr avatar Jan 04 '20 14:01 m4ur0jr