Friday, June 26, 2015

Natural Language Processing with Python: Chapter 6 Excercise Answers

#Too descriptive

#ex 2
def gender_features(word):
    return {
        'suffix1': word[-1:],
        'suffix2': word[-2:],
        'startswith': word[0].lower(),
        'containsyn':'yn' in word

names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
featuresets = [(gender_features(n), g) for (n,g) in names]

train_set = nltk.apply_features(gender_features, names[500:])
devtest_set = nltk.apply_features(gender_features, names[500:1000])
test_set = nltk.apply_features(gender_features, names[1000:len(names)])

classifier = nltk.NaiveBayesClassifier.train(train_set)

p(nltk.classify.accuracy(classifier, test_set)) # 81% accuracy

