Thursday, November 7, 2013

Answers to the exercises of the book Natural Language Processing with Python: Chapter 1

from __future__ import division
from import *

#author: Anik

ex 2
print 26**100

ex 3
print ['Monty', 'Python'] * 20

ex 4
print len(text2)
print len(set(text2))

ex 5

ex 6
text2.dispersion_plot(["Elinor", "Marianne", "Edward", "Willoughby"])
Ellenor appears everywhere, and probably Willoughby and Marianne had a relationship

ex 7
print text5.collocations()

ex 8

ex 9

ex 10
my_sent = ["All", "quiet", "on", "the", "western", "front"]
joined =  ' '.join(my_sent)
print joined
print joined.split()

ex 11
phrase1 = ["All", "quiet", "on", "the", "western", "front"]
phrase2 = ["We", "were", "Soldiers"]
print len(phrase1 + phrase2)
print len(phrase1) + len(phrase2)

ex 12
print "Monty Python"[6:12]
print ["Monty", "Python"][1]
second one, tokenized

ex 13
sent = "Flags of our fathers"
print sent.split()
print sent.split()[2][2]

ex 14
sent3 = ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', 'and', 'the', 'earth', '.']
indices = [i for i, x in enumerate(sent3) if x == "the"]
print indices

ex 15
bs = [w for w in text5 if w.startswith('b')]
print sorted(bs)

ex 16
print range(10)
print range(10,20)
print range(10, 20, 2)
print range(10, 20, -2)

ex 17
print text9.index('sunset')
print text9[621:644]

ex 18
print sorted(set(sent1+sent2+sent3+sent4+sent5+sent6+sent7+sent8))

ex 19
print len(set([w.lower() for w in text1]))
print len([w.lower() for w in set(text1)])

ex 20
print "ANIK".isupper()
print not "ANIK".islower()
no difference

ex 21
print text2[len(text2)-2:len(text2)]

ex 22
four_letter_words = sorted([w.lower() for w in text5 if len(w) is 4])
print FreqDist(four_letter_words)

ex 23
upper_case_words = [w for w in text6 if w.isupper()]
for word in upper_case_words:
    print word

ex 24
word_list = [w for w in text6 if w.endswith('ize') and 'pt' in w and w.istitle()]
print word_list

ex 25
sent = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
print [w for w in sent if w.startswith('sh')]
print [w for w in sent if len(w) > 4]

ex 26
print (sum([len(w) for w in text1])) / len(text1)

ex 27
def vocab_size(text):
    return len(set([word.lower() for word in text if word.isalpha()]))
print vocab_size(text1)

ex 28
def percent(word, text):
    return 100 * text.count(word) / len(text4)
print str(percent('a', text4)) + '%'

ex 29
too descriptive


  1. Do you know is there available list of answers to chapter 5?

  2. can yew pls post answers for chapter 3.. ????

  3. Sorry dude, but you got #20 wrong. Words with MixEd CasinG will evaluate differently between the two expressions.

  4. and 21 should be simplified to text2[-2:] since negative indices wrap around to the end of a string, list, set, etc...
