Thursday, November 7, 2013

Answers to the exercises of the book Natural Language Processing with Python: Chapter 1

from __future__ import division
from nltk.book import *

#author: Anik

Checkout Chapter 2 Excercise Answers

ex 2
print 26**100

ex 3
print ['Monty', 'Python'] * 20

ex 4
print len(text2)
print len(set(text2))

ex 5
romance

ex 6
text2.dispersion_plot(["Elinor", "Marianne", "Edward", "Willoughby"])
Ellenor appears everywhere, and probably Willoughby and Marianne had a relationship

ex 7
print text5.collocations()

ex 8
easy

ex 9
easy

ex 10
my_sent = ["All", "quiet", "on", "the", "western", "front"]
joined =  ' '.join(my_sent)
print joined
print joined.split()

ex 11
phrase1 = ["All", "quiet", "on", "the", "western", "front"]
phrase2 = ["We", "were", "Soldiers"]
print len(phrase1 + phrase2)
print len(phrase1) + len(phrase2)

ex 12
print "Monty Python"[6:12]
print ["Monty", "Python"][1]
second one, tokenized

ex 13
sent = "Flags of our fathers"
print sent.split()
print sent.split()[2][2]

ex 14
sent3 = ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', 'and', 'the', 'earth', '.']
indices = [i for i, x in enumerate(sent3) if x == "the"]
print indices

ex 15
bs = [w for w in text5 if w.startswith('b')]
print sorted(bs)

ex 16
print range(10)
print range(10,20)
print range(10, 20, 2)
print range(10, 20, -2)

ex 17
print text9.index('sunset')
print text9[621:644]

ex 18
print sorted(set(sent1+sent2+sent3+sent4+sent5+sent6+sent7+sent8))

ex 19
print len(set([w.lower() for w in text1]))
print len([w.lower() for w in set(text1)])

ex 20
print "ANIK".isupper()
print not "ANIK".islower()
no difference

ex 21
print text2[len(text2)-2:len(text2)]

ex 22
four_letter_words = sorted([w.lower() for w in text5 if len(w) is 4])
print FreqDist(four_letter_words)

ex 23
upper_case_words = [w for w in text6 if w.isupper()]
for word in upper_case_words:
    print word

ex 24
word_list = [w for w in text6 if w.endswith('ize') and 'pt' in w and w.istitle()]
print word_list

ex 25
sent = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
print [w for w in sent if w.startswith('sh')]
print [w for w in sent if len(w) > 4]

ex 26
print (sum([len(w) for w in text1])) / len(text1)

ex 27
def vocab_size(text):
    return len(set([word.lower() for word in text if word.isalpha()]))
print vocab_size(text1)

ex 28
def percent(word, text):
    return 100 * text.count(word) / len(text4)
print str(percent('a', text4)) + '%'

ex 29
too descriptive

4 comments:

  1. Do you know is there available list of answers to chapter 5?

    ReplyDelete
  2. can yew pls post answers for chapter 3.. ????

    ReplyDelete
  3. Sorry dude, but you got #20 wrong. Words with MixEd CasinG will evaluate differently between the two expressions.

    ReplyDelete
  4. and 21 should be simplified to text2[-2:] since negative indices wrap around to the end of a string, list, set, etc...

    ReplyDelete