from __future__ import division
from nltk.book import *
#author: Anik
Checkout Chapter 2 Excercise Answers
ex 2
print 26**100
ex 3
print ['Monty', 'Python'] * 20
ex 4
print len(text2)
print len(set(text2))
ex 5
romance
ex 6
text2.dispersion_plot(["Elinor", "Marianne", "Edward", "Willoughby"])
Ellenor appears everywhere, and probably Willoughby and Marianne had a relationship
ex 7
print text5.collocations()
ex 8
easy
ex 9
easy
ex 10
my_sent = ["All", "quiet", "on", "the", "western", "front"]
joined = ' '.join(my_sent)
print joined
print joined.split()
ex 11
phrase1 = ["All", "quiet", "on", "the", "western", "front"]
phrase2 = ["We", "were", "Soldiers"]
print len(phrase1 + phrase2)
print len(phrase1) + len(phrase2)
ex 12
print "Monty Python"[6:12]
print ["Monty", "Python"][1]
second one, tokenized
ex 13
sent = "Flags of our fathers"
print sent.split()
print sent.split()[2][2]
ex 14
sent3 = ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', 'and', 'the', 'earth', '.']
indices = [i for i, x in enumerate(sent3) if x == "the"]
print indices
ex 15
bs = [w for w in text5 if w.startswith('b')]
print sorted(bs)
ex 16
print range(10)
print range(10,20)
print range(10, 20, 2)
print range(10, 20, -2)
ex 17
print text9.index('sunset')
print text9[621:644]
ex 18
print sorted(set(sent1+sent2+sent3+sent4+sent5+sent6+sent7+sent8))
ex 19
print len(set([w.lower() for w in text1]))
print len([w.lower() for w in set(text1)])
ex 20
print "ANIK".isupper()
print not "ANIK".islower()
no difference
ex 21
print text2[len(text2)-2:len(text2)]
ex 22
four_letter_words = sorted([w.lower() for w in text5 if len(w) is 4])
print FreqDist(four_letter_words)
ex 23
upper_case_words = [w for w in text6 if w.isupper()]
for word in upper_case_words:
print word
ex 24
word_list = [w for w in text6 if w.endswith('ize') and 'pt' in w and w.istitle()]
print word_list
ex 25
sent = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
print [w for w in sent if w.startswith('sh')]
print [w for w in sent if len(w) > 4]
ex 26
print (sum([len(w) for w in text1])) / len(text1)
ex 27
def vocab_size(text):
return len(set([word.lower() for word in text if word.isalpha()]))
print vocab_size(text1)
ex 28
def percent(word, text):
return 100 * text.count(word) / len(text4)
print str(percent('a', text4)) + '%'
ex 29
too descriptive
from nltk.book import *
#author: Anik
Checkout Chapter 2 Excercise Answers
ex 2
print 26**100
ex 3
print ['Monty', 'Python'] * 20
ex 4
print len(text2)
print len(set(text2))
ex 5
romance
ex 6
text2.dispersion_plot(["Elinor", "Marianne", "Edward", "Willoughby"])
Ellenor appears everywhere, and probably Willoughby and Marianne had a relationship
ex 7
print text5.collocations()
ex 8
easy
ex 9
easy
ex 10
my_sent = ["All", "quiet", "on", "the", "western", "front"]
joined = ' '.join(my_sent)
print joined
print joined.split()
ex 11
phrase1 = ["All", "quiet", "on", "the", "western", "front"]
phrase2 = ["We", "were", "Soldiers"]
print len(phrase1 + phrase2)
print len(phrase1) + len(phrase2)
ex 12
print "Monty Python"[6:12]
print ["Monty", "Python"][1]
second one, tokenized
ex 13
sent = "Flags of our fathers"
print sent.split()
print sent.split()[2][2]
ex 14
sent3 = ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven', 'and', 'the', 'earth', '.']
indices = [i for i, x in enumerate(sent3) if x == "the"]
print indices
ex 15
bs = [w for w in text5 if w.startswith('b')]
print sorted(bs)
ex 16
print range(10)
print range(10,20)
print range(10, 20, 2)
print range(10, 20, -2)
ex 17
print text9.index('sunset')
print text9[621:644]
ex 18
print sorted(set(sent1+sent2+sent3+sent4+sent5+sent6+sent7+sent8))
ex 19
print len(set([w.lower() for w in text1]))
print len([w.lower() for w in set(text1)])
ex 20
print "ANIK".isupper()
print not "ANIK".islower()
no difference
ex 21
print text2[len(text2)-2:len(text2)]
ex 22
four_letter_words = sorted([w.lower() for w in text5 if len(w) is 4])
print FreqDist(four_letter_words)
ex 23
upper_case_words = [w for w in text6 if w.isupper()]
for word in upper_case_words:
print word
ex 24
word_list = [w for w in text6 if w.endswith('ize') and 'pt' in w and w.istitle()]
print word_list
ex 25
sent = ['she', 'sells', 'sea', 'shells', 'by', 'the', 'sea', 'shore']
print [w for w in sent if w.startswith('sh')]
print [w for w in sent if len(w) > 4]
ex 26
print (sum([len(w) for w in text1])) / len(text1)
ex 27
def vocab_size(text):
return len(set([word.lower() for word in text if word.isalpha()]))
print vocab_size(text1)
ex 28
def percent(word, text):
return 100 * text.count(word) / len(text4)
print str(percent('a', text4)) + '%'
ex 29
too descriptive
Do you know is there available list of answers to chapter 5?
ReplyDeletecan yew pls post answers for chapter 3.. ????
ReplyDeleteSorry dude, but you got #20 wrong. Words with MixEd CasinG will evaluate differently between the two expressions.
ReplyDeleteand 21 should be simplified to text2[-2:] since negative indices wrap around to the end of a string, list, set, etc...
ReplyDelete