-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathquiz.py
99 lines (86 loc) · 3.73 KB
/
quiz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from textblob import TextBlob
import nltk
import wikipedia
import re
from nltk.corpus import wordnet as wn
import random
nltk.download('brown')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
def get_similar_words( word, page):
synsets = wn.synsets(word, pos='n')# In the absence of a better method, take the first synset
if len(synsets) == 0:# If there aren't any synsets, return an empty list
return []
else:
synset = synsets[0]
hypernym = synset.hypernyms()[0]# Get the hypernym for this synset (again, take the first)
hyponyms = hypernym.hyponyms() # Get some hyponyms from this hypernym
similar_words = [] # Take the name of the first lemma for the first 8 hyponyms
for hyponym in hyponyms:
similar_word = hyponym.lemmas()[0].name().replace('_', ' ')
if similar_word != word:
similar_words.append(similar_word)
if len(similar_words) == 8:
break
print("similar_words are,",similar_words)
return similar_words
def evaluate_sentence(sentence,page):
print("tags are",sentence.tags)
if sentence.tags[0][1] == 'RB' or len(sentence) < 6:
# This sentence starts with an adverb or is less than five words long
# and probably won't be a good fit
return None
tag_map = {word.lower(): tag for word, tag in sentence.tags}
replace_nouns = []
for word, tag in sentence.tags:
if tag == 'NN':# and word not in page.title:# For now, only blank out non-proper nouns that don't appear in the article title
# Is it in a noun phrase? If so, blank out the last two words in that phrase
for phrase in sentence.noun_phrases:
if phrase[0] == '\'':
print("help me")
# If it starts with an apostrophe, ignore it
# (this is a weird error that should probably
# be handled elsewhere)
break
if word in phrase:
# Blank out the last two words in this phrase
[replace_nouns.append(phrase_word) for phrase_word in phrase.split()[-2:]]
break
# If we couldn't find the word in any phrases,
# replace it on its own
if len(replace_nouns) == 0:
replace_nouns.append(word)
break
#if len(replace_nouns) == 0:
# # Return none if we found no words to replace
# return None
trivia = {
'title': page.title,
'url': page.url,
'answer': ' '.join(replace_nouns)
}
print("trivia is",trivia )
if len(replace_nouns) == 1:
# If we're only replacing one word, use WordNet to find similar words
trivia['similar_words'] = get_similar_words(replace_nouns[0],page)
else:
# If we're replacing a phrase, don't bother - it's too unlikely to make sense
trivia['similar_words'] = []
# Blank out our replace words (only the first occurrence of the word in the sentence)
replace_phrase = ' '.join(replace_nouns)
blanks_phrase = ('__________ ' * len(replace_nouns)).strip()
expression = re.compile(re.escape(replace_phrase), re.IGNORECASE)
sentence = expression.sub(blanks_phrase, str(sentence), count=1)
trivia['question'] = sentence
return trivia
def subject_to_quiz(subject):
title=subject
page = wikipedia.page(title)
blob = TextBlob(page.summary)
#questions=[]
sentence = random.choice(blob.sentences)
#questions.append(evaluate_sentence(sentence, page))
eval_res =evaluate_sentence(sentence, page)
print(eval_res)
return eval_res