-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathex14HW.py
184 lines (153 loc) · 5.46 KB
/
ex14HW.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# Exercise 14.1. The os module provides a function called walk that is similar
# to this one but more versatile. Read the documentation and use it to print
# the names of the files in a given directory and its subdirectories
def run():
import os
import time
directory=raw_input('name of dir ')
flist=[]
for x,y,z in os.walk(directory):
if len(z)>0:
flist.extend(z)
print directory
for x in flist:
print x
time.sleep(1)
# Exercise 14.2. Write a function called sed that takes as arguments a pattern
# string, a replacement string, and two filenames; it should read the first
# file and write the contents into the second file (creating it if necessary).
# If the pattern string appears anywhere in the file, it should be replaced
# with the replacement string. If an error occurs while opening, reading,
# writing or closing files, your program should catch the exception,
# print an error message, and exit.
def sed(pattern,replacement,readfile,writefile):
try:
r=open(readfile)
w=open(writefile,'w')
a='a'
text=''
while a != '':
a=r.readline()
text+=a
while pattern in text:
p=text.find(pattern)
text=text[:p]+replacement+text[p+len(pattern):]
w.write(text)
r.close()
w.close()
except:
return 'does not fempute'
# Exercise 14.3. Write a module that
# imports anagram_sets and provides two new functions: store_anagrams should
# store the anagram dictionary in a 'shelf' read_anagrams should look up a
# word and return a list of its anagrams.
def all_anagrams(wordlist):
d=dict()
for word in wordlist:
a=list(word)
a.sort()
a=tuple(a)
d[a]=d.get(a,[])+[word]
return d
def stored_anagrams(d):
a=raw_input('read or write? ')
import pickle
if a == 'write':
f=open('anagram.db','w')
f.write(pickle.dumps(d))
elif a=='read':
f=open('anagram.db')
s=''.join(f.readlines())
s=pickle.loads(s)
for x in s:
d[x]=s[x]
f.close()
def read_anagrams(word,d='a'):
import pickle
if type(word)!=str:
return 'only strings please'
if d=='a':
f=open('anagram.db')
s=''.join(f.readlines())
d=pickle.loads(s)
letters=list(word)
letters.sort()
try:
anagrams=d[tuple(letters)][:]
anagrams.remove(word)
except:
return "sorry %s not a word" % word
return anagrams
# Exercise 14.4. In a large collection of MP3 files, there may be more than
# one copy of the same song, stored in different directories or with different
# file names. The goal of this exercise is to search for duplicates.
# 1. Write a program that searches a directory and all of its subdirectories,
# recursively, and returns a list of complete paths for all files with a given
# suffix (like .mp3). Hint: os.path provides several useful functions for
# manipulating file and path names.
def allfiles(dirf):
import os
masterlist=[]
for x,y,z in os.walk(dirf):
if len(z)>0:
for i in z:
if i[-1]=='3':
masterlist.append(x+'/'+i)
return masterlist
# 2. To recognize duplicates, you can use md5sum to compute a 'checksum' for
# each files. If two files have the same checksum, they probably have the
# same contents.
def find_dups(masterlist):
import os
d=dict()
for x in masterlist:
import string
x=x.replace(' ','\ ')
for i in "(){}[]&,';":
x=x.replace(i,"\\"+i)
f=os.popen('md5sum '+x)
hashed=f.read()
d[hashed[:32]]=d.get(hashed[:32],[])+[x]
f.close()
print x
return d
# 3. To double-check, you can use the Unix command diff.
# nah.
# Programs that will be imported as modules often use the following idiom:
# if __name__ == '__main__':
# print linecount('wc.py')
# __name__ is a built-in variable that is set when the program starts.
# If the program is running as a script, __name__ has the value __main__; in
# that case, the test code is executed. Otherwise, if the module is being
# imported, the test code is skipped.
# Exercise 14.5. Type this example into a file named wc.py and run it as a
# script. Then run the Python interpreter and import wc. What is the value
# of __name__ when the module is being imported?
# 'wc', The name of the module
# Exercise 14.6. The urllib module provides methods for manipulating URLs
# and downloading information from the web. The following example downloads
# and prints a secret message from thinkpython.com:
# import urllib
# conn = urllib.urlopen('http://thinkpython.com/secret.html')
# for line in conn:
# print line.strip()
# Run this code and follow the instructions you see there.
# Write a program that prompts the user for a zip code and prints the
# name and population of the corresponding town.
def townandpop():
a= raw_input('zip code please.. ')
import urllib
import string
f= urllib.urlopen('http://www.uszip.com/zip/'+a)
a=f.read()
f.close()
townchar=a.find('<title>')+7
popchar=a.find('Total population')
population=''
for x in a[popchar:popchar+40]:
if x in string.digits+',':
population+=x
end=a[townchar:].find(',')+4
town=a[townchar:townchar+end]
print town,population
#doesnt work for zipcodes that are assigned to just states such a 04970