-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathShowlist.py
155 lines (140 loc) · 3.94 KB
/
Showlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python
from bs4 import BeautifulSoup
import urllib2
from collections import defaultdict
import xml.etree.ElementTree
def scrape(): # Scrape the data for all h4 and table elements.
from bs4 import BeautifulSoup
import urllib2
html_doc = urllib2.urlopen('http://www.showlistaustin.com')
soup = BeautifulSoup(html_doc)
h4 = soup.find_all("h4")
h4 = h4[6:]
table = soup.find_all("table")
scrapeddata = [h4, table]
return scrapeddata
def get_dates(scrapeddata): # Grab the dates from the h4 elements found after scrape.
h4 = scrapeddata[0]
dates = []
for i in range(len(h4)):
tag = h4[i].find_all("b")
for i in range(len(tag)):
thing = tag[i].get_text()
dates.append(str(thing))
return dates
def get_events(scrapeddata): # Grab the events from the table elements found after scrape.
table = scrapeddata[1]
events = []
rough = []
draft = []
final = []
for i in range(len(table)):
for string in table[i].stripped_strings:
if len(string) >= 3:
rough.append(string)
else:
pass
events.append(rough)
rough = []
for i in range(len(events)):
for y in range(len(events[i])):
item = str(events[i][y]).lower()
the = "the"
at = "at"
if item[-3:] == the:
draft.append(item[:-7])
elif item[-2:] == at:
draft.append(item[:-3])
else:
pass
final.append(draft)
draft = []
new_final = []
new_day = []
for i in range(len(final)):
current_day = final[i]
for i in range(len(current_day)):
current_event = current_day[i]
new_event = current_event.split(', ')
new_day.append(new_event)
new_event = []
new_final.append(new_day)
new_day = []
return new_final
def get_venue(scrapeddata): # Grab the list of venues where the shows are playing. The venues will match up with the list of events.
table = scrapeddata[1]
temp = []
venues = []
for i in range(len(table)):
current_table = table[i]
b_tags_rough = current_table.find_all("b")
for i in range(len(b_tags_rough)):
stripped = b_tags_rough[i].get_text()
stripped = str(stripped)
temp.append(stripped)
venues.append(temp)
temp = []
return venues
def make_dict(scrapeddata): # Make a dictionary key for each artist found on showlist, mapping the date and venue as values.
dictionary = defaultdict(list)
new_final = get_events(scrapeddata)
dates = get_dates(scrapeddata)
venue = get_venue(scrapeddata)
event_count = 0
day_count = 0
for i in range(len(new_final)):
day = new_final[i]
for i in range(len(day)):
event = day[i]
for i in range(len(event)):
band = event[i]
current_venue = venue[day_count][event_count]
current_date = dates[day_count - 1]
dictionary[band].append(current_venue)
dictionary[band].append(current_date)
event_count += 1
event_count = 0
day_count += 1
return dictionary
def get_itunes_dir():
import os
try:
username = os.environ['USER']
itunes_dir = '/Users/{}/Music/iTunes'.format(username)
except KeyError:
username = os.environ['USERNAME']
itunes_dir = r'C:\Users\{}\Music\iTunes'.format(username)
return itunes_dir
def get_itunes_xml():
import os.path
return os.path.join(get_itunes_dir(), 'iTunes Music Library.xml')
print get_itunes_xml()
def iTunes(): # Scan iTunes .XML for artists, make a list of all artists.
tree = xml.etree.ElementTree.ElementTree()
tree.parse(get_itunes_xml())
doc = tree.getroot()
top = doc.find('dict')
tracks = top.find('dict')
sets = []
for track in tracks.findall('dict'):
name = track.findall('string')
name = name[4].text
name = name.lower()
if name in sets:
pass
else:
sets.append(name)
return sets
def make_list(): # Find matches between iTunes artists and Showlist's artists.
scrapeddata = scrape()
ituneslist = iTunes()
dictionary = make_dict(scrapeddata)
matches = defaultdict(list)
for i in range(len(ituneslist)):
if ituneslist[i] in dictionary:
print ituneslist[i], dictionary[ituneslist[i]]
print '\n\n'
else:
pass
if __name__ == '__main__':
make_list()