-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.py
57 lines (49 loc) · 2.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import requests
import json
import jsonlines
page=1
while page < 6:
print("Page :",page)
# Update the link of the item
link = f"https://www.ebay.com/sch/i.html?_from=R40&_nkw=iphone&LH_Sold=1&_oac=1&_pgn={page}"
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
with requests.Session() as c:
soup = BeautifulSoup(webpage, 'html.parser')
# if soup.find('div', attrs={'class':'s-error'}):
# break
items = soup.find_all('li',attrs={'class':'s-item s-item__pl-on-bottom'})
data = []
print("Count :", len(items))
for item in items:
item_data = {}
try:
item_data["name"] = item.find('span', attrs={'role':'heading'}).text
item_data["date_sold"] = item.find('div', attrs={'class':'s-item__caption-section'}).find('span', attrs={'class':'POSITIVE'}).text
item_data["price"] = item.find('span', attrs={'class':'s-item__price'} ).text
# print("Name : ", item.find('span', attrs={'role':'heading'}).text)
# print("Date sold : ", item.find('div', attrs={'class':'s-item__caption-section'}).find('span', attrs={'class':'POSITIVE'}).text)
# print("Price : ", item.find('span', attrs={'class':'s-item__price'} ).text)
except:
None
try:
item_data["bids"] = item.find('span', attrs={'class':'s-item__bids s-item__bidCount'}).text
# print("Bids :", item.find('span', attrs={'class':'s-item__bids s-item__bidCount'}).text)
except:
None
try:
item_data["purchase_option"] = item.find('span', attrs={'class':'s-item__purchase-options s-item__purchaseOptions'}).text
# print("Purchase Option : ", item.find('span', attrs={'class':'s-item__purchase-options s-item__purchaseOptions'}).text)
except:
None
item_data["page"] = page
data.append(item_data)
print("\n")
# final = json.dump(data)
# with open(f"mydata_{page}.json", "w") as final:
# json.dump(data, final)
with jsonlines.open('jsonout.jsonl', mode='a') as writer:
writer.write(data)
page += 1