forked from tatiatib/flathunt-easy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflathunt.py
93 lines (78 loc) · 2.56 KB
/
flathunt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
import re
import time
import telegram_send
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
with open('/home/a/Downloads/flats.txt') as f:
old_flats = f.readlines()
old_flats = set(old_flats)
# start web browser
options = webdriver.ChromeOptions()
options.add_argument("headless")
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=options)
new_flats = []
# put you search urls here (they are to be sorted by most recently created)
url_immonet = ""
url_immowelt = ""
url_wg_gesucht = ""
url_ebay = ""
# Load the webpage
driver.get(url_immonet)
time.sleep(5)
html = driver.page_source
# Parse the HTML with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Extract href links
links = soup.find_all('a', href=True)
# Print the links
for link in links:
if "expose" in link["href"]:
if link["href"] + "\n" not in old_flats:
print("NEW FLAT ", link["href"])
new_flats.append(link["href"])
# check immowelt
print("check immowelt")
r = requests.get(url_immowelt)
soup = BeautifulSoup(r.text, 'html.parser')
for flat in soup.find_all('a', href=True):
link = flat["href"]
if "expose" in link:
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
# check wg-gesucht
print("check wg-gesucht")
r = requests.get(url_wg_gesucht)
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile("\.[0-9]{3,}.html$")
for flat in soup.find_all('a', href=True):
if regex.search(flat["href"]):
link = "https://www.wg-gesucht.de/"+flat["href"]
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
# check ebay
print("check ebay-kleinanzeigen")
driver.get(url_ebay)
html = driver.page_source.split("Alternative Anzeigen in der Umgebung")[0]
time.sleep(2)
soup = BeautifulSoup(html, 'html.parser')
for flat in soup.find_all('a', href=True):
if not ("/s-anzeige/" in flat["href"]): continue
link = "https://www.ebay-kleinanzeigen.de"+flat["href"]
if link + "\n" not in old_flats:
print("NEW FLAT ", link)
new_flats.append(link)
if(new_flats != []):
print("sending telegram msg")
telegram_send.send(messages=["Es wurden neue Wohnungen gefunden!"])
telegram_send.send(messages=new_flats)
with open('/home/a/Downloads/flats.txt', "a") as f:
for flat in new_flats:
f.writelines([flat + "\n"])
else:
print("No new flats")
sdriver.quit()