-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathordomedic.py
91 lines (75 loc) · 2.35 KB
/
ordomedic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf8 -*-
import requests
from BeautifulSoup import BeautifulSoup
import json
def generateSpecialities():
# Function to retrieve all the specialisation
url = "https://ordomedic.be/fr/rechercher-un-medecin/"
response = requests.get(url)
soup = BeautifulSoup(response.text)
specList = soup.findAll(attrs={"name":"search_specialism"})
sList = []
for spec in specList[0].findAll("option")[1:]:
sList.append({"value":spec.get("value"),"name":spec.string})
return sList
def RetrievePeople(page,speciality,people):
# Function to retrieve people of a certain page for a certain speciality
query = language+"/rechercher-un-medecin/?"
query = query + "search_specialism=" + speciality["value"]
query = query + "search_place=" + town
query + query + "&page=" + str(page)
url = site+query
# Url connecting
response = requests.get(url)
# Stop if bad connection
if response.status_code != 200:
print response.status_code
isEnd = "Y"
continue
# Or no more people to gather
if numberOfPeople == people:
isEnd = "Y"
print "too many people !"
break
# HTML parsing
defaultSoup = BeautifulSoup(response.text)
# Person list
for info in defaultSoup.findAll(attrs = {"class": "result"}):
med = {}
# Name's Extract
name = info.findAll(attrs = {"class": "name"})
# Address' extract
address = info.findAll(attrs = {"class": "address"})
# Text only of the info
med["name"] = name[0].string
med["Address"] = str(address[0].findAll("dd",limit=2)[0].string) + str(address[0].findAll("dd",limit=2)[1].string)
# Extract of the phone number
phone = address[0].findAll("dd")[-1].string
if phone[1:3] == "Tel":
med["Phone"] = phone.split(" ")[1]
else:
med["Phone"] = "None"
# Add the specialisation
med["specialisation"] = speciality["name"]
# Not add duplicates
if med not in people:
people.append(med)
return people
# Url building
site = "https://ordomedic.be/"
town = "5000"
language = "fr"
people = []
numberOfPeople = len(people)
isEnd = "N"
for spec in generateSpecialities():
numPage = 1
while isEnd == "N":
people = retrievePeople(numPage,spec,people)
# Add one to index
numPage = numPage+1
if numPage >= 10:
isEnd = "Y"
# Save in a JSON file
with open('medecins.json', 'w') as outfile:
json.dump(people, outfile)