Skip to content

Commit

Permalink
Merge pull request #290 from kushagra1912/master
Browse files Browse the repository at this point in the history
Python Web Scraping
  • Loading branch information
fineanmol authored Oct 1, 2021
2 parents 49820d6 + 3f2b8ff commit c397258
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Contributors.html
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ <h1 class="animated rubberBand delay-4s">Contributors</h1>
<a class="box-item" href="https://github.com/christianwidjayaa"><span>Christian Widjaya</span></a>
<a class="box-item" href="https://github.com/kingketan9"><span>Ketan Goyal</span></a>
<a class="box-item" href="https://github.com/Davjot"><span>Davjot Singh</span></a>
<a class="box-item" href="https://github.com/kushagra1912"><span>Kushagra Agrawal</span></a>


<!-- Please maintain the alignment... -->

Expand Down
31 changes: 31 additions & 0 deletions Program's_Contributed_By_Contributors/Python_Programs/plwebsc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import json
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import urlopen

scrape_url = 'https://understat.com/league/EPL'


pg_connect = urlopen(scrape_url)

pg_html = BeautifulSoup(pg_connect, "html.parser")

json_raw_string = pg_html.find_all(name = "script")[3].string
print(json_raw_string)

start_ind = json_raw_string.index("\\")
end_ind = json_raw_string.index("')")

json_data = json_raw_string[start_ind:end_ind]

json_data = json_data.encode("utf8").decode("unicode_escape")

json.loads(json_data)

fin_json_df = pd.json_normalize(json.loads(json_data))

print(fin_json_df)

fin_json_df.to_csv('PLPLAYERS.csv')


0 comments on commit c397258

Please sign in to comment.