diff --git a/kenpompy/FanMatch.py b/kenpompy/FanMatch.py index e2ab36b..5893dc7 100644 --- a/kenpompy/FanMatch.py +++ b/kenpompy/FanMatch.py @@ -5,6 +5,7 @@ import mechanicalsoup import pandas as pd from bs4 import BeautifulSoup +from io import StringIO class FanMatch: """Object to hold FanMatch page scraping results. @@ -52,7 +53,7 @@ def __init__(self, browser, date = None): browser.open(self.url) fm = browser.get_current_page() table = fm.find_all("table")[0] - fm_df = pd.read_html(str(table)) + fm_df = pd.read_html(StringIO(str(table))) fm_df = fm_df[0] fm_df = fm_df.rename(columns={"Thrill Score": "ThrillScore", "Come back": "Comeback", "Excite ment": "Excitement"}) fm_df.ThrillScore = fm_df.ThrillScore.astype("str") diff --git a/kenpompy/conference.py b/kenpompy/conference.py index dfa6a55..4d14898 100644 --- a/kenpompy/conference.py +++ b/kenpompy/conference.py @@ -8,7 +8,7 @@ import re from bs4 import BeautifulSoup import datetime - +from io import StringIO def get_valid_conferences(browser, season=None): """ @@ -60,10 +60,10 @@ def get_aggregate_stats(browser, conf=None, season=None): confs = browser.get_current_page() #get first table table = confs.find_all('table')[-3] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] #get second table table = confs.find_all('table')[-2] - conf2_df = pd.read_html(str(table))[0] + conf2_df = pd.read_html(StringIO(str(table)))[0] conf2_df['Value'] = conf2_df['Value'].str.replace('%', '').astype(float) conf_df = pd.concat([conf_df, conf2_df]) #clean table @@ -80,7 +80,7 @@ def get_aggregate_stats(browser, conf=None, season=None): confs = browser.get_current_page() #get table table = confs.find_all('table')[0] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Clean table conf_df = conf_df.set_index('Conf') conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] @@ -107,7 +107,7 @@ def get_standings(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[0] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Parse out seed conf_df['Seed'] = conf_df['Team'].str.extract('([0-9]+)') conf_df['Team'] = conf_df['Team'].str.replace('([0-9]+)', '', regex=True).str.rstrip() @@ -139,7 +139,7 @@ def get_offense(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[1] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Rename Rank headers conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] @@ -168,7 +168,7 @@ def get_defense(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[2] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Rename Rank headers conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] diff --git a/kenpompy/misc.py b/kenpompy/misc.py index 15cd9e8..c4721a8 100644 --- a/kenpompy/misc.py +++ b/kenpompy/misc.py @@ -6,6 +6,7 @@ import mechanicalsoup import pandas as pd from bs4 import BeautifulSoup +from io import StringIO def get_pomeroy_ratings(browser, season=None): """ @@ -28,7 +29,7 @@ def get_pomeroy_ratings(browser, season=None): browser.open(url) page = browser.get_current_page() table = page.find_all('table')[0] - ratings_df = pd.read_html(str(table)) + ratings_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ratings_df = ratings_df[0] ratings_df.columns = ratings_df.columns.map(lambda x: x[1]) @@ -66,7 +67,7 @@ def get_trends(browser): browser.open(url) trends = browser.get_current_page() table = trends.find_all('table')[0] - trends_df = pd.read_html(str(table)) + trends_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. trends_df = trends_df[0] @@ -103,7 +104,7 @@ def get_refs(browser, season=None): browser.open(url) refs = browser.get_current_page() table = refs.find_all('table')[0] - refs_df = pd.read_html(str(table)) + refs_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. refs_df = refs_df[0] @@ -132,7 +133,7 @@ def get_hca(browser): browser.open(url) hca = browser.get_current_page() table = hca.find_all('table')[0] - hca_df = pd.read_html(str(table)) + hca_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. hca_df = hca_df[0] @@ -171,7 +172,7 @@ def get_arenas(browser, season=None): browser.open(url) arenas = browser.get_current_page() table = arenas.find_all('table')[0] - arenas_df = pd.read_html(str(table)) + arenas_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. arenas_df = arenas_df[0] @@ -234,7 +235,7 @@ def get_gameattribs(browser, season=None, metric='Excitement'): playerstats = browser.get_current_page() table = playerstats.find_all('table')[0] - ga_df = pd.read_html(str(table)) + ga_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ga_df = ga_df[0] @@ -263,7 +264,7 @@ def get_program_ratings(browser): browser.open(url) programs = browser.get_current_page() table = programs.find_all('table')[0] - programs_df = pd.read_html(str(table)) + programs_df = pd.read_html(StringIO(str(table))) programs_df = programs_df[0] programs_df.columns = ['Rank', 'Team', 'Rating', 'kenpom.Best.Rank', 'kenpom.Best.Season', 'kenpom.Worst.Rank', diff --git a/kenpompy/summary.py b/kenpompy/summary.py index f4c678b..af1cc01 100644 --- a/kenpompy/summary.py +++ b/kenpompy/summary.py @@ -7,7 +7,7 @@ import pandas as pd import re from bs4 import BeautifulSoup - +from io import StringIO def get_efficiency(browser, season=None): """ @@ -37,7 +37,7 @@ def get_efficiency(browser, season=None): browser.open(url) eff = browser.get_current_page() table = eff.find_all('table')[0] - eff_df = pd.read_html(str(table)) + eff_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. eff_df = eff_df[0] @@ -95,7 +95,7 @@ def get_fourfactors(browser, season=None): browser.open(url) ff = browser.get_current_page() table = ff.find_all('table')[0] - ff_df = pd.read_html(str(table)) + ff_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ff_df = ff_df[0] @@ -153,7 +153,7 @@ def get_teamstats(browser, defense=False, season=None): browser.open(url) ts = browser.get_current_page() table = ts.find_all('table')[0] - ts_df = pd.read_html(str(table)) + ts_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ts_df = ts_df[0] @@ -201,7 +201,7 @@ def get_pointdist(browser, season=None): browser.open(url) dist = browser.get_current_page() table = dist.find_all('table')[0] - dist_df = pd.read_html(str(table)) + dist_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. dist_df = dist_df[0] @@ -247,7 +247,7 @@ def get_height(browser, season=None): browser.open(url) height = browser.get_current_page() table = height.find_all('table')[0] - h_df = pd.read_html(str(table)) + h_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. h_df = h_df[0] @@ -346,7 +346,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal ps_dfs = [] tables = playerstats.find_all('table') for t in tables: - ps_df = pd.read_html(str(t)) + ps_df = pd.read_html(StringIO(str(t))) ps_df = ps_df[0] # Split ortg column. @@ -364,7 +364,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal if metric.upper() in perc_mets: metric = metric + '%' table = playerstats.find_all('table')[0] - ps_df = pd.read_html(str(table)) + ps_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ps_df = ps_df[0] @@ -415,7 +415,7 @@ def get_kpoy(browser, season=None): browser.open(url) kpoy = browser.get_current_page() table = kpoy.find_all('table')[0] - df = pd.read_html(str(table)) + df = pd.read_html(StringIO(str(table))) kpoy_df = df[0] kpoy_df.columns = ['Rank', 'Player', 'KPOY Rating'] @@ -431,7 +431,7 @@ def get_kpoy(browser, season=None): # Now the MVP table. if int(season) >= 2013: table = kpoy.find_all('table')[-1] - df = pd.read_html(str(table)) + df = pd.read_html(StringIO(str(table))) mvp_df = df[0] mvp_df.columns = ['Rank', 'Player', 'Game MVPs'] diff --git a/kenpompy/team.py b/kenpompy/team.py index d2addc8..54e7f55 100644 --- a/kenpompy/team.py +++ b/kenpompy/team.py @@ -5,7 +5,7 @@ import pandas as pd import datetime - +from io import StringIO def get_valid_teams(browser, season=None): """ @@ -26,7 +26,7 @@ def get_valid_teams(browser, season=None): browser.open(url) teams = browser.get_current_page() table = teams.find_all('table')[0] - team_df = pd.read_html(str(table)) + team_df = pd.read_html(StringIO(str(table))) # Get only the team column. team_df = team_df[0].iloc[:, 1] # Remove NCAA tourny seeds for previous seasons. @@ -89,7 +89,7 @@ def get_schedule(browser, team=None, season=None): browser.open(url) schedule = browser.get_current_page() table = schedule.find_all('table')[1] - schedule_df = pd.read_html(str(table)) + schedule_df = pd.read_html(StringIO(str(table))) # Dataframe Tidying schedule_df = schedule_df[0]