sportsipy
sportsipy copied to clipboard
Scoring and Penalty table pulls for NHL games
I was wanting to pull the data in the scoring and Penalty tables for a specific game into a Pandas DataFrame. Since this wasn't available with the other parts of the package, I created it for myself. I was able to get it to work for my needs, and so I wanted to give it so others can use it. It will need some work to make it formalized to fit better into the rest of the package, but at least this is a start.
The 'page' required in each function is a html file, so this would need to be altered in order to call any url.
import pandas as pd
from pyquery import PyQuery as pq
def Scoring(page):
path=pq(str(page).replace('<!--', '').replace('-->', ''))
scoring=path('div[id="all_scoring"]')
table=scoring('tbody')
scores=table('tr[data-row]')
period=1
periods,time,team,goal_type,scorer,goal_number,first_assist,second_assist=([] for i in range(8))
for i in scores.items():
if len(i('td'))==0:
period=period+1
if period>3:
period=i('th').text()
else:
if period=='Shootout':
periods.append(period)
time.append(i('td').text().split(' ')[0])
team.append(i('td').text().split(' ')[1])
goal_type.append(i('td').text().split(' ')[4])
scorer.append(i('td').text().split(' ')[2]+ ' '+ i('td').text().split(' ')[3])
goal_number.append('')
first_assist.append('')
second_assist.append('')
else:
periods.append(period)
time.append(i('td').text().split(' ')[0])
team.append(i('td').text().split(' ')[1])
goal_type.append(i('td').text().split(' ')[2])
scorer.append(i('td').text().split(' ')[3]+ ' '+ i('td').text().split(' ')[4])
goal_number.append(i('td').text().split(' ')[5])
if len(i('td').text().split(' '))>7:
first_assist.append(i('td').text().split(' ')[6]+ ' '+ i('td').text().split(' ')[7][:-1])
if len(i('td').text().split(' '))>8:
second_assist.append(i('td').text().split(' ')[8] + ' '+i('td').text().split(' ')[9])
else:
second_assist.append('')
else:
first_assist.append('')
second_assist.append('')
df=pd.DataFrame(list(zip(periods,time,team,goal_type,scorer,goal_number,first_assist,second_assist)),
columns =['period', 'time','team','goal_type','scorer','goal_number','first_assist','second_assist'])
return df
def Penalty(page):
path=pq(str(page).replace('<!--', '').replace('-->', ''))
penalty=path('div[id="all_penalty"]')
table=penalty('tbody')
pen=table('tr[data-row]')
period=1
periods,time,team,player,penalty_type,penalty_length=([] for i in range(6))
for i in pen.items():
if len(i('td'))==0:
period=period+1
if period>3:
period=i('th').text()
else:
periods.append(period)
time.append(i('td').text().split(' ')[0])
team.append(i('td').text().split(' ')[1])
player.append(i('td').text().split(' ')[2]+ ' '+ i('td').text().split(' ')[3])
penalty_name=i('td').text().split(' ')[4]
for j in range(5,len(i('td').text().split(' '))-2):
penalty_name=penalty_name+' '+i('td').text().split(' ')[j]
penalty_type.append(penalty_name)
penalty_length.append(i('td').text().split(' ')[len(i('td').text().split(' '))-2])
df=pd.DataFrame(list(zip( periods,time,team,player,penalty_type,penalty_length)),
columns =['period', 'time','team','player','penalty_type','penalty_length(min)'])
return df