-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdatapull.py
35 lines (26 loc) · 1.04 KB
/
datapull.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
"""
@author: mpr
"""
#Currently trying it out with Joe Root's debut match. Can extend it to the
#full list once we have the wrapper up and running.
import requests
from bs4 import BeautifulSoup
import re
def find_player_matches(playerid):
"""
Finds the list of ODI innings the batsman has played,
and finds the match numbers and returns it as a list.
"""
page = requests.get("http://stats.espncricinfo.com/ci/engine/player/"+playerid+".html?class=2;template=results;type=batting;view=innings")
soup = BeautifulSoup(page.text)
player_match_list = soup.find_all(name='a', attrs={"title":"view the scorecard for this row"})
#From the above list we need to parse out the match numbers which are part of
#the link. part after the /match/ and before the .html
match_list = []
for each in player_match_list:
a = str(each)
b = re.sub('<a href="/ci/engine/match/', "", a)
b = b.split(".")
match_list.append(b[0])
return match_list