-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbeerscrapping.py
77 lines (65 loc) · 2.08 KB
/
beerscrapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from bs4 import BeautifulSoup
from selenium import webdriver
import time, math
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
names = []
styles = []
breweries = []
meanScores = []
myScores = []
dates = []
driver = webdriver.Chrome(ChromeDriverManager().install())
url = 'https://www.ratebeer.com/user/421236/beer-ratings/'
driver.get(url)
time.sleep(1)
soup = BeautifulSoup(driver.page_source)
ratings = int(soup.find("div", class_="stat-value", id="beer-ratings").text)
pages = int(math.ceil(ratings/50))
for currentPage in range(pages):
currentUrl = url+str(currentPage+1)+'/'
driver.get(currentUrl)
time.sleep(1)
soup = BeautifulSoup(driver.page_source)
j = 0
k = 0
firstName, firstBrewery = True, True
for num in soup.findAll("td", class_="hidden-xs hidden-sm"):
if j%3 == 1:
meanScores.append(num.get_text())
j+=1
elif j%3 == 2:
dates.append(num.get_text()[:-1])
j+=1
else:
styles.append(num.get_text())
j+=1
for i in soup.findAll("td"):
for my in i.findAll("b"):
myScores.append(my.get_text())
for span in i.findAll("a"):
if k%3 == 1:
if firstName:
firstName = False
k+=1
else:
names.append(span.get_text())
k+=1
elif k%3 == 2:
if firstBrewery:
firstBrewery = False
k+=1
else:
breweries.append(span.get_text())
k+=1
else:
k+=1
export = {'Name': names,
'Brewery': breweries,
'Style': styles,
'My score': myScores,
'Avg score': meanScores,
'Date': dates
}
df = pd.DataFrame(export, columns = ['Name', 'Brewery', 'Style', 'My score', 'Avg score', 'Date'])
df.to_excel ('Beerscrapping.xlsx', index = False, header=True)