-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathautobuild.py
executable file
·85 lines (75 loc) · 2.84 KB
/
autobuild.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/python3
'''Auto build files for release.
'''
from common import *
import urllib.request as request
import urllib.parse as parse
build_path = os.path.join('build', 'bit_move_dorm')
other_files = [('assets','css','style.css')]
threshold_size = 3 << 20
def download(url, filename, processor = None, encoding = 'utf-8'):
print(f'downloading {url}')
response = request.urlopen(url)
html = response.read()
if processor is not None:
html = processor(html.decode(encoding)).encode(encoding)
path, _ = os.path.split(filename)
os.makedirs(path, exist_ok = True)
with open(filename, 'wb') as f:
f.write(html)
def common_processor(html):
html = re.sub(r'style\.css\?v\=[0-9a-f]+"', 'style.css"', html)
p1 = html.find('<footer ')
p2 = html.find('</footer>')
html = html[0:p1] + html[p2 + 9:]
return html
def processor_root(html):
'translate root link address'
html = common_processor(html)
html = html.replace('"/bit_move_dorm/','"./')
html = html.replace('/">相关','/index.html">相关')
return html
def processor_subdir(html):
'translate subdir link address'
html = common_processor(html)
html = html.replace('"/bit_move_dorm/','"../')
return html
def crawl_index(path, mode):
'crawl Github page for the dir *path*'
webpage_path = webpage_root + parse.quote(path) + '/'
local_path = os.path.join(build_path, path)
filename = os.path.join(local_path,'index.html')
if not os.path.exists(filename):
download(webpage_path, filename, processor_subdir)
for target in sorted(os.listdir(path)):
name, suffix = target.rsplit('.', 1)
suffix = suffix.lower()
if suffix in suffix_block: continue
filename_from = os.path.join(path, target)
filename_to = os.path.join(local_path, target)
if os.path.exists(filename_to):
os.remove(filename_to)
if mode == 'size':
filesize = os.path.getsize(filename_from)
if filesize > threshold_size: continue
if mode == 'img' and suffix not in suffix_img: continue
with open(filename_from, 'rb') as f_from:
with open(filename_to, 'wb') as f_to:
f_to.write(f_from.read())
def main(mode = 'all'): # mode: all, size, img
filename = os.path.join(build_path,'index.html')
download(webpage_root, filename, processor_root)
for names in other_files:
quoted_names = map(parse.quote, names)
webpage_path = webpage_root + '/'.join(quoted_names)
local_path = os.path.join(build_path, *names)
download(webpage_path, local_path)
for path in os.listdir():
if not os.path.isdir(path): continue
if path in path_block: continue
crawl_index(path, mode)
if __name__ == '__main__':
if len(sys.argv) > 1:
main(sys.argv[1])
else:
main()