-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.py
40 lines (29 loc) · 946 Bytes
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
import argparse
import collections
import operator
import os
import re
UA_RE = re.compile(r'"(Mozilla[^"]*?)"')
def extract_log(file_obj, counts):
for line in file_obj:
m = UA_RE.search(line)
if not m:
continue
counts[m.groups()[0]] += 1
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', default='.')
parser.add_argument('-c', '--count', default=10)
args = parser.parse_args()
counts = collections.defaultdict(int)
for fname in os.listdir(args.directory):
if fname.startswith('access.log'):
with open(fname) as file_obj:
extract_log(file_obj, counts)
agents = list(
sorted(counts.items(), key=operator.itemgetter(1), reverse=True))
for agent, count in agents[:args.count]:
print('{:<7d} {}'.format(count, agent))
if __name__ == '__main__':
main()