-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathwebsite_cloner.py
executable file
·157 lines (123 loc) · 5.02 KB
/
website_cloner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/python
'''
@author: Matthew C. Jones, CPA, CISA, OSCP
IS Audits & Consulting, LLC
TJS Deemer Dana LLP
Downloads a website into a format suitable for use with phishing frenzy
'''
import sys
import argparse
import os
import shutil
from BeautifulSoup import BeautifulSoup, NavigableString
import urllib2
import ConfigParser
def main(argv):
parser = argparse.ArgumentParser(description='Downloads a website into a format suitable for use with phishing frenzy')
parser.add_argument("site_addr", action="store", help="Site address")
args = parser.parse_args()
site_addr = args.site_addr
#########################################
#Get stuff from config file
#########################################
config_file = "config/website_cloner.config"
if os.path.exists(config_file):
pass
else:
try:
print "Specified config file not found. Copying example config file..."
shutil.copyfile("config/website_cloner.default", config_file)
except:
print "Error copying default config file...quitting execution..."
sys.exit()
config = ConfigParser.SafeConfigParser()
config.read(config_file)
try:
working_dir = config.get("general", "working_dir")
header_text = config.get("html", "header_text")
body_text = config.get("html", "body_text")
except:
print "Missing required config file sections. Check running config file against provided example\n"
sys.exit()
site_path = site_addr.replace("http://","")
site_path = site_path.replace("https://","")
working_dir = os.path.join(working_dir, site_path,'')
if not os.path.exists(working_dir):
os.makedirs(working_dir)
os.chdir(os.path.dirname(working_dir))
#########################################
#Get the site we are cloning
#########################################
if not site_addr[:4] == "http":
site_addr = "http://"+site_addr
try:
site_text=urllib2.urlopen(site_addr).read()
except:
print "Could not open site...quitting..."
sys.exit()
#soup=BeautifulSoup(header_text+site_text)
soup=BeautifulSoup(site_text)
head=soup.find('head')
head.insert(0,NavigableString(header_text))
body=soup.find('body')
body.insert(0,NavigableString(body_text))
###############################################
#Detect hyperlinked images and download locally
###############################################
imageList = []
for tag in soup.findAll('img', src=True):
imageList.append(tag['src'])
if not imageList:
pass
else:
for url in imageList:
try:
filename = url.split('/')[-1].split('#')[0].split('?')[0]
soup = BeautifulSoup(str(soup).decode("UTF-8").replace(url,filename).encode("UTF-8"))
if not url.startswith('http'):
url = urllib2.urlparse.urljoin(site_addr,url)
print "getting " + url + "..."
open(filename,"wb").write(urllib2.urlopen(url, timeout=5).read())
except:
pass
cssList = []
for tag in soup.findAll('link', {'rel':'stylesheet'}):
cssList.append(tag['href'])
if not cssList:
pass
else:
for url in cssList:
try:
filename = url.split('/')[-1].split('#')[0].split('?')[0]
soup = BeautifulSoup(str(soup).decode("UTF-8").replace(url,filename).encode("UTF-8"))
if not url.startswith('http'):
url = urllib2.urlparse.urljoin(site_addr,url)
print "getting " + url + "..."
open(filename,"wb").write(urllib2.urlopen(url, timeout=5).read())
except:
pass
scriptList = []
for tag in soup.findAll('script', src=True):
scriptList.append(tag['src'])
if not scriptList:
pass
else:
for url in scriptList:
try:
filename = url.split('/')[-1].split('#')[0].split('?')[0]
soup = BeautifulSoup(str(soup).decode("UTF-8").replace(url,filename).encode("UTF-8"))
if not url.startswith('http'):
url = urllib2.urlparse.urljoin(site_addr,url)
print "getting " + url + "..."
open(filename,"wb").write(urllib2.urlopen(url, timeout=5).read())
except:
pass
##########################################
#Clean up html output and make it readable
##########################################
mainpage = soup.prettify()
mainpage = mainpage.replace('<','<')
mainpage = mainpage.replace('>','>')
open("index.php","wb").write(mainpage)
if __name__ == "__main__":
main(sys.argv[1:])