-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy patheml_to_html.py
executable file
·152 lines (117 loc) · 6.21 KB
/
eml_to_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python
'''
@author: Matthew C. Jones, CPA, CISA, OSCP
IS Audits & Consulting, LLC
TJS Deemer Dana LLP
Parses an .eml file into separate files suitable for use with phishing frenzy
'''
import sys
import argparse
import email
import os
from BeautifulSoup import BeautifulSoup
import urllib
# encoding=utf8
reload(sys)
sys.setdefaultencoding('utf8')
def main(argv):
parser = argparse.ArgumentParser(description='Convert an .eml file into an html file suitable for use with phishing frenzy.')
parser.add_argument("infile", action="store", help="Input file")
args = parser.parse_args()
inputfile = open(args.infile, "rb")
#See if this will be used for phishing frenzy or as a standalone attack
#Yes option will use phishing-frenzy tags and check for additional options
global phishing_frenzy
phishing_frenzy = False
global replace_links
replace_links = False
global imbed_tracker
imbed_tracker = False
phishing_frenzy = raw_input("\nShould links and images be formatted for use in phishing frenzy? [yes]")
if not ("n" in phishing_frenzy or "N" in phishing_frenzy):
phishing_frenzy = True
replace_links = raw_input("\nWould you like to replace all links with phishing-frenzy tags? [yes]")
if not ("n" in replace_links or "N" in replace_links):
replace_links = True
imbed_tracker = raw_input("\nWould you like to imbed the phishing-frenzy tracking image tag? [yes]")
if not ("n" in imbed_tracker or "N" in imbed_tracker):
imbed_tracker = True
#change working directory so we are in same directory as input file!
os.chdir(os.path.dirname(inputfile.name))
message = email.message_from_file(inputfile)
extract_payloads(message)
def extract_payloads(msg):
if msg.is_multipart():
#message / section is multi-part; loop part back through the extraction module
print "Multi-part section encountered; extracting individual parts from section..."
for part in msg.get_payload():
extract_payloads(part)
else:
sectionText=msg.get_payload(decode=True)
contentType=msg.get_content_type()
filename=msg.get_filename() #this is the filename of an attachment
#sectionText = sectionText.encode('utf-8').decode('ascii', 'ignore')
soup = BeautifulSoup(sectionText)
if contentType=="text/html":
print "Processing HTML section..."
########################################
#replace links with phishing frenzy tags
########################################
if replace_links==True:
for a in soup.findAll('a'):
a['href'] = '<%= @url %>'
###############################################
#Detect hyperlinked images and download locally
###############################################
imageList = []
for tag in soup.findAll('img', src=True):
imageList.append(tag['src'])
if not imageList:
pass
else:
print "The following linked images were detected in the HTML:"
for url in imageList:
print url
download_images = raw_input("\nWould you like to download these and store locally? [yes]")
if not ("n" in download_images or "N" in download_images):
print "Downloading images..."
for url in imageList:
try:
filename = url.split('/')[-1].split('#')[0].split('?')[0]
open(filename,"wb").write(urllib.urlopen(url).read())
#Does not appear that using PF attachment tag is necessary; just use filename?!?
if phishing_frenzy==True:
pass
#filename = "<%= image_tag attachments['"+filename+"'].url %>"
soup = BeautifulSoup(str(soup).decode("UTF-8").replace(url,filename).encode("UTF-8"))
except:
print "Error processing " + url + " - skipping..."
if imbed_tracker == True:
soup.body.insert(len(soup.body.contents), '<img src="<%= @image_url %>" alt="" />')
##########################################
#Clean up html output and make it readable
##########################################
sectionText = soup.prettify()
sectionText = sectionText.replace('<','<')
sectionText = sectionText.replace('>','>')
print sectionText
if phishing_frenzy==True:
export_part(sectionText,"email.html.erb")
else:
export_part(sectionText,"email.html")
elif contentType=="text/plain":
##TODO: Need to fix link cleanup of text section; beautiful soup doesn't replace hyperlinks in text file!
print "Processing text section..."
if phishing_frenzy==True:
export_part(sectionText,"email.txt.erb")
else:
export_part(sectionText,"email.txt")
elif filename:
print "Processing attachment "+filename+"..."
export_part(sectionText,filename)
else:
print "section is of unknown type ("+str(contentType)+")...skipping..."
def export_part(sectionText,filename):
open(filename,"wb").write(sectionText)
if __name__ == "__main__":
main(sys.argv[1:])