diff --git a/EmailHarvester.py b/EmailHarvester.py index cff4d2f..01d7453 100644 --- a/EmailHarvester.py +++ b/EmailHarvester.py @@ -50,10 +50,13 @@ colorama.init() class myparser: - def __init__(self, results, word): + + def __init__(self): + self.temp = [] + + def extract(self, results, word): self.results = results self.word = word - self.temp = [] def genericClean(self): for e in '''

@@ -85,7 +88,7 @@ def __init__(self, userAgent, proxy): self.plugins = {} self.proxy = proxy self.userAgent = userAgent - + self.parser = myparser() path = "plugins/" plugins = {} @@ -94,7 +97,7 @@ def __init__(self, userAgent, proxy): fname, ext = os.path.splitext(f) if ext == '.py': mod = __import__(fname) - plugins[fname] = mod.Plugin(self) + plugins[fname] = mod.Plugin(self, {'useragent':userAgent, 'proxy':proxy}) def register_plugin(self, search_method, functions): self.plugins[search_method] = functions @@ -105,18 +108,18 @@ def get_plugins(self): def show_message(self, msg): print(green(msg)) - def init_search(self, urlPattern, word, limit, counterInit, counterStep): + def init_search(self, url, word, limit, counterInit, counterStep): self.results = "" self.totalresults = "" self.limit = int(limit) self.counter = int(counterInit) - self.urlPattern = urlPattern + self.url = url self.step = int(counterStep) self.word = word def do_search(self): try: - urly = self.urlPattern.format(counter=str(self.counter), word=self.word) + urly = self.url.format(counter=str(self.counter), word=self.word) headers = {'User-Agent': self.userAgent} if(self.proxy): proxies = {self.proxy.scheme: "http://" + self.proxy.netloc} @@ -139,8 +142,8 @@ def process(self): print("\tSearching " + str(self.counter) + " results...") def get_emails(self): - rawres = myparser(self.totalresults, self.word) - return rawres.emails() + self.parser.extract(self.totalresults, self.word) + return self.parser.emails() ################################################################### diff --git a/plugins/ask.py b/plugins/ask.py index 661468e..bca1fb9 100644 --- a/plugins/ask.py +++ b/plugins/ask.py @@ -21,22 +21,69 @@ For more see the file 'LICENSE' for copying permission. """ -#config = None +import requests +import time +import sys + +config = None app_emailharvester = None +class AskSearch(object): + + def __init__(self, url, word, limit): + self.results = "" + self.totalresults = "" + self.limit = int(limit) + self.page = 1 + self.url = url + self.word = word + self.proxy = config["proxy"] + self.userAgent = config["useragent"] + self.counter = 0 + + def do_search(self): + try: + urly = self.url.format(page=str(self.page), word=self.word) + headers = {'User-Agent': self.userAgent} + if(self.proxy): + proxies = {self.proxy.scheme: "http://" + self.proxy.netloc} + r=requests.get(urly, headers=headers, proxies=proxies) + else: + r=requests.get(urly, headers=headers) + + except Exception as e: + print(e) + sys.exit(4) + + self.results = r.content.decode(r.encoding) + self.totalresults += self.results + + def process(self): + while (self.counter < self.limit): + self.do_search() + time.sleep(1) + self.counter += 10 + self.page += 1 + print("\tSearching " + str(self.counter) + " results...") + + def get_emails(self): + app_emailharvester.parser.extract(self.totalresults, self.word) + return app_emailharvester.parser.emails() + + def search(domain, limit): app_emailharvester.show_message("\n[+] Searching in ASK..\n") - url = "http://www.ask.com/web?q=%40{word}" - app_emailharvester.init_search(url, domain, limit, 0, 100) - app_emailharvester.process() - return app_emailharvester.get_emails() + url = "http://www.ask.com/web?q=%40{word}&page={page}" + search = AskSearch(url, domain, limit) + search.process() + return search.get_emails() class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf): global app_emailharvester, config - #config = conf + config = conf app.register_plugin('ask', {'search': search}) app_emailharvester = app \ No newline at end of file diff --git a/plugins/baidu.py b/plugins/baidu.py index b601d63..218df41 100644 --- a/plugins/baidu.py +++ b/plugins/baidu.py @@ -34,7 +34,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('baidu', {'search': search}) diff --git a/plugins/bing.py b/plugins/bing.py index 40a392c..379f148 100644 --- a/plugins/bing.py +++ b/plugins/bing.py @@ -34,7 +34,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('bing', {'search': search}) diff --git a/plugins/dogpile.py b/plugins/dogpile.py index 7fd3d22..cb3702a 100644 --- a/plugins/dogpile.py +++ b/plugins/dogpile.py @@ -34,7 +34,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('dogpile', {'search': search}) diff --git a/plugins/exalead.py b/plugins/exalead.py index f6b2008..9ee83f6 100644 --- a/plugins/exalead.py +++ b/plugins/exalead.py @@ -34,7 +34,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('exalead', {'search': search}) diff --git a/plugins/google.py b/plugins/google.py index 47ce552..d5126f6 100644 --- a/plugins/google.py +++ b/plugins/google.py @@ -27,14 +27,14 @@ def search(domain, limit): app_emailharvester.show_message("\n[+] Searching in Google..\n") - url = 'http://www.google.com/search?num=100&start={counter}&hl=en&q="%40{word}"' + url = 'https://www.google.com/search?num=100&start={counter}&hl=en&q="%40{word}"' app_emailharvester.init_search(url, domain, limit, 0, 100) app_emailharvester.process() return app_emailharvester.get_emails() class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('google', {'search': search}) diff --git a/plugins/googleplus.py b/plugins/googleplus.py new file mode 100644 index 0000000..2685136 --- /dev/null +++ b/plugins/googleplus.py @@ -0,0 +1,44 @@ +""" + This file is part of EmailHarvester + Copyright (C) 2016 @maldevel + /~https://github.com/maldevel/EmailHarvester + + EmailHarvester - A tool to retrieve Domain email addresses from Search Engines. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + For more see the file 'LICENSE' for copying permission. +""" + +#config = None +app_emailharvester = None + + +def search(domain, limit): + app_emailharvester.show_message("\n[+] Searching in Google+..\n") + #search google+ only with google search engine + #who is gonna have google+ indexed better than google itself? + url = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Aplus.google.com+intext:"Works at"+-inurl:photos+-inurl:about+-inurl:posts+-inurl:plusones+%40{word}' + app_emailharvester.init_search(url, domain, limit, 0, 100) + app_emailharvester.process() + return app_emailharvester.get_emails() + + +class Plugin: + def __init__(self, app, conf):# + global app_emailharvester, config + #config = conf + app.register_plugin('googleplus', {'search': search}) + app_emailharvester = app + \ No newline at end of file diff --git a/plugins/linkedin.py b/plugins/linkedin.py index fcf5821..b905237 100644 --- a/plugins/linkedin.py +++ b/plugins/linkedin.py @@ -28,15 +28,9 @@ def search(domain, limit): all_emails = [] app_emailharvester.show_message("\n[+] Searching in Linkedin..\n") - - app_emailharvester.show_message("\n[+] Searching in ASK + Linkedin..\n") - askUrl = "http://www.ask.com/web?q=site%3Alinkedin.com+%40{word}" - app_emailharvester.init_search(askUrl, domain, limit, 0, 100) - app_emailharvester.process() - all_emails += app_emailharvester.get_emails() app_emailharvester.show_message("\n[+] Searching in Yahoo + Linkedin..\n") - yahooUrl = "http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}" + yahooUrl = "http://search.yahoo.com/search?p=site%3Alinkedin.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}" app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100) app_emailharvester.process() all_emails += app_emailharvester.get_emails() @@ -48,7 +42,7 @@ def search(domain, limit): all_emails += app_emailharvester.get_emails() app_emailharvester.show_message("\n[+] Searching in Google + Linkedin..\n") - googleUrl = 'http://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Alinkedin.com+"%40{word}"' + googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Alinkedin.com+"%40{word}"' app_emailharvester.init_search(googleUrl, domain, limit, 0, 100) app_emailharvester.process() all_emails += app_emailharvester.get_emails() @@ -71,7 +65,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('linkedin', {'search': search}) diff --git a/plugins/twitter.py b/plugins/twitter.py new file mode 100644 index 0000000..e79ac7b --- /dev/null +++ b/plugins/twitter.py @@ -0,0 +1,73 @@ +""" + This file is part of EmailHarvester + Copyright (C) 2016 @maldevel + /~https://github.com/maldevel/EmailHarvester + + EmailHarvester - A tool to retrieve Domain email addresses from Search Engines. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + For more see the file 'LICENSE' for copying permission. +""" + +#config = None +app_emailharvester = None + + +def search(domain, limit): + all_emails = [] + app_emailharvester.show_message("\n[+] Searching in Twitter..\n") + + app_emailharvester.show_message("\n[+] Searching in Yahoo + Twitter..\n") + yahooUrl = 'http://search.yahoo.com/search?p=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}' + app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Bing + Twitter..\n") + bingUrl = 'http://www.bing.com/search?q=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&count=50&first={counter}' + app_emailharvester.init_search(bingUrl, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Google + Twitter..\n") + googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Atwitter.com+intitle:"on Twitter"+"%40{word}"' + app_emailharvester.init_search(googleUrl, domain, limit, 0, 100) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Baidu + Twitter..\n") + url = 'http://www.baidu.com/search/s?wd=site%3Atwitter.com+intitle:"on Twitter"+"%40{word}"&pn={counter}' + app_emailharvester.init_search(url, domain, limit, 0, 10) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + app_emailharvester.show_message("\n[+] Searching in Exalead + Twitter..\n") + url = 'http://www.exalead.com/search/web/results/?q=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&elements_per_page=10&start_index={counter}' + app_emailharvester.init_search(url, domain, limit, 0, 50) + app_emailharvester.process() + all_emails += app_emailharvester.get_emails() + + #dogpile seems to not support site: + + return all_emails + + +class Plugin: + def __init__(self, app, conf):# + global app_emailharvester, config + #config = conf + app.register_plugin('twitter', {'search': search}) + app_emailharvester = app + \ No newline at end of file diff --git a/plugins/yahoo.py b/plugins/yahoo.py index 6cd39df..7bc4ba9 100644 --- a/plugins/yahoo.py +++ b/plugins/yahoo.py @@ -34,7 +34,7 @@ def search(domain, limit): class Plugin: - def __init__(self, app):#, conf + def __init__(self, app, conf):# global app_emailharvester, config #config = conf app.register_plugin('yahoo', {'search': search})