diff --git a/EmailHarvester.py b/EmailHarvester.py
index 560987c..4540831 100644
--- a/EmailHarvester.py
+++ b/EmailHarvester.py
@@ -28,23 +28,27 @@
__copyright__ = "Copyright (c) 2016 @maldevel"
__credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"]
__license__ = "GPLv3"
-__version__ = "1.3.0"
+__version__ = "1.3.1"
__maintainer__ = "maldevel"
################################
+
import argparse
import sys
import time
import requests
import re
import os
+import validators
from termcolor import colored
from argparse import RawTextHelpFormatter
from sys import platform as _platform
from urllib.parse import urlparse
+
################################
+
if _platform == 'win32':
import colorama
colorama.init()
@@ -173,6 +177,12 @@ def limit_type(x):
return x
raise argparse.ArgumentTypeError("Minimum results limit is 1.")
+def checkDomain(value):
+ domain_checked = validators.domain(value)
+ if not domain_checked:
+ raise argparse.ArgumentTypeError('Invalid {} domain.'.format(value))
+ return value
+
###################################################################
if __name__ == '__main__':
@@ -192,7 +202,7 @@ def limit_type(x):
formatter_class=RawTextHelpFormatter)
parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain',
- default=None, type=str, help="Domain to search.")
+ default=None, type=checkDomain, help="Domain to search.")
parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename',
default=None, type=str, help="Save the results into a TXT and XML file (both).")
diff --git a/README.md b/README.md
index 408c504..65f9473 100644
--- a/README.md
+++ b/README.md
@@ -14,17 +14,18 @@ Requirements
* termcolor
* colorama
* requests
+* validators
Features
=====
-* Retrieve Domain email addresses from Search Engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead).
+* Retrieve Domain email addresses from popular Search engines (Google, Bing, Yahoo, ASK, Baidu, Dogpile, Exalead).
* Export results to txt and xml files.
* Limit search results.
* Define your own User-Agent string.
* Use proxy server.
* Plugins system.
-* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github).
+* Search in popular web sites using Search engines (Twitter, LinkedIn, Google+, Github, Instagram, Reddit).
Download/Installation
@@ -47,7 +48,7 @@ usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT]
\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_|
A tool to retrieve Domain email addresses from Search Engines | @maldevel
- Version: 1.3.0
+ Version: 1.3.1
optional arguments:
-h, --help show this help message and exit
diff --git a/plugins/instagram.py b/plugins/instagram.py
new file mode 100644
index 0000000..3ff0360
--- /dev/null
+++ b/plugins/instagram.py
@@ -0,0 +1,73 @@
+"""
+ This file is part of EmailHarvester
+ Copyright (C) 2016 @maldevel
+ /~https://github.com/maldevel/EmailHarvester
+
+ EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+ For more see the file 'LICENSE' for copying permission.
+"""
+
+#config = None
+app_emailharvester = None
+
+
+def search(domain, limit):
+ all_emails = []
+ app_emailharvester.show_message("\n[+] Searching in Instagram..\n")
+
+ app_emailharvester.show_message("\n[+] Searching in Yahoo + Instagram..\n")
+ yahooUrl = "http://search.yahoo.com/search?p=site%3Ainstagram.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
+ app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Bing + Instagram..\n")
+ bingUrl = "http://www.bing.com/search?q=site%3Ainstagram.com+%40{word}&count=50&first={counter}"
+ app_emailharvester.init_search(bingUrl, domain, limit, 0, 50)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Google + Instagram..\n")
+ googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Ainstagram.com+"%40{word}"'
+ app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Baidu + Instagram..\n")
+ url = 'http://www.baidu.com/search/s?wd=site%3Ainstagram.com+"%40{word}"&pn={counter}'
+ app_emailharvester.init_search(url, domain, limit, 0, 10)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Exalead + Instagram..\n")
+ url = "http://www.exalead.com/search/web/results/?q=site%3Ainstagram.com+%40{word}&elements_per_page=10&start_index={counter}"
+ app_emailharvester.init_search(url, domain, limit, 0, 50)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ #dogpile seems to not support site:
+
+ return all_emails
+
+
+class Plugin:
+ def __init__(self, app, conf):#
+ global app_emailharvester, config
+ #config = conf
+ app.register_plugin('instagram', {'search': search})
+ app_emailharvester = app
+
\ No newline at end of file
diff --git a/plugins/reddit.py b/plugins/reddit.py
new file mode 100644
index 0000000..2705251
--- /dev/null
+++ b/plugins/reddit.py
@@ -0,0 +1,73 @@
+"""
+ This file is part of EmailHarvester
+ Copyright (C) 2016 @maldevel
+ /~https://github.com/maldevel/EmailHarvester
+
+ EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+ For more see the file 'LICENSE' for copying permission.
+"""
+
+#config = None
+app_emailharvester = None
+
+
+def search(domain, limit):
+ all_emails = []
+ app_emailharvester.show_message("\n[+] Searching in Reddit..\n")
+
+ app_emailharvester.show_message("\n[+] Searching in Yahoo + Reddit..\n")
+ yahooUrl = "http://search.yahoo.com/search?p=site%3Areddit.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
+ app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Bing + Reddit..\n")
+ bingUrl = "http://www.bing.com/search?q=site%3Areddit.com+%40{word}&count=50&first={counter}"
+ app_emailharvester.init_search(bingUrl, domain, limit, 0, 50)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Google + Reddit..\n")
+ googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Areddit.com+"%40{word}"'
+ app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Baidu + Reddit..\n")
+ url = 'http://www.baidu.com/search/s?wd=site%3Areddit.com+"%40{word}"&pn={counter}'
+ app_emailharvester.init_search(url, domain, limit, 0, 10)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ app_emailharvester.show_message("\n[+] Searching in Exalead + Reddit..\n")
+ url = "http://www.exalead.com/search/web/results/?q=site%3Areddit.com+%40{word}&elements_per_page=10&start_index={counter}"
+ app_emailharvester.init_search(url, domain, limit, 0, 50)
+ app_emailharvester.process()
+ all_emails += app_emailharvester.get_emails()
+
+ #dogpile seems to not support site:
+
+ return all_emails
+
+
+class Plugin:
+ def __init__(self, app, conf):#
+ global app_emailharvester, config
+ #config = conf
+ app.register_plugin('reddit', {'search': search})
+ app_emailharvester = app
+
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 63a54a1..529240e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
termcolor
colorama
-requests
\ No newline at end of file
+requests
+validators