Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for blacklisting hosts to the HTTP runner #4757

Merged
merged 5 commits into from
Aug 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ Changelog
in development
--------------

Added
~~~~~

* Add support for blacklisting / whitelisting hosts to the HTTP runner by adding new
``url_hosts_blacklist`` and ``url_hosts_whitelist`` runner attribute. (new feature)
#4757

Changed
~~~~~~~

Expand Down
79 changes: 77 additions & 2 deletions contrib/runners/http_runner/http_runner/http_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from __future__ import absolute_import

import ast
import copy
import json
Expand All @@ -21,6 +22,7 @@
import requests
from requests.auth import HTTPBasicAuth
from oslo_config import cfg
from six.moves.urllib import parse as urlparse # pylint: disable=import-error

from st2common.runners.base import ActionRunner
from st2common.runners.base import get_metadata as get_runner_metadata
Expand Down Expand Up @@ -55,6 +57,8 @@
RUNNER_VERIFY_SSL_CERT = 'verify_ssl_cert'
RUNNER_USERNAME = 'username'
RUNNER_PASSWORD = 'password'
RUNNER_URL_HOSTS_BLACKLIST = 'url_hosts_blacklist'
RUNNER_URL_HOSTS_WHITELIST = 'url_hosts_whitelist'

# Lookup constants for action params
ACTION_AUTH = 'auth'
Expand Down Expand Up @@ -93,10 +97,17 @@ def pre_run(self):
self._http_proxy = self.runner_parameters.get(RUNNER_HTTP_PROXY, None)
self._https_proxy = self.runner_parameters.get(RUNNER_HTTPS_PROXY, None)
self._verify_ssl_cert = self.runner_parameters.get(RUNNER_VERIFY_SSL_CERT, None)
self._url_hosts_blacklist = self.runner_parameters.get(RUNNER_URL_HOSTS_BLACKLIST, [])
self._url_hosts_whitelist = self.runner_parameters.get(RUNNER_URL_HOSTS_WHITELIST, [])

def run(self, action_parameters):
client = self._get_http_client(action_parameters)

if self._url_hosts_blacklist and self._url_hosts_whitelist:
msg = ('"url_hosts_blacklist" and "url_hosts_whitelist" parameters are mutually '
'exclusive. Only one should be provided.')
raise ValueError(msg)

try:
result = client.run()
except requests.exceptions.Timeout as e:
Expand Down Expand Up @@ -147,7 +158,9 @@ def _get_http_client(self, action_parameters):
headers=headers, cookies=self._cookies, auth=auth,
timeout=timeout, allow_redirects=self._allow_redirects,
proxies=proxies, files=files, verify=self._verify_ssl_cert,
username=self._username, password=self._password)
username=self._username, password=self._password,
url_hosts_blacklist=self._url_hosts_blacklist,
url_hosts_whitelist=self._url_hosts_whitelist)

@staticmethod
def _get_result_status(status_code):
Expand All @@ -158,7 +171,8 @@ def _get_result_status(status_code):
class HTTPClient(object):
def __init__(self, url=None, method=None, body='', params=None, headers=None, cookies=None,
auth=None, timeout=60, allow_redirects=False, proxies=None,
files=None, verify=False, username=None, password=None):
files=None, verify=False, username=None, password=None,
url_hosts_blacklist=None, url_hosts_whitelist=None):
if url is None:
raise Exception('URL must be specified.')

Expand Down Expand Up @@ -188,12 +202,30 @@ def __init__(self, url=None, method=None, body='', params=None, headers=None, co
self.verify = verify
self.username = username
self.password = password
self.url_hosts_blacklist = url_hosts_blacklist or []
self.url_hosts_whitelist = url_hosts_whitelist or []

if self.url_hosts_blacklist and self.url_hosts_whitelist:
msg = ('"url_hosts_blacklist" and "url_hosts_whitelist" parameters are mutually '
'exclusive. Only one should be provided.')
raise ValueError(msg)

def run(self):
results = {}
resp = None
json_content = self._is_json_content()

# Check if the provided URL is blacklisted
is_url_blacklisted = self._is_url_blacklisted(url=self.url)

if is_url_blacklisted:
raise ValueError('URL "%s" is blacklisted' % (self.url))

is_url_whitelisted = self._is_url_whitelisted(url=self.url)

if not is_url_whitelisted:
raise ValueError('URL "%s" is not whitelisted' % (self.url))

try:
if json_content:
# cast params (body) to dict
Expand Down Expand Up @@ -301,6 +333,49 @@ def _cast_object(self, value):
else:
return value

def _is_url_blacklisted(self, url):
"""
Verify if the provided URL is blacklisted via url_hosts_blacklist runner parameter.
"""
if not self.url_hosts_blacklist:
# Blacklist is empty
return False

host = self._get_host_from_url(url=url)

if host in self.url_hosts_blacklist:
return True

return False

def _is_url_whitelisted(self, url):
"""
Verify if the provided URL is whitelisted via url_hosts_whitelist runner parameter.
"""
if not self.url_hosts_whitelist:
return True

host = self._get_host_from_url(url=url)

if host in self.url_hosts_whitelist:
return True

return False

def _get_host_from_url(self, url):
"""
Return sanitized host (netloc) value from the provided url.
"""
parsed = urlparse.urlparse(url)

# Remove port and []
host = parsed.netloc.replace('[', '').replace(']', '')

if parsed.port is not None:
host = host.replace(':%s' % (parsed.port), '')

return host


def get_runner():
return HttpRunner(str(uuid.uuid4()))
Expand Down
16 changes: 16 additions & 0 deletions contrib/runners/http_runner/http_runner/runner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@
CA bundle which comes from Mozilla. Verification using a custom CA bundle
is not yet supported. Set to False to skip verification.
type: boolean
url_hosts_blacklist:
description: Optional list of hosts (network locations) to blacklist (e.g. example.com,
127.0.0.1, ::1, etc.). If action will try to access that endpoint, an exception will be
thrown and action will be marked as failed.
required: false
type: array
items:
type: string
url_hosts_whitelist:
description: Optional list of hosts (network locations) to whitelist (e.g. example.com,
127.0.0.1, ::1, etc.). If specified, actions will only be able to hit hosts on this
whitelist.
required: false
type: array
items:
type: string
output_key: body
output_schema:
status_code:
Expand Down
172 changes: 171 additions & 1 deletion contrib/runners/http_runner/tests/unit/test_http_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,23 @@

from __future__ import absolute_import

import re

import six
import mock
import unittest2

from st2common.constants.action import LIVEACTION_STATUS_SUCCEEDED
from http_runner.http_runner import HTTPClient
from http_runner.http_runner import HttpRunner

import st2tests.config as tests_config

__all__ = [
'HTTPClientTestCase',
'HTTPRunnerTestCase'
]


if six.PY2:
EXPECTED_DATA = ''
Expand All @@ -33,7 +43,7 @@ class MockResult(object):
close = mock.Mock()


class HTTPRunnerTestCase(unittest2.TestCase):
class HTTPClientTestCase(unittest2.TestCase):
@classmethod
def setUpClass(cls):
tests_config.parse_args()
Expand Down Expand Up @@ -212,3 +222,163 @@ def test_http_unicode_body_data(self, mock_requests):
expected_data = body

self.assertEqual(call_kwargs['data'], expected_data)

@mock.patch('http_runner.http_runner.requests')
def test_blacklisted_url_url_hosts_blacklist_runner_parameter(self, mock_requests):
# Black list is empty
self.assertEqual(mock_requests.request.call_count, 0)

url = 'http://www.example.com'
client = HTTPClient(url=url, method='GET')
client.run()

self.assertEqual(mock_requests.request.call_count, 1)

# Blacklist is set
url_hosts_blacklist = [
'example.com',
'127.0.0.1',
'::1',
'2001:0db8:85a3:0000:0000:8a2e:0370:7334'
]

# Blacklisted urls
urls = [
'https://example.com',
'http://example.com',
'http://example.com:81',
'http://example.com:80',
'http://example.com:9000',
'http://[::1]:80/',
'http://[::1]',
'http://[::1]:9000',
'http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]',
'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8000'
]

for url in urls:
expected_msg = r'URL "%s" is blacklisted' % (re.escape(url))
client = HTTPClient(url=url, method='GET', url_hosts_blacklist=url_hosts_blacklist)
self.assertRaisesRegexp(ValueError, expected_msg, client.run)

# Non blacklisted URLs
urls = [
'https://example2.com',
'http://example3.com',
'http://example4.com:81'
]

for url in urls:
mock_requests.request.reset_mock()

self.assertEqual(mock_requests.request.call_count, 0)

client = HTTPClient(url=url, method='GET', url_hosts_blacklist=url_hosts_blacklist)
client.run()

self.assertEqual(mock_requests.request.call_count, 1)

@mock.patch('http_runner.http_runner.requests')
def test_whitelisted_url_url_hosts_whitelist_runner_parameter(self, mock_requests):
# Whitelist is empty
self.assertEqual(mock_requests.request.call_count, 0)

url = 'http://www.example.com'
client = HTTPClient(url=url, method='GET')
client.run()

self.assertEqual(mock_requests.request.call_count, 1)

# Whitelist is set
url_hosts_whitelist = [
'example.com',
'127.0.0.1',
'::1',
'2001:0db8:85a3:0000:0000:8a2e:0370:7334'
]

# Non whitelisted urls
urls = [
'https://www.google.com',
'https://www.example2.com',
'http://127.0.0.2'
]

for url in urls:
expected_msg = r'URL "%s" is not whitelisted' % (re.escape(url))
client = HTTPClient(url=url, method='GET', url_hosts_whitelist=url_hosts_whitelist)
self.assertRaisesRegexp(ValueError, expected_msg, client.run)

# Whitelisted URLS
urls = [
'https://example.com',
'http://example.com',
'http://example.com:81',
'http://example.com:80',
'http://example.com:9000',
'http://[::1]:80/',
'http://[::1]',
'http://[::1]:9000',
'http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]',
'https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8000'
]

for url in urls:
mock_requests.request.reset_mock()

self.assertEqual(mock_requests.request.call_count, 0)

client = HTTPClient(url=url, method='GET', url_hosts_whitelist=url_hosts_whitelist)
client.run()

self.assertEqual(mock_requests.request.call_count, 1)
Copy link
Contributor

@m4dcoder m4dcoder Aug 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a unit test where both whitelist and blacklist are provided?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in c07f6d6.

While at it, I might also look at adding some integration tests (if it doesn't become too big of a rabbit hole), since we don't have any at the moment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the time being, I just decided to add some additional unit tests for the runner class itself - 25b43cb.


def test_url_host_blacklist_and_url_host_blacklist_params_are_mutually_exclusive(self):
url = 'http://www.example.com'

expected_msg = (r'"url_hosts_blacklist" and "url_hosts_whitelist" parameters are mutually '
'exclusive.')
self.assertRaisesRegexp(ValueError, expected_msg, HTTPClient, url=url, method='GET',
url_hosts_blacklist=[url], url_hosts_whitelist=[url])


class HTTPRunnerTestCase(unittest2.TestCase):
@mock.patch('http_runner.http_runner.requests')
def test_get_success(self, mock_requests):
mock_result = MockResult()

# Unknown content type, body should be returned raw
mock_result.text = 'foo bar ponies'
mock_result.headers = {'Content-Type': 'text/html'}
mock_result.status_code = 200

mock_requests.request.return_value = mock_result

runner_parameters = {
'url': 'http://www.example.com',
'method': 'GET'
}
runner = HttpRunner('id')
runner.runner_parameters = runner_parameters
runner.pre_run()

status, result, _ = runner.run({})
self.assertEqual(status, LIVEACTION_STATUS_SUCCEEDED)
self.assertEqual(result['body'], 'foo bar ponies')
self.assertEqual(result['status_code'], 200)
self.assertEqual(result['parsed'], False)

def test_url_host_blacklist_and_url_host_blacklist_params_are_mutually_exclusive(self):
runner_parameters = {
'url': 'http://www.example.com',
'method': 'GET',
'url_hosts_blacklist': ['http://127.0.0.1'],
'url_hosts_whitelist': ['http://127.0.0.1'],
}
runner = HttpRunner('id')
runner.runner_parameters = runner_parameters
runner.pre_run()

expected_msg = (r'"url_hosts_blacklist" and "url_hosts_whitelist" parameters are mutually '
'exclusive.')
self.assertRaisesRegexp(ValueError, expected_msg, runner.run, {})