diff --git a/Dockerfile b/Dockerfile index 3f3bc05c0..35a423850 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,7 +19,7 @@ RUN virtualenv --distribute venv \ && venv/bin/pip freeze # Install front-end dependencies -# JS depedencies +# JS dependencies COPY yarn.lock package.json tsconfig.json webpack.config.js tslint.json ./ RUN yarn install --ignore-engines @@ -31,6 +31,9 @@ RUN yarn build \ COPY . . +# Update local copy of AWS IP Ranges. +RUN curl https://ip-ranges.amazonaws.com/ip-ranges.json -o util/ipresolver/aws-ip-ranges.json + # Set up the init system RUN mkdir -p /etc/my_init.d /etc/systlog-ng /usr/local/bin /etc/monit static/fonts static/ldn /usr/local/nginx/logs/ \ && cp $QUAYCONF/init/*.sh /etc/my_init.d/ \ diff --git a/conf/init/service/interactive/ipresolverupdateworker/log/run b/conf/init/service/interactive/ipresolverupdateworker/log/run new file mode 100755 index 000000000..80bb40461 --- /dev/null +++ b/conf/init/service/interactive/ipresolverupdateworker/log/run @@ -0,0 +1,7 @@ +#!/bin/sh + +# Ensure dependencies start before the logger +sv check syslog-ng > /dev/null || exit 1 + +# Start the logger +exec logger -i -t ipresolverupdateworker diff --git a/conf/init/service/interactive/ipresolverupdateworker/run b/conf/init/service/interactive/ipresolverupdateworker/run new file mode 100755 index 000000000..b5d95669b --- /dev/null +++ b/conf/init/service/interactive/ipresolverupdateworker/run @@ -0,0 +1,9 @@ +#! /bin/bash + +echo 'Starting ip resolver update worker' + +QUAYPATH=${QUAYPATH:-"."} +cd ${QUAYDIR:-"/"} +PYTHONPATH=$QUAYPATH venv/bin/python -m workers.ipresolverupdateworker 2>&1 + +echo 'IP resolver update worker exited' diff --git a/storage/test/test_cloudfront.py b/storage/test/test_cloudfront.py index 9d5629e84..69c3ddbdc 100644 --- a/storage/test/test_cloudfront.py +++ b/storage/test/test_cloudfront.py @@ -1,13 +1,14 @@ import pytest -from httmock import urlmatch, HTTMock +from contextlib import contextmanager +from mock import patch from moto import mock_s3 import boto from app import config_provider from storage import CloudFrontedS3Storage, StorageContext from util.ipresolver import IPResolver -from util.ipresolver.test.test_ipresolver import http_client, test_aws_ip, aws_ip_range_handler +from util.ipresolver.test.test_ipresolver import test_aws_ip, aws_ip_range_data from test.fixtures import * _TEST_CONTENT = os.urandom(1024) @@ -20,37 +21,34 @@ _TEST_PATH = 'some/cool/path' def ipranges_populated(request): return request.param -@pytest.fixture() -def ipresolver(http_client, aws_ip_range_handler, ipranges_populated, app): - with HTTMock(aws_ip_range_handler): - ipresolver = IPResolver(app, client=http_client) - if ipranges_populated: - assert ipresolver._update_aws_ip_range() - - return ipresolver - -@pytest.fixture() -def storage_context(ipresolver, app): - return StorageContext('nyc', None, None, config_provider, ipresolver) - @mock_s3 -def test_direct_download(storage_context, test_aws_ip, ipranges_populated, app): - # Create a test bucket and put some test content. - boto.connect_s3().create_bucket(_TEST_BUCKET) - - engine = CloudFrontedS3Storage(storage_context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path', - _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD) - engine.put_content(_TEST_PATH, _TEST_CONTENT) - assert engine.exists(_TEST_PATH) - - # Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL. - assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip) - +def test_direct_download(test_aws_ip, aws_ip_range_data, ipranges_populated, app): + ipresolver = IPResolver(app) if ipranges_populated: - # Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL. - assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4') - else: - # Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still - # get back an S3 URL. - assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4') + empty_range_data = { + 'syncToken': 123456789, + 'prefixes': [], + } + + with patch.object(ipresolver, '_get_aws_ip_ranges', lambda: aws_ip_range_data if ipranges_populated else empty_range_data): + context = StorageContext('nyc', None, None, config_provider, ipresolver) + + # Create a test bucket and put some test content. + boto.connect_s3().create_bucket(_TEST_BUCKET) + + engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path', + _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD) + engine.put_content(_TEST_PATH, _TEST_CONTENT) + assert engine.exists(_TEST_PATH) + + # Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL. + assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip) + + if ipranges_populated: + # Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL. + assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4') + else: + # Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still + # get back an S3 URL. + assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4') diff --git a/util/ipresolver/__init__.py b/util/ipresolver/__init__.py index 7ecd106cb..8daf130ec 100644 --- a/util/ipresolver/__init__.py +++ b/util/ipresolver/__init__.py @@ -29,7 +29,7 @@ def update_resolver_datafiles(): logger.debug('Successfully wrote %s', filename) class IPResolver(object): - def __init__(self, app, *args, **kwargs): + def __init__(self, app): self.app = app self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb') @@ -43,11 +43,10 @@ class IPResolver(object): return location_function(ip_address) - @ttl_cache(maxsize=1, ttl=600) - def _get_location_function(self): + def _get_aws_ip_ranges(self): try: with open('util/ipresolver/aws-ip-ranges.json', 'r') as f: - aws_ip_range_json = json.loads(f.read()) + return json.loads(f.read()) except IOError: logger.exception('Could not load AWS IP Ranges') return None @@ -57,6 +56,12 @@ class IPResolver(object): except TypeError: logger.exception('Could not load AWS IP Ranges') return None + + @ttl_cache(maxsize=1, ttl=600) + def _get_location_function(self): + aws_ip_range_json = self._get_aws_ip_ranges() + if aws_ip_range_json is None: + return None sync_token = aws_ip_range_json['syncToken'] all_amazon, regions, services = IPResolver._parse_amazon_ranges(aws_ip_range_json) diff --git a/util/ipresolver/test/test_ipresolver.py b/util/ipresolver/test/test_ipresolver.py index fea27364d..26e78d7fc 100644 --- a/util/ipresolver/test/test_ipresolver.py +++ b/util/ipresolver/test/test_ipresolver.py @@ -1,54 +1,40 @@ -import requests import pytest -import json -from httmock import urlmatch, HTTMock +from mock import patch -from config import build_requests_session from util.ipresolver import IPResolver, ResolvedLocation from test.fixtures import * -@pytest.fixture(scope="module") -def http_client(): - sess = requests.Session() - adapter = requests.adapters.HTTPAdapter(pool_connections=100, - pool_maxsize=100) - sess.mount('http://', adapter) - sess.mount('https://', adapter) - return sess - @pytest.fixture() def test_aws_ip(): return '10.0.0.1' @pytest.fixture() -def aws_ip_range_handler(): - @urlmatch(netloc=r'ip-ranges.amazonaws.com') - def handler(_, request): - fake_range_doc = { - 'syncToken': 123456789, - 'prefixes': [ - { - 'ip_prefix': '10.0.0.0/8', - 'region': 'GLOBAL', - 'service': 'AMAZON', - } - ], - } - return {'status_code': 200, 'content': json.dumps(fake_range_doc)} +def aws_ip_range_data(): + fake_range_doc = { + 'syncToken': 123456789, + 'prefixes': [ + { + 'ip_prefix': '10.0.0.0/8', + 'region': 'GLOBAL', + 'service': 'AMAZON', + } + ], + } + return fake_range_doc - return handler - -def test_unstarted(app, test_aws_ip, http_client): - ipresolver = IPResolver(app, client=http_client) +def test_unstarted(app, test_aws_ip): + ipresolver = IPResolver(app) assert ipresolver.resolve_ip(test_aws_ip) is None -def test_resolved(aws_ip_range_handler, test_aws_ip, app, http_client): - with HTTMock(aws_ip_range_handler): - ipresolver = IPResolver(app, client=http_client) - assert ipresolver._update_aws_ip_range() +def test_resolved(aws_ip_range_data, test_aws_ip, app,): + ipresolver = IPResolver(app) - assert ipresolver.resolve_ip(test_aws_ip) == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789) - assert ipresolver.resolve_ip('10.0.0.2') == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789) - assert ipresolver.resolve_ip('1.2.3.4') == ResolvedLocation(provider='internet', region=u'NA', service=u'US', sync_token=123456789) - assert ipresolver.resolve_ip('127.0.0.1') == ResolvedLocation(provider='internet', region=None, service=None, sync_token=123456789) + def get_data(): + return aws_ip_range_data + + with patch.object(ipresolver, '_get_aws_ip_ranges', get_data): + assert ipresolver.resolve_ip(test_aws_ip) == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789) + assert ipresolver.resolve_ip('10.0.0.2') == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789) + assert ipresolver.resolve_ip('1.2.3.4') == ResolvedLocation(provider='internet', region=u'NA', service=u'US', sync_token=123456789) + assert ipresolver.resolve_ip('127.0.0.1') == ResolvedLocation(provider='internet', region=None, service=None, sync_token=123456789) diff --git a/workers/ipresolverupdateworker.py b/workers/ipresolverupdateworker.py new file mode 100644 index 000000000..4c808059a --- /dev/null +++ b/workers/ipresolverupdateworker.py @@ -0,0 +1,45 @@ +import logging +import time + +from app import app +from util.ipresolver import update_resolver_datafiles +from workers.worker import Worker + +logger = logging.getLogger(__name__) + + +class IPResolverUpdateWorker(Worker): + def __init__(self): + super(IPResolverUpdateWorker, self).__init__() + + # Update now. + try: + self._update_resolver_datafiles() + except: + logger.exception('Initial update of range data files failed') + + self.add_operation(self._update_resolver_datafiles, + app.config.get('IP_RESOLVER_DATAFILE_REFRESH', 60 * 60 * 2) * 60) + + def _update_resolver_datafiles(self): + logger.debug('Starting refresh of IP resolver data files') + update_resolver_datafiles() + logger.debug('Finished refresh of IP resolver data files') + + +if __name__ == "__main__": + # Only enable if CloudFronted storage is used. + requires_resolution = False + for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values(): + if storage_type == 'CloudFrontedS3Storage': + requires_resolution = True + break + + if not requires_resolution: + logger.debug('Cloud fronted storage not used; skipping') + while True: + time.sleep(10000) + + worker = IPResolverUpdateWorker() + worker.start() +