Add worker to update ipresolver data files every few hours

This commit is contained in:
Joseph Schorr 2017-09-27 17:14:04 -04:00
parent 52927de7f6
commit 05b4a7d457
7 changed files with 130 additions and 77 deletions

View File

@ -19,7 +19,7 @@ RUN virtualenv --distribute venv \
&& venv/bin/pip freeze
# Install front-end dependencies
# JS depedencies
# JS dependencies
COPY yarn.lock package.json tsconfig.json webpack.config.js tslint.json ./
RUN yarn install --ignore-engines
@ -31,6 +31,9 @@ RUN yarn build \
COPY . .
# Update local copy of AWS IP Ranges.
RUN curl https://ip-ranges.amazonaws.com/ip-ranges.json -o util/ipresolver/aws-ip-ranges.json
# Set up the init system
RUN mkdir -p /etc/my_init.d /etc/systlog-ng /usr/local/bin /etc/monit static/fonts static/ldn /usr/local/nginx/logs/ \
&& cp $QUAYCONF/init/*.sh /etc/my_init.d/ \

View File

@ -0,0 +1,7 @@
#!/bin/sh
# Ensure dependencies start before the logger
sv check syslog-ng > /dev/null || exit 1
# Start the logger
exec logger -i -t ipresolverupdateworker

View File

@ -0,0 +1,9 @@
#! /bin/bash
echo 'Starting ip resolver update worker'
QUAYPATH=${QUAYPATH:-"."}
cd ${QUAYDIR:-"/"}
PYTHONPATH=$QUAYPATH venv/bin/python -m workers.ipresolverupdateworker 2>&1
echo 'IP resolver update worker exited'

View File

@ -1,13 +1,14 @@
import pytest
from httmock import urlmatch, HTTMock
from contextlib import contextmanager
from mock import patch
from moto import mock_s3
import boto
from app import config_provider
from storage import CloudFrontedS3Storage, StorageContext
from util.ipresolver import IPResolver
from util.ipresolver.test.test_ipresolver import http_client, test_aws_ip, aws_ip_range_handler
from util.ipresolver.test.test_ipresolver import test_aws_ip, aws_ip_range_data
from test.fixtures import *
_TEST_CONTENT = os.urandom(1024)
@ -20,37 +21,34 @@ _TEST_PATH = 'some/cool/path'
def ipranges_populated(request):
return request.param
@pytest.fixture()
def ipresolver(http_client, aws_ip_range_handler, ipranges_populated, app):
with HTTMock(aws_ip_range_handler):
ipresolver = IPResolver(app, client=http_client)
if ipranges_populated:
assert ipresolver._update_aws_ip_range()
return ipresolver
@pytest.fixture()
def storage_context(ipresolver, app):
return StorageContext('nyc', None, None, config_provider, ipresolver)
@mock_s3
def test_direct_download(storage_context, test_aws_ip, ipranges_populated, app):
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = CloudFrontedS3Storage(storage_context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
assert engine.exists(_TEST_PATH)
# Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip)
def test_direct_download(test_aws_ip, aws_ip_range_data, ipranges_populated, app):
ipresolver = IPResolver(app)
if ipranges_populated:
# Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL.
assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
else:
# Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still
# get back an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
empty_range_data = {
'syncToken': 123456789,
'prefixes': [],
}
with patch.object(ipresolver, '_get_aws_ip_ranges', lambda: aws_ip_range_data if ipranges_populated else empty_range_data):
context = StorageContext('nyc', None, None, config_provider, ipresolver)
# Create a test bucket and put some test content.
boto.connect_s3().create_bucket(_TEST_BUCKET)
engine = CloudFrontedS3Storage(context, 'cloudfrontdomain', 'keyid', 'test/data/test.pem', 'some/path',
_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD)
engine.put_content(_TEST_PATH, _TEST_CONTENT)
assert engine.exists(_TEST_PATH)
# Request a direct download URL for a request from a known AWS IP, and ensure we are returned an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, test_aws_ip)
if ipranges_populated:
# Request a direct download URL for a request from a non-AWS IP, and ensure we are returned a CloudFront URL.
assert 'cloudfrontdomain' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')
else:
# Request a direct download URL for a request from a non-AWS IP, but since IP Ranges isn't populated, we still
# get back an S3 URL.
assert 's3.amazonaws.com' in engine.get_direct_download_url(_TEST_PATH, '1.2.3.4')

View File

@ -29,7 +29,7 @@ def update_resolver_datafiles():
logger.debug('Successfully wrote %s', filename)
class IPResolver(object):
def __init__(self, app, *args, **kwargs):
def __init__(self, app):
self.app = app
self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb')
@ -43,11 +43,10 @@ class IPResolver(object):
return location_function(ip_address)
@ttl_cache(maxsize=1, ttl=600)
def _get_location_function(self):
def _get_aws_ip_ranges(self):
try:
with open('util/ipresolver/aws-ip-ranges.json', 'r') as f:
aws_ip_range_json = json.loads(f.read())
return json.loads(f.read())
except IOError:
logger.exception('Could not load AWS IP Ranges')
return None
@ -57,6 +56,12 @@ class IPResolver(object):
except TypeError:
logger.exception('Could not load AWS IP Ranges')
return None
@ttl_cache(maxsize=1, ttl=600)
def _get_location_function(self):
aws_ip_range_json = self._get_aws_ip_ranges()
if aws_ip_range_json is None:
return None
sync_token = aws_ip_range_json['syncToken']
all_amazon, regions, services = IPResolver._parse_amazon_ranges(aws_ip_range_json)

View File

@ -1,54 +1,40 @@
import requests
import pytest
import json
from httmock import urlmatch, HTTMock
from mock import patch
from config import build_requests_session
from util.ipresolver import IPResolver, ResolvedLocation
from test.fixtures import *
@pytest.fixture(scope="module")
def http_client():
sess = requests.Session()
adapter = requests.adapters.HTTPAdapter(pool_connections=100,
pool_maxsize=100)
sess.mount('http://', adapter)
sess.mount('https://', adapter)
return sess
@pytest.fixture()
def test_aws_ip():
return '10.0.0.1'
@pytest.fixture()
def aws_ip_range_handler():
@urlmatch(netloc=r'ip-ranges.amazonaws.com')
def handler(_, request):
fake_range_doc = {
'syncToken': 123456789,
'prefixes': [
{
'ip_prefix': '10.0.0.0/8',
'region': 'GLOBAL',
'service': 'AMAZON',
}
],
}
return {'status_code': 200, 'content': json.dumps(fake_range_doc)}
def aws_ip_range_data():
fake_range_doc = {
'syncToken': 123456789,
'prefixes': [
{
'ip_prefix': '10.0.0.0/8',
'region': 'GLOBAL',
'service': 'AMAZON',
}
],
}
return fake_range_doc
return handler
def test_unstarted(app, test_aws_ip, http_client):
ipresolver = IPResolver(app, client=http_client)
def test_unstarted(app, test_aws_ip):
ipresolver = IPResolver(app)
assert ipresolver.resolve_ip(test_aws_ip) is None
def test_resolved(aws_ip_range_handler, test_aws_ip, app, http_client):
with HTTMock(aws_ip_range_handler):
ipresolver = IPResolver(app, client=http_client)
assert ipresolver._update_aws_ip_range()
def test_resolved(aws_ip_range_data, test_aws_ip, app,):
ipresolver = IPResolver(app)
assert ipresolver.resolve_ip(test_aws_ip) == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789)
assert ipresolver.resolve_ip('10.0.0.2') == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789)
assert ipresolver.resolve_ip('1.2.3.4') == ResolvedLocation(provider='internet', region=u'NA', service=u'US', sync_token=123456789)
assert ipresolver.resolve_ip('127.0.0.1') == ResolvedLocation(provider='internet', region=None, service=None, sync_token=123456789)
def get_data():
return aws_ip_range_data
with patch.object(ipresolver, '_get_aws_ip_ranges', get_data):
assert ipresolver.resolve_ip(test_aws_ip) == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789)
assert ipresolver.resolve_ip('10.0.0.2') == ResolvedLocation(provider='aws', region=u'GLOBAL', service=None, sync_token=123456789)
assert ipresolver.resolve_ip('1.2.3.4') == ResolvedLocation(provider='internet', region=u'NA', service=u'US', sync_token=123456789)
assert ipresolver.resolve_ip('127.0.0.1') == ResolvedLocation(provider='internet', region=None, service=None, sync_token=123456789)

View File

@ -0,0 +1,45 @@
import logging
import time
from app import app
from util.ipresolver import update_resolver_datafiles
from workers.worker import Worker
logger = logging.getLogger(__name__)
class IPResolverUpdateWorker(Worker):
def __init__(self):
super(IPResolverUpdateWorker, self).__init__()
# Update now.
try:
self._update_resolver_datafiles()
except:
logger.exception('Initial update of range data files failed')
self.add_operation(self._update_resolver_datafiles,
app.config.get('IP_RESOLVER_DATAFILE_REFRESH', 60 * 60 * 2) * 60)
def _update_resolver_datafiles(self):
logger.debug('Starting refresh of IP resolver data files')
update_resolver_datafiles()
logger.debug('Finished refresh of IP resolver data files')
if __name__ == "__main__":
# Only enable if CloudFronted storage is used.
requires_resolution = False
for storage_type, _ in app.config.get('DISTRIBUTED_STORAGE_CONFIG', {}).values():
if storage_type == 'CloudFrontedS3Storage':
requires_resolution = True
break
if not requires_resolution:
logger.debug('Cloud fronted storage not used; skipping')
while True:
time.sleep(10000)
worker = IPResolverUpdateWorker()
worker.start()