This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/util/ipresolver/__init__.py
Kenny Lee Sin Cheong b6336393de Make IPResolver run the update in a separate thread
A separate thread will cache the results of parsing the range
file, and the IPResolver will hit the cache instead of blocking while recomputing the
ranges everytime. The thread updates every 600s, and retry every 60s on
failures.
2018-08-31 14:00:53 -04:00

223 lines
6.9 KiB
Python

import logging
import json
import time
from collections import namedtuple, defaultdict
from threading import Thread, Lock
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from cachetools import ttl_cache, lru_cache
from netaddr import IPNetwork, IPAddress, IPSet, AddrFormatError
import geoip2.database
import geoip2.errors
import requests
from util.abchelpers import nooper
ResolvedLocation = namedtuple('ResolvedLocation', ['provider', 'region', 'service', 'sync_token'])
logger = logging.getLogger(__name__)
_DATA_FILES = {'aws-ip-ranges.json': 'https://ip-ranges.amazonaws.com/ip-ranges.json'}
_UPDATE_INTERVAL = 600
_FAILED_UPDATE_RETRY_SECS = 60
CACHE = {}
CACHE_LOCK = Lock()
def update_resolver_datafiles():
""" Performs an update of the data file(s) used by the IP Resolver. """
for filename, url in _DATA_FILES.iteritems():
logger.debug('Updating IP resolver data file "%s" from URL "%s"', filename, url)
with open('util/ipresolver/%s' % filename, 'w') as f:
response = requests.get(url)
logger.debug('Got %s response for URL %s', response.status_code, url)
if response.status_code / 2 != 100:
raise Exception('Got non-2XX status code for URL %s: %s' % (url, response.status_code))
f.write(response.text)
logger.debug('Successfully wrote %s', filename)
def _get_aws_ip_ranges():
try:
with open('util/ipresolver/aws-ip-ranges.json', 'r') as f:
return json.loads(f.read())
except IOError:
logger.exception('Could not load AWS IP Ranges')
return None
except ValueError:
logger.exception('Could not load AWS IP Ranges')
return None
except TypeError:
logger.exception('Could not load AWS IP Ranges')
return None
@add_metaclass(ABCMeta)
class IPResolverInterface(object):
""" Helper class for resolving information about an IP address. """
@abstractmethod
def resolve_ip(self, ip_address):
""" Attempts to return resolved information about the specified IP Address. If such an attempt
fails, returns None.
"""
pass
@abstractmethod
def is_ip_possible_threat(self, ip_address):
""" Attempts to return whether the given IP address is a possible abuser or spammer.
Returns False if the IP address information could not be looked up.
"""
pass
@nooper
class NoopIPResolver(IPResolverInterface):
""" No-op version of the security scanner API. """
pass
class IPResolver(IPResolverInterface):
def __init__(self, app):
self.app = app
self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb')
self._worker = _UpdateIPRange(_UPDATE_INTERVAL)
if not app.config.get('TESTING', False):
self._worker.start()
@ttl_cache(maxsize=100, ttl=600)
def is_ip_possible_threat(self, ip_address):
if self.app.config.get('THREAT_NAMESPACE_MAXIMUM_BUILD_COUNT') is None:
return False
if self.app.config.get('IP_DATA_API_KEY') is None:
return False
if not ip_address:
return False
api_key = self.app.config['IP_DATA_API_KEY']
try:
logger.debug('Requesting IP data for IP %s', ip_address)
r = requests.get('https://api.ipdata.co/%s/threat?api-key=%s' % (ip_address, api_key),
timeout=1)
if r.status_code != 200:
logger.debug('Got non-200 response for IP %s: %s', ip_address, r.status_code)
return False
logger.debug('Got IP data for IP %s: %s => %s', ip_address, r.status_code, r.json())
threat_data = r.json()
return threat_data.get('is_threat', False) or threat_data.get('is_bogon', False)
except requests.RequestException:
logger.exception('Got exception when trying to lookup IP Address')
except ValueError:
logger.exception('Got exception when trying to lookup IP Address')
except Exception:
logger.exception('Got exception when trying to lookup IP Address')
return False
def resolve_ip(self, ip_address):
""" Attempts to return resolved information about the specified IP Address. If such an attempt
fails, returns None.
"""
location_function = self._get_location_function()
if not ip_address or not location_function:
return None
return location_function(ip_address)
def _get_location_function(self):
try:
cache = CACHE
sync_token = cache.get('sync_token', None)
if sync_token is None:
logger.debug('The aws ip range has not been cached from %s', _DATA_FILES['aws-ip-ranges.json'])
return None
all_amazon = cache['all_amazon']
regions = cache['regions']
except KeyError:
logger.exception('Got exception trying to hit aws ip range cache')
return None
except Exception:
logger.exception('Got exception trying to hit aws ip range cache')
return None
return IPResolver._build_location_function(sync_token, all_amazon, regions, self.geoip_db)
@staticmethod
def _build_location_function(sync_token, all_amazon, regions, country_db):
@lru_cache(maxsize=4096)
def _get_location(ip_address):
try:
parsed_ip = IPAddress(ip_address)
except AddrFormatError:
return ResolvedLocation('invalid_ip', None, None, sync_token)
if parsed_ip not in all_amazon:
# Try geoip classification
try:
found = country_db.country(parsed_ip)
return ResolvedLocation(
'internet',
found.continent.code,
found.country.iso_code,
sync_token,
)
except geoip2.errors.AddressNotFoundError:
return ResolvedLocation('internet', None, None, sync_token)
region = None
for region_name, region_set in regions.items():
if parsed_ip in region_set:
region = region_name
break
return ResolvedLocation('aws', region, None, sync_token)
return _get_location
@staticmethod
def _parse_amazon_ranges(ranges):
all_amazon = IPSet()
regions = defaultdict(IPSet)
for service_description in ranges['prefixes']:
cidr = IPNetwork(service_description['ip_prefix'])
region = service_description['region']
all_amazon.add(cidr)
regions[region].add(cidr)
return all_amazon, regions
class _UpdateIPRange(Thread):
"""Helper class that uses a thread to loads the IP ranges from Amazon"""
def __init__(self, interval):
Thread.__init__(self)
self.interval = interval
def run(self):
while True:
try:
logger.debug('Updating aws ip range from "%s"', 'util/ipresolver/aws-ip-ranges.json')
aws_ip_range_json = _get_aws_ip_ranges()
except:
logger.exception('Failed trying to update aws ip range')
time.sleep(_FAILED_UPDATE_RETRY_SECS)
break
sync_token = aws_ip_range_json['syncToken']
all_amazon, regions = IPResolver._parse_amazon_ranges(aws_ip_range_json)
with CACHE_LOCK:
CACHE['sync_token'] = sync_token
CACHE['all_amazon'] = all_amazon
CACHE['regions'] = regions
time.sleep(self.interval)