import logging import json import time from collections import namedtuple, defaultdict from threading import Thread, Lock from abc import ABCMeta, abstractmethod from six import add_metaclass from cachetools import ttl_cache, lru_cache from netaddr import IPNetwork, IPAddress, IPSet, AddrFormatError import geoip2.database import geoip2.errors import requests from util.abchelpers import nooper ResolvedLocation = namedtuple('ResolvedLocation', ['provider', 'region', 'service', 'sync_token']) logger = logging.getLogger(__name__) _DATA_FILES = {'aws-ip-ranges.json': 'https://ip-ranges.amazonaws.com/ip-ranges.json'} _UPDATE_INTERVAL = 600 _FAILED_UPDATE_RETRY_SECS = 60 CACHE = {} CACHE_LOCK = Lock() def update_resolver_datafiles(): """ Performs an update of the data file(s) used by the IP Resolver. """ for filename, url in _DATA_FILES.iteritems(): logger.debug('Updating IP resolver data file "%s" from URL "%s"', filename, url) with open('util/ipresolver/%s' % filename, 'w') as f: response = requests.get(url) logger.debug('Got %s response for URL %s', response.status_code, url) if response.status_code / 2 != 100: raise Exception('Got non-2XX status code for URL %s: %s' % (url, response.status_code)) f.write(response.text) logger.debug('Successfully wrote %s', filename) def _get_aws_ip_ranges(): try: with open('util/ipresolver/aws-ip-ranges.json', 'r') as f: return json.loads(f.read()) except IOError: logger.exception('Could not load AWS IP Ranges') return None except ValueError: logger.exception('Could not load AWS IP Ranges') return None except TypeError: logger.exception('Could not load AWS IP Ranges') return None @add_metaclass(ABCMeta) class IPResolverInterface(object): """ Helper class for resolving information about an IP address. """ @abstractmethod def resolve_ip(self, ip_address): """ Attempts to return resolved information about the specified IP Address. If such an attempt fails, returns None. """ pass @abstractmethod def is_ip_possible_threat(self, ip_address): """ Attempts to return whether the given IP address is a possible abuser or spammer. Returns False if the IP address information could not be looked up. """ pass @nooper class NoopIPResolver(IPResolverInterface): """ No-op version of the security scanner API. """ pass class IPResolver(IPResolverInterface): def __init__(self, app): self.app = app self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb') self._worker = _UpdateIPRange(_UPDATE_INTERVAL) self._worker_started = False @ttl_cache(maxsize=100, ttl=600) def is_ip_possible_threat(self, ip_address): if self.app.config.get('THREAT_NAMESPACE_MAXIMUM_BUILD_COUNT') is None: return False if self.app.config.get('IP_DATA_API_KEY') is None: return False if not ip_address: return False api_key = self.app.config['IP_DATA_API_KEY'] try: logger.debug('Requesting IP data for IP %s', ip_address) r = requests.get('https://api.ipdata.co/%s/threat?api-key=%s' % (ip_address, api_key), timeout=1) if r.status_code != 200: logger.debug('Got non-200 response for IP %s: %s', ip_address, r.status_code) return False logger.debug('Got IP data for IP %s: %s => %s', ip_address, r.status_code, r.json()) threat_data = r.json() return threat_data.get('is_threat', False) or threat_data.get('is_bogon', False) except requests.RequestException: logger.exception('Got exception when trying to lookup IP Address') except ValueError: logger.exception('Got exception when trying to lookup IP Address') except Exception: logger.exception('Got exception when trying to lookup IP Address') return False def resolve_ip(self, ip_address): """ Attempts to return resolved information about the specified IP Address. If such an attempt fails, returns None. """ location_function = self._get_location_function() if not ip_address or not location_function: return None return location_function(ip_address) def _get_location_function(self): if (not self.app.config.get('TESTING', False) and not self._worker_started and not self._worker.is_alive()): try: self._worker.start() self._worker_started = True except: logger.exception('Got exception try to start ip resolver thread') try: cache = CACHE sync_token = cache.get('sync_token', None) if sync_token is None: logger.debug('The aws ip range has not been cached from %s', _DATA_FILES['aws-ip-ranges.json']) return None all_amazon = cache['all_amazon'] regions = cache['regions'] except KeyError: logger.exception('Got exception trying to hit aws ip range cache') return None except Exception: logger.exception('Got exception trying to hit aws ip range cache') return None return IPResolver._build_location_function(sync_token, all_amazon, regions, self.geoip_db) @staticmethod def _build_location_function(sync_token, all_amazon, regions, country_db): @lru_cache(maxsize=4096) def _get_location(ip_address): try: parsed_ip = IPAddress(ip_address) except AddrFormatError: return ResolvedLocation('invalid_ip', None, None, sync_token) if parsed_ip not in all_amazon: # Try geoip classification try: found = country_db.country(parsed_ip) return ResolvedLocation( 'internet', found.continent.code, found.country.iso_code, sync_token, ) except geoip2.errors.AddressNotFoundError: return ResolvedLocation('internet', None, None, sync_token) region = None for region_name, region_set in regions.items(): if parsed_ip in region_set: region = region_name break return ResolvedLocation('aws', region, None, sync_token) return _get_location @staticmethod def _parse_amazon_ranges(ranges): all_amazon = IPSet() regions = defaultdict(IPSet) for service_description in ranges['prefixes']: cidr = IPNetwork(service_description['ip_prefix']) region = service_description['region'] all_amazon.add(cidr) regions[region].add(cidr) return all_amazon, regions class _UpdateIPRange(Thread): """Helper class that uses a thread to loads the IP ranges from Amazon""" def __init__(self, interval): Thread.__init__(self) self.interval = interval def run(self): while True: try: logger.debug('Updating aws ip range from "%s"', 'util/ipresolver/aws-ip-ranges.json') aws_ip_range_json = _get_aws_ip_ranges() except: logger.exception('Failed trying to update aws ip range') time.sleep(_FAILED_UPDATE_RETRY_SECS) continue sync_token = aws_ip_range_json['syncToken'] all_amazon, regions = IPResolver._parse_amazon_ranges(aws_ip_range_json) with CACHE_LOCK: CACHE['sync_token'] = sync_token CACHE['all_amazon'] = all_amazon CACHE['regions'] = regions time.sleep(self.interval)