b6336393de
A separate thread will cache the results of parsing the range file, and the IPResolver will hit the cache instead of blocking while recomputing the ranges everytime. The thread updates every 600s, and retry every 60s on failures.
223 lines
6.9 KiB
Python
223 lines
6.9 KiB
Python
import logging
|
|
import json
|
|
import time
|
|
|
|
from collections import namedtuple, defaultdict
|
|
|
|
from threading import Thread, Lock
|
|
from abc import ABCMeta, abstractmethod
|
|
from six import add_metaclass
|
|
from cachetools import ttl_cache, lru_cache
|
|
from netaddr import IPNetwork, IPAddress, IPSet, AddrFormatError
|
|
|
|
import geoip2.database
|
|
import geoip2.errors
|
|
import requests
|
|
|
|
from util.abchelpers import nooper
|
|
|
|
ResolvedLocation = namedtuple('ResolvedLocation', ['provider', 'region', 'service', 'sync_token'])
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DATA_FILES = {'aws-ip-ranges.json': 'https://ip-ranges.amazonaws.com/ip-ranges.json'}
|
|
_UPDATE_INTERVAL = 600
|
|
_FAILED_UPDATE_RETRY_SECS = 60
|
|
|
|
CACHE = {}
|
|
CACHE_LOCK = Lock()
|
|
|
|
def update_resolver_datafiles():
|
|
""" Performs an update of the data file(s) used by the IP Resolver. """
|
|
for filename, url in _DATA_FILES.iteritems():
|
|
logger.debug('Updating IP resolver data file "%s" from URL "%s"', filename, url)
|
|
with open('util/ipresolver/%s' % filename, 'w') as f:
|
|
response = requests.get(url)
|
|
logger.debug('Got %s response for URL %s', response.status_code, url)
|
|
if response.status_code / 2 != 100:
|
|
raise Exception('Got non-2XX status code for URL %s: %s' % (url, response.status_code))
|
|
|
|
f.write(response.text)
|
|
logger.debug('Successfully wrote %s', filename)
|
|
|
|
def _get_aws_ip_ranges():
|
|
try:
|
|
with open('util/ipresolver/aws-ip-ranges.json', 'r') as f:
|
|
return json.loads(f.read())
|
|
except IOError:
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
return None
|
|
except ValueError:
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
return None
|
|
except TypeError:
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
return None
|
|
|
|
|
|
@add_metaclass(ABCMeta)
|
|
class IPResolverInterface(object):
|
|
""" Helper class for resolving information about an IP address. """
|
|
@abstractmethod
|
|
def resolve_ip(self, ip_address):
|
|
""" Attempts to return resolved information about the specified IP Address. If such an attempt
|
|
fails, returns None.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def is_ip_possible_threat(self, ip_address):
|
|
""" Attempts to return whether the given IP address is a possible abuser or spammer.
|
|
Returns False if the IP address information could not be looked up.
|
|
"""
|
|
pass
|
|
|
|
|
|
@nooper
|
|
class NoopIPResolver(IPResolverInterface):
|
|
""" No-op version of the security scanner API. """
|
|
pass
|
|
|
|
|
|
class IPResolver(IPResolverInterface):
|
|
def __init__(self, app):
|
|
self.app = app
|
|
self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb')
|
|
self._worker = _UpdateIPRange(_UPDATE_INTERVAL)
|
|
if not app.config.get('TESTING', False):
|
|
self._worker.start()
|
|
|
|
@ttl_cache(maxsize=100, ttl=600)
|
|
def is_ip_possible_threat(self, ip_address):
|
|
if self.app.config.get('THREAT_NAMESPACE_MAXIMUM_BUILD_COUNT') is None:
|
|
return False
|
|
|
|
if self.app.config.get('IP_DATA_API_KEY') is None:
|
|
return False
|
|
|
|
if not ip_address:
|
|
return False
|
|
|
|
api_key = self.app.config['IP_DATA_API_KEY']
|
|
|
|
try:
|
|
logger.debug('Requesting IP data for IP %s', ip_address)
|
|
r = requests.get('https://api.ipdata.co/%s/threat?api-key=%s' % (ip_address, api_key),
|
|
timeout=1)
|
|
if r.status_code != 200:
|
|
logger.debug('Got non-200 response for IP %s: %s', ip_address, r.status_code)
|
|
return False
|
|
|
|
logger.debug('Got IP data for IP %s: %s => %s', ip_address, r.status_code, r.json())
|
|
threat_data = r.json()
|
|
return threat_data.get('is_threat', False) or threat_data.get('is_bogon', False)
|
|
except requests.RequestException:
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
|
except ValueError:
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
|
except Exception:
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
|
|
|
return False
|
|
|
|
def resolve_ip(self, ip_address):
|
|
""" Attempts to return resolved information about the specified IP Address. If such an attempt
|
|
fails, returns None.
|
|
"""
|
|
location_function = self._get_location_function()
|
|
if not ip_address or not location_function:
|
|
return None
|
|
|
|
return location_function(ip_address)
|
|
|
|
def _get_location_function(self):
|
|
try:
|
|
cache = CACHE
|
|
sync_token = cache.get('sync_token', None)
|
|
if sync_token is None:
|
|
logger.debug('The aws ip range has not been cached from %s', _DATA_FILES['aws-ip-ranges.json'])
|
|
return None
|
|
|
|
all_amazon = cache['all_amazon']
|
|
regions = cache['regions']
|
|
except KeyError:
|
|
logger.exception('Got exception trying to hit aws ip range cache')
|
|
return None
|
|
except Exception:
|
|
logger.exception('Got exception trying to hit aws ip range cache')
|
|
return None
|
|
|
|
return IPResolver._build_location_function(sync_token, all_amazon, regions, self.geoip_db)
|
|
|
|
@staticmethod
|
|
def _build_location_function(sync_token, all_amazon, regions, country_db):
|
|
@lru_cache(maxsize=4096)
|
|
def _get_location(ip_address):
|
|
try:
|
|
parsed_ip = IPAddress(ip_address)
|
|
except AddrFormatError:
|
|
return ResolvedLocation('invalid_ip', None, None, sync_token)
|
|
|
|
if parsed_ip not in all_amazon:
|
|
# Try geoip classification
|
|
try:
|
|
found = country_db.country(parsed_ip)
|
|
return ResolvedLocation(
|
|
'internet',
|
|
found.continent.code,
|
|
found.country.iso_code,
|
|
sync_token,
|
|
)
|
|
except geoip2.errors.AddressNotFoundError:
|
|
return ResolvedLocation('internet', None, None, sync_token)
|
|
|
|
region = None
|
|
|
|
for region_name, region_set in regions.items():
|
|
if parsed_ip in region_set:
|
|
region = region_name
|
|
break
|
|
|
|
return ResolvedLocation('aws', region, None, sync_token)
|
|
return _get_location
|
|
|
|
@staticmethod
|
|
def _parse_amazon_ranges(ranges):
|
|
all_amazon = IPSet()
|
|
regions = defaultdict(IPSet)
|
|
|
|
for service_description in ranges['prefixes']:
|
|
cidr = IPNetwork(service_description['ip_prefix'])
|
|
region = service_description['region']
|
|
|
|
all_amazon.add(cidr)
|
|
regions[region].add(cidr)
|
|
|
|
return all_amazon, regions
|
|
|
|
|
|
class _UpdateIPRange(Thread):
|
|
"""Helper class that uses a thread to loads the IP ranges from Amazon"""
|
|
def __init__(self, interval):
|
|
Thread.__init__(self)
|
|
self.interval = interval
|
|
|
|
def run(self):
|
|
while True:
|
|
try:
|
|
logger.debug('Updating aws ip range from "%s"', 'util/ipresolver/aws-ip-ranges.json')
|
|
aws_ip_range_json = _get_aws_ip_ranges()
|
|
except:
|
|
logger.exception('Failed trying to update aws ip range')
|
|
time.sleep(_FAILED_UPDATE_RETRY_SECS)
|
|
break
|
|
|
|
sync_token = aws_ip_range_json['syncToken']
|
|
all_amazon, regions = IPResolver._parse_amazon_ranges(aws_ip_range_json)
|
|
|
|
with CACHE_LOCK:
|
|
CACHE['sync_token'] = sync_token
|
|
CACHE['all_amazon'] = all_amazon
|
|
CACHE['regions'] = regions
|
|
|
|
time.sleep(self.interval)
|