2017-09-26 18:47:23 +00:00
|
|
|
import logging
|
2017-09-26 20:08:50 +00:00
|
|
|
import json
|
2018-08-23 15:49:51 +00:00
|
|
|
import time
|
2018-04-20 15:01:05 +00:00
|
|
|
|
|
|
|
from collections import namedtuple, defaultdict
|
2017-09-26 18:47:23 +00:00
|
|
|
|
2018-08-23 15:49:51 +00:00
|
|
|
from threading import Thread, Lock
|
2017-10-17 18:29:40 +00:00
|
|
|
from abc import ABCMeta, abstractmethod
|
|
|
|
from six import add_metaclass
|
2017-09-26 20:08:50 +00:00
|
|
|
from cachetools import ttl_cache, lru_cache
|
2017-09-26 18:47:23 +00:00
|
|
|
from netaddr import IPNetwork, IPAddress, IPSet, AddrFormatError
|
|
|
|
|
|
|
|
import geoip2.database
|
|
|
|
import geoip2.errors
|
2018-04-20 15:01:05 +00:00
|
|
|
import requests
|
2017-09-26 18:47:23 +00:00
|
|
|
|
2017-10-17 18:29:40 +00:00
|
|
|
from util.abchelpers import nooper
|
|
|
|
|
2018-12-05 20:19:37 +00:00
|
|
|
ResolvedLocation = namedtuple('ResolvedLocation', ['provider', 'region', 'service', 'sync_token',
|
|
|
|
'country_iso_code'])
|
2017-09-26 18:47:23 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2017-09-27 20:39:47 +00:00
|
|
|
_DATA_FILES = {'aws-ip-ranges.json': 'https://ip-ranges.amazonaws.com/ip-ranges.json'}
|
2018-08-23 15:49:51 +00:00
|
|
|
_UPDATE_INTERVAL = 600
|
|
|
|
_FAILED_UPDATE_RETRY_SECS = 60
|
|
|
|
|
|
|
|
CACHE = {}
|
|
|
|
CACHE_LOCK = Lock()
|
2017-09-27 20:39:47 +00:00
|
|
|
|
|
|
|
def update_resolver_datafiles():
|
|
|
|
""" Performs an update of the data file(s) used by the IP Resolver. """
|
|
|
|
for filename, url in _DATA_FILES.iteritems():
|
|
|
|
logger.debug('Updating IP resolver data file "%s" from URL "%s"', filename, url)
|
|
|
|
with open('util/ipresolver/%s' % filename, 'w') as f:
|
|
|
|
response = requests.get(url)
|
|
|
|
logger.debug('Got %s response for URL %s', response.status_code, url)
|
|
|
|
if response.status_code / 2 != 100:
|
|
|
|
raise Exception('Got non-2XX status code for URL %s: %s' % (url, response.status_code))
|
|
|
|
|
|
|
|
f.write(response.text)
|
|
|
|
logger.debug('Successfully wrote %s', filename)
|
|
|
|
|
2018-08-23 15:49:51 +00:00
|
|
|
def _get_aws_ip_ranges():
|
|
|
|
try:
|
|
|
|
with open('util/ipresolver/aws-ip-ranges.json', 'r') as f:
|
|
|
|
return json.loads(f.read())
|
|
|
|
except IOError:
|
|
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
|
|
return None
|
|
|
|
except ValueError:
|
|
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
|
|
return None
|
|
|
|
except TypeError:
|
|
|
|
logger.exception('Could not load AWS IP Ranges')
|
|
|
|
return None
|
|
|
|
|
2017-10-17 18:29:40 +00:00
|
|
|
|
|
|
|
@add_metaclass(ABCMeta)
|
|
|
|
class IPResolverInterface(object):
|
|
|
|
""" Helper class for resolving information about an IP address. """
|
|
|
|
@abstractmethod
|
|
|
|
def resolve_ip(self, ip_address):
|
|
|
|
""" Attempts to return resolved information about the specified IP Address. If such an attempt
|
|
|
|
fails, returns None.
|
|
|
|
"""
|
|
|
|
pass
|
|
|
|
|
2018-04-20 15:01:05 +00:00
|
|
|
@abstractmethod
|
|
|
|
def is_ip_possible_threat(self, ip_address):
|
|
|
|
""" Attempts to return whether the given IP address is a possible abuser or spammer.
|
|
|
|
Returns False if the IP address information could not be looked up.
|
|
|
|
"""
|
|
|
|
pass
|
|
|
|
|
2017-10-17 18:29:40 +00:00
|
|
|
|
|
|
|
@nooper
|
|
|
|
class NoopIPResolver(IPResolverInterface):
|
|
|
|
""" No-op version of the security scanner API. """
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class IPResolver(IPResolverInterface):
|
2017-09-27 21:14:04 +00:00
|
|
|
def __init__(self, app):
|
2017-09-26 18:47:23 +00:00
|
|
|
self.app = app
|
|
|
|
self.geoip_db = geoip2.database.Reader('util/ipresolver/GeoLite2-Country.mmdb')
|
2018-08-23 15:49:51 +00:00
|
|
|
self._worker = _UpdateIPRange(_UPDATE_INTERVAL)
|
2018-09-07 17:40:20 +00:00
|
|
|
self._worker_started = False
|
2017-09-26 18:47:23 +00:00
|
|
|
|
2018-04-20 15:01:05 +00:00
|
|
|
@ttl_cache(maxsize=100, ttl=600)
|
|
|
|
def is_ip_possible_threat(self, ip_address):
|
|
|
|
if self.app.config.get('THREAT_NAMESPACE_MAXIMUM_BUILD_COUNT') is None:
|
|
|
|
return False
|
|
|
|
|
2018-06-20 19:44:40 +00:00
|
|
|
if self.app.config.get('IP_DATA_API_KEY') is None:
|
|
|
|
return False
|
|
|
|
|
2018-04-20 15:01:05 +00:00
|
|
|
if not ip_address:
|
|
|
|
return False
|
|
|
|
|
2018-06-20 19:44:40 +00:00
|
|
|
api_key = self.app.config['IP_DATA_API_KEY']
|
|
|
|
|
2018-04-20 15:01:05 +00:00
|
|
|
try:
|
|
|
|
logger.debug('Requesting IP data for IP %s', ip_address)
|
2018-06-20 19:44:40 +00:00
|
|
|
r = requests.get('https://api.ipdata.co/%s/threat?api-key=%s' % (ip_address, api_key),
|
|
|
|
timeout=1)
|
2018-04-20 15:01:05 +00:00
|
|
|
if r.status_code != 200:
|
|
|
|
logger.debug('Got non-200 response for IP %s: %s', ip_address, r.status_code)
|
|
|
|
return False
|
|
|
|
|
2018-05-03 23:11:16 +00:00
|
|
|
logger.debug('Got IP data for IP %s: %s => %s', ip_address, r.status_code, r.json())
|
2018-06-20 19:44:40 +00:00
|
|
|
threat_data = r.json()
|
2018-04-20 15:01:05 +00:00
|
|
|
return threat_data.get('is_threat', False) or threat_data.get('is_bogon', False)
|
|
|
|
except requests.RequestException:
|
|
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
|
|
|
except ValueError:
|
|
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
2018-05-03 23:11:16 +00:00
|
|
|
except Exception:
|
|
|
|
logger.exception('Got exception when trying to lookup IP Address')
|
2018-04-20 15:01:05 +00:00
|
|
|
|
|
|
|
return False
|
|
|
|
|
2017-09-26 18:47:23 +00:00
|
|
|
def resolve_ip(self, ip_address):
|
2018-04-20 15:01:05 +00:00
|
|
|
""" Attempts to return resolved information about the specified IP Address. If such an attempt
|
|
|
|
fails, returns None.
|
2017-09-26 18:47:23 +00:00
|
|
|
"""
|
2017-09-26 20:08:50 +00:00
|
|
|
location_function = self._get_location_function()
|
2018-12-05 20:19:37 +00:00
|
|
|
if not ip_address:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if not location_function:
|
|
|
|
logger.debug('No location function could be defined for IP address resolution')
|
2017-09-26 18:47:23 +00:00
|
|
|
return None
|
2018-04-20 15:01:05 +00:00
|
|
|
|
2017-09-26 18:47:23 +00:00
|
|
|
return location_function(ip_address)
|
|
|
|
|
2018-08-23 15:49:51 +00:00
|
|
|
def _get_location_function(self):
|
2018-09-07 17:50:56 +00:00
|
|
|
if (not self.app.config.get('TESTING', False) and not self._worker_started and
|
|
|
|
not self._worker.is_alive()):
|
2018-09-07 17:26:49 +00:00
|
|
|
try:
|
|
|
|
self._worker.start()
|
2018-09-07 17:40:20 +00:00
|
|
|
self._worker_started = True
|
2018-09-07 17:26:49 +00:00
|
|
|
except:
|
|
|
|
logger.exception('Got exception try to start ip resolver thread')
|
2018-09-07 17:40:20 +00:00
|
|
|
|
2017-09-26 18:47:23 +00:00
|
|
|
try:
|
2018-08-23 15:49:51 +00:00
|
|
|
cache = CACHE
|
|
|
|
sync_token = cache.get('sync_token', None)
|
|
|
|
if sync_token is None:
|
2018-12-05 20:19:37 +00:00
|
|
|
logger.debug('The aws ip range has not been cached from %s',
|
|
|
|
_DATA_FILES['aws-ip-ranges.json'])
|
|
|
|
return IPResolver._build_location_function(sync_token, set(), {}, self.geoip_db)
|
2018-08-23 15:49:51 +00:00
|
|
|
|
|
|
|
all_amazon = cache['all_amazon']
|
|
|
|
regions = cache['regions']
|
|
|
|
except KeyError:
|
|
|
|
logger.exception('Got exception trying to hit aws ip range cache')
|
2017-09-26 20:08:50 +00:00
|
|
|
return None
|
2018-08-23 15:49:51 +00:00
|
|
|
except Exception:
|
|
|
|
logger.exception('Got exception trying to hit aws ip range cache')
|
2017-09-27 21:14:04 +00:00
|
|
|
return None
|
2017-09-26 20:08:50 +00:00
|
|
|
|
2018-08-08 18:40:26 +00:00
|
|
|
return IPResolver._build_location_function(sync_token, all_amazon, regions, self.geoip_db)
|
2017-09-26 18:47:23 +00:00
|
|
|
|
|
|
|
@staticmethod
|
2018-08-08 18:40:26 +00:00
|
|
|
def _build_location_function(sync_token, all_amazon, regions, country_db):
|
2017-09-26 18:47:23 +00:00
|
|
|
@lru_cache(maxsize=4096)
|
|
|
|
def _get_location(ip_address):
|
|
|
|
try:
|
|
|
|
parsed_ip = IPAddress(ip_address)
|
|
|
|
except AddrFormatError:
|
2018-12-05 20:19:37 +00:00
|
|
|
return ResolvedLocation('invalid_ip', None, None, sync_token, None)
|
|
|
|
|
|
|
|
# Try geoip classification
|
|
|
|
try:
|
|
|
|
geoinfo = country_db.country(parsed_ip)
|
|
|
|
except geoip2.errors.AddressNotFoundError:
|
|
|
|
geoinfo = None
|
2017-09-26 18:47:23 +00:00
|
|
|
|
|
|
|
if parsed_ip not in all_amazon:
|
2018-12-05 20:19:37 +00:00
|
|
|
if geoinfo:
|
2017-09-26 18:47:23 +00:00
|
|
|
return ResolvedLocation(
|
|
|
|
'internet',
|
2018-12-05 20:19:37 +00:00
|
|
|
geoinfo.continent.code,
|
|
|
|
geoinfo.country.iso_code,
|
2017-09-26 18:47:23 +00:00
|
|
|
sync_token,
|
2018-12-05 20:19:37 +00:00
|
|
|
geoinfo.country.iso_code,
|
2017-09-26 18:47:23 +00:00
|
|
|
)
|
2018-12-05 20:19:37 +00:00
|
|
|
|
|
|
|
return ResolvedLocation('internet', None, None, sync_token, None)
|
2017-09-26 18:47:23 +00:00
|
|
|
|
|
|
|
region = None
|
|
|
|
|
|
|
|
for region_name, region_set in regions.items():
|
|
|
|
if parsed_ip in region_set:
|
|
|
|
region = region_name
|
|
|
|
break
|
|
|
|
|
2018-12-05 20:19:37 +00:00
|
|
|
return ResolvedLocation('aws', region, None, sync_token,
|
2018-12-10 01:46:07 +00:00
|
|
|
geoinfo.country.iso_code if geoinfo else None)
|
2017-09-26 18:47:23 +00:00
|
|
|
return _get_location
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _parse_amazon_ranges(ranges):
|
|
|
|
all_amazon = IPSet()
|
|
|
|
regions = defaultdict(IPSet)
|
|
|
|
|
|
|
|
for service_description in ranges['prefixes']:
|
|
|
|
cidr = IPNetwork(service_description['ip_prefix'])
|
|
|
|
region = service_description['region']
|
|
|
|
|
|
|
|
all_amazon.add(cidr)
|
|
|
|
regions[region].add(cidr)
|
|
|
|
|
2018-08-08 18:40:26 +00:00
|
|
|
return all_amazon, regions
|
2018-08-23 15:49:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
class _UpdateIPRange(Thread):
|
|
|
|
"""Helper class that uses a thread to loads the IP ranges from Amazon"""
|
|
|
|
def __init__(self, interval):
|
|
|
|
Thread.__init__(self)
|
|
|
|
self.interval = interval
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
while True:
|
2018-10-12 14:24:28 +00:00
|
|
|
logger.debug('Updating aws ip range from "%s"', 'util/ipresolver/aws-ip-ranges.json')
|
|
|
|
aws_ip_range_json = _get_aws_ip_ranges()
|
|
|
|
if aws_ip_range_json is None:
|
2018-08-23 15:49:51 +00:00
|
|
|
logger.exception('Failed trying to update aws ip range')
|
|
|
|
time.sleep(_FAILED_UPDATE_RETRY_SECS)
|
2018-09-07 13:55:09 +00:00
|
|
|
continue
|
2018-08-23 15:49:51 +00:00
|
|
|
|
|
|
|
sync_token = aws_ip_range_json['syncToken']
|
|
|
|
all_amazon, regions = IPResolver._parse_amazon_ranges(aws_ip_range_json)
|
|
|
|
|
|
|
|
with CACHE_LOCK:
|
|
|
|
CACHE['sync_token'] = sync_token
|
|
|
|
CACHE['all_amazon'] = all_amazon
|
|
|
|
CACHE['regions'] = regions
|
|
|
|
|
|
|
|
time.sleep(self.interval)
|