initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

View file

@ -0,0 +1,27 @@
import logging
logger = logging.getLogger(__name__)
class LogSendException(Exception):
""" A generic error when sending the logs to its destination.
e.g. Kinesis, Kafka, Elasticsearch, ...
"""
pass
class LogProducerProxy(object):
def __init__(self):
self._model = None
def initialize(self, model):
self._model = model
logger.info('===============================')
logger.info('Using producer `%s`', self._model)
logger.info('===============================')
def __getattr__(self, attr):
if not self._model:
raise AttributeError("LogsModelProxy is not initialized")
return getattr(self._model, attr)

View file

@ -0,0 +1,25 @@
import logging
from elasticsearch.exceptions import ElasticsearchException
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
class ElasticsearchLogsProducer(LogProducerInterface):
""" Log producer writing log entries to Elasticsearch.
This implementation writes directly to Elasticsearch without a streaming/queueing service.
"""
def send(self, logentry):
try:
logentry.save()
except ElasticsearchException as ex:
logger.exception('ElasticsearchLogsProducer error sending log to Elasticsearch: %s', ex)
raise LogSendException('ElasticsearchLogsProducer error sending log to Elasticsearch: %s' % ex)
except Exception as e:
logger.exception('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s', e)
raise LogSendException('ElasticsearchLogsProducer exception sending log to Elasticsearch: %s' % e)

View file

@ -0,0 +1,8 @@
from abc import ABCMeta, abstractmethod
from six import add_metaclass
@add_metaclass(ABCMeta)
class LogProducerInterface(object):
@abstractmethod
def send(self, logentry):
""" Send a log entry to the configured log infrastructure. """

View file

@ -0,0 +1,45 @@
import logging
from kafka.errors import KafkaError, KafkaTimeoutError
from kafka import KafkaProducer
from data.logs_model.shared import epoch_ms
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
DEFAULT_MAX_BLOCK_SECONDS = 5
class KafkaLogsProducer(LogProducerInterface):
""" Log producer writing log entries to a Kafka stream. """
def __init__(self, bootstrap_servers=None, topic=None, client_id=None, max_block_seconds=None):
self.bootstrap_servers = bootstrap_servers
self.topic = topic
self.client_id = client_id
self.max_block_ms = (max_block_seconds or DEFAULT_MAX_BLOCK_SECONDS) * 1000
self._producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers,
client_id=self.client_id,
max_block_ms=self.max_block_ms,
value_serializer=logs_json_serializer)
def send(self, logentry):
try:
# send() has a (max_block_ms) timeout and get() has a (max_block_ms) timeout
# for an upper bound of 2x(max_block_ms) before guaranteed delivery
future = self._producer.send(self.topic, logentry.to_dict(), timestamp_ms=epoch_ms(logentry.datetime))
record_metadata = future.get(timeout=self.max_block_ms)
assert future.succeeded
except KafkaTimeoutError as kte:
logger.exception('KafkaLogsProducer timeout sending log to Kafka: %s', kte)
raise LogSendException('KafkaLogsProducer timeout sending log to Kafka: %s' % kte)
except KafkaError as ke:
logger.exception('KafkaLogsProducer error sending log to Kafka: %s', ke)
raise LogSendException('KafkaLogsProducer error sending log to Kafka: %s' % ke)
except Exception as e:
logger.exception('KafkaLogsProducer exception sending log to Kafka: %s', e)
raise LogSendException('KafkaLogsProducer exception sending log to Kafka: %s' % e)

View file

@ -0,0 +1,75 @@
import logging
import hashlib
import random
import boto3
from botocore.exceptions import ClientError
from botocore.client import Config
from data.logs_model.logs_producer.interface import LogProducerInterface
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.logs_producer import LogSendException
logger = logging.getLogger(__name__)
KINESIS_PARTITION_KEY_PREFIX = 'logentry_partition_key_'
DEFAULT_CONNECT_TIMEOUT = 5
DEFAULT_READ_TIMEOUT = 5
MAX_RETRY_ATTEMPTS = 5
DEFAULT_MAX_POOL_CONNECTIONS = 10
def _partition_key(number_of_shards=None):
""" Generate a partition key for AWS Kinesis stream.
If the number of shards is specified, generate keys where the size of the key space is
the number of shards.
"""
key = None
if number_of_shards is not None:
shard_number = random.randrange(0, number_of_shards)
key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(shard_number)).hexdigest()
else:
key = hashlib.sha1(KINESIS_PARTITION_KEY_PREFIX + str(random.getrandbits(256))).hexdigest()
return key
class KinesisStreamLogsProducer(LogProducerInterface):
""" Log producer writing log entries to an Amazon Kinesis Data Stream. """
def __init__(self, stream_name, aws_region, aws_access_key=None, aws_secret_key=None,
connect_timeout=None, read_timeout=None, max_retries=None,
max_pool_connections=None):
self._stream_name = stream_name
self._aws_region = aws_region
self._aws_access_key = aws_access_key
self._aws_secret_key = aws_secret_key
self._connect_timeout = connect_timeout or DEFAULT_CONNECT_TIMEOUT
self._read_timeout = read_timeout or DEFAULT_READ_TIMEOUT
self._max_retries = max_retries or MAX_RETRY_ATTEMPTS
self._max_pool_connections=max_pool_connections or DEFAULT_MAX_POOL_CONNECTIONS
client_config = Config(connect_timeout=self._connect_timeout,
read_timeout=self._read_timeout ,
retries={'max_attempts': self._max_retries},
max_pool_connections=self._max_pool_connections)
self._producer = boto3.client('kinesis', use_ssl=True,
region_name=self._aws_region,
aws_access_key_id=self._aws_access_key,
aws_secret_access_key=self._aws_secret_key,
config=client_config)
def send(self, logentry):
try:
data = logs_json_serializer(logentry)
self._producer.put_record(
StreamName=self._stream_name,
Data=data,
PartitionKey=_partition_key()
)
except ClientError as ce:
logger.exception('KinesisStreamLogsProducer client error sending log to Kinesis: %s', ce)
raise LogSendException('KinesisStreamLogsProducer client error sending log to Kinesis: %s' % ce)
except Exception as e:
logger.exception('KinesisStreamLogsProducer exception sending log to Kinesis: %s', e)
raise LogSendException('KinesisStreamLogsProducer exception sending log to Kinesis: %s' % e)

View file

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
import logging
import json
from datetime import datetime
import pytest
from data.logs_model.logs_producer.util import logs_json_serializer
from data.logs_model.elastic_logs import LogEntry
logger = logging.getLogger(__name__)
TEST_DATETIME = datetime.utcnow()
TEST_JSON_STRING = '{"a": "b", "c": "d"}'
TEST_JSON_STRING_WITH_UNICODE = u'{"éëê": "îôû"}'
VALID_LOGENTRY = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING, datetime=TEST_DATETIME)
VALID_LOGENTRY_WITH_UNICODE = LogEntry(random_id='123-45', ip='0.0.0.0', metadata_json=TEST_JSON_STRING_WITH_UNICODE, datetime=TEST_DATETIME)
VALID_LOGENTRY_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"a\\": \\"b\\", \\"c\\": \\"d\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT = '{"datetime": "%s", "ip": "0.0.0.0", "metadata_json": "{\\"\\u00e9\\u00eb\\u00ea\\": \\"\\u00ee\\u00f4\\u00fb\\"}", "random_id": "123-45"}' % TEST_DATETIME.isoformat()
@pytest.mark.parametrize(
'is_valid, given_input, expected_output',
[
# Valid inputs
pytest.param(True, VALID_LOGENTRY, VALID_LOGENTRY_EXPECTED_OUTPUT),
# With unicode
pytest.param(True, VALID_LOGENTRY_WITH_UNICODE, VALID_LOGENTRY_WITH_UNICODE_EXPECTED_OUTPUT),
])
def test_logs_json_serializer(is_valid, given_input, expected_output):
if not is_valid:
with pytest.raises(ValueError) as ve:
data = logs_json_serializer(given_input)
else:
data = logs_json_serializer(given_input, sort_keys=True)
assert data == expected_output
# Make sure the datetime was serialized in the correct ISO8601
datetime_str = json.loads(data)['datetime']
assert datetime_str == TEST_DATETIME.isoformat()

View file

@ -0,0 +1,15 @@
import json
from datetime import datetime
class LogEntryJSONEncoder(json.JSONEncoder):
""" JSON encoder to encode datetimes to ISO8601 format. """
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super(LogEntryJSONEncoder, self).default(obj)
def logs_json_serializer(logentry, sort_keys=False):
""" Serializes a LogEntry to json bytes. """
return json.dumps(logentry.to_dict(), cls=LogEntryJSONEncoder,
ensure_ascii=True, sort_keys=sort_keys).encode('ascii')