initial import for Open Source 🎉

This commit is contained in:
Jimmy Zelinskie 2019-11-12 11:09:47 -05:00
parent 1898c361f3
commit 9c0dd3b722
2048 changed files with 218743 additions and 0 deletions

View file

View file

@ -0,0 +1,390 @@
import json
import uuid
import fnmatch
from collections import defaultdict
from contextlib import contextmanager
from datetime import datetime
import dateutil.parser
from httmock import urlmatch, HTTMock
FAKE_ES_HOST = 'fakees'
EMPTY_RESULT = {
'hits': {'hits': [], 'total': 0},
'_shards': {'successful': 1, 'total': 1},
}
def parse_query(query):
if not query:
return {}
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&")}
@contextmanager
def fake_elasticsearch(allow_wildcard=True):
templates = {}
docs = defaultdict(list)
scrolls = {}
id_counter = [1]
def transform(value, field_name):
# TODO: implement this using a real index template if we ever need more than a few
# fields here.
if field_name == 'datetime':
if isinstance(value, int):
return datetime.utcfromtimestamp(value / 1000)
parsed = dateutil.parser.parse(value)
return parsed
return value
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='GET')
def get_template(url, request):
template_name = url[len('/_template/'):]
if template_name in templates:
return {'status_code': 200}
return {'status_code': 404}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='PUT')
def put_template(url, request):
template_name = url[len('/_template/'):]
templates[template_name] = True
return {'status_code': 201}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_doc', method='POST')
def post_doc(url, request):
index_name, _ = url.path[1:].split('/')
item = json.loads(request.body)
item['_id'] = item['random_id']
id_counter[0] += 1
docs[index_name].append(item)
return {
'status_code': 204,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps({
"result": "created",
}),
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='DELETE')
def index_delete(url, request):
index_name_or_pattern = url.path[1:]
to_delete = []
for index_name in docs.keys():
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
to_delete.append(index_name)
for index in to_delete:
docs.pop(index)
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': {'acknowledged': True}
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='GET')
def index_lookup(url, request):
index_name_or_pattern = url.path[1:]
found = {}
for index_name in docs.keys():
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
found[index_name] = {}
if not found:
return {
'status_code': 404,
}
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(found),
}
def _match_query(index_name_or_pattern, query):
found = []
found_index = False
for index_name in docs.keys():
if not allow_wildcard and index_name_or_pattern.find('*') >= 0:
break
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
continue
found_index = True
def _is_match(doc, current_query):
if current_query is None:
return True
for filter_type, filter_params in current_query.iteritems():
for field_name, filter_props in filter_params.iteritems():
if filter_type == 'range':
lt = transform(filter_props['lt'], field_name)
gte = transform(filter_props['gte'], field_name)
doc_value = transform(doc[field_name], field_name)
if not (doc_value < lt and doc_value >= gte):
return False
elif filter_type == 'term':
doc_value = transform(doc[field_name], field_name)
return doc_value == filter_props
elif filter_type == 'terms':
doc_value = transform(doc[field_name], field_name)
return doc_value in filter_props
elif filter_type == 'bool':
assert not 'should' in filter_params, 'should is unsupported'
must = filter_params.get('must')
must_not = filter_params.get('must_not')
filter_bool = filter_params.get('filter')
if must:
for check in must:
if not _is_match(doc, check):
return False
if must_not:
for check in must_not:
if _is_match(doc, check):
return False
if filter_bool:
for check in filter_bool:
if not _is_match(doc, check):
return False
else:
raise Exception('Unimplemented query %s: %s' % (filter_type, query))
return True
for doc in docs[index_name]:
if not _is_match(doc, query):
continue
found.append({'_source': doc, '_index': index_name})
return found, found_index or (index_name_or_pattern.find('*') >= 0)
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_count$', method='GET')
def count_docs(url, request):
request = json.loads(request.body)
index_name_or_pattern, _ = url.path[1:].split('/')
found, found_index = _match_query(index_name_or_pattern, request['query'])
if not found_index:
return {
'status_code': 404,
}
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps({'count': len(found)}),
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='GET')
def lookup_scroll(url, request):
request_obj = json.loads(request.body)
scroll_id = request_obj['scroll_id']
if scroll_id in scrolls:
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(scrolls[scroll_id]),
}
return {
'status_code': 404,
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='DELETE')
def delete_scroll(url, request):
request = json.loads(request.body)
for scroll_id in request['scroll_id']:
scrolls.pop(scroll_id, None)
return {
'status_code': 404,
}
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_search$', method='GET')
def lookup_docs(url, request):
query_params = parse_query(url.query)
request = json.loads(request.body)
index_name_or_pattern, _ = url.path[1:].split('/')
# Find matching docs.
query = request.get('query')
found, found_index = _match_query(index_name_or_pattern, query)
if not found_index:
return {
'status_code': 404,
}
# Sort.
sort = request.get('sort')
if sort:
if sort == ['_doc'] or sort == '_doc':
found.sort(key=lambda x: x['_source']['_id'])
else:
def get_sort_key(item):
source = item['_source']
key = ''
for sort_config in sort:
for sort_key, direction in sort_config.iteritems():
assert direction == 'desc'
sort_key = sort_key.replace('.keyword', '')
key += str(transform(source[sort_key], sort_key))
key += '|'
return key
found.sort(key=get_sort_key, reverse=True)
# Search after.
search_after = request.get('search_after')
if search_after:
sort_fields = []
for sort_config in sort:
if isinstance(sort_config, unicode):
sort_fields.append(sort_config)
continue
for sort_key, _ in sort_config.iteritems():
sort_key = sort_key.replace('.keyword', '')
sort_fields.append(sort_key)
for index, search_after_value in enumerate(search_after):
field_name = sort_fields[index]
value = transform(search_after_value, field_name)
if field_name == '_doc':
found = [f for f in found if transform(f['_source']['_id'], field_name) > value]
else:
found = [f for f in found if transform(f['_source'][field_name], field_name) < value]
if len(found) < 2:
break
if field_name == '_doc':
if found[0]['_source']['_id'] != found[1]['_source']:
break
else:
if found[0]['_source'][field_name] != found[1]['_source']:
break
# Size.
size = request.get('size')
if size:
found = found[0:size]
# Aggregation.
# {u'query':
# {u'range':
# {u'datetime': {u'lt': u'2019-06-27T15:45:09.768085',
# u'gte': u'2019-06-27T15:35:09.768085'}}},
# u'aggs': {
# u'by_id': {
# u'terms': {u'field': u'kind_id'},
# u'aggs': {
# u'by_date': {u'date_histogram': {u'field': u'datetime', u'interval': u'day'}}}}},
# u'size': 0}
def _by_field(agg_field_params, results):
aggregated_by_field = defaultdict(list)
for agg_means, agg_means_params in agg_field_params.iteritems():
if agg_means == 'terms':
field_name = agg_means_params['field']
for result in results:
value = result['_source'][field_name]
aggregated_by_field[value].append(result)
elif agg_means == 'date_histogram':
field_name = agg_means_params['field']
interval = agg_means_params['interval']
for result in results:
value = transform(result['_source'][field_name], field_name)
aggregated_by_field[getattr(value, interval)].append(result)
elif agg_means == 'aggs':
# Skip. Handled below.
continue
else:
raise Exception('Unsupported aggregation method: %s' % agg_means)
# Invoke the aggregation recursively.
buckets = []
for field_value, field_results in aggregated_by_field.iteritems():
aggregated = _aggregate(agg_field_params, field_results)
if isinstance(aggregated, list):
aggregated = {'doc_count': len(aggregated)}
aggregated['key'] = field_value
buckets.append(aggregated)
return {'buckets': buckets}
def _aggregate(query_config, results):
agg_params = query_config.get(u'aggs')
if not agg_params:
return results
by_field_name = {}
for agg_field_name, agg_field_params in agg_params.iteritems():
by_field_name[agg_field_name] = _by_field(agg_field_params, results)
return by_field_name
final_result = {
'hits': {
'hits': found,
'total': len(found),
},
'_shards': {
'successful': 1,
'total': 1,
},
'aggregations': _aggregate(request, found),
}
if query_params.get('scroll'):
scroll_id = str(uuid.uuid4())
scrolls[scroll_id] = EMPTY_RESULT
final_result['_scroll_id'] = scroll_id
return {
'status_code': 200,
'headers': {
'Content-Type': 'application/json',
},
'content': json.dumps(final_result),
}
@urlmatch(netloc=FAKE_ES_HOST)
def catchall_handler(url, request):
print "Unsupported URL: %s %s" % (request.method, url, )
return {'status_code': 501}
handlers = [get_template, put_template, index_delete, index_lookup, post_doc, count_docs,
lookup_docs, lookup_scroll, delete_scroll, catchall_handler]
with HTTMock(*handlers):
yield

View file

@ -0,0 +1,400 @@
# -*- coding: utf-8 -*-
import json
from datetime import datetime
from dateutil.parser import parse
from data.logs_model.datatypes import LogEntriesPage, Log, AggregatedLogCount
def _status(d, code=200):
return {"status_code": code, "content": json.dumps(d)}
def _shards(d, total=5, failed=0, successful=5):
d.update({"_shards": {"total": total, "failed": failed, "successful": successful}})
return d
def _hits(hits):
return {"hits": {"total": len(hits), "max_score": None, "hits": hits}}
INDEX_LIST_RESPONSE_HIT1_HIT2 = _status({
"logentry_2018-03-08": {},
"logentry_2018-04-02": {}
})
INDEX_LIST_RESPONSE_HIT2 = _status({
"logentry_2018-04-02": {}
})
INDEX_LIST_RESPONSE = _status({
"logentry_2019-01-01": {},
"logentry_2017-03-08": {},
"logentry_2018-03-08": {},
"logentry_2018-04-02": {}
})
DEFAULT_TEMPLATE_RESPONSE = _status({"acknowledged": True})
INDEX_RESPONSE_2019_01_01 = _status(
_shards({
"_index": "logentry_2019-01-01",
"_type": "_doc",
"_id": "1",
"_version": 1,
"_seq_no": 0,
"_primary_term": 1,
"result": "created"
}))
INDEX_RESPONSE_2017_03_08 = _status(
_shards({
"_index": "logentry_2017-03-08",
"_type": "_doc",
"_id": "1",
"_version": 1,
"_seq_no": 0,
"_primary_term": 1,
"result": "created"
}))
FAILURE_400 = _status({}, 400)
INDEX_REQUEST_2019_01_01 = [
"logentry_2019-01-01", {
"account_id":
1,
"repository_id":
1,
"ip":
"192.168.1.1",
"random_id":
233,
"datetime":
"2019-01-01T03:30:00",
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
"performer_id":
1,
"kind_id":
1
}
]
INDEX_REQUEST_2017_03_08 = [
"logentry_2017-03-08", {
"repository_id":
1,
"account_id":
1,
"ip":
"192.168.1.1",
"random_id":
233,
"datetime":
"2017-03-08T03:30:00",
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
"performer_id":
1,
"kind_id":
2
}
]
_hit1 = {
"_index": "logentry_2018-03-08",
"_type": "doc",
"_id": "1",
"_score": None,
"_source": {
"random_id":
233,
"kind_id":
1,
"account_id":
1,
"performer_id":
1,
"repository_id":
1,
"ip":
"192.168.1.1",
"metadata_json":
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
"datetime":
"2018-03-08T03:30",
},
"sort": [1520479800000, 233]
}
_hit2 = {
"_index": "logentry_2018-04-02",
"_type": "doc",
"_id": "2",
"_score": None,
"_source": {
"random_id":
233,
"kind_id":
2,
"account_id":
1,
"performer_id":
1,
"repository_id":
1,
"ip":
"192.168.1.2",
"metadata_json":
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
"datetime":
"2018-04-02T03:30",
},
"sort": [1522639800000, 233]
}
_log1 = Log(
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
"192.168.1.1", parse("2018-03-08T03:30"), "user1.email", "user1.username", "user1.robot",
"user1.organization", "user1.username", "user1.email", "user1.robot", 1)
_log2 = Log(
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
"192.168.1.2", parse("2018-04-02T03:30"), "user1.email", "user1.username", "user1.robot",
"user1.organization", "user1.username", "user1.email", "user1.robot", 2)
SEARCH_RESPONSE_START = _status(_shards(_hits([_hit1, _hit2])))
SEARCH_RESPONSE_END = _status(_shards(_hits([_hit2])))
SEARCH_REQUEST_START = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}]
}
},
"size": 2
}
SEARCH_REQUEST_END = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}]
}
},
"search_after": [1520479800000, 233],
"size": 2
}
SEARCH_REQUEST_FILTER = {
"sort": [{
"datetime": "desc"
}, {
"random_id.keyword": "desc"
}],
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}, {
"bool": {
"must_not": [{
"terms": {
"kind_id": [1]
}
}]
}
}]
}
},
"size": 2
}
SEARCH_PAGE_TOKEN = {
"datetime": datetime(2018, 3, 8, 3, 30).isoformat(),
"random_id": 233,
"page_number": 1
}
SEARCH_PAGE_START = LogEntriesPage(logs=[_log1], next_page_token=SEARCH_PAGE_TOKEN)
SEARCH_PAGE_END = LogEntriesPage(logs=[_log2], next_page_token=None)
SEARCH_PAGE_EMPTY = LogEntriesPage([], None)
AGGS_RESPONSE = _status(
_shards({
"hits": {
"total": 4,
"max_score": None,
"hits": []
},
"aggregations": {
"by_id": {
"doc_count_error_upper_bound":
0,
"sum_other_doc_count":
0,
"buckets": [{
"key": 2,
"doc_count": 3,
"by_date": {
"buckets": [{
"key_as_string": "2009-11-12T00:00:00.000Z",
"key": 1257984000000,
"doc_count": 1
}, {
"key_as_string": "2009-11-13T00:00:00.000Z",
"key": 1258070400000,
"doc_count": 0
}, {
"key_as_string": "2009-11-14T00:00:00.000Z",
"key": 1258156800000,
"doc_count": 2
}]
}
}, {
"key": 1,
"doc_count": 1,
"by_date": {
"buckets": [{
"key_as_string": "2009-11-15T00:00:00.000Z",
"key": 1258243200000,
"doc_count": 1
}]
}
}]
}
}
}))
AGGS_REQUEST = {
"query": {
"bool": {
"filter": [{
"term": {
"performer_id": 1
}
}, {
"term": {
"repository_id": 1
}
}, {
"bool": {
"must_not": [{
"terms": {
"kind_id": [2]
}
}]
}
}],
"must": [{
"range": {
"datetime": {
"lt": "2018-04-08T03:30:00",
"gte": "2018-03-08T03:30:00"
}
}
}]
}
},
"aggs": {
"by_id": {
"terms": {
"field": "kind_id"
},
"aggs": {
"by_date": {
"date_histogram": {
"field": "datetime",
"interval": "day"
}
}
}
}
},
"size": 0
}
AGGS_COUNT = [
AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")),
AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")),
AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000"))
]
COUNT_REQUEST = {
"query": {
"bool": {
"filter": [{
"term": {
"repository_id": 1
}
}]
}
}
}
COUNT_RESPONSE = _status(_shards({
"count": 1,
}))
# assume there are 2 pages
_scroll_id = "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAACEmFkk1aGlTRzdSUWllejZmYTlEYTN3SVEAAAAAAAAhJRZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lRAAAAAAAAHtAWLWZpaFZXVzVSTy1OTXA5V3MwcHZrZwAAAAAAAB7RFi1maWhWV1c1Uk8tTk1wOVdzMHB2a2cAAAAAAAAhJxZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lR"
def _scroll(d):
d["_scroll_id"] = _scroll_id
return d
SCROLL_CREATE = _status(_shards(_scroll(_hits([_hit1]))))
SCROLL_GET = _status(_shards(_scroll(_hits([_hit2]))))
SCROLL_GET_2 = _status(_shards(_scroll(_hits([]))))
SCROLL_DELETE = _status({"succeeded": True, "num_freed": 5})
SCROLL_LOGS = [[_log1], [_log2]]
SCROLL_REQUESTS = [
[
"5m", 1, {
"sort": "_doc",
"query": {
"range": {
"datetime": {
"lt": "2018-04-02T00:00:00",
"gte": "2018-03-08T00:00:00"
}
}
}
}
],
[{"scroll": "5m", "scroll_id": _scroll_id}],
[{"scroll":"5m", "scroll_id": _scroll_id}],
[{"scroll_id": [_scroll_id]}],
]
SCROLL_RESPONSES = [SCROLL_CREATE, SCROLL_GET, SCROLL_GET_2, SCROLL_DELETE]

View file

@ -0,0 +1,130 @@
from datetime import date, datetime, timedelta
from freezegun import freeze_time
from data.logs_model.inmemory_model import InMemoryModel
from data.logs_model.combined_model import CombinedLogsModel
from test.fixtures import *
@pytest.fixture()
def first_model():
return InMemoryModel()
@pytest.fixture()
def second_model():
return InMemoryModel()
@pytest.fixture()
def combined_model(first_model, second_model, initialized_db):
return CombinedLogsModel(first_model, second_model)
def test_log_action(first_model, second_model, combined_model, initialized_db):
day = date(2019, 1, 1)
# Write to the combined model.
with freeze_time(day):
combined_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
simple_repo = model.repository.get_repository('devtable', 'simple')
# Make sure it is found in the first model but not the second.
assert combined_model.count_repository_actions(simple_repo, day) == 1
assert first_model.count_repository_actions(simple_repo, day) == 1
assert second_model.count_repository_actions(simple_repo, day) == 0
def test_count_repository_actions(first_model, second_model, combined_model, initialized_db):
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Ensure the counts match as expected.
day = datetime.today() - timedelta(minutes=60)
simple_repo = model.repository.get_repository('devtable', 'simple')
assert first_model.count_repository_actions(simple_repo, day) == 3
assert second_model.count_repository_actions(simple_repo, day) == 2
assert combined_model.count_repository_actions(simple_repo, day) == 5
def test_yield_logs_for_export(first_model, second_model, combined_model, initialized_db):
now = datetime.now()
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
later = datetime.now()
# Ensure the full set of logs is yielded.
first_logs = list(first_model.yield_logs_for_export(now, later))[0]
second_logs = list(second_model.yield_logs_for_export(now, later))[0]
combined = list(combined_model.yield_logs_for_export(now, later))
full_combined = []
for subset in combined:
full_combined.extend(subset)
assert len(full_combined) == len(first_logs) + len(second_logs)
assert full_combined == (first_logs + second_logs)
def test_lookup_logs(first_model, second_model, combined_model, initialized_db):
now = datetime.now()
# Write to each model.
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
later = datetime.now()
def _collect_logs(model):
page_token = None
all_logs = []
while True:
paginated_logs = model.lookup_logs(now, later, page_token=page_token)
page_token = paginated_logs.next_page_token
all_logs.extend(paginated_logs.logs)
if page_token is None:
break
return all_logs
first_logs = _collect_logs(first_model)
second_logs = _collect_logs(second_model)
combined = _collect_logs(combined_model)
assert len(combined) == len(first_logs) + len(second_logs)
assert combined == (first_logs + second_logs)

View file

@ -0,0 +1,529 @@
# -*- coding: utf-8 -*-
# pylint: disable=redefined-outer-name, wildcard-import
import json
from datetime import datetime, timedelta
import pytest
from mock import patch, Mock
from dateutil.parser import parse
from httmock import urlmatch, HTTMock
from data.model.log import _json_serialize
from data.logs_model.elastic_logs import ElasticsearchLogs, INDEX_NAME_PREFIX, INDEX_DATE_FORMAT
from data.logs_model import configure, LogsModelProxy
from mock_elasticsearch import *
FAKE_ES_HOST = 'fakees'
FAKE_ES_HOST_PATTERN = r'fakees.*'
FAKE_ES_PORT = 443
FAKE_AWS_ACCESS_KEY = None
FAKE_AWS_SECRET_KEY = None
FAKE_AWS_REGION = None
@pytest.fixture()
def logs_model_config():
conf = {
'LOGS_MODEL': 'elasticsearch',
'LOGS_MODEL_CONFIG': {
'producer': 'elasticsearch',
'elasticsearch_config': {
'host': FAKE_ES_HOST,
'port': FAKE_ES_PORT,
'access_key': FAKE_AWS_ACCESS_KEY,
'secret_key': FAKE_AWS_SECRET_KEY,
'aws_region': FAKE_AWS_REGION
}
}
}
return conf
FAKE_LOG_ENTRY_KINDS = {'push_repo': 1, 'pull_repo': 2}
FAKE_NAMESPACES = {
'user1':
Mock(id=1, organization="user1.organization", username="user1.username", email="user1.email",
robot="user1.robot"),
'user2':
Mock(id=2, organization="user2.organization", username="user2.username", email="user2.email",
robot="user2.robot")
}
FAKE_REPOSITORIES = {
'user1/repo1': Mock(id=1, namespace_user=FAKE_NAMESPACES['user1']),
'user2/repo2': Mock(id=2, namespace_user=FAKE_NAMESPACES['user2']),
}
@pytest.fixture()
def logs_model():
# prevent logs model from changing
logs_model = LogsModelProxy()
with patch('data.logs_model.logs_model', logs_model):
yield logs_model
@pytest.fixture(scope='function')
def app_config(logs_model_config):
fake_config = {}
fake_config.update(logs_model_config)
with patch("data.logs_model.document_logs_model.config.app_config", fake_config):
yield fake_config
@pytest.fixture()
def mock_page_size():
with patch('data.logs_model.document_logs_model.PAGE_SIZE', 1):
yield
@pytest.fixture()
def mock_max_result_window():
with patch('data.logs_model.document_logs_model.DEFAULT_RESULT_WINDOW', 1):
yield
@pytest.fixture
def mock_random_id():
mock_random = Mock(return_value=233)
with patch('data.logs_model.document_logs_model._random_id', mock_random):
yield
@pytest.fixture()
def mock_db_model():
def get_user_map_by_ids(namespace_ids):
mapping = {}
for i in namespace_ids:
for name in FAKE_NAMESPACES:
if FAKE_NAMESPACES[name].id == i:
mapping[i] = FAKE_NAMESPACES[name]
return mapping
model = Mock(
user=Mock(
get_namespace_user=FAKE_NAMESPACES.get,
get_user_or_org=FAKE_NAMESPACES.get,
get_user=FAKE_NAMESPACES.get,
get_user_map_by_ids=get_user_map_by_ids,
),
repository=Mock(get_repository=lambda user_name, repo_name: FAKE_REPOSITORIES.get(
user_name + '/' + repo_name),
),
log=Mock(
_get_log_entry_kind=lambda name: FAKE_LOG_ENTRY_KINDS[name],
_json_serialize=_json_serialize,
get_log_entry_kinds=Mock(return_value=FAKE_LOG_ENTRY_KINDS),
),
)
with patch('data.logs_model.document_logs_model.model', model), patch(
'data.logs_model.datatypes.model', model):
yield
def parse_query(query):
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&") if s != ""}
@pytest.fixture()
def mock_elasticsearch():
mock = Mock()
mock.template.side_effect = NotImplementedError
mock.index.side_effect = NotImplementedError
mock.count.side_effect = NotImplementedError
mock.scroll_get.side_effect = NotImplementedError
mock.scroll_delete.side_effect = NotImplementedError
mock.search_scroll_create.side_effect = NotImplementedError
mock.search_aggs.side_effect = NotImplementedError
mock.search_after.side_effect = NotImplementedError
mock.list_indices.side_effect = NotImplementedError
@urlmatch(netloc=r'.*', path=r'.*')
def default(url, req):
raise Exception('\nurl={}\nmethod={}\nreq.url={}\nheaders={}\nbody={}'.format(
url, req.method, req.url, req.headers, req.body))
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_template/.*')
def template(url, req):
return mock.template(url.query.split('/')[-1], req.body)
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]+)')
def list_indices(url, req):
return mock.list_indices()
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_[0-9\-]*/_doc')
def index(url, req):
index = url.path.split('/')[1]
body = json.loads(req.body)
body['metadata_json'] = json.loads(body['metadata_json'])
return mock.index(index, body)
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_([0-9\-]*|\*)/_count')
def count(_, req):
return mock.count(json.loads(req.body))
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_search/scroll')
def scroll(url, req):
if req.method == 'DELETE':
return mock.scroll_delete(json.loads(req.body))
elif req.method == 'GET':
request_obj = json.loads(req.body)
return mock.scroll_get(request_obj)
raise NotImplementedError()
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]*)/_search')
def search(url, req):
if "scroll" in url.query:
query = parse_query(url.query)
window_size = query['scroll']
maximum_result_size = int(query['size'])
return mock.search_scroll_create(window_size, maximum_result_size, json.loads(req.body))
elif "aggs" in req.body:
return mock.search_aggs(json.loads(req.body))
else:
return mock.search_after(json.loads(req.body))
with HTTMock(scroll, count, search, index, template, list_indices, default):
yield mock
@pytest.mark.parametrize(
"""
unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
timestamp,
index_response, expected_request, throws
""",
[
# Invalid inputs
pytest.param(
False, 'non-existing', None, None, None,
None,
None, None, True,
id="Invalid Kind"
),
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), 'repo1',
None,
None, None, True,
id="Invalid Parameters"
),
# Remote exceptions
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), None,
None,
FAILURE_400, None, True,
id="Throw on pull log failure"
),
pytest.param(
True, 'pull_repo', 'user1', Mock(id=1), None,
parse("2017-03-08T03:30"),
FAILURE_400, INDEX_REQUEST_2017_03_08, False,
id="Ok on pull log failure"
),
# Success executions
pytest.param(
False, 'pull_repo', 'user1', Mock(id=1), None,
parse("2017-03-08T03:30"),
INDEX_RESPONSE_2017_03_08, INDEX_REQUEST_2017_03_08, False,
id="Log with namespace name and repository"
),
pytest.param(
False, 'push_repo', 'user1', None, 'repo1',
parse("2019-01-01T03:30"),
INDEX_RESPONSE_2019_01_01, INDEX_REQUEST_2019_01_01, False,
id="Log with namespace name and repository name"
),
])
def test_log_action(unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
timestamp,
index_response, expected_request, throws,
app_config, logs_model, mock_elasticsearch, mock_db_model, mock_random_id):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.index = Mock(return_value=index_response)
app_config['ALLOW_PULLS_WITHOUT_STRICT_LOGGING'] = unlogged_pulls_ok
configure(app_config)
performer = Mock(id=1)
ip = "192.168.1.1"
metadata = {'key': 'value', 'time': parse("2018-03-08T03:30"), '😂': '😂👌👌👌👌'}
if throws:
with pytest.raises(Exception):
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
repository_name, timestamp)
else:
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
repository_name, timestamp)
mock_elasticsearch.index.assert_called_with(*expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds,
page_token,
max_page_count,
search_response,
list_indices_response,
expected_request,
expected_page,
throws
""",
[
# 1st page
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
None,
None,
SEARCH_RESPONSE_START,
INDEX_LIST_RESPONSE_HIT1_HIT2,
SEARCH_REQUEST_START,
SEARCH_PAGE_START,
False,
id="1st page"
),
# Last page
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
SEARCH_PAGE_TOKEN,
None,
SEARCH_RESPONSE_END,
INDEX_LIST_RESPONSE_HIT1_HIT2,
SEARCH_REQUEST_END,
SEARCH_PAGE_END,
False,
id="Search using pagination token"
),
# Filter
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
['push_repo'],
None,
None,
SEARCH_RESPONSE_END,
INDEX_LIST_RESPONSE_HIT2,
SEARCH_REQUEST_FILTER,
SEARCH_PAGE_END,
False,
id="Filtered search"
),
# Max page count
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
None,
SEARCH_PAGE_TOKEN,
1,
AssertionError, # Assert that it should not reach the ES server
None,
None,
SEARCH_PAGE_EMPTY,
False,
id="Page token reaches maximum page count",
),
])
def test_lookup_logs(start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds,
page_token,
max_page_count,
search_response,
list_indices_response,
expected_request,
expected_page,
throws,
logs_model, mock_elasticsearch, mock_db_model, mock_page_size, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_after = Mock(return_value=search_response)
mock_elasticsearch.list_indices = Mock(return_value=list_indices_response)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
namespace_name, filter_kinds, page_token, max_page_count)
else:
page = logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
namespace_name, filter_kinds, page_token, max_page_count)
assert page == expected_page
if expected_request:
mock_elasticsearch.search_after.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds, search_response, expected_request, expected_counts, throws
""",
[
# Valid
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
'user1', 'repo1', 'user1',
['pull_repo'], AGGS_RESPONSE, AGGS_REQUEST, AGGS_COUNT, False,
id="Valid Counts"
),
# Invalid case: date range too big
pytest.param(
parse('2018-03-08T03:30'), parse('2018-04-09T03:30'),
'user1', 'repo1', 'user1',
[], None, None, None, True,
id="Throw on date range too big"
)
])
def test_get_aggregated_log_counts(start_datetime, end_datetime,
performer_name, repository_name, namespace_name,
filter_kinds, search_response, expected_request, expected_counts, throws,
logs_model, mock_elasticsearch, mock_db_model, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_aggs = Mock(return_value=search_response)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds)
else:
counts = logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
repository_name, namespace_name, filter_kinds)
assert set(counts) == set(expected_counts)
if expected_request:
mock_elasticsearch.search_aggs.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
repository,
day,
count_response, expected_request, expected_count, throws
""",
[
pytest.param(
FAKE_REPOSITORIES['user1/repo1'],
parse("2018-03-08").date(),
COUNT_RESPONSE, COUNT_REQUEST, 1, False,
id="Valid Count with 1 as result"),
])
def test_count_repository_actions(repository,
day,
count_response, expected_request, expected_count, throws,
logs_model, mock_elasticsearch, mock_db_model, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.count = Mock(return_value=count_response)
mock_elasticsearch.list_indices = Mock(return_value=INDEX_LIST_RESPONSE)
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.count_repository_actions(repository, day)
else:
count = logs_model.count_repository_actions(repository, day)
assert count == expected_count
if expected_request:
mock_elasticsearch.count.assert_called_with(expected_request)
@pytest.mark.parametrize(
"""
start_datetime, end_datetime,
repository_id, namespace_id,
max_query_time, scroll_responses, expected_requests, expected_logs, throws
""",
[
pytest.param(
parse("2018-03-08"), parse("2018-04-02"),
1, 1,
timedelta(seconds=10), SCROLL_RESPONSES, SCROLL_REQUESTS, SCROLL_LOGS, False,
id="Scroll 3 pages with page size = 1"
),
])
def test_yield_logs_for_export(start_datetime, end_datetime,
repository_id, namespace_id,
max_query_time, scroll_responses, expected_requests, expected_logs, throws,
logs_model, mock_elasticsearch, mock_db_model, mock_max_result_window, app_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
mock_elasticsearch.search_scroll_create = Mock(return_value=scroll_responses[0])
mock_elasticsearch.scroll_get = Mock(side_effect=scroll_responses[1:-1])
mock_elasticsearch.scroll_delete = Mock(return_value=scroll_responses[-1])
configure(app_config)
if throws:
with pytest.raises(Exception):
logs_model.yield_logs_for_export(start_datetime, end_datetime, max_query_time=max_query_time)
else:
log_generator = logs_model.yield_logs_for_export(start_datetime, end_datetime,
max_query_time=max_query_time)
counter = 0
for logs in log_generator:
if counter == 0:
mock_elasticsearch.search_scroll_create.assert_called_with(*expected_requests[counter])
else:
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[counter])
assert expected_logs[counter] == logs
counter += 1
# the last two requests must be
# 1. get with response scroll with 0 hits, which indicates the termination condition
# 2. delete scroll request
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[-2])
mock_elasticsearch.scroll_delete.assert_called_with(*expected_requests[-1])
@pytest.mark.parametrize('prefix, is_valid', [
pytest.param('..', False, id='Invalid `..`'),
pytest.param('.', False, id='Invalid `.`'),
pytest.param('-prefix', False, id='Invalid prefix start -'),
pytest.param('_prefix', False, id='Invalid prefix start _'),
pytest.param('+prefix', False, id='Invalid prefix start +'),
pytest.param('prefix_with_UPPERCASES', False, id='Invalid uppercase'),
pytest.param('valid_index', True, id='Valid prefix'),
pytest.param('valid_index_with_numbers1234', True, id='Valid prefix with numbers'),
pytest.param('a'*256, False, id='Prefix too long')
])
def test_valid_index_prefix(prefix, is_valid):
assert ElasticsearchLogs._valid_index_prefix(prefix) == is_valid
@pytest.mark.parametrize('index, cutoff_date, expected_result', [
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 8),
True,
id="Index older than cutoff"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 4),
False,
id="Index younger than cutoff"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 6, 23),
False,
id="Index older than cutoff but timedelta less than 1 day"
),
pytest.param(
INDEX_NAME_PREFIX+'2019-06-06',
datetime(2019, 6, 7),
True,
id="Index older than cutoff by exactly one day"
),
])
def test_can_delete_index(index, cutoff_date, expected_result):
es = ElasticsearchLogs(index_prefix=INDEX_NAME_PREFIX)
assert datetime.strptime(index.split(es._index_prefix, 1)[-1], INDEX_DATE_FORMAT)
assert es.can_delete_index(index, cutoff_date) == expected_result

View file

@ -0,0 +1,473 @@
from datetime import datetime, timedelta, date
from data.logs_model.datatypes import AggregatedLogCount
from data.logs_model.table_logs_model import TableLogsModel
from data.logs_model.combined_model import CombinedLogsModel
from data.logs_model.inmemory_model import InMemoryModel
from data.logs_model.combined_model import _merge_aggregated_log_counts
from data.logs_model.document_logs_model import _date_range_in_single_index, DocumentLogsModel
from data.logs_model.interface import LogsIterationTimeout
from data.logs_model.test.fake_elasticsearch import FAKE_ES_HOST, fake_elasticsearch
from data.database import LogEntry, LogEntry2, LogEntry3, LogEntryKind
from data import model
from test.fixtures import *
@pytest.fixture()
def mock_page_size():
page_size = 2
with patch('data.logs_model.document_logs_model.PAGE_SIZE', page_size):
yield page_size
@pytest.fixture()
def clear_db_logs(initialized_db):
LogEntry.delete().execute()
LogEntry2.delete().execute()
LogEntry3.delete().execute()
def combined_model():
return CombinedLogsModel(TableLogsModel(), InMemoryModel())
def es_model():
return DocumentLogsModel(producer='elasticsearch', elasticsearch_config={
'host': FAKE_ES_HOST,
'port': 12345,
})
@pytest.fixture()
def fake_es():
with fake_elasticsearch():
yield
@pytest.fixture(params=[TableLogsModel, InMemoryModel, es_model, combined_model])
def logs_model(request, clear_db_logs, fake_es):
return request.param()
def _lookup_logs(logs_model, start_time, end_time, **kwargs):
logs_found = []
page_token = None
while True:
found = logs_model.lookup_logs(start_time, end_time, page_token=page_token, **kwargs)
logs_found.extend(found.logs)
page_token = found.next_page_token
if not found.logs or not page_token:
break
assert len(logs_found) == len(set(logs_found))
return logs_found
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason='Flaky on MySQL')
@pytest.mark.parametrize('namespace_name, repo_name, performer_name, check_args, expect_results', [
pytest.param('devtable', 'simple', 'devtable', {}, True, id='no filters'),
pytest.param('devtable', 'simple', 'devtable', {
'performer_name': 'devtable',
}, True, id='matching performer'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
}, True, id='matching namespace'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
'repository_name': 'simple',
}, True, id='matching repository'),
pytest.param('devtable', 'simple', 'devtable', {
'performer_name': 'public',
}, False, id='different performer'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'public',
}, False, id='different namespace'),
pytest.param('devtable', 'simple', 'devtable', {
'namespace_name': 'devtable',
'repository_name': 'complex',
}, False, id='different repository'),
])
def test_logs(namespace_name, repo_name, performer_name, check_args, expect_results, logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user(performer_name)
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for index in range(0, 3):
logs_model.log_action(kind.name, namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
found = _lookup_logs(logs_model, start_timestamp, start_timestamp + timedelta(minutes=10),
**check_args)
if expect_results:
assert len(found) == len(kinds) * 3
else:
assert not found
aggregated_counts = logs_model.get_aggregated_log_counts(start_timestamp,
start_timestamp + timedelta(minutes=10),
**check_args)
if expect_results:
assert len(aggregated_counts) == len(kinds)
for ac in aggregated_counts:
assert ac.count == 3
else:
assert not aggregated_counts
@pytest.mark.parametrize('filter_kinds, expect_results', [
pytest.param(None, True),
pytest.param(['push_repo'], True, id='push_repo filter'),
pytest.param(['pull_repo'], True, id='pull_repo filter'),
pytest.param(['push_repo', 'pull_repo'], False, id='push and pull filters')
])
def test_lookup_latest_logs(filter_kinds, expect_results, logs_model):
kind_map = model.log.get_log_entry_kinds()
if filter_kinds:
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds if filter_kinds]
else:
ignore_ids = []
now = datetime.now()
namespace_name = 'devtable'
repo_name = 'simple'
performer_name = 'devtable'
user = model.user.get_user(performer_name)
size = 3
# Log some push actions
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=1, seconds=11))
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=7, seconds=33))
# Log some pull actions
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=0, seconds=3))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=3, seconds=55))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=5, seconds=3))
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=11, seconds=11))
# Get the latest logs
latest_logs = logs_model.lookup_latest_logs(performer_name, repo_name, namespace_name,
filter_kinds=filter_kinds, size=size)
# Test max lookup size
assert len(latest_logs) <= size
# Make sure that the latest logs returned are in decreasing order
assert all(x >= y for x, y in zip(latest_logs, latest_logs[1:]))
if expect_results:
assert latest_logs
# Lookup all logs filtered by kinds and sort them in reverse chronological order
all_logs = _lookup_logs(logs_model, now - timedelta(days=30), now + timedelta(days=30),
filter_kinds=filter_kinds, namespace_name=namespace_name,
repository_name=repo_name)
all_logs = sorted(all_logs, key=lambda l: l.datetime, reverse=True)
# Check that querying all logs does not return the filtered kinds
assert all([log.kind_id not in ignore_ids for log in all_logs])
# Check that the latest logs contains only th most recent ones
assert latest_logs == all_logs[:len(latest_logs)]
def test_count_repository_actions(logs_model):
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different repo.
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
# Count the actions.
day = date.today()
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 3
complex_repo = model.repository.get_repository('devtable', 'complex')
count = logs_model.count_repository_actions(complex_repo, day)
assert count == 2
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
assert count == 0
def test_yield_log_rotation_context(logs_model):
cutoff_date = datetime.now()
min_logs_per_rotation = 3
# Log some actions to be archived
# One day
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
ip='1.2.3.4', timestamp=cutoff_date-timedelta(days=1, seconds=1))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='5.6.7.8', timestamp=cutoff_date-timedelta(days=1, seconds=2))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple3',
ip='9.10.11.12', timestamp=cutoff_date-timedelta(days=1, seconds=3))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple4',
ip='0.0.0.0', timestamp=cutoff_date-timedelta(days=1, seconds=4))
# Another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=1))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=2))
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=3))
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
assert found is not None and len(found) == 7
# Iterate the logs using the log rotation contexts
all_logs = []
for log_rotation_context in logs_model.yield_log_rotation_context(cutoff_date,
min_logs_per_rotation):
with log_rotation_context as context:
for logs, _ in context.yield_logs_batch():
all_logs.extend(logs)
assert len(all_logs) == 7
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
assert not found
# Make sure all datetimes are monotonically increasing (by datetime) after sorting the lookup
# to make sure no duplicates were returned
all_logs.sort(key=lambda d: d.datetime)
assert all(x.datetime < y.datetime for x, y in zip(all_logs, all_logs[1:]))
def test_count_repository_actions_with_wildcard_disabled(initialized_db):
with fake_elasticsearch(allow_wildcard=False):
logs_model = es_model()
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different repo.
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
ip='1.2.3.4')
# Count the actions.
day = date.today()
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 3
complex_repo = model.repository.get_repository('devtable', 'complex')
count = logs_model.count_repository_actions(complex_repo, day)
assert count == 2
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
assert count == 0
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason='Flaky on MySQL')
def test_yield_logs_for_export(logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user('devtable')
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for index in range(0, 10):
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
# Yield the logs.
simple_repo = model.repository.get_repository('devtable', 'simple')
logs_found = []
for logs in logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=10),
repository_id=simple_repo.id):
logs_found.extend(logs)
# Ensure we found all added logs.
assert len(logs_found) == len(kinds) * 10
def test_yield_logs_for_export_timeout(logs_model):
# Add some logs.
kinds = list(LogEntryKind.select())
user = model.user.get_user('devtable')
start_timestamp = datetime.utcnow()
timestamp = start_timestamp
for kind in kinds:
for _ in range(0, 2):
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
performer=user, ip='1.2.3.4', timestamp=timestamp)
timestamp = timestamp + timedelta(seconds=1)
# Yield the logs. Since we set the timeout to nothing, it should immediately fail.
simple_repo = model.repository.get_repository('devtable', 'simple')
with pytest.raises(LogsIterationTimeout):
list(logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=1),
repository_id=simple_repo.id,
max_query_time=timedelta(seconds=0)))
def test_disabled_namespace(clear_db_logs):
logs_model = TableLogsModel(lambda kind, namespace, is_free: namespace == 'devtable')
# Log some actions.
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
ip='1.2.3.4')
# Log some actions to a different namespace.
logs_model.log_action('push_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
ip='1.2.3.4')
# Count the actions.
day = datetime.today() - timedelta(minutes=60)
simple_repo = model.repository.get_repository('devtable', 'simple')
count = logs_model.count_repository_actions(simple_repo, day)
assert count == 0
org_repo = model.repository.get_repository('buynlarge', 'orgrepo')
count = logs_model.count_repository_actions(org_repo, day)
assert count == 3
@pytest.mark.parametrize('aggregated_log_counts1, aggregated_log_counts2, expected_result', [
pytest.param(
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)), # 2
],
[
AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3
],
[
AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)), # 2
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)) # 3
]
),
pytest.param(
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
],
[
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
],
[
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
]
),
pytest.param(
[],
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))],
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))]
),
])
def test_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2, expected_result):
assert (sorted(_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2)) ==
sorted(expected_result))
@pytest.mark.parametrize('dt1, dt2, expected_result', [
# Valid dates
pytest.param(date(2019, 6, 17), date(2019, 6, 18), True),
# Invalid dates
pytest.param(date(2019, 6, 17), date(2019, 6, 17), False),
pytest.param(date(2019, 6, 17), date(2019, 6, 19), False),
pytest.param(date(2019, 6, 18), date(2019, 6, 17), False),
# Valid datetimes
pytest.param(datetime(2019, 6, 17, 0, 1), datetime(2019, 6, 17, 0, 2), True),
# Invalid datetimes
pytest.param(datetime(2019, 6, 17, 0, 2), datetime(2019, 6, 17, 0, 1), False),
pytest.param(datetime(2019, 6, 17, 11), datetime(2019, 6, 17, 11) + timedelta(hours=14), False),
])
def test_date_range_in_single_index(dt1, dt2, expected_result):
assert _date_range_in_single_index(dt1, dt2) == expected_result
def test_pagination(logs_model, mock_page_size):
"""
Make sure that pagination does not stop if searching through multiple indices by day,
and the current log count matches the page size while there are still indices to be searched.
"""
day1 = datetime.now()
day2 = day1 + timedelta(days=1)
day3 = day2 + timedelta(days=1)
# Log some actions in day indices
# One day
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
ip='1.2.3.4', timestamp=day1)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple1',
ip='5.6.7.8', timestamp=day1)
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
assert len(found) == mock_page_size
# Another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='1.1.1.1', timestamp=day2)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='0.0.0.0', timestamp=day2)
# Yet another day
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='1.1.1.1', timestamp=day3)
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
ip='0.0.0.0', timestamp=day3)
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
assert len(found) == 6

View file

@ -0,0 +1,77 @@
import logging
import pytest
from dateutil.parser import parse
from mock import patch, Mock
import botocore
from data.logs_model import configure
from test_elasticsearch import app_config, logs_model_config, logs_model, mock_elasticsearch, mock_db_model
from mock_elasticsearch import *
logger = logging.getLogger(__name__)
FAKE_KAFKA_BROKERS = ['fake_server1', 'fake_server2']
FAKE_KAFKA_TOPIC = 'sometopic'
FAKE_MAX_BLOCK_SECONDS = 1
@pytest.fixture()
def kafka_logs_producer_config(app_config):
producer_config = {}
producer_config.update(app_config)
kafka_config = {
'bootstrap_servers': FAKE_KAFKA_BROKERS,
'topic': FAKE_KAFKA_TOPIC,
'max_block_seconds': FAKE_MAX_BLOCK_SECONDS
}
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kafka'
producer_config['LOGS_MODEL_CONFIG']['kafka_config'] = kafka_config
return producer_config
@pytest.fixture()
def kinesis_logs_producer_config(app_config):
producer_config = {}
producer_config.update(app_config)
kinesis_stream_config = {
'stream_name': 'test-stream',
'aws_region': 'fake_region',
'aws_access_key': 'some_key',
'aws_secret_key': 'some_secret'
}
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kinesis_stream'
producer_config['LOGS_MODEL_CONFIG']['kinesis_stream_config'] = kinesis_stream_config
return producer_config
def test_kafka_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kafka_logs_producer_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
producer_config = kafka_logs_producer_config
with patch('kafka.client_async.KafkaClient.check_version'), patch('kafka.KafkaProducer.send') as mock_send:
configure(producer_config)
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
None, 'repo1', parse("2019-01-01T03:30"))
mock_send.assert_called_once()
def test_kinesis_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kinesis_logs_producer_config):
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
producer_config = kinesis_logs_producer_config
with patch('botocore.endpoint.EndpointCreator.create_endpoint'), \
patch('botocore.client.BaseClient._make_api_call') as mock_send:
configure(producer_config)
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
None, 'repo1', parse("2019-01-01T03:30"))
# Check that a PutRecord api call is made.
# NOTE: The second arg of _make_api_call uses a randomized PartitionKey
mock_send.assert_called_once_with(u'PutRecord', mock_send.call_args_list[0][0][1])