initial import for Open Source 🎉
This commit is contained in:
parent
1898c361f3
commit
9c0dd3b722
2048 changed files with 218743 additions and 0 deletions
0
data/logs_model/test/__init__.py
Normal file
0
data/logs_model/test/__init__.py
Normal file
390
data/logs_model/test/fake_elasticsearch.py
Normal file
390
data/logs_model/test/fake_elasticsearch.py
Normal file
|
@ -0,0 +1,390 @@
|
|||
import json
|
||||
import uuid
|
||||
import fnmatch
|
||||
|
||||
from collections import defaultdict
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil.parser
|
||||
|
||||
from httmock import urlmatch, HTTMock
|
||||
|
||||
FAKE_ES_HOST = 'fakees'
|
||||
|
||||
EMPTY_RESULT = {
|
||||
'hits': {'hits': [], 'total': 0},
|
||||
'_shards': {'successful': 1, 'total': 1},
|
||||
}
|
||||
|
||||
def parse_query(query):
|
||||
if not query:
|
||||
return {}
|
||||
|
||||
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&")}
|
||||
|
||||
|
||||
@contextmanager
|
||||
def fake_elasticsearch(allow_wildcard=True):
|
||||
templates = {}
|
||||
docs = defaultdict(list)
|
||||
scrolls = {}
|
||||
id_counter = [1]
|
||||
|
||||
def transform(value, field_name):
|
||||
# TODO: implement this using a real index template if we ever need more than a few
|
||||
# fields here.
|
||||
if field_name == 'datetime':
|
||||
if isinstance(value, int):
|
||||
return datetime.utcfromtimestamp(value / 1000)
|
||||
|
||||
parsed = dateutil.parser.parse(value)
|
||||
return parsed
|
||||
|
||||
return value
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='GET')
|
||||
def get_template(url, request):
|
||||
template_name = url[len('/_template/'):]
|
||||
if template_name in templates:
|
||||
return {'status_code': 200}
|
||||
|
||||
return {'status_code': 404}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_template/(.+)', method='PUT')
|
||||
def put_template(url, request):
|
||||
template_name = url[len('/_template/'):]
|
||||
templates[template_name] = True
|
||||
return {'status_code': 201}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_doc', method='POST')
|
||||
def post_doc(url, request):
|
||||
index_name, _ = url.path[1:].split('/')
|
||||
item = json.loads(request.body)
|
||||
item['_id'] = item['random_id']
|
||||
id_counter[0] += 1
|
||||
docs[index_name].append(item)
|
||||
return {
|
||||
'status_code': 204,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': json.dumps({
|
||||
"result": "created",
|
||||
}),
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='DELETE')
|
||||
def index_delete(url, request):
|
||||
index_name_or_pattern = url.path[1:]
|
||||
to_delete = []
|
||||
for index_name in docs.keys():
|
||||
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
|
||||
continue
|
||||
|
||||
to_delete.append(index_name)
|
||||
|
||||
for index in to_delete:
|
||||
docs.pop(index)
|
||||
|
||||
return {
|
||||
'status_code': 200,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': {'acknowledged': True}
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)$', method='GET')
|
||||
def index_lookup(url, request):
|
||||
index_name_or_pattern = url.path[1:]
|
||||
found = {}
|
||||
for index_name in docs.keys():
|
||||
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
|
||||
continue
|
||||
|
||||
found[index_name] = {}
|
||||
|
||||
if not found:
|
||||
return {
|
||||
'status_code': 404,
|
||||
}
|
||||
|
||||
return {
|
||||
'status_code': 200,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': json.dumps(found),
|
||||
}
|
||||
|
||||
def _match_query(index_name_or_pattern, query):
|
||||
found = []
|
||||
found_index = False
|
||||
|
||||
for index_name in docs.keys():
|
||||
if not allow_wildcard and index_name_or_pattern.find('*') >= 0:
|
||||
break
|
||||
|
||||
if not fnmatch.fnmatch(index_name, index_name_or_pattern):
|
||||
continue
|
||||
|
||||
found_index = True
|
||||
|
||||
def _is_match(doc, current_query):
|
||||
if current_query is None:
|
||||
return True
|
||||
|
||||
for filter_type, filter_params in current_query.iteritems():
|
||||
for field_name, filter_props in filter_params.iteritems():
|
||||
if filter_type == 'range':
|
||||
lt = transform(filter_props['lt'], field_name)
|
||||
gte = transform(filter_props['gte'], field_name)
|
||||
doc_value = transform(doc[field_name], field_name)
|
||||
if not (doc_value < lt and doc_value >= gte):
|
||||
return False
|
||||
elif filter_type == 'term':
|
||||
doc_value = transform(doc[field_name], field_name)
|
||||
return doc_value == filter_props
|
||||
elif filter_type == 'terms':
|
||||
doc_value = transform(doc[field_name], field_name)
|
||||
return doc_value in filter_props
|
||||
elif filter_type == 'bool':
|
||||
assert not 'should' in filter_params, 'should is unsupported'
|
||||
|
||||
must = filter_params.get('must')
|
||||
must_not = filter_params.get('must_not')
|
||||
filter_bool = filter_params.get('filter')
|
||||
|
||||
if must:
|
||||
for check in must:
|
||||
if not _is_match(doc, check):
|
||||
return False
|
||||
|
||||
if must_not:
|
||||
for check in must_not:
|
||||
if _is_match(doc, check):
|
||||
return False
|
||||
|
||||
if filter_bool:
|
||||
for check in filter_bool:
|
||||
if not _is_match(doc, check):
|
||||
return False
|
||||
else:
|
||||
raise Exception('Unimplemented query %s: %s' % (filter_type, query))
|
||||
|
||||
return True
|
||||
|
||||
for doc in docs[index_name]:
|
||||
if not _is_match(doc, query):
|
||||
continue
|
||||
|
||||
found.append({'_source': doc, '_index': index_name})
|
||||
|
||||
return found, found_index or (index_name_or_pattern.find('*') >= 0)
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_count$', method='GET')
|
||||
def count_docs(url, request):
|
||||
request = json.loads(request.body)
|
||||
index_name_or_pattern, _ = url.path[1:].split('/')
|
||||
|
||||
found, found_index = _match_query(index_name_or_pattern, request['query'])
|
||||
if not found_index:
|
||||
return {
|
||||
'status_code': 404,
|
||||
}
|
||||
|
||||
return {
|
||||
'status_code': 200,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': json.dumps({'count': len(found)}),
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='GET')
|
||||
def lookup_scroll(url, request):
|
||||
request_obj = json.loads(request.body)
|
||||
scroll_id = request_obj['scroll_id']
|
||||
if scroll_id in scrolls:
|
||||
return {
|
||||
'status_code': 200,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': json.dumps(scrolls[scroll_id]),
|
||||
}
|
||||
|
||||
return {
|
||||
'status_code': 404,
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/_search/scroll$', method='DELETE')
|
||||
def delete_scroll(url, request):
|
||||
request = json.loads(request.body)
|
||||
for scroll_id in request['scroll_id']:
|
||||
scrolls.pop(scroll_id, None)
|
||||
|
||||
return {
|
||||
'status_code': 404,
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST, path=r'/([^/]+)/_search$', method='GET')
|
||||
def lookup_docs(url, request):
|
||||
query_params = parse_query(url.query)
|
||||
|
||||
request = json.loads(request.body)
|
||||
index_name_or_pattern, _ = url.path[1:].split('/')
|
||||
|
||||
# Find matching docs.
|
||||
query = request.get('query')
|
||||
found, found_index = _match_query(index_name_or_pattern, query)
|
||||
if not found_index:
|
||||
return {
|
||||
'status_code': 404,
|
||||
}
|
||||
|
||||
# Sort.
|
||||
sort = request.get('sort')
|
||||
if sort:
|
||||
if sort == ['_doc'] or sort == '_doc':
|
||||
found.sort(key=lambda x: x['_source']['_id'])
|
||||
else:
|
||||
def get_sort_key(item):
|
||||
source = item['_source']
|
||||
key = ''
|
||||
for sort_config in sort:
|
||||
for sort_key, direction in sort_config.iteritems():
|
||||
assert direction == 'desc'
|
||||
sort_key = sort_key.replace('.keyword', '')
|
||||
key += str(transform(source[sort_key], sort_key))
|
||||
key += '|'
|
||||
return key
|
||||
|
||||
found.sort(key=get_sort_key, reverse=True)
|
||||
|
||||
# Search after.
|
||||
search_after = request.get('search_after')
|
||||
if search_after:
|
||||
sort_fields = []
|
||||
for sort_config in sort:
|
||||
if isinstance(sort_config, unicode):
|
||||
sort_fields.append(sort_config)
|
||||
continue
|
||||
|
||||
for sort_key, _ in sort_config.iteritems():
|
||||
sort_key = sort_key.replace('.keyword', '')
|
||||
sort_fields.append(sort_key)
|
||||
|
||||
for index, search_after_value in enumerate(search_after):
|
||||
field_name = sort_fields[index]
|
||||
value = transform(search_after_value, field_name)
|
||||
if field_name == '_doc':
|
||||
found = [f for f in found if transform(f['_source']['_id'], field_name) > value]
|
||||
else:
|
||||
found = [f for f in found if transform(f['_source'][field_name], field_name) < value]
|
||||
if len(found) < 2:
|
||||
break
|
||||
|
||||
if field_name == '_doc':
|
||||
if found[0]['_source']['_id'] != found[1]['_source']:
|
||||
break
|
||||
else:
|
||||
if found[0]['_source'][field_name] != found[1]['_source']:
|
||||
break
|
||||
|
||||
# Size.
|
||||
size = request.get('size')
|
||||
if size:
|
||||
found = found[0:size]
|
||||
|
||||
# Aggregation.
|
||||
# {u'query':
|
||||
# {u'range':
|
||||
# {u'datetime': {u'lt': u'2019-06-27T15:45:09.768085',
|
||||
# u'gte': u'2019-06-27T15:35:09.768085'}}},
|
||||
# u'aggs': {
|
||||
# u'by_id': {
|
||||
# u'terms': {u'field': u'kind_id'},
|
||||
# u'aggs': {
|
||||
# u'by_date': {u'date_histogram': {u'field': u'datetime', u'interval': u'day'}}}}},
|
||||
# u'size': 0}
|
||||
def _by_field(agg_field_params, results):
|
||||
aggregated_by_field = defaultdict(list)
|
||||
|
||||
for agg_means, agg_means_params in agg_field_params.iteritems():
|
||||
if agg_means == 'terms':
|
||||
field_name = agg_means_params['field']
|
||||
for result in results:
|
||||
value = result['_source'][field_name]
|
||||
aggregated_by_field[value].append(result)
|
||||
elif agg_means == 'date_histogram':
|
||||
field_name = agg_means_params['field']
|
||||
interval = agg_means_params['interval']
|
||||
for result in results:
|
||||
value = transform(result['_source'][field_name], field_name)
|
||||
aggregated_by_field[getattr(value, interval)].append(result)
|
||||
elif agg_means == 'aggs':
|
||||
# Skip. Handled below.
|
||||
continue
|
||||
else:
|
||||
raise Exception('Unsupported aggregation method: %s' % agg_means)
|
||||
|
||||
# Invoke the aggregation recursively.
|
||||
buckets = []
|
||||
for field_value, field_results in aggregated_by_field.iteritems():
|
||||
aggregated = _aggregate(agg_field_params, field_results)
|
||||
if isinstance(aggregated, list):
|
||||
aggregated = {'doc_count': len(aggregated)}
|
||||
|
||||
aggregated['key'] = field_value
|
||||
buckets.append(aggregated)
|
||||
|
||||
return {'buckets': buckets}
|
||||
|
||||
def _aggregate(query_config, results):
|
||||
agg_params = query_config.get(u'aggs')
|
||||
if not agg_params:
|
||||
return results
|
||||
|
||||
by_field_name = {}
|
||||
for agg_field_name, agg_field_params in agg_params.iteritems():
|
||||
by_field_name[agg_field_name] = _by_field(agg_field_params, results)
|
||||
|
||||
return by_field_name
|
||||
|
||||
final_result = {
|
||||
'hits': {
|
||||
'hits': found,
|
||||
'total': len(found),
|
||||
},
|
||||
'_shards': {
|
||||
'successful': 1,
|
||||
'total': 1,
|
||||
},
|
||||
'aggregations': _aggregate(request, found),
|
||||
}
|
||||
|
||||
if query_params.get('scroll'):
|
||||
scroll_id = str(uuid.uuid4())
|
||||
scrolls[scroll_id] = EMPTY_RESULT
|
||||
final_result['_scroll_id'] = scroll_id
|
||||
|
||||
return {
|
||||
'status_code': 200,
|
||||
'headers': {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
'content': json.dumps(final_result),
|
||||
}
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST)
|
||||
def catchall_handler(url, request):
|
||||
print "Unsupported URL: %s %s" % (request.method, url, )
|
||||
return {'status_code': 501}
|
||||
|
||||
handlers = [get_template, put_template, index_delete, index_lookup, post_doc, count_docs,
|
||||
lookup_docs, lookup_scroll, delete_scroll, catchall_handler]
|
||||
|
||||
with HTTMock(*handlers):
|
||||
yield
|
400
data/logs_model/test/mock_elasticsearch.py
Normal file
400
data/logs_model/test/mock_elasticsearch.py
Normal file
|
@ -0,0 +1,400 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil.parser import parse
|
||||
|
||||
from data.logs_model.datatypes import LogEntriesPage, Log, AggregatedLogCount
|
||||
|
||||
|
||||
def _status(d, code=200):
|
||||
return {"status_code": code, "content": json.dumps(d)}
|
||||
|
||||
|
||||
def _shards(d, total=5, failed=0, successful=5):
|
||||
d.update({"_shards": {"total": total, "failed": failed, "successful": successful}})
|
||||
return d
|
||||
|
||||
|
||||
def _hits(hits):
|
||||
return {"hits": {"total": len(hits), "max_score": None, "hits": hits}}
|
||||
|
||||
|
||||
INDEX_LIST_RESPONSE_HIT1_HIT2 = _status({
|
||||
"logentry_2018-03-08": {},
|
||||
"logentry_2018-04-02": {}
|
||||
})
|
||||
|
||||
|
||||
INDEX_LIST_RESPONSE_HIT2 = _status({
|
||||
"logentry_2018-04-02": {}
|
||||
})
|
||||
|
||||
|
||||
INDEX_LIST_RESPONSE = _status({
|
||||
"logentry_2019-01-01": {},
|
||||
"logentry_2017-03-08": {},
|
||||
"logentry_2018-03-08": {},
|
||||
"logentry_2018-04-02": {}
|
||||
})
|
||||
|
||||
|
||||
DEFAULT_TEMPLATE_RESPONSE = _status({"acknowledged": True})
|
||||
INDEX_RESPONSE_2019_01_01 = _status(
|
||||
_shards({
|
||||
"_index": "logentry_2019-01-01",
|
||||
"_type": "_doc",
|
||||
"_id": "1",
|
||||
"_version": 1,
|
||||
"_seq_no": 0,
|
||||
"_primary_term": 1,
|
||||
"result": "created"
|
||||
}))
|
||||
|
||||
INDEX_RESPONSE_2017_03_08 = _status(
|
||||
_shards({
|
||||
"_index": "logentry_2017-03-08",
|
||||
"_type": "_doc",
|
||||
"_id": "1",
|
||||
"_version": 1,
|
||||
"_seq_no": 0,
|
||||
"_primary_term": 1,
|
||||
"result": "created"
|
||||
}))
|
||||
|
||||
FAILURE_400 = _status({}, 400)
|
||||
|
||||
INDEX_REQUEST_2019_01_01 = [
|
||||
"logentry_2019-01-01", {
|
||||
"account_id":
|
||||
1,
|
||||
"repository_id":
|
||||
1,
|
||||
"ip":
|
||||
"192.168.1.1",
|
||||
"random_id":
|
||||
233,
|
||||
"datetime":
|
||||
"2019-01-01T03:30:00",
|
||||
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
|
||||
"performer_id":
|
||||
1,
|
||||
"kind_id":
|
||||
1
|
||||
}
|
||||
]
|
||||
|
||||
INDEX_REQUEST_2017_03_08 = [
|
||||
"logentry_2017-03-08", {
|
||||
"repository_id":
|
||||
1,
|
||||
"account_id":
|
||||
1,
|
||||
"ip":
|
||||
"192.168.1.1",
|
||||
"random_id":
|
||||
233,
|
||||
"datetime":
|
||||
"2017-03-08T03:30:00",
|
||||
"metadata_json": json.loads("{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}"),
|
||||
"performer_id":
|
||||
1,
|
||||
"kind_id":
|
||||
2
|
||||
}
|
||||
]
|
||||
|
||||
_hit1 = {
|
||||
"_index": "logentry_2018-03-08",
|
||||
"_type": "doc",
|
||||
"_id": "1",
|
||||
"_score": None,
|
||||
"_source": {
|
||||
"random_id":
|
||||
233,
|
||||
"kind_id":
|
||||
1,
|
||||
"account_id":
|
||||
1,
|
||||
"performer_id":
|
||||
1,
|
||||
"repository_id":
|
||||
1,
|
||||
"ip":
|
||||
"192.168.1.1",
|
||||
"metadata_json":
|
||||
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
|
||||
"datetime":
|
||||
"2018-03-08T03:30",
|
||||
},
|
||||
"sort": [1520479800000, 233]
|
||||
}
|
||||
|
||||
_hit2 = {
|
||||
"_index": "logentry_2018-04-02",
|
||||
"_type": "doc",
|
||||
"_id": "2",
|
||||
"_score": None,
|
||||
"_source": {
|
||||
"random_id":
|
||||
233,
|
||||
"kind_id":
|
||||
2,
|
||||
"account_id":
|
||||
1,
|
||||
"performer_id":
|
||||
1,
|
||||
"repository_id":
|
||||
1,
|
||||
"ip":
|
||||
"192.168.1.2",
|
||||
"metadata_json":
|
||||
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
|
||||
"datetime":
|
||||
"2018-04-02T03:30",
|
||||
},
|
||||
"sort": [1522639800000, 233]
|
||||
}
|
||||
|
||||
_log1 = Log(
|
||||
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1520479800}",
|
||||
"192.168.1.1", parse("2018-03-08T03:30"), "user1.email", "user1.username", "user1.robot",
|
||||
"user1.organization", "user1.username", "user1.email", "user1.robot", 1)
|
||||
_log2 = Log(
|
||||
"{\"\\ud83d\\ude02\": \"\\ud83d\\ude02\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\\ud83d\\udc4c\", \"key\": \"value\", \"time\": 1522639800}",
|
||||
"192.168.1.2", parse("2018-04-02T03:30"), "user1.email", "user1.username", "user1.robot",
|
||||
"user1.organization", "user1.username", "user1.email", "user1.robot", 2)
|
||||
|
||||
SEARCH_RESPONSE_START = _status(_shards(_hits([_hit1, _hit2])))
|
||||
SEARCH_RESPONSE_END = _status(_shards(_hits([_hit2])))
|
||||
SEARCH_REQUEST_START = {
|
||||
"sort": [{
|
||||
"datetime": "desc"
|
||||
}, {
|
||||
"random_id.keyword": "desc"
|
||||
}],
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"term": {
|
||||
"performer_id": 1
|
||||
}
|
||||
}, {
|
||||
"term": {
|
||||
"repository_id": 1
|
||||
}
|
||||
}]
|
||||
}
|
||||
},
|
||||
"size": 2
|
||||
}
|
||||
SEARCH_REQUEST_END = {
|
||||
"sort": [{
|
||||
"datetime": "desc"
|
||||
}, {
|
||||
"random_id.keyword": "desc"
|
||||
}],
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"term": {
|
||||
"performer_id": 1
|
||||
}
|
||||
}, {
|
||||
"term": {
|
||||
"repository_id": 1
|
||||
}
|
||||
}]
|
||||
}
|
||||
},
|
||||
"search_after": [1520479800000, 233],
|
||||
"size": 2
|
||||
}
|
||||
SEARCH_REQUEST_FILTER = {
|
||||
"sort": [{
|
||||
"datetime": "desc"
|
||||
}, {
|
||||
"random_id.keyword": "desc"
|
||||
}],
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"term": {
|
||||
"performer_id": 1
|
||||
}
|
||||
}, {
|
||||
"term": {
|
||||
"repository_id": 1
|
||||
}
|
||||
}, {
|
||||
"bool": {
|
||||
"must_not": [{
|
||||
"terms": {
|
||||
"kind_id": [1]
|
||||
}
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
||||
},
|
||||
"size": 2
|
||||
}
|
||||
SEARCH_PAGE_TOKEN = {
|
||||
"datetime": datetime(2018, 3, 8, 3, 30).isoformat(),
|
||||
"random_id": 233,
|
||||
"page_number": 1
|
||||
}
|
||||
SEARCH_PAGE_START = LogEntriesPage(logs=[_log1], next_page_token=SEARCH_PAGE_TOKEN)
|
||||
SEARCH_PAGE_END = LogEntriesPage(logs=[_log2], next_page_token=None)
|
||||
SEARCH_PAGE_EMPTY = LogEntriesPage([], None)
|
||||
|
||||
AGGS_RESPONSE = _status(
|
||||
_shards({
|
||||
"hits": {
|
||||
"total": 4,
|
||||
"max_score": None,
|
||||
"hits": []
|
||||
},
|
||||
"aggregations": {
|
||||
"by_id": {
|
||||
"doc_count_error_upper_bound":
|
||||
0,
|
||||
"sum_other_doc_count":
|
||||
0,
|
||||
"buckets": [{
|
||||
"key": 2,
|
||||
"doc_count": 3,
|
||||
"by_date": {
|
||||
"buckets": [{
|
||||
"key_as_string": "2009-11-12T00:00:00.000Z",
|
||||
"key": 1257984000000,
|
||||
"doc_count": 1
|
||||
}, {
|
||||
"key_as_string": "2009-11-13T00:00:00.000Z",
|
||||
"key": 1258070400000,
|
||||
"doc_count": 0
|
||||
}, {
|
||||
"key_as_string": "2009-11-14T00:00:00.000Z",
|
||||
"key": 1258156800000,
|
||||
"doc_count": 2
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"key": 1,
|
||||
"doc_count": 1,
|
||||
"by_date": {
|
||||
"buckets": [{
|
||||
"key_as_string": "2009-11-15T00:00:00.000Z",
|
||||
"key": 1258243200000,
|
||||
"doc_count": 1
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
|
||||
AGGS_REQUEST = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"term": {
|
||||
"performer_id": 1
|
||||
}
|
||||
}, {
|
||||
"term": {
|
||||
"repository_id": 1
|
||||
}
|
||||
}, {
|
||||
"bool": {
|
||||
"must_not": [{
|
||||
"terms": {
|
||||
"kind_id": [2]
|
||||
}
|
||||
}]
|
||||
}
|
||||
}],
|
||||
"must": [{
|
||||
"range": {
|
||||
"datetime": {
|
||||
"lt": "2018-04-08T03:30:00",
|
||||
"gte": "2018-03-08T03:30:00"
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
},
|
||||
"aggs": {
|
||||
"by_id": {
|
||||
"terms": {
|
||||
"field": "kind_id"
|
||||
},
|
||||
"aggs": {
|
||||
"by_date": {
|
||||
"date_histogram": {
|
||||
"field": "datetime",
|
||||
"interval": "day"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"size": 0
|
||||
}
|
||||
|
||||
AGGS_COUNT = [
|
||||
AggregatedLogCount(1, 1, parse("2009-11-15T00:00:00.000")),
|
||||
AggregatedLogCount(2, 1, parse("2009-11-12T00:00:00.000")),
|
||||
AggregatedLogCount(2, 2, parse("2009-11-14T00:00:00.000"))
|
||||
]
|
||||
|
||||
COUNT_REQUEST = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [{
|
||||
"term": {
|
||||
"repository_id": 1
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
COUNT_RESPONSE = _status(_shards({
|
||||
"count": 1,
|
||||
}))
|
||||
|
||||
# assume there are 2 pages
|
||||
_scroll_id = "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAACEmFkk1aGlTRzdSUWllejZmYTlEYTN3SVEAAAAAAAAhJRZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lRAAAAAAAAHtAWLWZpaFZXVzVSTy1OTXA5V3MwcHZrZwAAAAAAAB7RFi1maWhWV1c1Uk8tTk1wOVdzMHB2a2cAAAAAAAAhJxZJNWhpU0c3UlFpZXo2ZmE5RGEzd0lR"
|
||||
|
||||
|
||||
def _scroll(d):
|
||||
d["_scroll_id"] = _scroll_id
|
||||
return d
|
||||
|
||||
|
||||
SCROLL_CREATE = _status(_shards(_scroll(_hits([_hit1]))))
|
||||
SCROLL_GET = _status(_shards(_scroll(_hits([_hit2]))))
|
||||
SCROLL_GET_2 = _status(_shards(_scroll(_hits([]))))
|
||||
SCROLL_DELETE = _status({"succeeded": True, "num_freed": 5})
|
||||
SCROLL_LOGS = [[_log1], [_log2]]
|
||||
|
||||
SCROLL_REQUESTS = [
|
||||
[
|
||||
"5m", 1, {
|
||||
"sort": "_doc",
|
||||
"query": {
|
||||
"range": {
|
||||
"datetime": {
|
||||
"lt": "2018-04-02T00:00:00",
|
||||
"gte": "2018-03-08T00:00:00"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
[{"scroll": "5m", "scroll_id": _scroll_id}],
|
||||
[{"scroll":"5m", "scroll_id": _scroll_id}],
|
||||
[{"scroll_id": [_scroll_id]}],
|
||||
]
|
||||
|
||||
SCROLL_RESPONSES = [SCROLL_CREATE, SCROLL_GET, SCROLL_GET_2, SCROLL_DELETE]
|
130
data/logs_model/test/test_combined_model.py
Normal file
130
data/logs_model/test/test_combined_model.py
Normal file
|
@ -0,0 +1,130 @@
|
|||
from datetime import date, datetime, timedelta
|
||||
|
||||
from freezegun import freeze_time
|
||||
|
||||
from data.logs_model.inmemory_model import InMemoryModel
|
||||
from data.logs_model.combined_model import CombinedLogsModel
|
||||
|
||||
from test.fixtures import *
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def first_model():
|
||||
return InMemoryModel()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def second_model():
|
||||
return InMemoryModel()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def combined_model(first_model, second_model, initialized_db):
|
||||
return CombinedLogsModel(first_model, second_model)
|
||||
|
||||
|
||||
def test_log_action(first_model, second_model, combined_model, initialized_db):
|
||||
day = date(2019, 1, 1)
|
||||
|
||||
# Write to the combined model.
|
||||
with freeze_time(day):
|
||||
combined_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
|
||||
# Make sure it is found in the first model but not the second.
|
||||
assert combined_model.count_repository_actions(simple_repo, day) == 1
|
||||
assert first_model.count_repository_actions(simple_repo, day) == 1
|
||||
assert second_model.count_repository_actions(simple_repo, day) == 0
|
||||
|
||||
|
||||
def test_count_repository_actions(first_model, second_model, combined_model, initialized_db):
|
||||
# Write to each model.
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Ensure the counts match as expected.
|
||||
day = datetime.today() - timedelta(minutes=60)
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
|
||||
assert first_model.count_repository_actions(simple_repo, day) == 3
|
||||
assert second_model.count_repository_actions(simple_repo, day) == 2
|
||||
assert combined_model.count_repository_actions(simple_repo, day) == 5
|
||||
|
||||
|
||||
def test_yield_logs_for_export(first_model, second_model, combined_model, initialized_db):
|
||||
now = datetime.now()
|
||||
|
||||
# Write to each model.
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
later = datetime.now()
|
||||
|
||||
# Ensure the full set of logs is yielded.
|
||||
first_logs = list(first_model.yield_logs_for_export(now, later))[0]
|
||||
second_logs = list(second_model.yield_logs_for_export(now, later))[0]
|
||||
|
||||
combined = list(combined_model.yield_logs_for_export(now, later))
|
||||
full_combined = []
|
||||
for subset in combined:
|
||||
full_combined.extend(subset)
|
||||
|
||||
assert len(full_combined) == len(first_logs) + len(second_logs)
|
||||
assert full_combined == (first_logs + second_logs)
|
||||
|
||||
|
||||
def test_lookup_logs(first_model, second_model, combined_model, initialized_db):
|
||||
now = datetime.now()
|
||||
|
||||
# Write to each model.
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
first_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
second_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
later = datetime.now()
|
||||
|
||||
def _collect_logs(model):
|
||||
page_token = None
|
||||
all_logs = []
|
||||
while True:
|
||||
paginated_logs = model.lookup_logs(now, later, page_token=page_token)
|
||||
page_token = paginated_logs.next_page_token
|
||||
all_logs.extend(paginated_logs.logs)
|
||||
if page_token is None:
|
||||
break
|
||||
return all_logs
|
||||
|
||||
first_logs = _collect_logs(first_model)
|
||||
second_logs = _collect_logs(second_model)
|
||||
combined = _collect_logs(combined_model)
|
||||
|
||||
assert len(combined) == len(first_logs) + len(second_logs)
|
||||
assert combined == (first_logs + second_logs)
|
529
data/logs_model/test/test_elasticsearch.py
Normal file
529
data/logs_model/test/test_elasticsearch.py
Normal file
|
@ -0,0 +1,529 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# pylint: disable=redefined-outer-name, wildcard-import
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pytest
|
||||
from mock import patch, Mock
|
||||
from dateutil.parser import parse
|
||||
|
||||
from httmock import urlmatch, HTTMock
|
||||
|
||||
from data.model.log import _json_serialize
|
||||
from data.logs_model.elastic_logs import ElasticsearchLogs, INDEX_NAME_PREFIX, INDEX_DATE_FORMAT
|
||||
from data.logs_model import configure, LogsModelProxy
|
||||
from mock_elasticsearch import *
|
||||
|
||||
FAKE_ES_HOST = 'fakees'
|
||||
FAKE_ES_HOST_PATTERN = r'fakees.*'
|
||||
FAKE_ES_PORT = 443
|
||||
FAKE_AWS_ACCESS_KEY = None
|
||||
FAKE_AWS_SECRET_KEY = None
|
||||
FAKE_AWS_REGION = None
|
||||
|
||||
@pytest.fixture()
|
||||
def logs_model_config():
|
||||
conf = {
|
||||
'LOGS_MODEL': 'elasticsearch',
|
||||
'LOGS_MODEL_CONFIG': {
|
||||
'producer': 'elasticsearch',
|
||||
'elasticsearch_config': {
|
||||
'host': FAKE_ES_HOST,
|
||||
'port': FAKE_ES_PORT,
|
||||
'access_key': FAKE_AWS_ACCESS_KEY,
|
||||
'secret_key': FAKE_AWS_SECRET_KEY,
|
||||
'aws_region': FAKE_AWS_REGION
|
||||
}
|
||||
}
|
||||
}
|
||||
return conf
|
||||
|
||||
|
||||
FAKE_LOG_ENTRY_KINDS = {'push_repo': 1, 'pull_repo': 2}
|
||||
FAKE_NAMESPACES = {
|
||||
'user1':
|
||||
Mock(id=1, organization="user1.organization", username="user1.username", email="user1.email",
|
||||
robot="user1.robot"),
|
||||
'user2':
|
||||
Mock(id=2, organization="user2.organization", username="user2.username", email="user2.email",
|
||||
robot="user2.robot")
|
||||
}
|
||||
FAKE_REPOSITORIES = {
|
||||
'user1/repo1': Mock(id=1, namespace_user=FAKE_NAMESPACES['user1']),
|
||||
'user2/repo2': Mock(id=2, namespace_user=FAKE_NAMESPACES['user2']),
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def logs_model():
|
||||
# prevent logs model from changing
|
||||
logs_model = LogsModelProxy()
|
||||
with patch('data.logs_model.logs_model', logs_model):
|
||||
yield logs_model
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def app_config(logs_model_config):
|
||||
fake_config = {}
|
||||
fake_config.update(logs_model_config)
|
||||
with patch("data.logs_model.document_logs_model.config.app_config", fake_config):
|
||||
yield fake_config
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_page_size():
|
||||
with patch('data.logs_model.document_logs_model.PAGE_SIZE', 1):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_max_result_window():
|
||||
with patch('data.logs_model.document_logs_model.DEFAULT_RESULT_WINDOW', 1):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_random_id():
|
||||
mock_random = Mock(return_value=233)
|
||||
with patch('data.logs_model.document_logs_model._random_id', mock_random):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_db_model():
|
||||
def get_user_map_by_ids(namespace_ids):
|
||||
mapping = {}
|
||||
for i in namespace_ids:
|
||||
for name in FAKE_NAMESPACES:
|
||||
if FAKE_NAMESPACES[name].id == i:
|
||||
mapping[i] = FAKE_NAMESPACES[name]
|
||||
return mapping
|
||||
|
||||
model = Mock(
|
||||
user=Mock(
|
||||
get_namespace_user=FAKE_NAMESPACES.get,
|
||||
get_user_or_org=FAKE_NAMESPACES.get,
|
||||
get_user=FAKE_NAMESPACES.get,
|
||||
get_user_map_by_ids=get_user_map_by_ids,
|
||||
),
|
||||
repository=Mock(get_repository=lambda user_name, repo_name: FAKE_REPOSITORIES.get(
|
||||
user_name + '/' + repo_name),
|
||||
),
|
||||
log=Mock(
|
||||
_get_log_entry_kind=lambda name: FAKE_LOG_ENTRY_KINDS[name],
|
||||
_json_serialize=_json_serialize,
|
||||
get_log_entry_kinds=Mock(return_value=FAKE_LOG_ENTRY_KINDS),
|
||||
),
|
||||
)
|
||||
|
||||
with patch('data.logs_model.document_logs_model.model', model), patch(
|
||||
'data.logs_model.datatypes.model', model):
|
||||
yield
|
||||
|
||||
|
||||
def parse_query(query):
|
||||
return {s.split('=')[0]: s.split('=')[1] for s in query.split("&") if s != ""}
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_elasticsearch():
|
||||
mock = Mock()
|
||||
mock.template.side_effect = NotImplementedError
|
||||
mock.index.side_effect = NotImplementedError
|
||||
mock.count.side_effect = NotImplementedError
|
||||
mock.scroll_get.side_effect = NotImplementedError
|
||||
mock.scroll_delete.side_effect = NotImplementedError
|
||||
mock.search_scroll_create.side_effect = NotImplementedError
|
||||
mock.search_aggs.side_effect = NotImplementedError
|
||||
mock.search_after.side_effect = NotImplementedError
|
||||
mock.list_indices.side_effect = NotImplementedError
|
||||
|
||||
@urlmatch(netloc=r'.*', path=r'.*')
|
||||
def default(url, req):
|
||||
raise Exception('\nurl={}\nmethod={}\nreq.url={}\nheaders={}\nbody={}'.format(
|
||||
url, req.method, req.url, req.headers, req.body))
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_template/.*')
|
||||
def template(url, req):
|
||||
return mock.template(url.query.split('/')[-1], req.body)
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]+)')
|
||||
def list_indices(url, req):
|
||||
return mock.list_indices()
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_[0-9\-]*/_doc')
|
||||
def index(url, req):
|
||||
index = url.path.split('/')[1]
|
||||
body = json.loads(req.body)
|
||||
body['metadata_json'] = json.loads(body['metadata_json'])
|
||||
return mock.index(index, body)
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_([0-9\-]*|\*)/_count')
|
||||
def count(_, req):
|
||||
return mock.count(json.loads(req.body))
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/_search/scroll')
|
||||
def scroll(url, req):
|
||||
if req.method == 'DELETE':
|
||||
return mock.scroll_delete(json.loads(req.body))
|
||||
elif req.method == 'GET':
|
||||
request_obj = json.loads(req.body)
|
||||
return mock.scroll_get(request_obj)
|
||||
raise NotImplementedError()
|
||||
|
||||
@urlmatch(netloc=FAKE_ES_HOST_PATTERN, path=r'/logentry_(\*|[0-9\-]*)/_search')
|
||||
def search(url, req):
|
||||
if "scroll" in url.query:
|
||||
query = parse_query(url.query)
|
||||
window_size = query['scroll']
|
||||
maximum_result_size = int(query['size'])
|
||||
return mock.search_scroll_create(window_size, maximum_result_size, json.loads(req.body))
|
||||
elif "aggs" in req.body:
|
||||
return mock.search_aggs(json.loads(req.body))
|
||||
else:
|
||||
return mock.search_after(json.loads(req.body))
|
||||
|
||||
with HTTMock(scroll, count, search, index, template, list_indices, default):
|
||||
yield mock
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"""
|
||||
unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
|
||||
timestamp,
|
||||
index_response, expected_request, throws
|
||||
""",
|
||||
[
|
||||
# Invalid inputs
|
||||
pytest.param(
|
||||
False, 'non-existing', None, None, None,
|
||||
None,
|
||||
None, None, True,
|
||||
id="Invalid Kind"
|
||||
),
|
||||
pytest.param(
|
||||
False, 'pull_repo', 'user1', Mock(id=1), 'repo1',
|
||||
None,
|
||||
None, None, True,
|
||||
id="Invalid Parameters"
|
||||
),
|
||||
|
||||
# Remote exceptions
|
||||
pytest.param(
|
||||
False, 'pull_repo', 'user1', Mock(id=1), None,
|
||||
None,
|
||||
FAILURE_400, None, True,
|
||||
id="Throw on pull log failure"
|
||||
),
|
||||
pytest.param(
|
||||
True, 'pull_repo', 'user1', Mock(id=1), None,
|
||||
parse("2017-03-08T03:30"),
|
||||
FAILURE_400, INDEX_REQUEST_2017_03_08, False,
|
||||
id="Ok on pull log failure"
|
||||
),
|
||||
|
||||
# Success executions
|
||||
pytest.param(
|
||||
False, 'pull_repo', 'user1', Mock(id=1), None,
|
||||
parse("2017-03-08T03:30"),
|
||||
INDEX_RESPONSE_2017_03_08, INDEX_REQUEST_2017_03_08, False,
|
||||
id="Log with namespace name and repository"
|
||||
),
|
||||
pytest.param(
|
||||
False, 'push_repo', 'user1', None, 'repo1',
|
||||
parse("2019-01-01T03:30"),
|
||||
INDEX_RESPONSE_2019_01_01, INDEX_REQUEST_2019_01_01, False,
|
||||
id="Log with namespace name and repository name"
|
||||
),
|
||||
])
|
||||
def test_log_action(unlogged_pulls_ok, kind_name, namespace_name, repository, repository_name,
|
||||
timestamp,
|
||||
index_response, expected_request, throws,
|
||||
app_config, logs_model, mock_elasticsearch, mock_db_model, mock_random_id):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
mock_elasticsearch.index = Mock(return_value=index_response)
|
||||
app_config['ALLOW_PULLS_WITHOUT_STRICT_LOGGING'] = unlogged_pulls_ok
|
||||
configure(app_config)
|
||||
|
||||
performer = Mock(id=1)
|
||||
ip = "192.168.1.1"
|
||||
metadata = {'key': 'value', 'time': parse("2018-03-08T03:30"), '😂': '😂👌👌👌👌'}
|
||||
if throws:
|
||||
with pytest.raises(Exception):
|
||||
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
|
||||
repository_name, timestamp)
|
||||
else:
|
||||
logs_model.log_action(kind_name, namespace_name, performer, ip, metadata, repository,
|
||||
repository_name, timestamp)
|
||||
mock_elasticsearch.index.assert_called_with(*expected_request)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"""
|
||||
start_datetime, end_datetime,
|
||||
performer_name, repository_name, namespace_name,
|
||||
filter_kinds,
|
||||
page_token,
|
||||
max_page_count,
|
||||
search_response,
|
||||
list_indices_response,
|
||||
expected_request,
|
||||
expected_page,
|
||||
throws
|
||||
""",
|
||||
[
|
||||
# 1st page
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
SEARCH_RESPONSE_START,
|
||||
INDEX_LIST_RESPONSE_HIT1_HIT2,
|
||||
SEARCH_REQUEST_START,
|
||||
SEARCH_PAGE_START,
|
||||
False,
|
||||
id="1st page"
|
||||
),
|
||||
|
||||
# Last page
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
None,
|
||||
SEARCH_PAGE_TOKEN,
|
||||
None,
|
||||
SEARCH_RESPONSE_END,
|
||||
INDEX_LIST_RESPONSE_HIT1_HIT2,
|
||||
SEARCH_REQUEST_END,
|
||||
SEARCH_PAGE_END,
|
||||
False,
|
||||
id="Search using pagination token"
|
||||
),
|
||||
|
||||
# Filter
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
['push_repo'],
|
||||
None,
|
||||
None,
|
||||
SEARCH_RESPONSE_END,
|
||||
INDEX_LIST_RESPONSE_HIT2,
|
||||
SEARCH_REQUEST_FILTER,
|
||||
SEARCH_PAGE_END,
|
||||
False,
|
||||
id="Filtered search"
|
||||
),
|
||||
|
||||
# Max page count
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
None,
|
||||
SEARCH_PAGE_TOKEN,
|
||||
1,
|
||||
AssertionError, # Assert that it should not reach the ES server
|
||||
None,
|
||||
None,
|
||||
SEARCH_PAGE_EMPTY,
|
||||
False,
|
||||
id="Page token reaches maximum page count",
|
||||
),
|
||||
])
|
||||
def test_lookup_logs(start_datetime, end_datetime,
|
||||
performer_name, repository_name, namespace_name,
|
||||
filter_kinds,
|
||||
page_token,
|
||||
max_page_count,
|
||||
search_response,
|
||||
list_indices_response,
|
||||
expected_request,
|
||||
expected_page,
|
||||
throws,
|
||||
logs_model, mock_elasticsearch, mock_db_model, mock_page_size, app_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
mock_elasticsearch.search_after = Mock(return_value=search_response)
|
||||
mock_elasticsearch.list_indices = Mock(return_value=list_indices_response)
|
||||
|
||||
configure(app_config)
|
||||
if throws:
|
||||
with pytest.raises(Exception):
|
||||
logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
|
||||
namespace_name, filter_kinds, page_token, max_page_count)
|
||||
else:
|
||||
page = logs_model.lookup_logs(start_datetime, end_datetime, performer_name, repository_name,
|
||||
namespace_name, filter_kinds, page_token, max_page_count)
|
||||
assert page == expected_page
|
||||
if expected_request:
|
||||
mock_elasticsearch.search_after.assert_called_with(expected_request)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"""
|
||||
start_datetime, end_datetime,
|
||||
performer_name, repository_name, namespace_name,
|
||||
filter_kinds, search_response, expected_request, expected_counts, throws
|
||||
""",
|
||||
[
|
||||
# Valid
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-08T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
['pull_repo'], AGGS_RESPONSE, AGGS_REQUEST, AGGS_COUNT, False,
|
||||
id="Valid Counts"
|
||||
),
|
||||
|
||||
# Invalid case: date range too big
|
||||
pytest.param(
|
||||
parse('2018-03-08T03:30'), parse('2018-04-09T03:30'),
|
||||
'user1', 'repo1', 'user1',
|
||||
[], None, None, None, True,
|
||||
id="Throw on date range too big"
|
||||
)
|
||||
])
|
||||
def test_get_aggregated_log_counts(start_datetime, end_datetime,
|
||||
performer_name, repository_name, namespace_name,
|
||||
filter_kinds, search_response, expected_request, expected_counts, throws,
|
||||
logs_model, mock_elasticsearch, mock_db_model, app_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
mock_elasticsearch.search_aggs = Mock(return_value=search_response)
|
||||
|
||||
configure(app_config)
|
||||
if throws:
|
||||
with pytest.raises(Exception):
|
||||
logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
|
||||
repository_name, namespace_name, filter_kinds)
|
||||
else:
|
||||
counts = logs_model.get_aggregated_log_counts(start_datetime, end_datetime, performer_name,
|
||||
repository_name, namespace_name, filter_kinds)
|
||||
assert set(counts) == set(expected_counts)
|
||||
if expected_request:
|
||||
mock_elasticsearch.search_aggs.assert_called_with(expected_request)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"""
|
||||
repository,
|
||||
day,
|
||||
count_response, expected_request, expected_count, throws
|
||||
""",
|
||||
[
|
||||
pytest.param(
|
||||
FAKE_REPOSITORIES['user1/repo1'],
|
||||
parse("2018-03-08").date(),
|
||||
COUNT_RESPONSE, COUNT_REQUEST, 1, False,
|
||||
id="Valid Count with 1 as result"),
|
||||
])
|
||||
def test_count_repository_actions(repository,
|
||||
day,
|
||||
count_response, expected_request, expected_count, throws,
|
||||
logs_model, mock_elasticsearch, mock_db_model, app_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
mock_elasticsearch.count = Mock(return_value=count_response)
|
||||
mock_elasticsearch.list_indices = Mock(return_value=INDEX_LIST_RESPONSE)
|
||||
|
||||
configure(app_config)
|
||||
if throws:
|
||||
with pytest.raises(Exception):
|
||||
logs_model.count_repository_actions(repository, day)
|
||||
else:
|
||||
count = logs_model.count_repository_actions(repository, day)
|
||||
assert count == expected_count
|
||||
if expected_request:
|
||||
mock_elasticsearch.count.assert_called_with(expected_request)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"""
|
||||
start_datetime, end_datetime,
|
||||
repository_id, namespace_id,
|
||||
max_query_time, scroll_responses, expected_requests, expected_logs, throws
|
||||
""",
|
||||
[
|
||||
pytest.param(
|
||||
parse("2018-03-08"), parse("2018-04-02"),
|
||||
1, 1,
|
||||
timedelta(seconds=10), SCROLL_RESPONSES, SCROLL_REQUESTS, SCROLL_LOGS, False,
|
||||
id="Scroll 3 pages with page size = 1"
|
||||
),
|
||||
])
|
||||
def test_yield_logs_for_export(start_datetime, end_datetime,
|
||||
repository_id, namespace_id,
|
||||
max_query_time, scroll_responses, expected_requests, expected_logs, throws,
|
||||
logs_model, mock_elasticsearch, mock_db_model, mock_max_result_window, app_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
mock_elasticsearch.search_scroll_create = Mock(return_value=scroll_responses[0])
|
||||
mock_elasticsearch.scroll_get = Mock(side_effect=scroll_responses[1:-1])
|
||||
mock_elasticsearch.scroll_delete = Mock(return_value=scroll_responses[-1])
|
||||
|
||||
configure(app_config)
|
||||
if throws:
|
||||
with pytest.raises(Exception):
|
||||
logs_model.yield_logs_for_export(start_datetime, end_datetime, max_query_time=max_query_time)
|
||||
else:
|
||||
log_generator = logs_model.yield_logs_for_export(start_datetime, end_datetime,
|
||||
max_query_time=max_query_time)
|
||||
counter = 0
|
||||
for logs in log_generator:
|
||||
if counter == 0:
|
||||
mock_elasticsearch.search_scroll_create.assert_called_with(*expected_requests[counter])
|
||||
else:
|
||||
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[counter])
|
||||
assert expected_logs[counter] == logs
|
||||
counter += 1
|
||||
# the last two requests must be
|
||||
# 1. get with response scroll with 0 hits, which indicates the termination condition
|
||||
# 2. delete scroll request
|
||||
mock_elasticsearch.scroll_get.assert_called_with(*expected_requests[-2])
|
||||
mock_elasticsearch.scroll_delete.assert_called_with(*expected_requests[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('prefix, is_valid', [
|
||||
pytest.param('..', False, id='Invalid `..`'),
|
||||
pytest.param('.', False, id='Invalid `.`'),
|
||||
pytest.param('-prefix', False, id='Invalid prefix start -'),
|
||||
pytest.param('_prefix', False, id='Invalid prefix start _'),
|
||||
pytest.param('+prefix', False, id='Invalid prefix start +'),
|
||||
pytest.param('prefix_with_UPPERCASES', False, id='Invalid uppercase'),
|
||||
pytest.param('valid_index', True, id='Valid prefix'),
|
||||
pytest.param('valid_index_with_numbers1234', True, id='Valid prefix with numbers'),
|
||||
pytest.param('a'*256, False, id='Prefix too long')
|
||||
])
|
||||
def test_valid_index_prefix(prefix, is_valid):
|
||||
assert ElasticsearchLogs._valid_index_prefix(prefix) == is_valid
|
||||
|
||||
|
||||
@pytest.mark.parametrize('index, cutoff_date, expected_result', [
|
||||
pytest.param(
|
||||
INDEX_NAME_PREFIX+'2019-06-06',
|
||||
datetime(2019, 6, 8),
|
||||
True,
|
||||
id="Index older than cutoff"
|
||||
),
|
||||
pytest.param(
|
||||
INDEX_NAME_PREFIX+'2019-06-06',
|
||||
datetime(2019, 6, 4),
|
||||
False,
|
||||
id="Index younger than cutoff"
|
||||
),
|
||||
pytest.param(
|
||||
INDEX_NAME_PREFIX+'2019-06-06',
|
||||
datetime(2019, 6, 6, 23),
|
||||
False,
|
||||
id="Index older than cutoff but timedelta less than 1 day"
|
||||
),
|
||||
pytest.param(
|
||||
INDEX_NAME_PREFIX+'2019-06-06',
|
||||
datetime(2019, 6, 7),
|
||||
True,
|
||||
id="Index older than cutoff by exactly one day"
|
||||
),
|
||||
])
|
||||
def test_can_delete_index(index, cutoff_date, expected_result):
|
||||
es = ElasticsearchLogs(index_prefix=INDEX_NAME_PREFIX)
|
||||
assert datetime.strptime(index.split(es._index_prefix, 1)[-1], INDEX_DATE_FORMAT)
|
||||
assert es.can_delete_index(index, cutoff_date) == expected_result
|
473
data/logs_model/test/test_logs_interface.py
Normal file
473
data/logs_model/test/test_logs_interface.py
Normal file
|
@ -0,0 +1,473 @@
|
|||
from datetime import datetime, timedelta, date
|
||||
from data.logs_model.datatypes import AggregatedLogCount
|
||||
from data.logs_model.table_logs_model import TableLogsModel
|
||||
from data.logs_model.combined_model import CombinedLogsModel
|
||||
from data.logs_model.inmemory_model import InMemoryModel
|
||||
from data.logs_model.combined_model import _merge_aggregated_log_counts
|
||||
from data.logs_model.document_logs_model import _date_range_in_single_index, DocumentLogsModel
|
||||
from data.logs_model.interface import LogsIterationTimeout
|
||||
from data.logs_model.test.fake_elasticsearch import FAKE_ES_HOST, fake_elasticsearch
|
||||
|
||||
from data.database import LogEntry, LogEntry2, LogEntry3, LogEntryKind
|
||||
from data import model
|
||||
|
||||
from test.fixtures import *
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_page_size():
|
||||
page_size = 2
|
||||
with patch('data.logs_model.document_logs_model.PAGE_SIZE', page_size):
|
||||
yield page_size
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def clear_db_logs(initialized_db):
|
||||
LogEntry.delete().execute()
|
||||
LogEntry2.delete().execute()
|
||||
LogEntry3.delete().execute()
|
||||
|
||||
|
||||
def combined_model():
|
||||
return CombinedLogsModel(TableLogsModel(), InMemoryModel())
|
||||
|
||||
|
||||
def es_model():
|
||||
return DocumentLogsModel(producer='elasticsearch', elasticsearch_config={
|
||||
'host': FAKE_ES_HOST,
|
||||
'port': 12345,
|
||||
})
|
||||
|
||||
@pytest.fixture()
|
||||
def fake_es():
|
||||
with fake_elasticsearch():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(params=[TableLogsModel, InMemoryModel, es_model, combined_model])
|
||||
def logs_model(request, clear_db_logs, fake_es):
|
||||
return request.param()
|
||||
|
||||
|
||||
def _lookup_logs(logs_model, start_time, end_time, **kwargs):
|
||||
logs_found = []
|
||||
page_token = None
|
||||
while True:
|
||||
found = logs_model.lookup_logs(start_time, end_time, page_token=page_token, **kwargs)
|
||||
logs_found.extend(found.logs)
|
||||
page_token = found.next_page_token
|
||||
if not found.logs or not page_token:
|
||||
break
|
||||
|
||||
assert len(logs_found) == len(set(logs_found))
|
||||
return logs_found
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
|
||||
reason='Flaky on MySQL')
|
||||
@pytest.mark.parametrize('namespace_name, repo_name, performer_name, check_args, expect_results', [
|
||||
pytest.param('devtable', 'simple', 'devtable', {}, True, id='no filters'),
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'performer_name': 'devtable',
|
||||
}, True, id='matching performer'),
|
||||
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'namespace_name': 'devtable',
|
||||
}, True, id='matching namespace'),
|
||||
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'namespace_name': 'devtable',
|
||||
'repository_name': 'simple',
|
||||
}, True, id='matching repository'),
|
||||
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'performer_name': 'public',
|
||||
}, False, id='different performer'),
|
||||
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'namespace_name': 'public',
|
||||
}, False, id='different namespace'),
|
||||
|
||||
pytest.param('devtable', 'simple', 'devtable', {
|
||||
'namespace_name': 'devtable',
|
||||
'repository_name': 'complex',
|
||||
}, False, id='different repository'),
|
||||
])
|
||||
def test_logs(namespace_name, repo_name, performer_name, check_args, expect_results, logs_model):
|
||||
# Add some logs.
|
||||
kinds = list(LogEntryKind.select())
|
||||
user = model.user.get_user(performer_name)
|
||||
|
||||
start_timestamp = datetime.utcnow()
|
||||
timestamp = start_timestamp
|
||||
|
||||
for kind in kinds:
|
||||
for index in range(0, 3):
|
||||
logs_model.log_action(kind.name, namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='1.2.3.4', timestamp=timestamp)
|
||||
timestamp = timestamp + timedelta(seconds=1)
|
||||
|
||||
found = _lookup_logs(logs_model, start_timestamp, start_timestamp + timedelta(minutes=10),
|
||||
**check_args)
|
||||
if expect_results:
|
||||
assert len(found) == len(kinds) * 3
|
||||
else:
|
||||
assert not found
|
||||
|
||||
aggregated_counts = logs_model.get_aggregated_log_counts(start_timestamp,
|
||||
start_timestamp + timedelta(minutes=10),
|
||||
**check_args)
|
||||
if expect_results:
|
||||
assert len(aggregated_counts) == len(kinds)
|
||||
for ac in aggregated_counts:
|
||||
assert ac.count == 3
|
||||
else:
|
||||
assert not aggregated_counts
|
||||
|
||||
|
||||
@pytest.mark.parametrize('filter_kinds, expect_results', [
|
||||
pytest.param(None, True),
|
||||
pytest.param(['push_repo'], True, id='push_repo filter'),
|
||||
pytest.param(['pull_repo'], True, id='pull_repo filter'),
|
||||
pytest.param(['push_repo', 'pull_repo'], False, id='push and pull filters')
|
||||
])
|
||||
def test_lookup_latest_logs(filter_kinds, expect_results, logs_model):
|
||||
kind_map = model.log.get_log_entry_kinds()
|
||||
if filter_kinds:
|
||||
ignore_ids = [kind_map[kind_name] for kind_name in filter_kinds if filter_kinds]
|
||||
else:
|
||||
ignore_ids = []
|
||||
|
||||
now = datetime.now()
|
||||
namespace_name = 'devtable'
|
||||
repo_name = 'simple'
|
||||
performer_name = 'devtable'
|
||||
|
||||
user = model.user.get_user(performer_name)
|
||||
size = 3
|
||||
|
||||
# Log some push actions
|
||||
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=1, seconds=11))
|
||||
logs_model.log_action('push_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=7, seconds=33))
|
||||
|
||||
# Log some pull actions
|
||||
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=0, seconds=3))
|
||||
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=3, seconds=55))
|
||||
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=5, seconds=3))
|
||||
logs_model.log_action('pull_repo', namespace_name=namespace_name, repository_name=repo_name,
|
||||
performer=user, ip='0.0.0.0', timestamp=now-timedelta(days=11, seconds=11))
|
||||
|
||||
# Get the latest logs
|
||||
latest_logs = logs_model.lookup_latest_logs(performer_name, repo_name, namespace_name,
|
||||
filter_kinds=filter_kinds, size=size)
|
||||
|
||||
# Test max lookup size
|
||||
assert len(latest_logs) <= size
|
||||
|
||||
# Make sure that the latest logs returned are in decreasing order
|
||||
assert all(x >= y for x, y in zip(latest_logs, latest_logs[1:]))
|
||||
|
||||
if expect_results:
|
||||
assert latest_logs
|
||||
|
||||
# Lookup all logs filtered by kinds and sort them in reverse chronological order
|
||||
all_logs = _lookup_logs(logs_model, now - timedelta(days=30), now + timedelta(days=30),
|
||||
filter_kinds=filter_kinds, namespace_name=namespace_name,
|
||||
repository_name=repo_name)
|
||||
all_logs = sorted(all_logs, key=lambda l: l.datetime, reverse=True)
|
||||
|
||||
# Check that querying all logs does not return the filtered kinds
|
||||
assert all([log.kind_id not in ignore_ids for log in all_logs])
|
||||
|
||||
# Check that the latest logs contains only th most recent ones
|
||||
assert latest_logs == all_logs[:len(latest_logs)]
|
||||
|
||||
|
||||
def test_count_repository_actions(logs_model):
|
||||
# Log some actions.
|
||||
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Log some actions to a different repo.
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Count the actions.
|
||||
day = date.today()
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
|
||||
count = logs_model.count_repository_actions(simple_repo, day)
|
||||
assert count == 3
|
||||
|
||||
complex_repo = model.repository.get_repository('devtable', 'complex')
|
||||
count = logs_model.count_repository_actions(complex_repo, day)
|
||||
assert count == 2
|
||||
|
||||
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
|
||||
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
|
||||
assert count == 0
|
||||
|
||||
|
||||
def test_yield_log_rotation_context(logs_model):
|
||||
cutoff_date = datetime.now()
|
||||
min_logs_per_rotation = 3
|
||||
|
||||
# Log some actions to be archived
|
||||
# One day
|
||||
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
|
||||
ip='1.2.3.4', timestamp=cutoff_date-timedelta(days=1, seconds=1))
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
|
||||
ip='5.6.7.8', timestamp=cutoff_date-timedelta(days=1, seconds=2))
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple3',
|
||||
ip='9.10.11.12', timestamp=cutoff_date-timedelta(days=1, seconds=3))
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple4',
|
||||
ip='0.0.0.0', timestamp=cutoff_date-timedelta(days=1, seconds=4))
|
||||
# Another day
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
|
||||
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=1))
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
|
||||
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=2))
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple5',
|
||||
ip='1.1.1.1', timestamp=cutoff_date-timedelta(days=2, seconds=3))
|
||||
|
||||
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
|
||||
assert found is not None and len(found) == 7
|
||||
|
||||
# Iterate the logs using the log rotation contexts
|
||||
all_logs = []
|
||||
for log_rotation_context in logs_model.yield_log_rotation_context(cutoff_date,
|
||||
min_logs_per_rotation):
|
||||
with log_rotation_context as context:
|
||||
for logs, _ in context.yield_logs_batch():
|
||||
all_logs.extend(logs)
|
||||
|
||||
assert len(all_logs) == 7
|
||||
found = _lookup_logs(logs_model, cutoff_date - timedelta(days=3), cutoff_date + timedelta(days=1))
|
||||
assert not found
|
||||
|
||||
# Make sure all datetimes are monotonically increasing (by datetime) after sorting the lookup
|
||||
# to make sure no duplicates were returned
|
||||
all_logs.sort(key=lambda d: d.datetime)
|
||||
assert all(x.datetime < y.datetime for x, y in zip(all_logs, all_logs[1:]))
|
||||
|
||||
|
||||
def test_count_repository_actions_with_wildcard_disabled(initialized_db):
|
||||
with fake_elasticsearch(allow_wildcard=False):
|
||||
logs_model = es_model()
|
||||
|
||||
# Log some actions.
|
||||
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Log some actions to a different repo.
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='complex',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Count the actions.
|
||||
day = date.today()
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
|
||||
count = logs_model.count_repository_actions(simple_repo, day)
|
||||
assert count == 3
|
||||
|
||||
complex_repo = model.repository.get_repository('devtable', 'complex')
|
||||
count = logs_model.count_repository_actions(complex_repo, day)
|
||||
assert count == 2
|
||||
|
||||
# Try counting actions for a few days in the future to ensure it doesn't raise an error.
|
||||
count = logs_model.count_repository_actions(simple_repo, day + timedelta(days=5))
|
||||
assert count == 0
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
|
||||
reason='Flaky on MySQL')
|
||||
def test_yield_logs_for_export(logs_model):
|
||||
# Add some logs.
|
||||
kinds = list(LogEntryKind.select())
|
||||
user = model.user.get_user('devtable')
|
||||
|
||||
start_timestamp = datetime.utcnow()
|
||||
timestamp = start_timestamp
|
||||
|
||||
for kind in kinds:
|
||||
for index in range(0, 10):
|
||||
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
|
||||
performer=user, ip='1.2.3.4', timestamp=timestamp)
|
||||
timestamp = timestamp + timedelta(seconds=1)
|
||||
|
||||
# Yield the logs.
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
logs_found = []
|
||||
for logs in logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=10),
|
||||
repository_id=simple_repo.id):
|
||||
logs_found.extend(logs)
|
||||
|
||||
# Ensure we found all added logs.
|
||||
assert len(logs_found) == len(kinds) * 10
|
||||
|
||||
|
||||
def test_yield_logs_for_export_timeout(logs_model):
|
||||
# Add some logs.
|
||||
kinds = list(LogEntryKind.select())
|
||||
user = model.user.get_user('devtable')
|
||||
|
||||
start_timestamp = datetime.utcnow()
|
||||
timestamp = start_timestamp
|
||||
|
||||
for kind in kinds:
|
||||
for _ in range(0, 2):
|
||||
logs_model.log_action(kind.name, namespace_name='devtable', repository_name='simple',
|
||||
performer=user, ip='1.2.3.4', timestamp=timestamp)
|
||||
timestamp = timestamp + timedelta(seconds=1)
|
||||
|
||||
# Yield the logs. Since we set the timeout to nothing, it should immediately fail.
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
with pytest.raises(LogsIterationTimeout):
|
||||
list(logs_model.yield_logs_for_export(start_timestamp, timestamp + timedelta(minutes=1),
|
||||
repository_id=simple_repo.id,
|
||||
max_query_time=timedelta(seconds=0)))
|
||||
|
||||
|
||||
def test_disabled_namespace(clear_db_logs):
|
||||
logs_model = TableLogsModel(lambda kind, namespace, is_free: namespace == 'devtable')
|
||||
|
||||
# Log some actions.
|
||||
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Log some actions to a different namespace.
|
||||
logs_model.log_action('push_repo', namespace_name='buynlarge', repository_name='orgrepo',
|
||||
ip='1.2.3.4')
|
||||
|
||||
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
|
||||
ip='1.2.3.4')
|
||||
logs_model.log_action('pull_repo', namespace_name='buynlarge', repository_name='orgrepo',
|
||||
ip='1.2.3.4')
|
||||
|
||||
# Count the actions.
|
||||
day = datetime.today() - timedelta(minutes=60)
|
||||
simple_repo = model.repository.get_repository('devtable', 'simple')
|
||||
count = logs_model.count_repository_actions(simple_repo, day)
|
||||
assert count == 0
|
||||
|
||||
org_repo = model.repository.get_repository('buynlarge', 'orgrepo')
|
||||
count = logs_model.count_repository_actions(org_repo, day)
|
||||
assert count == 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize('aggregated_log_counts1, aggregated_log_counts2, expected_result', [
|
||||
pytest.param(
|
||||
[
|
||||
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
|
||||
AggregatedLogCount(1, 3, datetime(2019, 6, 7, 0, 0)), # 2
|
||||
],
|
||||
[
|
||||
AggregatedLogCount(1, 5, datetime(2019, 6, 6, 0, 0)), # 1
|
||||
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
|
||||
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)), # 3
|
||||
],
|
||||
[
|
||||
AggregatedLogCount(1, 8, datetime(2019, 6, 6, 0, 0)), # 1
|
||||
AggregatedLogCount(1, 10, datetime(2019, 6, 7, 0, 0)), # 2
|
||||
AggregatedLogCount(3, 3, datetime(2019, 6, 1, 0, 0)) # 3
|
||||
]
|
||||
),
|
||||
pytest.param(
|
||||
[
|
||||
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
|
||||
],
|
||||
[
|
||||
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
|
||||
],
|
||||
[
|
||||
AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0)), # 1
|
||||
AggregatedLogCount(1, 7, datetime(2019, 6, 7, 0, 0)), # 2
|
||||
]
|
||||
),
|
||||
pytest.param(
|
||||
[],
|
||||
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))],
|
||||
[AggregatedLogCount(1, 3, datetime(2019, 6, 6, 0, 0))]
|
||||
),
|
||||
])
|
||||
def test_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2, expected_result):
|
||||
assert (sorted(_merge_aggregated_log_counts(aggregated_log_counts1, aggregated_log_counts2)) ==
|
||||
sorted(expected_result))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dt1, dt2, expected_result', [
|
||||
# Valid dates
|
||||
pytest.param(date(2019, 6, 17), date(2019, 6, 18), True),
|
||||
|
||||
# Invalid dates
|
||||
pytest.param(date(2019, 6, 17), date(2019, 6, 17), False),
|
||||
pytest.param(date(2019, 6, 17), date(2019, 6, 19), False),
|
||||
pytest.param(date(2019, 6, 18), date(2019, 6, 17), False),
|
||||
|
||||
# Valid datetimes
|
||||
pytest.param(datetime(2019, 6, 17, 0, 1), datetime(2019, 6, 17, 0, 2), True),
|
||||
|
||||
# Invalid datetimes
|
||||
pytest.param(datetime(2019, 6, 17, 0, 2), datetime(2019, 6, 17, 0, 1), False),
|
||||
pytest.param(datetime(2019, 6, 17, 11), datetime(2019, 6, 17, 11) + timedelta(hours=14), False),
|
||||
])
|
||||
def test_date_range_in_single_index(dt1, dt2, expected_result):
|
||||
assert _date_range_in_single_index(dt1, dt2) == expected_result
|
||||
|
||||
|
||||
def test_pagination(logs_model, mock_page_size):
|
||||
"""
|
||||
Make sure that pagination does not stop if searching through multiple indices by day,
|
||||
and the current log count matches the page size while there are still indices to be searched.
|
||||
"""
|
||||
day1 = datetime.now()
|
||||
day2 = day1 + timedelta(days=1)
|
||||
day3 = day2 + timedelta(days=1)
|
||||
|
||||
# Log some actions in day indices
|
||||
# One day
|
||||
logs_model.log_action('push_repo', namespace_name='devtable', repository_name='simple1',
|
||||
ip='1.2.3.4', timestamp=day1)
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple1',
|
||||
ip='5.6.7.8', timestamp=day1)
|
||||
|
||||
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
|
||||
assert len(found) == mock_page_size
|
||||
|
||||
# Another day
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
|
||||
ip='1.1.1.1', timestamp=day2)
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
|
||||
ip='0.0.0.0', timestamp=day2)
|
||||
|
||||
# Yet another day
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
|
||||
ip='1.1.1.1', timestamp=day3)
|
||||
logs_model.log_action('pull_repo', namespace_name='devtable', repository_name='simple2',
|
||||
ip='0.0.0.0', timestamp=day3)
|
||||
|
||||
found = _lookup_logs(logs_model, day1-timedelta(seconds=1), day3+timedelta(seconds=1))
|
||||
assert len(found) == 6
|
77
data/logs_model/test/test_logs_producer.py
Normal file
77
data/logs_model/test/test_logs_producer.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
import logging
|
||||
import pytest
|
||||
from dateutil.parser import parse
|
||||
from mock import patch, Mock
|
||||
|
||||
import botocore
|
||||
|
||||
from data.logs_model import configure
|
||||
|
||||
from test_elasticsearch import app_config, logs_model_config, logs_model, mock_elasticsearch, mock_db_model
|
||||
from mock_elasticsearch import *
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
FAKE_KAFKA_BROKERS = ['fake_server1', 'fake_server2']
|
||||
FAKE_KAFKA_TOPIC = 'sometopic'
|
||||
FAKE_MAX_BLOCK_SECONDS = 1
|
||||
|
||||
@pytest.fixture()
|
||||
def kafka_logs_producer_config(app_config):
|
||||
producer_config = {}
|
||||
producer_config.update(app_config)
|
||||
|
||||
kafka_config = {
|
||||
'bootstrap_servers': FAKE_KAFKA_BROKERS,
|
||||
'topic': FAKE_KAFKA_TOPIC,
|
||||
'max_block_seconds': FAKE_MAX_BLOCK_SECONDS
|
||||
}
|
||||
|
||||
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kafka'
|
||||
producer_config['LOGS_MODEL_CONFIG']['kafka_config'] = kafka_config
|
||||
return producer_config
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def kinesis_logs_producer_config(app_config):
|
||||
producer_config = {}
|
||||
producer_config.update(app_config)
|
||||
|
||||
kinesis_stream_config = {
|
||||
'stream_name': 'test-stream',
|
||||
'aws_region': 'fake_region',
|
||||
'aws_access_key': 'some_key',
|
||||
'aws_secret_key': 'some_secret'
|
||||
}
|
||||
|
||||
producer_config['LOGS_MODEL_CONFIG']['producer'] = 'kinesis_stream'
|
||||
producer_config['LOGS_MODEL_CONFIG']['kinesis_stream_config'] = kinesis_stream_config
|
||||
return producer_config
|
||||
|
||||
|
||||
def test_kafka_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kafka_logs_producer_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
|
||||
producer_config = kafka_logs_producer_config
|
||||
with patch('kafka.client_async.KafkaClient.check_version'), patch('kafka.KafkaProducer.send') as mock_send:
|
||||
configure(producer_config)
|
||||
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
|
||||
None, 'repo1', parse("2019-01-01T03:30"))
|
||||
|
||||
mock_send.assert_called_once()
|
||||
|
||||
|
||||
def test_kinesis_logs_producers(logs_model, mock_elasticsearch, mock_db_model, kinesis_logs_producer_config):
|
||||
mock_elasticsearch.template = Mock(return_value=DEFAULT_TEMPLATE_RESPONSE)
|
||||
|
||||
producer_config = kinesis_logs_producer_config
|
||||
with patch('botocore.endpoint.EndpointCreator.create_endpoint'), \
|
||||
patch('botocore.client.BaseClient._make_api_call') as mock_send:
|
||||
configure(producer_config)
|
||||
logs_model.log_action('pull_repo', 'user1', Mock(id=1), '192.168.1.1', {'key': 'value'},
|
||||
None, 'repo1', parse("2019-01-01T03:30"))
|
||||
|
||||
# Check that a PutRecord api call is made.
|
||||
# NOTE: The second arg of _make_api_call uses a randomized PartitionKey
|
||||
mock_send.assert_called_once_with(u'PutRecord', mock_send.call_args_list[0][0][1])
|
Reference in a new issue