Optimize listing of logs by changing to bucket by datetime, over which we have an index

This commit is contained in:
Joseph Schorr 2019-01-28 17:16:52 -05:00
parent 9f09d68ad8
commit d4c74bc1d3
3 changed files with 46 additions and 27 deletions

View file

@ -48,9 +48,11 @@ class TableLogsModel(ActionLogsDataInterface):
ignore=filter_kinds, model=m)
logs, next_page_token = model.modelutil.paginate(logs_query, m,
descending=True, page_token=page_token,
descending=True,
page_token=page_token,
limit=20,
max_page=max_page_count)
max_page=max_page_count,
sort_field_name='datetime')
return LogEntriesPage([Log.for_logentry(log) for log in logs], next_page_token)
# First check the LogEntry3 table for the most recent logs, unless we've been expressly told

View file

@ -1,31 +1,39 @@
import dateutil.parser
from datetime import datetime
from peewee import SQL
def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None,
max_page=None):
""" Paginates the given query using an ID range, starting at the optional page_token.
def paginate(query, model, descending=False, page_token=None, limit=50, sort_field_alias=None,
max_page=None, sort_field_name=None):
""" Paginates the given query using an field range, starting at the optional page_token.
Returns a *list* of matching results along with an unencrypted page_token for the
next page, if any. If descending is set to True, orders by the ID descending rather
next page, if any. If descending is set to True, orders by the field descending rather
than ascending.
"""
# Note: We use the id_alias for the order_by, but not the where below. The alias is necessary
# for certain queries that use unions in MySQL, as it gets confused on which ID to order by.
# The where clause, on the other hand, cannot use the alias because Postgres does not allow
# aliases in where clauses.
id_field = model.id
if id_alias is not None:
id_field = SQL(id_alias)
# Note: We use the sort_field_alias for the order_by, but not the where below. The alias is
# necessary for certain queries that use unions in MySQL, as it gets confused on which field
# to order by. The where clause, on the other hand, cannot use the alias because Postgres does
# not allow aliases in where clauses.
sort_field_name = sort_field_name or 'id'
sort_field = getattr(model, sort_field_name)
if sort_field_alias is not None:
sort_field_name = sort_field_alias
sort_field = SQL(sort_field_alias)
if descending:
query = query.order_by(id_field.desc())
query = query.order_by(sort_field.desc())
else:
query = query.order_by(id_field)
query = query.order_by(sort_field)
start_id = pagination_start(page_token)
if start_id is not None:
start_index = pagination_start(page_token)
if start_index is not None:
if descending:
query = query.where(model.id <= start_id)
query = query.where(sort_field <= start_index)
else:
query = query.where(model.id >= start_id)
query = query.where(sort_field >= start_index)
query = query.limit(limit + 1)
@ -33,28 +41,37 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias
if page_number is not None and max_page is not None and page_number > max_page:
return [], None
return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number)
return paginate_query(query, limit=limit, sort_field_name=sort_field_name,
page_number=page_number)
def pagination_start(page_token=None):
""" Returns the start ID for pagination for the given page token. Will return None if None. """
""" Returns the start index for pagination for the given page token. Will return None if None. """
if page_token is not None:
return page_token.get('start_id')
start_index = page_token.get('start_index')
if page_token.get('is_datetime'):
start_index = dateutil.parser.parse(start_index)
return start_index
return None
def paginate_query(query, limit=50, id_alias=None, page_number=None):
def paginate_query(query, limit=50, sort_field_name=None, page_number=None):
""" Executes the given query and returns a page's worth of results, as well as the page token
for the next page (if any).
"""
results = list(query)
page_token = None
if len(results) > limit:
start_id = getattr(results[limit], id_alias or 'id')
start_index = getattr(results[limit], sort_field_name or 'id')
is_datetime = False
if isinstance(start_index, datetime):
start_index = start_index.isoformat() + "Z"
is_datetime = True
page_token = {
'start_id': start_id,
'start_index': start_index,
'page_number': page_number + 1 if page_number else 1,
'is_datetime': is_datetime,
}
return results[0:limit], page_token

View file

@ -89,7 +89,7 @@ class PreOCIModel(RepositoryDataInterface):
kind_filter=repo_kind)
repos, next_page_token = model.modelutil.paginate_query(repo_query, limit=REPOS_PER_PAGE,
id_alias='rid')
sort_field_name='rid')
# Collect the IDs of the repositories found for subequent lookup of popularity
# and/or last modified.