Optimize listing of logs by changing to bucket by datetime, over which we have an index

2019-01-28 17:16:52 -05:00 · 2019-01-28 17:16:52 -05:00 · d4c74bc1d3
commit d4c74bc1d3
parent 9f09d68ad8
3 changed files with 46 additions and 27 deletions
--- a/data/logs_model/table_logs_model.py
+++ b/data/logs_model/table_logs_model.py
@ -48,9 +48,11 @@ class TableLogsModel(ActionLogsDataInterface):
                                            ignore=filter_kinds, model=m)

      logs, next_page_token = model.modelutil.paginate(logs_query, m,
-                                                       descending=True, page_token=page_token,
+                                                       descending=True,
+                                                       page_token=page_token,
                                                       limit=20,
-                                                       max_page=max_page_count)
+                                                       max_page=max_page_count,
+                                                       sort_field_name='datetime')
      return LogEntriesPage([Log.for_logentry(log) for log in logs], next_page_token)

    # First check the LogEntry3 table for the most recent logs, unless we've been expressly told
--- a/data/model/modelutil.py
+++ b/data/model/modelutil.py
@ -1,31 +1,39 @@
+import dateutil.parser
+
+from datetime import datetime
+
 from peewee import SQL

-def paginate(query, model, descending=False, page_token=None, limit=50, id_alias=None,
-             max_page=None):
-  """ Paginates the given query using an ID range, starting at the optional page_token.
+
+def paginate(query, model, descending=False, page_token=None, limit=50, sort_field_alias=None,
+             max_page=None, sort_field_name=None):
+  """ Paginates the given query using an field range, starting at the optional page_token.
      Returns a *list* of matching results along with an unencrypted page_token for the
-      next page, if any. If descending is set to True, orders by the ID descending rather
+      next page, if any. If descending is set to True, orders by the field descending rather
      than ascending.
  """
-  # Note: We use the id_alias for the order_by, but not the where below. The alias is necessary
-  # for certain queries that use unions in MySQL, as it gets confused on which ID to order by.
-  # The where clause, on the other hand, cannot use the alias because Postgres does not allow
-  # aliases in where clauses.
-  id_field = model.id
-  if id_alias is not None:
-    id_field = SQL(id_alias)
+  # Note: We use the sort_field_alias for the order_by, but not the where below. The alias is
+  # necessary for certain queries that use unions in MySQL, as it gets confused on which field
+  # to order by. The where clause, on the other hand, cannot use the alias because Postgres does
+  # not allow aliases in where clauses.
+  sort_field_name = sort_field_name or 'id'
+  sort_field = getattr(model, sort_field_name)
+
+  if sort_field_alias is not None:
+    sort_field_name = sort_field_alias
+    sort_field = SQL(sort_field_alias)

  if descending:
-    query = query.order_by(id_field.desc())
+    query = query.order_by(sort_field.desc())
  else:
-    query = query.order_by(id_field)
+    query = query.order_by(sort_field)

-  start_id = pagination_start(page_token)
-  if start_id is not None:
+  start_index = pagination_start(page_token)
+  if start_index is not None:
    if descending:
-      query = query.where(model.id <= start_id)
+      query = query.where(sort_field <= start_index)
    else:
-      query = query.where(model.id >= start_id)
+      query = query.where(sort_field >= start_index)

  query = query.limit(limit + 1)

@ -33,28 +41,37 @@ def paginate(query, model, descending=False, page_token=None, limit=50, id_alias
  if page_number is not None and max_page is not None and page_number > max_page:
    return [], None

-  return paginate_query(query, limit=limit, id_alias=id_alias, page_number=page_number)
+  return paginate_query(query, limit=limit, sort_field_name=sort_field_name,
+                        page_number=page_number)


 def pagination_start(page_token=None):
-  """ Returns the start ID for pagination for the given page token. Will return None if None. """
+  """ Returns the start index for pagination for the given page token. Will return None if None. """
  if page_token is not None:
-    return page_token.get('start_id')
-
+    start_index = page_token.get('start_index')
+    if page_token.get('is_datetime'):
+      start_index = dateutil.parser.parse(start_index)
+    return start_index
  return None


-def paginate_query(query, limit=50, id_alias=None, page_number=None):
+def paginate_query(query, limit=50, sort_field_name=None, page_number=None):
  """ Executes the given query and returns a page's worth of results, as well as the page token
      for the next page (if any).
  """
  results = list(query)
  page_token = None
  if len(results) > limit:
-    start_id = getattr(results[limit], id_alias or 'id')
+    start_index = getattr(results[limit], sort_field_name or 'id')
+    is_datetime = False
+    if isinstance(start_index, datetime):
+      start_index = start_index.isoformat() + "Z"
+      is_datetime = True
+
    page_token = {
-      'start_id': start_id,
+      'start_index': start_index,
      'page_number': page_number + 1 if page_number else 1,
+      'is_datetime': is_datetime,
    }

  return results[0:limit], page_token
--- a/endpoints/api/repository_models_pre_oci.py
+++ b/endpoints/api/repository_models_pre_oci.py
@ -89,7 +89,7 @@ class PreOCIModel(RepositoryDataInterface):
        kind_filter=repo_kind)

      repos, next_page_token = model.modelutil.paginate_query(repo_query, limit=REPOS_PER_PAGE,
-                                                              id_alias='rid')
+                                                              sort_field_name='rid')

    # Collect the IDs of the repositories found for subequent lookup of popularity
    # and/or last modified.