Fix queries for repository list popularity and action count
Before this change, we used extremely inefficient outer joins as part of a single query of lookup, which was spiking our CPU usage to nearly 100% on the query. We now issue two separate queries for popularity and action account, by doing a lookup of the previously found IDs. Interestingly enough, because of the way the queries are now written, MySQL can actually do both queries *directly from the indicies*, which means they each occur in approx 20ms! Verified by local tests, postgres tests, and testing on staging with monitoring of our CPU usage during lookup
This commit is contained in:
		
							parent
							
								
									d21251c910
								
							
						
					
					
						commit
						7a548ea101
					
				
					 2 changed files with 50 additions and 31 deletions
				
			
		|  | @ -984,19 +984,43 @@ def get_user_teams_within_org(username, organization): | |||
|                       User.username == username) | ||||
| 
 | ||||
| 
 | ||||
| def get_when_last_modified(repository_ids): | ||||
|   tuples = (RepositoryTag | ||||
|             .select(RepositoryTag.repository, fn.Max(RepositoryTag.lifetime_start_ts)) | ||||
|             .where(RepositoryTag.repository << repository_ids) | ||||
|             .group_by(RepositoryTag.repository) | ||||
|             .tuples()) | ||||
| 
 | ||||
|   last_modified_map = {} | ||||
|   for record in tuples: | ||||
|     last_modified_map[record[0]] = record[1] | ||||
| 
 | ||||
|   return last_modified_map | ||||
| 
 | ||||
| 
 | ||||
| def get_action_counts(repository_ids): | ||||
|   # Filter the join to recent entries only. | ||||
|   last_week = datetime.now() - timedelta(weeks=1) | ||||
|   tuples = (RepositoryActionCount | ||||
|             .select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count)) | ||||
|             .where(RepositoryActionCount.repository << repository_ids) | ||||
|             .where(RepositoryActionCount.date >= last_week) | ||||
|             .group_by(RepositoryActionCount.repository) | ||||
|             .tuples()) | ||||
| 
 | ||||
|   action_count_map = {} | ||||
|   for record in tuples: | ||||
|     action_count_map[record[0]] = record[1] | ||||
| 
 | ||||
|   return action_count_map | ||||
| 
 | ||||
| 
 | ||||
| def get_visible_repositories(username=None, include_public=True, page=None, | ||||
|                              limit=None, namespace=None, namespace_only=False, | ||||
|                              include_actions=False, include_latest_tag=False): | ||||
|                              limit=None, namespace=None, namespace_only=False): | ||||
| 
 | ||||
|   fields = [Repository.name, Repository.id, Repository.description, Visibility.name, | ||||
|             Namespace.username] | ||||
| 
 | ||||
|   if include_actions: | ||||
|     fields.append(fn.Max(RepositoryActionCount.count)) | ||||
| 
 | ||||
|   if include_latest_tag: | ||||
|     fields.append(fn.Max(RepositoryTag.lifetime_start_ts)) | ||||
| 
 | ||||
|   query = _visible_repository_query(username=username, include_public=include_public, page=page, | ||||
|                                     limit=limit, namespace=namespace, | ||||
|                                     select_models=fields) | ||||
|  | @ -1007,23 +1031,6 @@ def get_visible_repositories(username=None, include_public=True, page=None, | |||
|   if namespace and namespace_only: | ||||
|     query = query.where(Namespace.username == namespace) | ||||
| 
 | ||||
|   if include_actions: | ||||
|     # Filter the join to recent entries only. | ||||
|     last_week = datetime.now() - timedelta(weeks=1) | ||||
|     join_query = ((RepositoryActionCount.repository == Repository.id) & | ||||
|                   (RepositoryActionCount.date >= last_week)) | ||||
| 
 | ||||
|     query = (query.switch(Repository) | ||||
|                   .join(RepositoryActionCount, JOIN_LEFT_OUTER, on=join_query) | ||||
|                   .group_by(RepositoryActionCount.repository, Repository.name, Repository.id, | ||||
|                             Repository.description, Visibility.name, Namespace.username)) | ||||
| 
 | ||||
|   if include_latest_tag: | ||||
|     query = (query.switch(Repository) | ||||
|                   .join(RepositoryTag, JOIN_LEFT_OUTER) | ||||
|                   .group_by(RepositoryTag.repository, Repository.name, Repository.id, | ||||
|                             Repository.description, Visibility.name, Namespace.username)) | ||||
| 
 | ||||
|   return TupleSelector(query, fields) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Reference in a new issue