Add a sitemap.txt for popular public repos

and reference it from the robots.txt
This commit is contained in:
Jake Moshenko 2016-06-17 13:52:27 -04:00
parent c712be05e2
commit a1cf12e460
11 changed files with 146 additions and 52 deletions

View file

@ -2,7 +2,7 @@
if [ -e /conf/stack/robots.txt ]
then
cp /conf/stack/robots.txt /static/robots.txt
cp /conf/stack/robots.txt /templates/robots.txt
fi
if [ -e /conf/stack/favicon.ico ]

View file

@ -405,3 +405,15 @@ def confirm_email_authorization_for_repo(code):
return found
def list_popular_public_repos(action_count_threshold, time_span):
cutoff = datetime.now() - time_span
return (Repository
.select(Namespace.username, Repository.name)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Repository)
.join(RepositoryActionCount)
.where(RepositoryActionCount.date >= cutoff,
Repository.visibility == get_public_repo_visibility())
.group_by(RepositoryActionCount.repository)
.having(fn.Sum(RepositoryActionCount.count) >= action_count_threshold)
.tuples())

View file

@ -2,15 +2,17 @@ import json
import logging
from urlparse import urlparse
from datetime import timedelta
from cachetools import lru_cache
from flask import (abort, redirect, request, url_for, make_response, Response,
from flask import (abort, redirect, request, url_for, make_response, Response, render_template,
Blueprint, send_from_directory, jsonify, send_file)
from flask.ext.login import current_user
import features
from app import app, billing as stripe, build_logs, avatar, signer, log_archive, config_provider
from app import (app, billing as stripe, build_logs, avatar, signer, log_archive, config_provider,
get_app_url)
from auth import scopes
from auth.auth import require_session_login, process_oauth, has_basic_auth, process_auth_or_cookie
from auth.permissions import (AdministerOrganizationPermission, ReadRepositoryPermission,
@ -282,9 +284,19 @@ def disclaimer():
@web.route('/robots.txt', methods=['GET'])
@no_cache
def robots():
return send_from_directory('static', 'robots.txt')
robots_txt = make_response(render_template('robots.txt', baseurl=get_app_url()))
robots_txt.headers['Content-Type'] = 'text/plain'
return robots_txt
@web.route('/sitemap.xml', methods=['GET'])
def sitemap():
popular_repo_tuples = model.repository.list_popular_public_repos(50, timedelta(weeks=1))
xml = make_response(render_template('sitemap.xml', public_repos=popular_repo_tuples,
baseurl=get_app_url()))
xml.headers['Content-Type'] = 'application/xml'
return xml
@web.route('/buildlogs/<build_uuid>', methods=['GET'])

View file

@ -6,7 +6,7 @@ import calendar
import os
import argparse
from datetime import datetime, timedelta
from datetime import datetime, timedelta, date
from peewee import (SqliteDatabase, create_model_tables, drop_model_tables, savepoint_sqlite,
savepoint)
from itertools import count
@ -506,9 +506,9 @@ def populate_database(minimal=False, with_storage=False):
(1, [(1, [], 'v5.0'), (1, [], 'v6.0')], None)],
None))
__generate_repository(with_storage, new_user_2, 'publicrepo',
'Public repository pullable by the world.', True,
[], (10, [], 'latest'))
publicrepo = __generate_repository(with_storage, new_user_2, 'publicrepo',
'Public repository pullable by the world.', True,
[], (10, [], 'latest'))
__generate_repository(with_storage, outside_org, 'coolrepo',
'Some cool repo.', False,
@ -653,6 +653,7 @@ def populate_database(minimal=False, with_storage=False):
week_ago = today - timedelta(6)
six_ago = today - timedelta(5)
four_ago = today - timedelta(4)
yesterday = datetime.combine(date.today(), datetime.min.time()) - timedelta(hours=6)
__generate_service_key('kid1', 'somesamplekey', new_user_1, today,
ServiceKeyApprovalType.SUPERUSER)
@ -740,6 +741,10 @@ def populate_database(minimal=False, with_storage=False):
timestamp=today,
metadata={'token_code': 'somecode', 'repo': 'orgrepo'})
model.log.log_action('pull_repo', new_user_2.username, repository=publicrepo,
timestamp=yesterday,
metadata={'token_code': 'somecode', 'repo': 'publicrepo'})
model.log.log_action('build_dockerfile', new_user_1.username, repository=building,
timestamp=today,
metadata={'repo': 'building', 'namespace': new_user_1.username,

View file

@ -1,43 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://quay.io/</loc>
<changefreq>hourly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://quay.io/plans/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>https://quay.io/organizations/</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>https://quay.io/repository/</loc>
<changefreq>always</changefreq>
</url>
<url>
<loc>https://quay.io/contact/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>https://quay.io/about/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>https://quay.io/security/</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>https://quay.io/tos</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>https://quay.io/privacy</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
</urlset>

58
templates/sitemap.xml Normal file
View file

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>{{ baseurl }}/</loc>
<changefreq>hourly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>{{ baseurl }}/plans/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/tour/</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/tour/organizations</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/tour/features</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/tour/enterprise</loc>
<changefreq>weekly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/contact/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/about/</loc>
<changefreq>monthly</changefreq>
</url>
<url>
<loc>{{ baseurl }}/security/</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>{{ baseurl }}/tos</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>{{ baseurl }}/privacy</loc>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
{% for namespace, reponame in public_repos -%}
<url>
<loc>{{ baseurl }}/repository/{{ namespace }}/{{ reponame }}</loc>
<changefreq>daily</changefreq>
<priority>0.3</priority>
</url>
{%- endfor %}
</urlset>

Binary file not shown.

View file

@ -111,6 +111,12 @@ class WebEndpointTestCase(EndpointTestCase):
def test_index(self):
self.getResponse('web.index')
def test_robots(self):
self.getResponse('web.robots')
def test_sitemap(self):
self.getResponse('web.sitemap')
def test_repo_view(self):
self.getResponse('web.repository', path='devtable/simple')

37
test/test_repomodel.py Normal file
View file

@ -0,0 +1,37 @@
import unittest
from datetime import timedelta
from app import app
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model
PUBLIC_USERNAME = 'public'
PUBLIC_REPONAME = 'publicrepo'
class TestRepoModel(unittest.TestCase):
def setUp(self):
setup_database_for_testing(self)
self.app = app.test_client()
self.ctx = app.test_request_context()
self.ctx.__enter__()
def tearDown(self):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def test_popular_repo_list(self):
# Our repository action count table should have 1 event for the only public
# repo.
onlypublic = model.repository.list_popular_public_repos(0, timedelta(weeks=1))
self.assertEquals(len(onlypublic), 1)
self.assertEquals(onlypublic[0], (PUBLIC_USERNAME, PUBLIC_REPONAME))
self.assertEquals(len(model.repository.list_popular_public_repos(1, timedelta(weeks=1))), 1)
self.assertEquals(len(model.repository.list_popular_public_repos(50, timedelta(weeks=1))), 0)
if __name__ == '__main__':
unittest.main()

View file

@ -10,6 +10,10 @@ POLL_PERIOD_SECONDS = 10
logger = logging.getLogger(__name__)
def count_repository_actions():
""" Aggregates repository actions from the LogEntry table and writes them to
the RepositoryActionCount table. Returns the number of repositories for
which actions were logged. Returns 0 when there is no more work.
"""
try:
# Get a random repository to count.
today = date.today()
@ -32,11 +36,14 @@ def count_repository_actions():
# Create the row.
try:
RepositoryActionCount.create(repository=to_count, date=yesterday, count=actions)
return 1
except:
logger.exception('Exception when writing count')
except Repository.DoesNotExist:
logger.debug('No further repositories to count')
return 0
class RepositoryActionCountWorker(Worker):
def __init__(self):