Merge branch 'master' into git

This commit is contained in:
Jimmy Zelinskie 2015-04-16 17:38:35 -04:00
commit ba2cb08904
268 changed files with 7008 additions and 1535 deletions

View file

@ -139,7 +139,7 @@ def uuid_generator():
return str(uuid.uuid4())
_get_epoch_timestamp = lambda: int(time.time())
get_epoch_timestamp = lambda: int(time.time())
def close_db_filter(_):
@ -167,6 +167,17 @@ class BaseModel(ReadSlaveModel):
database = db
read_slaves = (read_slave,)
def __getattribute__(self, name):
""" Adds _id accessors so that foreign key field IDs can be looked up without making
a database roundtrip.
"""
if name.endswith('_id'):
field_name = name[0:len(name) - 3]
if field_name in self._meta.fields:
return self._data.get(field_name)
return super(BaseModel, self).__getattribute__(name)
class User(BaseModel):
uuid = CharField(default=uuid_generator, max_length=36, null=True)
@ -484,7 +495,7 @@ class RepositoryTag(BaseModel):
name = CharField()
image = ForeignKeyField(Image)
repository = ForeignKeyField(Repository)
lifetime_start_ts = IntegerField(default=_get_epoch_timestamp)
lifetime_start_ts = IntegerField(default=get_epoch_timestamp)
lifetime_end_ts = IntegerField(null=True, index=True)
hidden = BooleanField(default=False)
@ -493,6 +504,9 @@ class RepositoryTag(BaseModel):
read_slaves = (read_slave,)
indexes = (
(('repository', 'name'), False),
# This unique index prevents deadlocks when concurrently moving and deleting tags
(('repository', 'name', 'lifetime_end_ts'), True),
)

View file

@ -19,7 +19,7 @@ up_mysql() {
down_mysql() {
docker kill mysql
docker rm mysql
docker rm -v mysql
}
up_mariadb() {
@ -36,24 +36,24 @@ up_mariadb() {
down_mariadb() {
docker kill mariadb
docker rm mariadb
docker rm -v mariadb
}
up_percona() {
# Run a SQL database on port 3306 inside of Docker.
docker run --name percona -p 3306:3306 -d dockerfile/percona
docker run --name percona -p 3306:3306 -d percona
# Sleep for 10s
echo 'Sleeping for 10...'
sleep 10
# Add the daabase to mysql.
docker run --rm --link percona:percona dockerfile/percona sh -c 'echo "create database genschema" | mysql -h $PERCONA_PORT_3306_TCP_ADDR'
docker run --rm --link percona:percona percona sh -c 'echo "create database genschema" | mysql -h $PERCONA_PORT_3306_TCP_ADDR'
}
down_percona() {
docker kill percona
docker rm percona
docker rm -v percona
}
up_postgres() {
@ -70,7 +70,7 @@ up_postgres() {
down_postgres() {
docker kill postgres
docker rm postgres
docker rm -v postgres
}
gen_migrate() {

View file

@ -0,0 +1,26 @@
"""Add a unique index to prevent deadlocks with tags.
Revision ID: 2b4dc0818a5e
Revises: 2b2529fd23ff
Create Date: 2015-03-20 23:37:10.558179
"""
# revision identifiers, used by Alembic.
revision = '2b4dc0818a5e'
down_revision = '2b2529fd23ff'
from alembic import op
import sqlalchemy as sa
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.create_index('repositorytag_repository_id_name_lifetime_end_ts', 'repositorytag', ['repository_id', 'name', 'lifetime_end_ts'], unique=True)
### end Alembic commands ###
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_index('repositorytag_repository_id_name_lifetime_end_ts', table_name='repositorytag')
### end Alembic commands ###

View file

@ -18,7 +18,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
ImageStorageSignatureKind, validate_database_url, db_for_update,
AccessTokenKind, Star)
AccessTokenKind, Star, get_epoch_timestamp)
from peewee import JOIN_LEFT_OUTER, fn
from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE)
@ -310,11 +310,54 @@ def _list_entity_robots(entity_name):
.where(User.robot == True, User.username ** (entity_name + '+%')))
def list_entity_robot_tuples(entity_name):
return (_list_entity_robots(entity_name)
.select(User.username, FederatedLogin.service_ident)
.tuples())
class _TupleWrapper(object):
def __init__(self, data, fields):
self._data = data
self._fields = fields
def get(self, field):
return self._data[self._fields.index(field.name + ':' + field.model_class.__name__)]
class TupleSelector(object):
""" Helper class for selecting tuples from a peewee query and easily accessing
them as if they were objects.
"""
def __init__(self, query, fields):
self._query = query.select(*fields).tuples()
self._fields = [field.name + ':' + field.model_class.__name__ for field in fields]
def __iter__(self):
return self._build_iterator()
def _build_iterator(self):
for tuple_data in self._query:
yield _TupleWrapper(tuple_data, self._fields)
def list_entity_robot_permission_teams(entity_name):
query = (_list_entity_robots(entity_name)
.join(RepositoryPermission, JOIN_LEFT_OUTER,
on=(RepositoryPermission.user == FederatedLogin.user))
.join(Repository, JOIN_LEFT_OUTER)
.switch(User)
.join(TeamMember, JOIN_LEFT_OUTER)
.join(Team, JOIN_LEFT_OUTER))
fields = [User.username, FederatedLogin.service_ident, Repository.name, Team.name]
return TupleSelector(query, fields)
def list_robot_permissions(robot_name):
return (RepositoryPermission.select(RepositoryPermission, User, Repository)
.join(Repository)
.join(Visibility)
.switch(RepositoryPermission)
.join(Role)
.switch(RepositoryPermission)
.join(User)
.where(User.username == robot_name, User.robot == True))
def convert_user_to_organization(user, admin_user):
# Change the user to an organization.
@ -636,6 +679,73 @@ def get_user_or_org_by_customer_id(customer_id):
except User.DoesNotExist:
return None
def get_matching_user_namespaces(namespace_prefix, username, limit=10):
query = (Repository
.select()
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Repository)
.join(Visibility)
.switch(Repository)
.join(RepositoryPermission, JOIN_LEFT_OUTER)
.where(Namespace.username ** (namespace_prefix + '%'))
.group_by(Repository.namespace_user, Repository))
count = 0
namespaces = {}
for repo in _filter_to_repos_for_user(query, username):
if not repo.namespace_user.username in namespaces:
namespaces[repo.namespace_user.username] = repo.namespace_user
count = count + 1
if count >= limit:
break
return namespaces.values()
def get_matching_user_teams(team_prefix, user, limit=10):
query = (Team.select()
.join(User)
.switch(Team)
.join(TeamMember)
.where(TeamMember.user == user, Team.name ** (team_prefix + '%'))
.distinct(Team.id)
.limit(limit))
return query
def get_matching_robots(name_prefix, username, limit=10):
admined_orgs = (get_user_organizations(username)
.switch(Team)
.join(TeamRole)
.where(TeamRole.name == 'admin'))
prefix_checks = False
for org in admined_orgs:
prefix_checks = prefix_checks | (User.username ** (org.username + '+' + name_prefix + '%'))
prefix_checks = prefix_checks | (User.username ** (username + '+' + name_prefix + '%'))
return User.select().where(prefix_checks).limit(limit)
def get_matching_admined_teams(team_prefix, user, limit=10):
admined_orgs = (get_user_organizations(user.username)
.switch(Team)
.join(TeamRole)
.where(TeamRole.name == 'admin'))
query = (Team.select()
.join(User)
.switch(Team)
.join(TeamMember)
.where(Team.name ** (team_prefix + '%'), Team.organization << (admined_orgs))
.distinct(Team.id)
.limit(limit))
return query
def get_matching_teams(team_prefix, organization):
query = Team.select().where(Team.name ** (team_prefix + '%'),
Team.organization == organization)
@ -654,13 +764,13 @@ def get_matching_users(username_prefix, robot_namespace=None,
(User.robot == True)))
query = (User
.select(User.username, User.robot)
.group_by(User.username, User.robot)
.select(User.username, User.email, User.robot)
.group_by(User.username, User.email, User.robot)
.where(direct_user_query))
if organization:
query = (query
.select(User.username, User.robot, fn.Sum(Team.id))
.select(User.username, User.email, User.robot, fn.Sum(Team.id))
.join(TeamMember, JOIN_LEFT_OUTER)
.join(Team, JOIN_LEFT_OUTER, on=((Team.id == TeamMember.team) &
(Team.organization == organization))))
@ -669,9 +779,11 @@ def get_matching_users(username_prefix, robot_namespace=None,
class MatchingUserResult(object):
def __init__(self, *args):
self.username = args[0]
self.is_robot = args[1]
self.email = args[1]
self.robot = args[2]
if organization:
self.is_org_member = (args[2] != None)
self.is_org_member = (args[3] != None)
else:
self.is_org_member = None
@ -787,7 +899,7 @@ def get_visible_repository_count(username=None, include_public=True,
def get_visible_repositories(username=None, include_public=True, page=None,
limit=None, sort=False, namespace=None):
limit=None, sort=False, namespace=None, namespace_only=False):
query = _visible_repository_query(username=username, include_public=include_public, page=page,
limit=limit, namespace=namespace,
select_models=[Repository, Namespace, Visibility])
@ -798,6 +910,9 @@ def get_visible_repositories(username=None, include_public=True, page=None,
if limit:
query = query.limit(limit)
if namespace and namespace_only:
query = query.where(Namespace.username == namespace)
return query
@ -876,11 +991,73 @@ def _get_public_repo_visibility():
return _public_repo_visibility_cache
def get_matching_repositories(repo_term, username=None):
def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
""" Returns repositories matching the given prefix string and passing the given checker
function.
"""
last_week = datetime.now() - timedelta(weeks=1)
results = []
existing_ids = []
def get_search_results(search_clause, with_count):
if len(results) >= limit:
return
selected = [Repository, Namespace]
if with_count:
selected.append(fn.Count(LogEntry.id).alias('count'))
query = (Repository.select(*selected)
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
.switch(Repository)
.where(search_clause)
.group_by(Repository, Namespace))
if only_public:
query = query.where(Repository.visibility == _get_public_repo_visibility())
if existing_ids:
query = query.where(~(Repository.id << existing_ids))
if with_count:
query = (query.join(LogEntry, JOIN_LEFT_OUTER)
.where(LogEntry.datetime >= last_week)
.order_by(fn.Count(LogEntry.id).desc()))
for result in query:
if len(results) >= limit:
return results
# Note: We compare IDs here, instead of objects, because calling .visibility on the
# Repository will kick off a new SQL query to retrieve that visibility enum value. We don't
# join the visibility table in SQL, as well, because it is ungodly slow in MySQL :-/
result.is_public = result.visibility_id == _get_public_repo_visibility().id
result.count = result.count if with_count else 0
if not checker(result):
continue
results.append(result)
existing_ids.append(result.id)
# For performance reasons, we conduct the repo name and repo namespace searches on their
# own, and with and without counts on their own. This also affords us the ability to give
# higher precedence to repository names matching over namespaces, which is semantically correct.
get_search_results((Repository.name ** (prefix + '%')), with_count=True)
get_search_results((Repository.name ** (prefix + '%')), with_count=False)
get_search_results((Namespace.username ** (prefix + '%')), with_count=True)
get_search_results((Namespace.username ** (prefix + '%')), with_count=False)
return results
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
namespace_term = repo_term
name_term = repo_term
visible = get_visible_repositories(username)
visible = get_visible_repositories(username, include_public=include_public)
search_clauses = (Repository.name ** ('%' + name_term + '%') |
Namespace.username ** ('%' + namespace_term + '%'))
@ -894,8 +1071,7 @@ def get_matching_repositories(repo_term, username=None):
search_clauses = (Repository.name ** ('%' + name_term + '%') &
Namespace.username ** ('%' + namespace_term + '%'))
final = visible.where(search_clauses).limit(10)
return list(final)
return visible.where(search_clauses).limit(limit)
def change_password(user, new_password):
@ -905,6 +1081,7 @@ def change_password(user, new_password):
pw_hash = hash_password(new_password)
user.invalid_login_attempts = 0
user.password_hash = pw_hash
user.uuid = str(uuid4())
user.save()
# Remove any password required notifications for the user.
@ -1038,7 +1215,8 @@ def get_all_repo_teams(namespace_name, repository_name):
def get_all_repo_users(namespace_name, repository_name):
return (RepositoryPermission.select(User.username, User.robot, Role.name, RepositoryPermission)
return (RepositoryPermission.select(User.username, User.email, User.robot, Role.name,
RepositoryPermission)
.join(User)
.switch(RepositoryPermission)
.join(Role)
@ -1577,9 +1755,21 @@ def get_repository_images(namespace_name, repository_name):
return _get_repository_images_base(namespace_name, repository_name, lambda q: q)
def _tag_alive(query):
def _tag_alive(query, now_ts=None):
if now_ts is None:
now_ts = get_epoch_timestamp()
return query.where((RepositoryTag.lifetime_end_ts >> None) |
(RepositoryTag.lifetime_end_ts > int(time.time())))
(RepositoryTag.lifetime_end_ts > now_ts))
def list_repository_tag_history(repository, limit=100):
query = (RepositoryTag
.select(RepositoryTag, Image)
.join(Image)
.where(RepositoryTag.repository == repository)
.order_by(RepositoryTag.lifetime_start_ts.desc())
.limit(limit))
return query
def list_repository_tags(namespace_name, repository_name, include_hidden=False,
@ -1610,14 +1800,19 @@ def list_repository_tags(namespace_name, repository_name, include_hidden=False,
def _garbage_collect_tags(namespace_name, repository_name):
# We do this without using a join to prevent holding read locks on the repository table
repo = _get_repository(namespace_name, repository_name)
now = int(time.time())
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s
(RepositoryTag
.delete()
.where(RepositoryTag.repository == repo,
~(RepositoryTag.lifetime_end_ts >> None),
(RepositoryTag.lifetime_end_ts + repo.namespace_user.removed_tag_expiration_s) <= now)
.execute())
tags_to_delete = list(RepositoryTag
.select(RepositoryTag.id)
.where(RepositoryTag.repository == repo,
~(RepositoryTag.lifetime_end_ts >> None),
(RepositoryTag.lifetime_end_ts <= expired_time))
.order_by(RepositoryTag.id))
if len(tags_to_delete) > 0:
(RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
def garbage_collect_repository(namespace_name, repository_name):
@ -1713,46 +1908,39 @@ def _garbage_collect_storage(storage_id_whitelist):
logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist)
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Track all of the data that should be removed from blob storage
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
.select(ImageStoragePlacement,
ImageStorage,
ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage),
storage_id_whitelist,
(ImageStorage, ImageStoragePlacement,
ImageStorageLocation))
placements_to_remove = list(orphaned_storage_query(ImageStoragePlacement
.select(ImageStoragePlacement,
ImageStorage,
ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage),
storage_id_whitelist,
(ImageStorage, ImageStoragePlacement,
ImageStorageLocation)))
paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone())
paths_to_remove = placements_query_to_paths_set(placements_to_remove)
# Remove the placements for orphaned storages
placements_subquery = (placements_to_remove
.clone()
.select(ImageStoragePlacement.id)
.alias('ps'))
inner = (ImageStoragePlacement
.select(placements_subquery.c.id)
.from_(placements_subquery))
placements_removed = (ImageStoragePlacement
.delete()
.where(ImageStoragePlacement.id << inner)
.execute())
logger.debug('Removed %s image storage placements', placements_removed)
if len(placements_to_remove) > 0:
placement_ids_to_remove = [placement.id for placement in placements_to_remove]
placements_removed = (ImageStoragePlacement
.delete()
.where(ImageStoragePlacement.id << placement_ids_to_remove)
.execute())
logger.debug('Removed %s image storage placements', placements_removed)
# Remove all orphaned storages
# The comma after ImageStorage.id is VERY important, it makes it a tuple, which is a sequence
orphaned_storages = orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist,
(ImageStorage.id,)).alias('osq')
orphaned_storage_inner = (ImageStorage
.select(orphaned_storages.c.id)
.from_(orphaned_storages))
storages_removed = (ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storage_inner)
.execute())
logger.debug('Removed %s image storage records', storages_removed)
orphaned_storages = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist,
(ImageStorage.id,)).alias('osq'))
if len(orphaned_storages) > 0:
storages_removed = (ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storages)
.execute())
logger.debug('Removed %s image storage records', storages_removed)
# We are going to make the conscious decision to not delete image storage blobs inside
# transactions.
@ -1803,40 +1991,34 @@ def get_parent_images(namespace_name, repository_name, image_obj):
def create_or_update_tag(namespace_name, repository_name, tag_name,
tag_docker_image_id):
try:
repo = _get_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
now_ts = get_epoch_timestamp()
with config.app_config['DB_TRANSACTION_FACTORY'](db):
try:
repo = _get_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
tag = db_for_update(_tag_alive(RepositoryTag
.select()
.where(RepositoryTag.repository == repo,
RepositoryTag.name == tag_name), now_ts)).get()
tag.lifetime_end_ts = now_ts
tag.save()
except RepositoryTag.DoesNotExist:
pass
try:
image = Image.get(Image.docker_image_id == tag_docker_image_id, Image.repository == repo)
except Image.DoesNotExist:
raise DataModelException('Invalid image with id: %s' % tag_docker_image_id)
now_ts = int(time.time())
created = RepositoryTag.create(repository=repo, image=image, name=tag_name,
lifetime_start_ts=now_ts)
try:
# When we move a tag, we really end the timeline of the old one and create a new one
query = _tag_alive(RepositoryTag
.select()
.where(RepositoryTag.repository == repo, RepositoryTag.name == tag_name,
RepositoryTag.id != created.id))
tag = query.get()
tag.lifetime_end_ts = now_ts
tag.save()
except RepositoryTag.DoesNotExist:
# No tag that needs to be ended
pass
return created
return RepositoryTag.create(repository=repo, image=image, name=tag_name,
lifetime_start_ts=now_ts)
def delete_tag(namespace_name, repository_name, tag_name):
now_ts = get_epoch_timestamp()
with config.app_config['DB_TRANSACTION_FACTORY'](db):
try:
query = _tag_alive(RepositoryTag
@ -1845,21 +2027,21 @@ def delete_tag(namespace_name, repository_name, tag_name):
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.name == repository_name,
Namespace.username == namespace_name,
RepositoryTag.name == tag_name))
RepositoryTag.name == tag_name), now_ts)
found = db_for_update(query).get()
except RepositoryTag.DoesNotExist:
msg = ('Invalid repository tag \'%s\' on repository \'%s/%s\'' %
(tag_name, namespace_name, repository_name))
raise DataModelException(msg)
found.lifetime_end_ts = int(time.time())
found.lifetime_end_ts = now_ts
found.save()
def create_temporary_hidden_tag(repo, image, expiration_s):
""" Create a tag with a defined timeline, that will not appear in the UI or CLI. Returns the name
of the temporary tag. """
now_ts = int(time.time())
now_ts = get_epoch_timestamp()
expire_ts = now_ts + expiration_s
tag_name = str(uuid4())
RepositoryTag.create(repository=repo, image=image, name=tag_name, lifetime_start_ts=now_ts,

View file

@ -1,6 +1,9 @@
import redis
import json
import threading
import logging
logger = logging.getLogger(__name__)
class UserEventBuilder(object):
"""
@ -68,8 +71,9 @@ class UserEvent(object):
def conduct():
try:
self.publish_event_data_sync(event_id, data_obj)
except Exception as e:
print e
logger.debug('Published user event %s: %s', event_id, data_obj)
except Exception:
logger.exception('Could not publish user event')
thread = threading.Thread(target=conduct)
thread.start()

View file

@ -1,6 +1,11 @@
import ldap
import logging
import json
import itertools
import uuid
import struct
from util.aes import AESCipher
from util.validation import generate_valid_usernames
from data import model
@ -106,6 +111,7 @@ class LDAPUsers(object):
return found_user is not None
class UserAuthentication(object):
def __init__(self, app=None):
self.app = app
@ -138,5 +144,81 @@ class UserAuthentication(object):
app.extensions['authentication'] = users
return users
def _get_secret_key(self):
""" Returns the secret key to use for encrypting and decrypting. """
from app import app
app_secret_key = app.config['SECRET_KEY']
secret_key = None
# First try parsing the key as an int.
try:
big_int = int(app_secret_key)
secret_key = str(bytearray.fromhex('{:02x}'.format(big_int)))
except ValueError:
pass
# Next try parsing it as an UUID.
if secret_key is None:
try:
secret_key = uuid.UUID(app_secret_key).bytes
except ValueError:
pass
if secret_key is None:
secret_key = str(bytearray(map(ord, app_secret_key)))
# Otherwise, use the bytes directly.
return ''.join(itertools.islice(itertools.cycle(secret_key), 32))
def encrypt_user_password(self, password):
""" Returns an encrypted version of the user's password. """
data = {
'password': password
}
message = json.dumps(data)
cipher = AESCipher(self._get_secret_key())
return cipher.encrypt(message)
def _decrypt_user_password(self, encrypted):
""" Attempts to decrypt the given password and returns it. """
cipher = AESCipher(self._get_secret_key())
try:
message = cipher.decrypt(encrypted)
except ValueError:
return None
except TypeError:
return None
try:
data = json.loads(message)
except ValueError:
return None
return data.get('password', encrypted)
def verify_user(self, username_or_email, password, basic_auth=False):
# First try to decode the password as a signed token.
if basic_auth:
import features
decrypted = self._decrypt_user_password(password)
if decrypted is None:
# This is a normal password.
if features.REQUIRE_ENCRYPTED_BASIC_AUTH:
msg = ('Client login with unecrypted passwords is disabled. Please generate an ' +
'encrypted password in the user admin panel for use here.')
return (None, msg)
else:
password = decrypted
result = self.state.verify_user(username_or_email, password)
if result:
return (result, '')
else:
return (result, 'Invalid password.')
def __getattr__(self, name):
return getattr(self.state, name, None)