From 42dba8655cbbdbe82a782ac690e8f1d65ff178b1 Mon Sep 17 00:00:00 2001
From: Joseph Schorr <josephschorr@users.noreply.github.com>
Date: Thu, 27 Aug 2015 14:55:33 -0400
Subject: [PATCH] Fix auth and add V2 tests!

---
 endpoints/v2/blob.py     |   2 +-
 endpoints/v2/manifest.py |   8 +-
 endpoints/v2/v2auth.py   |  22 +++--
 storage/fakestorage.py   |   2 +-
 test/registry_tests.py   | 197 +++++++++++++++++++++++++++++++--------
 5 files changed, 180 insertions(+), 51 deletions(-)

diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py
index 9867917a1..590a8421f 100644
--- a/endpoints/v2/blob.py
+++ b/endpoints/v2/blob.py
@@ -59,7 +59,7 @@ def _base_blob_fetch(namespace, repo_name, digest):
 
   # Add the Accept-Ranges header if the storage engine supports resumable
   # downloads.
-  if storage.get_supports_resumable_downloads(found.storage.locations):
+  if storage.get_supports_resumable_downloads(found.locations):
     logger.debug('Storage supports resumable downloads')
     headers['Accept-Ranges'] = 'bytes'
 
diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py
index 98172b815..8533d3be3 100644
--- a/endpoints/v2/manifest.py
+++ b/endpoints/v2/manifest.py
@@ -63,8 +63,10 @@ class SignedManifest(object):
 
   def __init__(self, manifest_bytes):
     self._bytes = manifest_bytes
-    self._parsed = yaml.safe_load(manifest_bytes)
 
+    # TODO(jakedt): If the manifest_bytes doesn't parse as valid YAML, safe_load returns the
+    # same string again. We should throw some sort of exception.
+    self._parsed = yaml.safe_load(manifest_bytes)
     self._signatures = self._parsed[_SIGNATURES_KEY]
     self._namespace, self._repo_name = self._parsed[_REPO_NAME_KEY].split('/')
     self._tag = self._parsed[_REPO_TAG_KEY]
@@ -107,8 +109,10 @@ class SignedManifest(object):
                                                   self._parsed[_HISTORY_KEY])):
       image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
       metadata_string = history_obj[_V1_COMPAT_KEY]
-      v1_metadata = yaml.safe_load(metadata_string)
 
+      # TODO(jakedt): If the metadata_string doesn't parse as valid YAML, safe_load returns the
+      # same string again. We should throw some sort of exception.
+      v1_metadata = yaml.safe_load(metadata_string)
       command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
       command = json.dumps(command_list) if command_list else None
 
diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py
index 76f11b042..3cb687128 100644
--- a/endpoints/v2/v2auth.py
+++ b/endpoints/v2/v2auth.py
@@ -18,13 +18,14 @@ from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermissi
 from endpoints.v2 import v2_bp
 from util.cache import no_cache
 from util.names import parse_namespace_repository
+from endpoints.decorators import anon_protect
 
 
 logger = logging.getLogger(__name__)
 
 
 SCOPE_REGEX = re.compile(
-    r'repository:([\.a-zA-Z0-9_\-]+/[\.a-zA-Z0-9_\-]+):(((push|pull|\*),)*(push|pull|\*))'
+    r'^repository:([\.a-zA-Z0-9_\-]+/[\.a-zA-Z0-9_\-]+):(((push|pull|\*),)*(push|pull|\*))$'
 )
 
 
@@ -43,6 +44,7 @@ def load_private_key(private_key_file_path):
 @v2_bp.route('/auth')
 @process_auth
 @no_cache
+@anon_protect
 def generate_registry_jwt():
   """ This endpoint will generate a JWT conforming to the Docker registry v2 auth spec:
       https://docs.docker.com/registry/spec/auth/token/
@@ -54,15 +56,11 @@ def generate_registry_jwt():
   logger.debug('Scope request: %s', scope_param)
 
   user = get_authenticated_user()
-  if user is None:
-    abort(404)
-
   access = []
   if scope_param is not None:
     match = SCOPE_REGEX.match(scope_param)
-    if match is None or match.end() != len(scope_param):
+    if match is None:
       logger.debug('Match: %s', match)
-      logger.debug('End: %s', match.end())
       logger.debug('len: %s', len(scope_param))
       logger.warning('Unable to decode repository and actions: %s', scope_param)
       abort(400)
@@ -74,17 +72,21 @@ def generate_registry_jwt():
 
     namespace, reponame = parse_namespace_repository(namespace_and_repo)
     if 'pull' in actions and 'push' in actions:
+      if user is None:
+        abort(401)
+
       repo = model.repository.get_repository(namespace, reponame)
       if repo:
-        if not ModifyRepositoryPermission(namespace, reponame):
+        if not ModifyRepositoryPermission(namespace, reponame).can():
           abort(403)
       else:
-        if not CreateRepositoryPermission(namespace):
+        if not CreateRepositoryPermission(namespace).can():
           abort(403)
         logger.debug('Creating repository: %s/%s', namespace, reponame)
         model.repository.create_repository(namespace, reponame, user)
     elif 'pull' in actions:
-      if not ReadRepositoryPermission(namespace, reponame):
+      if (not ReadRepositoryPermission(namespace, reponame).can() and
+          not model.repository.repository_is_public(namespace, reponame)):
         abort(403)
 
 
@@ -99,7 +101,7 @@ def generate_registry_jwt():
     'aud': audience_param,
     'nbf': int(time.time()),
     'exp': int(time.time() + 60),
-    'sub': user.username,
+    'sub': user.username if user else '(anonymous)',
     'access': access,
   }
 
diff --git a/storage/fakestorage.py b/storage/fakestorage.py
index d72b5ddc4..0d17d1a9b 100644
--- a/storage/fakestorage.py
+++ b/storage/fakestorage.py
@@ -54,7 +54,7 @@ class FakeStorage(BaseStorageV2):
   def stream_upload_chunk(self, uuid, offset, length, in_fp, _):
     upload_storage = _FAKE_STORAGE_MAP[uuid]
     upload_storage.seek(offset)
-    return self.stream_write_to_fp(in_fp, upload_storage, length)
+    return self.stream_write_to_fp(in_fp, upload_storage, length), {}
 
   def complete_chunked_upload(self, uuid, final_path, _):
     _FAKE_STORAGE_MAP[final_path] = _FAKE_STORAGE_MAP[uuid]
diff --git a/test/registry_tests.py b/test/registry_tests.py
index cba8fa87d..cf3c7e184 100644
--- a/test/registry_tests.py
+++ b/test/registry_tests.py
@@ -1,5 +1,6 @@
 import unittest
 import requests
+import os
 
 from flask import request, jsonify
 from flask.blueprints import Blueprint
@@ -7,6 +8,8 @@ from flask.ext.testing import LiveServerTestCase
 
 from app import app
 from endpoints.v1 import v1_bp
+from endpoints.v2 import v2_bp
+from endpoints.v2.manifest import SignedManifestBuilder
 from endpoints.api import api_bp
 from initdb import wipe_database, initialize_database, populate_database
 from endpoints.csrf import generate_csrf_token
@@ -14,14 +17,20 @@ from endpoints.csrf import generate_csrf_token
 import endpoints.decorated
 import json
 import features
+import hashlib
 
 import tarfile
 
+from jwkest.jws import SIGNER_ALGS
+from jwkest.jwk import RSAKey
+from Crypto.PublicKey import RSA
+
 from cStringIO import StringIO
 from digest.checksums import compute_simple
 
 try:
   app.register_blueprint(v1_bp, url_prefix='/v1')
+  app.register_blueprint(v2_bp, url_prefix='/v2')
   app.register_blueprint(api_bp, url_prefix='/api')
 except ValueError:
   # Blueprint was already registered
@@ -68,32 +77,8 @@ class TestFeature(object):
                            data=json.dumps(dict(value=self.old_value)),
                            headers={'Content-Type': 'application/json'})
 
-class RegistryTestCase(LiveServerTestCase):
-  maxDiff = None
-
-  def create_app(self):
-    app.config['TESTING'] = True
-    return app
-
-  def setUp(self):
-    # Note: We cannot use the normal savepoint-based DB setup here because we are accessing
-    # different app instances remotely via a live webserver, which is multiprocess. Therefore, we
-    # completely clear the database between tests.
-    wipe_database()
-    initialize_database()
-    populate_database()
-
-    self.clearSession()
-
-  def clearSession(self):
-    self.session = requests.Session()
-    self.signature = None
-    self.docker_token = 'true'
-
-    # Load the CSRF token.
-    self.csrf_token = ''
-    self.csrf_token = self.conduct('GET', '/__test/csrf').text
 
+class V1RegistryMixin(object):
   def conduct(self, method, url, headers=None, data=None, auth=None, expected_code=200):
     headers = headers or {}
     headers['X-Docker-Token'] = self.docker_token
@@ -118,17 +103,6 @@ class RegistryTestCase(LiveServerTestCase):
   def ping(self):
     self.conduct('GET', '/v1/_ping')
 
-  def do_login(self, username, password='password'):
-    self.ping()
-    result = self.conduct('POST', '/v1/users/',
-                           data=json.dumps(dict(username=username, password=password,
-                                                email='bar@example.com')),
-                           headers={"Content-Type": "application/json"},
-                           expected_code=400)
-
-    self.assertEquals(result.text, '"Username or email already exists"')
-    self.conduct('GET', '/v1/users/', auth=(username, password))
-
   def do_push(self, namespace, repository, username, password, images):
     auth = (username, password)
 
@@ -201,6 +175,147 @@ class RegistryTestCase(LiveServerTestCase):
       self.conduct('GET', image_prefix + 'json')
       self.conduct('GET', image_prefix + 'layer')
 
+  def clearSession(self):
+    self.signature = None
+    self.docker_token = 'true'
+
+
+class V2RegistryMixin(object):
+  def conduct(self, method, url, headers=None, params=None, data=None, auth=None, expected_code=200):
+    headers = headers or {}
+    params = params or {}
+    params['_csrf_token'] = self.csrf_token
+
+    if self.docker_token and not auth:
+      headers['Authorization'] = 'Bearer ' + self.docker_token
+
+    response = self.session.request(method, self.get_server_url() + url, headers=headers, data=data,
+                                    auth=auth, params=params)
+    if response.status_code != expected_code:
+      print response.text
+
+    self.assertEquals(response.status_code, expected_code)
+    return response
+
+  def ping(self):
+    self.conduct('GET', '/v2/', expected_code=200 if self.docker_token else 401)
+
+
+  def do_auth(self, username, password, namespace, repository, expected_code=200, scopes=[]):
+    auth = (username, password)
+    params = {
+      'account': username,
+      'scope': 'repository:%s/%s:%s' % (namespace, repository, ','.join(scopes)),
+      'service': 'quay'
+    }
+
+    response = self.conduct('GET', '/v2/auth', params=params, auth=(username, password),
+                            expected_code=expected_code)
+
+    if expected_code == 200:
+      response_json = json.loads(response.text)
+      self.assertIsNotNone(response_json.get('token'))
+      self.docker_token = response_json['token']
+
+
+  def do_push(self, namespace, repository, username, password, images):
+    # Ping!
+    self.ping()
+
+    # Auth.
+    self.do_auth(username, password, namespace, repository, scopes=['push', 'pull'])
+
+    # Build a fake manifest.
+    images = [('somelayer', 'some fake data')]
+
+    tag_name = 'latest'
+    builder = SignedManifestBuilder(namespace, repository, tag_name)
+    for image_id, contents in images:
+      checksum = 'sha256:' + hashlib.sha256(contents).hexdigest()
+      builder.add_layer(checksum, json.dumps({'id': image_id, 'data': contents}))
+
+    # Push the image's layers.
+    for image_id, contents in images:
+      # Layer data should not yet exist.
+      checksum = 'sha256:' + hashlib.sha256(contents).hexdigest()
+      self.conduct('HEAD', '/v2/%s/%s/blobs/%s' % (namespace, repository, checksum),
+                   expected_code=404)
+
+      # Start a new upload of the layer data.
+      response = self.conduct('POST', '/v2/%s/%s/blobs/uploads/' % (namespace, repository),
+                              expected_code=202)
+
+      location = response.headers['Location'][len(self.get_server_url()):]
+
+      # PATCH the image data into the layer.
+      self.conduct('PATCH', location, data=contents, expected_code=204)
+
+      # Finish the layer upload with a PUT.
+      self.conduct('PUT', location, params=dict(digest=checksum), expected_code=201)
+
+    # Write the manifest.
+    new_key = RSA.generate(2048)
+    jwk = RSAKey(key=new_key)
+    manifest = builder.build(jwk)
+
+    self.conduct('PUT', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name),
+                 data=manifest.bytes, expected_code=202,
+                 headers={'Content-Type': 'application/json'})
+
+
+  def do_pull(self, namespace, repository, username=None, password='password', expected_code=200):
+    auth = None
+    if username:
+      auth = (username, password)
+
+    # Ping!
+    self.ping()
+
+    # Auth.
+    self.do_auth(username, password, namespace, repository, scopes=['pull'],
+                 expected_code=expected_code)
+    if expected_code != 200:
+      return
+
+    # Retrieve the manifest for the tag.
+    tag_name = 'latest'
+    response = self.conduct('GET', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name))
+    manifest_data = json.loads(response.text)
+    for layer in manifest_data['fsLayers']:
+      blob_id = layer['blobSum']
+      self.conduct('GET', '/v2/%s/%s/blobs/%s' % (namespace, repository, blob_id), expected_code=200)
+
+
+  def clearSession(self):
+    self.docker_token = None
+
+
+class RegistryTestCaseMixin(object):
+  maxDiff = None
+
+  def create_app(self):
+    app.config['TESTING'] = True
+    app.config['DEBUG'] = True
+    return app
+
+  def setUp(self):
+    # Note: We cannot use the normal savepoint-based DB setup here because we are accessing
+    # different app instances remotely via a live webserver, which is multiprocess. Therefore, we
+    # completely clear the database between tests.
+    wipe_database()
+    initialize_database()
+    populate_database()
+
+    self.clearTestSession()
+
+  def clearTestSession(self):
+    self.session = requests.Session()
+    self.clearSession()
+
+    # Load the CSRF token.
+    self.csrf_token = ''
+    self.csrf_token = self.conduct('GET', '/__test/csrf').text
+
   def conduct_api_login(self, username, password):
     self.conduct('POST', '/api/v1/signin',
                  data=json.dumps(dict(username=username, password=password)),
@@ -212,7 +327,7 @@ class RegistryTestCase(LiveServerTestCase):
                  headers={'Content-Type': 'application/json'})
 
 
-class RegistryTests(RegistryTestCase):
+class RegistryTestsMixin(object):
   def test_pull_publicrepo_anonymous(self):
     # Add a new repository under the public user, so we have a real repository to pull.
     images = [{
@@ -385,5 +500,13 @@ class RegistryTests(RegistryTestCase):
     # org.
     self.do_pull('buynlarge', 'newrepo', 'devtable', 'password')
 
+
+class V1RegistryTests(V1RegistryMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase):
+  """ Tests for V1 registry. """
+
+class V2RegistryTests(V2RegistryMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase):
+  """ Tests for V2 registry. """
+
+
 if __name__ == '__main__':
   unittest.main()