Merge pull request #3250 from quay/joseph.schorr/QUAY-1030/interfacing-part-9

Implement blob uploader and change V1 to use it
2018-09-24 16:03:41 -04:00 · 2018-09-24 16:03:41 -04:00 · 468e5a8fc2
commit 468e5a8fc2
parent c10e3bf973 5422386de4
14 changed files with 717 additions and 84 deletions
--- a/data/registry_model/blobuploader.py
+++ b/data/registry_model/blobuploader.py
@ -0,0 +1,310 @@
+import logging
+import time
+
+from contextlib import contextmanager
+from collections import namedtuple
+
+import bitmath
+import resumablehashlib
+
+from data.registry_model import registry_model
+from data.database import CloseForLongOperation, db_transaction
+from digest import digest_tools
+from util.registry.filelike import wrap_with_handler, StreamSlice
+from util.registry.gzipstream import calculate_size_handler
+from util.registry.torrent import PieceHasher
+
+
+logger = logging.getLogger(__name__)
+
+
+BLOB_CONTENT_TYPE = 'application/octet-stream'
+
+
+class BlobUploadException(Exception):
+  """ Base for all exceptions raised when uploading blobs. """
+
+class BlobDigestMismatchException(BlobUploadException):
+  """ Exception raised if the digest requested does not match that of the contents uploaded. """
+
+class BlobTooLargeException(BlobUploadException):
+  """ Exception raised if the data uploaded exceeds the maximum_blob_size. """
+  def __init__(self, uploaded, max_allowed):
+    super(BlobTooLargeException, self).__init__()
+    self.uploaded = uploaded
+    self.max_allowed = max_allowed
+
+
+BlobUploadSettings = namedtuple('BlobUploadSettings', ['maximum_blob_size', 'bittorrent_piece_size',
+                                                       'committed_blob_expiration'])
+
+
+def create_blob_upload(repository_ref, storage, settings, extra_blob_stream_handlers=None):
+  """ Creates a new blob upload in the specified repository and returns a manager for interacting
+      with that upload. Returns None if a new blob upload could not be started.
+  """
+  location_name = storage.preferred_locations[0]
+  new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name)
+  blob_upload = registry_model.create_blob_upload(repository_ref, new_upload_uuid, location_name,
+                                                  upload_metadata)
+  if blob_upload is None:
+    return None
+
+  return _BlobUploadManager(repository_ref, blob_upload, settings, storage,
+                            extra_blob_stream_handlers)
+
+
+def retrieve_blob_upload_manager(repository_ref, blob_upload_id, storage, settings):
+  """ Retrieves the manager for an in-progress blob upload with the specified ID under the given
+      repository or None if none.
+  """
+  blob_upload = registry_model.lookup_blob_upload(repository_ref, blob_upload_id)
+  if blob_upload is None:
+    return None
+
+  return _BlobUploadManager(repository_ref, blob_upload, settings, storage)
+
+
+@contextmanager
+def upload_blob(repository_ref, storage, settings, extra_blob_stream_handlers=None):
+  """ Starts a new blob upload in the specified repository and yields a manager for interacting
+      with that upload. When the context manager completes, the blob upload is deleted, whether
+      committed to a blob or not. Yields None if a blob upload could not be started.
+  """
+  created = create_blob_upload(repository_ref, storage, settings, extra_blob_stream_handlers)
+  if not created:
+    yield None
+    return
+
+  try:
+    yield created
+  except Exception as ex:
+    logger.exception('Exception when uploading blob `%s`', created.blob_upload_id)
+    raise ex
+  finally:
+    # Cancel the upload if something went wrong or it was not commit to a blob.
+    if created.committed_blob is None:
+      created.cancel_upload()
+
+
+class _BlobUploadManager(object):
+  """ Defines a helper class for easily interacting with blob uploads in progress, including
+      handling of database and storage calls.
+  """
+  def __init__(self, repository_ref, blob_upload, settings, storage,
+               extra_blob_stream_handlers=None):
+    self.repository_ref = repository_ref
+    self.blob_upload = blob_upload
+    self.settings = settings
+    self.storage = storage
+    self.extra_blob_stream_handlers = extra_blob_stream_handlers
+    self.committed_blob = None
+
+  @property
+  def blob_upload_id(self):
+    """ Returns the unique ID for the blob upload. """
+    return self.blob_upload.upload_id
+
+  def upload_chunk(self, app_config, input_fp, start_offset=0, length=-1, metric_queue=None):
+    """ Uploads a chunk of data found in the given input file-like interface. start_offset and
+        length are optional and should match a range header if any was given.
+
+        If metric_queue is given, the upload time and chunk size are written into the metrics in
+        the queue.
+
+        Returns the total number of bytes uploaded after this upload has completed. Raises
+        a BlobUploadException if the upload failed.
+    """
+    assert start_offset is not None
+    assert length is not None
+
+    if start_offset > 0 and start_offset > self.blob_upload.byte_count:
+      logger.error('start_offset provided greater than blob_upload.byte_count')
+      return None
+
+    # Ensure that we won't go over the allowed maximum size for blobs.
+    max_blob_size = bitmath.parse_string_unsafe(self.settings.maximum_blob_size)
+    uploaded = bitmath.Byte(length + start_offset)
+    if length > -1 and uploaded > max_blob_size:
+      raise BlobTooLargeException(uploaded=uploaded.bytes, max_allowed=max_blob_size.bytes)
+
+    location_set = {self.blob_upload.location_name}
+    upload_error = None
+    with CloseForLongOperation(app_config):
+      if start_offset > 0 and start_offset < self.blob_upload.byte_count:
+        # Skip the bytes which were received on a previous push, which are already stored and
+        # included in the sha calculation
+        overlap_size = self.blob_upload.byte_count - start_offset
+        input_fp = StreamSlice(input_fp, overlap_size)
+
+        # Update our upload bounds to reflect the skipped portion of the overlap
+        start_offset = self.blob_upload.byte_count
+        length = max(length - overlap_size, 0)
+
+      # We use this to escape early in case we have already processed all of the bytes the user
+      # wants to upload.
+      if length == 0:
+        return self.blob_upload.byte_count
+
+      input_fp = wrap_with_handler(input_fp, self.blob_upload.sha_state.update)
+
+      if self.extra_blob_stream_handlers:
+        for handler in self.extra_blob_stream_handlers:
+          input_fp = wrap_with_handler(input_fp, handler)
+
+      # Add a hasher for calculating SHA1s for torrents if this is the first chunk and/or we have
+      # already calculated hash data for the previous chunk(s).
+      piece_hasher = None
+      if self.blob_upload.chunk_count == 0 or self.blob_upload.piece_sha_state:
+        initial_sha1_value = self.blob_upload.piece_sha_state or resumablehashlib.sha1()
+        initial_sha1_pieces_value = self.blob_upload.piece_hashes or ''
+
+        piece_hasher = PieceHasher(self.settings.bittorrent_piece_size, start_offset,
+                                   initial_sha1_pieces_value, initial_sha1_value)
+        input_fp = wrap_with_handler(input_fp, piece_hasher.update)
+
+      # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
+      # stream so we can determine the uncompressed size. We'll throw out this data if another chunk
+      # comes in, but in the common case the docker client only sends one chunk.
+      size_info = None
+      if start_offset == 0 and self.blob_upload.chunk_count == 0:
+        size_info, fn = calculate_size_handler()
+        input_fp = wrap_with_handler(input_fp, fn)
+
+      start_time = time.time()
+      length_written, new_metadata, upload_error = self.storage.stream_upload_chunk(
+        location_set,
+        self.blob_upload.upload_id,
+        start_offset,
+        length,
+        input_fp,
+        self.blob_upload.storage_metadata,
+        content_type=BLOB_CONTENT_TYPE,
+      )
+
+      if upload_error is not None:
+        logger.error('storage.stream_upload_chunk returned error %s', upload_error)
+        raise BlobUploadException(upload_error)
+
+      # Update the chunk upload time metric.
+      if metric_queue is not None:
+        metric_queue.chunk_upload_time.Observe(time.time() - start_time, labelvalues=[
+          length_written, list(location_set)[0]])
+
+    # Ensure we have not gone beyond the max layer size.
+    new_blob_bytes = self.blob_upload.byte_count + length_written
+    new_blob_size = bitmath.Byte(new_blob_bytes)
+    if new_blob_size > max_blob_size:
+      raise BlobTooLargeException(uploaded=new_blob_size, max_allowed=max_blob_size.bytes)
+
+    # If we determined an uncompressed size and this is the first chunk, add it to the blob.
+    # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks.
+    uncompressed_byte_count = self.blob_upload.uncompressed_byte_count
+    if size_info is not None and self.blob_upload.chunk_count == 0 and size_info.is_valid:
+      uncompressed_byte_count = size_info.uncompressed_size
+    elif length_written > 0:
+      # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't
+      # know the uncompressed size.
+      uncompressed_byte_count = None
+
+    piece_hashes = None
+    piece_sha_state = None
+    if piece_hasher is not None:
+      piece_hashes = piece_hasher.piece_hashes
+      piece_sha_state = piece_hasher.hash_fragment
+
+    self.blob_upload = registry_model.update_blob_upload(self.blob_upload,
+                                                         uncompressed_byte_count,
+                                                         piece_hashes,
+                                                         piece_sha_state,
+                                                         new_metadata,
+                                                         new_blob_bytes,
+                                                         self.blob_upload.chunk_count + 1,
+                                                         self.blob_upload.sha_state)
+    if self.blob_upload is None:
+      raise BlobUploadException('Could not complete upload of chunk')
+
+    return new_blob_bytes
+
+  def cancel_upload(self):
+    """ Cancels the blob upload, deleting any data uploaded and removing the upload itself. """
+    # Tell storage to cancel the chunked upload, deleting its contents.
+    self.storage.cancel_chunked_upload({self.blob_upload.location_name},
+                                       self.blob_upload.upload_id,
+                                       self.blob_upload.storage_metadata)
+
+    # Remove the blob upload record itself.
+    registry_model.delete_blob_upload(self.blob_upload)
+
+  def commit_to_blob(self, app_config, expected_digest=None):
+    """ Commits the blob upload to a blob under the repository. The resulting blob will be marked
+        to not be GCed for some period of time (as configured by `committed_blob_expiration`).
+
+        If expected_digest is specified, the content digest of the data uploaded for the blob is
+        compared to that given and, if it does not match, a BlobDigestMismatchException is
+        raised. The digest given must be of type `Digest` and not a string.
+    """
+    # Compare the content digest.
+    if expected_digest is not None:
+      self._validate_digest(expected_digest)
+
+    # Finalize the storage.
+    storage_already_existed = self._finalize_blob_storage(app_config)
+
+    # Convert the upload to a blob.
+    computed_digest_str = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
+
+    with db_transaction():
+      blob = registry_model.commit_blob_upload(self.blob_upload, computed_digest_str,
+                                               self.settings.committed_blob_expiration)
+      if blob is None:
+        return None
+
+      # Save torrent hash information (if available).
+      if self.blob_upload.piece_sha_state is not None and not storage_already_existed:
+        piece_bytes = self.blob_upload.piece_hashes + self.blob_upload.piece_sha_state.digest()
+        registry_model.set_torrent_info(blob, self.settings.bittorrent_piece_size, piece_bytes)
+
+    self.committed_blob = blob
+    return blob
+
+  def _validate_digest(self, expected_digest):
+    """
+    Verifies that the digest's SHA matches that of the uploaded data.
+    """
+    computed_digest = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
+    if not digest_tools.digests_equal(computed_digest, expected_digest):
+      logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s',
+                   self.blob_upload.upload_id, expected_digest, computed_digest)
+      raise BlobDigestMismatchException()
+
+  def _finalize_blob_storage(self, app_config):
+    """
+    When an upload is successful, this ends the uploading process from the
+    storage's perspective.
+
+    Returns True if the blob already existed.
+    """
+    computed_digest = digest_tools.sha256_digest_from_hashlib(self.blob_upload.sha_state)
+    final_blob_location = digest_tools.content_path(computed_digest)
+
+    # Close the database connection before we perform this operation, as it can take a while
+    # and we shouldn't hold the connection during that time.
+    with CloseForLongOperation(app_config):
+      # Move the storage into place, or if this was a re-upload, cancel it
+      already_existed = self.storage.exists({self.blob_upload.location_name}, final_blob_location)
+      if already_existed:
+        # It already existed, clean up our upload which served as proof that the
+        # uploader had the blob.
+        self.storage.cancel_chunked_upload({self.blob_upload.location_name},
+                                           self.blob_upload.upload_id,
+                                           self.blob_upload.storage_metadata)
+      else:
+        # We were the first ones to upload this image (at least to this location)
+        # Let's copy it into place
+        self.storage.complete_chunked_upload({self.blob_upload.location_name},
+                                             self.blob_upload.upload_id,
+                                             final_blob_location,
+                                             self.blob_upload.storage_metadata)
+
+    return already_existed
--- a/data/registry_model/datatypes.py
+++ b/data/registry_model/datatypes.py
@ -122,13 +122,14 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen
                                           'image_size', 'aggregate_size', 'uploading'])):
  """ LegacyImage represents a Docker V1-style image found in a repository. """
  @classmethod
-  def for_image(cls, image, images_map=None, tags_map=None):
+  def for_image(cls, image, images_map=None, tags_map=None, blob=None):
    if image is None:
      return None

    return LegacyImage(db_id=image.id,
                       inputs=dict(images_map=images_map, tags_map=tags_map,
-                                   ancestor_id_list=image.ancestor_id_list()),
+                                   ancestor_id_list=image.ancestor_id_list(),
+                                   blob=blob),
                       docker_image_id=image.docker_image_id,
                       created=image.created,
                       comment=image.comment,
@ -148,6 +149,14 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen
            for ancestor_id in reversed(ancestor_id_list)
            if images_map.get(ancestor_id)]

+  @property
+  @requiresinput('blob')
+  def blob(self, blob):
+    """ Returns the blob for this image. Raises an exception if the blob has
+        not been loaded before this property is invoked.
+    """
+    return blob
+
  @property
  @requiresinput('tags_map')
  def tags(self, tags_map):
@ -240,3 +249,21 @@ class TorrentInfo(datatype('TorrentInfo', ['pieces', 'piece_length'])):
    return TorrentInfo(db_id=torrent_info.id,
                       pieces=torrent_info.pieces,
                       piece_length=torrent_info.piece_length)
+
+
+class BlobUpload(datatype('BlobUpload', ['upload_id', 'byte_count', 'uncompressed_byte_count',
+                                         'chunk_count', 'sha_state', 'location_name',
+                                         'storage_metadata', 'piece_sha_state', 'piece_hashes'])):
+  """ BlobUpload represents information about an in-progress upload to create a blob. """
+  @classmethod
+  def for_upload(cls, blob_upload):
+    return BlobUpload(db_id=blob_upload.id,
+                      upload_id=blob_upload.uuid,
+                      byte_count=blob_upload.byte_count,
+                      uncompressed_byte_count=blob_upload.uncompressed_byte_count,
+                      chunk_count=blob_upload.chunk_count,
+                      sha_state=blob_upload.sha_state,
+                      location_name=blob_upload.location.name,
+                      storage_metadata=blob_upload.storage_metadata,
+                      piece_sha_state=blob_upload.piece_sha_state,
+                      piece_hashes=blob_upload.piece_hashes)
--- a/data/registry_model/interface.py
+++ b/data/registry_model/interface.py
@ -42,7 +42,8 @@ class RegistryDataInterface(object):
    """

  @abstractmethod
-  def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False):
+  def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False,
+                       include_blob=False):
    """
    Returns the matching LegacyImages under the matching repository, if any. If none,
    returns None.
@ -196,9 +197,36 @@ class RegistryDataInterface(object):
    """

  @abstractmethod
-  def get_repo_blob_by_digest(self, repo_ref, blob_digest, include_placements=False):
+  def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
    """
    Returns the blob in the repository with the given digest, if any or None if none. Note that
    there may be multiple records in the same repository for the same blob digest, so the return
    value of this function may change.
    """
+
+  @abstractmethod
+  def create_blob_upload(self, repository_ref, upload_id, location_name, storage_metadata):
+    """ Creates a new blob upload and returns a reference. If the blob upload could not be
+        created, returns None. """
+
+  @abstractmethod
+  def lookup_blob_upload(self, repository_ref, blob_upload_id):
+    """ Looks up the blob upload withn the given ID under the specified repository and returns it
+        or None if none.
+    """
+
+  @abstractmethod
+  def update_blob_upload(self, blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state,
+                         storage_metadata, byte_count, chunk_count, sha_state):
+    """ Updates the fields of the blob upload to match those given. Returns the updated blob upload
+        or None if the record does not exists.
+    """
+
+  @abstractmethod
+  def delete_blob_upload(self, blob_upload):
+    """ Deletes a blob upload record. """
+
+  @abstractmethod
+  def commit_blob_upload(self, blob_upload, blob_digest_str, blob_expiration_seconds):
+    """ Commits the blob upload into a blob and sets an expiration before that blob will be GCed.
+    """
--- a/data/registry_model/registry_pre_oci_model.py
+++ b/data/registry_model/registry_pre_oci_model.py
@ -10,7 +10,7 @@ from data import model
 from data.registry_model.interface import RegistryDataInterface
 from data.registry_model.datatypes import (Tag, RepositoryReference, Manifest, LegacyImage, Label,
                                           SecurityScanStatus, ManifestLayer, Blob, DerivedImage,
-                                           TorrentInfo)
+                                           TorrentInfo, BlobUpload)
 from image.docker.schema1 import DockerSchema1ManifestBuilder, ManifestException


@ -99,7 +99,8 @@ class PreOCIModel(RegistryDataInterface):
    return [LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id)
            for image in all_images]

-  def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False):
+  def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False,
+                       include_blob=False):
    """
    Returns the matching LegacyImages under the matching repository, if any. If none,
    returns None.
@ -117,7 +118,14 @@ class PreOCIModel(RegistryDataInterface):
      parent_images = model.image.get_parent_images(repo.namespace_user.username, repo.name, image)
      parent_images_map = {image.id: image for image in parent_images}

-    return LegacyImage.for_image(image, images_map=parent_images_map)
+    blob = None
+    if include_blob:
+      placements = list(model.storage.get_storage_locations(image.storage.uuid))
+      blob = Blob.for_image_storage(image.storage,
+                                    storage_path=model.storage.get_layer_path(image.storage),
+                                    placements=placements)
+
+    return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)

  def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None):
    """ Creates a label on the manifest with the given key and value. """
@ -547,14 +555,14 @@ class PreOCIModel(RegistryDataInterface):
    torrent_info = model.storage.save_torrent_info(image_storage, piece_length, pieces)
    return TorrentInfo.for_torrent_info(torrent_info)

-  def get_repo_blob_by_digest(self, repo_ref, blob_digest, include_placements=False):
+  def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
    """
    Returns the blob in the repository with the given digest, if any or None if none. Note that
    there may be multiple records in the same repository for the same blob digest, so the return
    value of this function may change.
    """
    try:
-      image_storage = model.blob.get_repository_blob_by_digest(repo_ref._db_id, blob_digest)
+      image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
    except model.BlobDoesNotExist:
      return None

@ -568,5 +576,76 @@ class PreOCIModel(RegistryDataInterface):
                                  storage_path=model.storage.get_layer_path(image_storage),
                                  placements=placements)

+  def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
+    """ Creates a new blob upload and returns a reference. If the blob upload could not be
+        created, returns None. """
+    repo = model.repository.lookup_repository(repository_ref._db_id)
+    if repo is None:
+      return None
+
+    try:
+      upload_record = model.blob.initiate_upload(repo.namespace_user.username, repo.name,
+                                                 new_upload_id, location_name, storage_metadata)
+      return BlobUpload.for_upload(upload_record)
+    except database.Repository.DoesNotExist:
+      return None
+
+  def lookup_blob_upload(self, repository_ref, blob_upload_id):
+    """ Looks up the blob upload withn the given ID under the specified repository and returns it
+        or None if none.
+    """
+    upload_record = model.blob.get_blob_upload_by_uuid(blob_upload_id)
+    if upload_record is None:
+      return None
+
+    return BlobUpload.for_upload(upload_record)
+
+  def update_blob_upload(self, blob_upload, uncompressed_byte_count, piece_hashes, piece_sha_state,
+                         storage_metadata, byte_count, chunk_count, sha_state):
+    """ Updates the fields of the blob upload to match those given. Returns the updated blob upload
+        or None if the record does not exists.
+    """
+    upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
+    if upload_record is None:
+      return None
+
+    upload_record.uncompressed_byte_count = uncompressed_byte_count
+    upload_record.piece_hashes = piece_hashes
+    upload_record.piece_sha_state = piece_sha_state
+    upload_record.storage_metadata = storage_metadata
+    upload_record.byte_count = byte_count
+    upload_record.chunk_count = chunk_count
+    upload_record.sha_state = sha_state
+    upload_record.save()
+    return BlobUpload.for_upload(upload_record)
+
+  def delete_blob_upload(self, blob_upload):
+    """ Deletes a blob upload record. """
+    upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
+    if upload_record is not None:
+      upload_record.delete_instance()
+
+  def commit_blob_upload(self, blob_upload, blob_digest_str, blob_expiration_seconds):
+    """ Commits the blob upload into a blob and sets an expiration before that blob will be GCed.
+    """
+    upload_record = model.blob.get_blob_upload_by_uuid(blob_upload.upload_id)
+    if upload_record is None:
+      return None
+
+    repository = upload_record.repository
+    namespace_name = repository.namespace_user.username
+    repo_name = repository.name
+
+    # Create the blob and temporarily tag it.
+    location_obj = model.storage.get_image_location_for_name(blob_upload.location_name)
+    blob_record = model.blob.store_blob_record_and_temp_link(
+      namespace_name, repo_name, blob_digest_str, location_obj.id, blob_upload.byte_count,
+      blob_expiration_seconds, blob_upload.uncompressed_byte_count)
+
+    # Delete the blob upload.
+    upload_record.delete_instance()
+    return Blob.for_image_storage(blob_record,
+                                  storage_path=model.storage.get_layer_path(blob_record))
+

 pre_oci_model = PreOCIModel()
--- a/data/registry_model/test/test_blobuploader.py
+++ b/data/registry_model/test/test_blobuploader.py
@ -0,0 +1,115 @@
+import hashlib
+import os
+
+from io import BytesIO
+
+import pytest
+
+from data.registry_model.datatypes import RepositoryReference
+from data.registry_model.blobuploader import (create_blob_upload, retrieve_blob_upload_manager,
+                                              upload_blob, BlobUploadException,
+                                              BlobDigestMismatchException, BlobTooLargeException,
+                                              BlobUploadSettings)
+from data.registry_model.registry_pre_oci_model import PreOCIModel
+
+from storage.distributedstorage import DistributedStorage
+from storage.fakestorage import FakeStorage
+from test.fixtures import *
+
+@pytest.fixture()
+def pre_oci_model(initialized_db):
+  return PreOCIModel()
+
+@pytest.mark.parametrize('chunk_count', [
+  0,
+  1,
+  2,
+  10,
+])
+@pytest.mark.parametrize('subchunk', [
+  True,
+  False,
+])
+def test_basic_upload_blob(chunk_count, subchunk, pre_oci_model):
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
+  storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
+  settings = BlobUploadSettings('2M', 512 * 1024, 3600)
+  app_config = {'TESTING': True}
+
+  data = ''
+  with upload_blob(repository_ref, storage, settings) as manager:
+    assert manager
+    assert manager.blob_upload_id
+
+    for index in range(0, chunk_count):
+      chunk_data = os.urandom(100)
+      data += chunk_data
+
+      if subchunk:
+        manager.upload_chunk(app_config, BytesIO(chunk_data))
+        manager.upload_chunk(app_config, BytesIO(chunk_data), (index * 100) + 50)
+      else:
+        manager.upload_chunk(app_config, BytesIO(chunk_data))
+
+    blob = manager.commit_to_blob(app_config)
+
+  # Check the blob.
+  assert blob.compressed_size == len(data)
+  assert not blob.uploading
+  assert blob.digest == 'sha256:' + hashlib.sha256(data).hexdigest()
+
+  # Ensure the blob exists in storage and has the expected data.
+  assert storage.get_content(['local_us'], blob.storage_path) == data
+
+
+def test_cancel_upload(pre_oci_model):
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
+  storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
+  settings = BlobUploadSettings('2M', 512 * 1024, 3600)
+  app_config = {'TESTING': True}
+
+  blob_upload_id = None
+  with upload_blob(repository_ref, storage, settings) as manager:
+    blob_upload_id = manager.blob_upload_id
+    assert pre_oci_model.lookup_blob_upload(repository_ref, blob_upload_id) is not None
+
+    manager.upload_chunk(app_config, BytesIO('hello world'))
+
+  # Since the blob was not comitted, the upload should be deleted.
+  assert blob_upload_id
+  assert pre_oci_model.lookup_blob_upload(repository_ref, blob_upload_id) is None
+
+
+def test_too_large(pre_oci_model):
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
+  storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
+  settings = BlobUploadSettings('1K', 512 * 1024, 3600)
+  app_config = {'TESTING': True}
+
+  with upload_blob(repository_ref, storage, settings) as manager:
+    with pytest.raises(BlobTooLargeException):
+      manager.upload_chunk(app_config, BytesIO(os.urandom(1024 * 1024 * 2)))
+
+
+def test_extra_blob_stream_handlers(pre_oci_model):
+  handler1_result = []
+  handler2_result = []
+
+  def handler1(bytes):
+    handler1_result.append(bytes)
+
+  def handler2(bytes):
+    handler2_result.append(bytes)
+
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
+  storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us'])
+  settings = BlobUploadSettings('1K', 512 * 1024, 3600)
+  app_config = {'TESTING': True}
+
+  with upload_blob(repository_ref, storage, settings,
+                   extra_blob_stream_handlers=[handler1, handler2]) as manager:
+    manager.upload_chunk(app_config, BytesIO('hello '))
+    manager.upload_chunk(app_config, BytesIO('world'))
+
+  assert ''.join(handler1_result) == 'hello world'
+  assert ''.join(handler2_result) == 'hello world'
--- a/data/registry_model/test/test_pre_oci_model.py
+++ b/data/registry_model/test/test_pre_oci_model.py
@ -1,3 +1,6 @@
+import hashlib
+import uuid
+
 from datetime import datetime, timedelta

 import pytest
@ -105,11 +108,13 @@ def test_legacy_images(repo_namespace, repo_name, pre_oci_model):
    found_image = pre_oci_model.get_legacy_image(repository_ref, image.docker_image_id,
                                                 include_parents=True)

-    with assert_query_count(4 if found_image.parents else 3):
+    with assert_query_count(5 if found_image.parents else 4):
      found_image = pre_oci_model.get_legacy_image(repository_ref, image.docker_image_id,
-                                                   include_parents=True)
+                                                   include_parents=True, include_blob=True)
      assert found_image.docker_image_id == image.docker_image_id
      assert found_image.parents == image.parents
+      assert found_image.blob
+      assert found_image.blob.placements

    # Check that the tags list can be retrieved.
    assert image.tags is not None
@ -523,3 +528,50 @@ def test_torrent_info(pre_oci_model):
  assert torrent_info is not None
  assert torrent_info.piece_length == 2
  assert torrent_info.pieces == 'foo'
+
+
+def test_blob_uploads(pre_oci_model):
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'simple')
+
+  blob_upload = pre_oci_model.create_blob_upload(repository_ref, str(uuid.uuid4()),
+                                                 'local_us', {'some': 'metadata'})
+  assert blob_upload
+  assert blob_upload.storage_metadata == {'some': 'metadata'}
+  assert blob_upload.location_name == 'local_us'
+
+  # Ensure we can find the blob upload.
+  assert pre_oci_model.lookup_blob_upload(repository_ref, blob_upload.upload_id) == blob_upload
+
+  # Update and ensure the changes are saved.
+  assert pre_oci_model.update_blob_upload(blob_upload, 1, 'the-pieces_hash',
+                                          blob_upload.piece_sha_state,
+                                          {'new': 'metadata'}, 2, 3,
+                                          blob_upload.sha_state)
+
+  updated = pre_oci_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
+  assert updated
+  assert updated.uncompressed_byte_count == 1
+  assert updated.piece_hashes == 'the-pieces_hash'
+  assert updated.storage_metadata == {'new': 'metadata'}
+  assert updated.byte_count == 2
+  assert updated.chunk_count == 3
+
+  # Delete the upload.
+  pre_oci_model.delete_blob_upload(blob_upload)
+
+  # Ensure it can no longer be found.
+  assert not pre_oci_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
+
+
+def test_commit_blob_upload(pre_oci_model):
+  repository_ref = pre_oci_model.lookup_repository('devtable', 'simple')
+  blob_upload = pre_oci_model.create_blob_upload(repository_ref, str(uuid.uuid4()),
+                                                 'local_us', {'some': 'metadata'})
+
+  # Commit the blob upload and make sure it is written as a blob.
+  digest = 'sha256:' + hashlib.sha256('hello').hexdigest()
+  blob = pre_oci_model.commit_blob_upload(blob_upload, digest, 60)
+  assert blob.digest == digest
+
+  # Ensure the upload can no longer be found.
+  assert not pre_oci_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)