Merge pull request #1906 from nwt/s3-multipart-copy
Use multipart upload API in S3 Move method
This commit is contained in:
		
						commit
						010e063270
					
				
					 4 changed files with 305 additions and 61 deletions
				
			
		|  | @ -99,6 +99,9 @@ information about each option that appears later in this page. | |||
|         secure: true | ||||
|         v4auth: true | ||||
|         chunksize: 5242880 | ||||
|         multipartcopychunksize: 33554432 | ||||
|         multipartcopymaxconcurrency: 100 | ||||
|         multipartcopythresholdsize: 33554432 | ||||
|         rootdirectory: /s3/object/name/prefix | ||||
|       swift: | ||||
|         username: username | ||||
|  | @ -382,6 +385,9 @@ Permitted values are `error`, `warn`, `info` and `debug`. The default is | |||
|         secure: true | ||||
|         v4auth: true | ||||
|         chunksize: 5242880 | ||||
|         multipartcopychunksize: 33554432 | ||||
|         multipartcopymaxconcurrency: 100 | ||||
|         multipartcopythresholdsize: 33554432 | ||||
|         rootdirectory: /s3/object/name/prefix | ||||
|       swift: | ||||
|         username: username | ||||
|  |  | |||
|  | @ -138,6 +138,42 @@ An implementation of the `storagedriver.StorageDriver` interface which uses Amaz | |||
|       should be a number that is larger than 5*1024*1024. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>multipartcopychunksize</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|        Chunk size for all but the last Upload Part - Copy | ||||
|        operation of a copy that uses the multipart upload API. | ||||
|    </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>multipartcopymaxconcurrency</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|        Maximum number of concurrent Upload Part - Copy operations for a | ||||
|        copy that uses the multipart upload API. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>multipartcopythresholdsize</code> | ||||
|     </td> | ||||
|     <td> | ||||
|       no | ||||
|     </td> | ||||
|     <td> | ||||
|        Objects above this size will be copied using the multipart upload API. | ||||
|        PUT Object - Copy is used for objects at or below this size. | ||||
|     </td> | ||||
|   </tr> | ||||
|   <tr> | ||||
|     <td> | ||||
|       <code>rootdirectory</code> | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ import ( | |||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"io/ioutil" | ||||
| 	"math" | ||||
| 	"net/http" | ||||
| 	"reflect" | ||||
| 	"sort" | ||||
|  | @ -45,8 +46,27 @@ const driverName = "s3aws" | |||
| // S3 API requires multipart upload chunks to be at least 5MB | ||||
| const minChunkSize = 5 << 20 | ||||
| 
 | ||||
| // maxChunkSize defines the maximum multipart upload chunk size allowed by S3. | ||||
| const maxChunkSize = 5 << 30 | ||||
| 
 | ||||
| const defaultChunkSize = 2 * minChunkSize | ||||
| 
 | ||||
| const ( | ||||
| 	// defaultMultipartCopyChunkSize defines the default chunk size for all | ||||
| 	// but the last Upload Part - Copy operation of a multipart copy. | ||||
| 	// Empirically, 32 MB is optimal. | ||||
| 	defaultMultipartCopyChunkSize = 32 << 20 | ||||
| 
 | ||||
| 	// defaultMultipartCopyMaxConcurrency defines the default maximum number | ||||
| 	// of concurrent Upload Part - Copy operations for a multipart copy. | ||||
| 	defaultMultipartCopyMaxConcurrency = 100 | ||||
| 
 | ||||
| 	// defaultMultipartCopyThresholdSize defines the default object size | ||||
| 	// above which multipart copy will be used. (PUT Object - Copy is used | ||||
| 	// for objects at or below this size.)  Empirically, 32 MB is optimal. | ||||
| 	defaultMultipartCopyThresholdSize = 32 << 20 | ||||
| ) | ||||
| 
 | ||||
| // listMax is the largest amount of objects you can request from S3 in a list call | ||||
| const listMax = 1000 | ||||
| 
 | ||||
|  | @ -67,6 +87,9 @@ type DriverParameters struct { | |||
| 	KeyID                       string | ||||
| 	Secure                      bool | ||||
| 	ChunkSize                   int64 | ||||
| 	MultipartCopyChunkSize      int64 | ||||
| 	MultipartCopyMaxConcurrency int64 | ||||
| 	MultipartCopyThresholdSize  int64 | ||||
| 	RootDirectory               string | ||||
| 	StorageClass                string | ||||
| 	UserAgent                   string | ||||
|  | @ -121,6 +144,9 @@ type driver struct { | |||
| 	ChunkSize                   int64 | ||||
| 	Encrypt                     bool | ||||
| 	KeyID                       string | ||||
| 	MultipartCopyChunkSize      int64 | ||||
| 	MultipartCopyMaxConcurrency int64 | ||||
| 	MultipartCopyThresholdSize  int64 | ||||
| 	RootDirectory               string | ||||
| 	StorageClass                string | ||||
| 	ObjectAcl                   string | ||||
|  | @ -217,27 +243,24 @@ func FromParameters(parameters map[string]interface{}) (*Driver, error) { | |||
| 		keyID = "" | ||||
| 	} | ||||
| 
 | ||||
| 	chunkSize := int64(defaultChunkSize) | ||||
| 	chunkSizeParam := parameters["chunksize"] | ||||
| 	switch v := chunkSizeParam.(type) { | ||||
| 	case string: | ||||
| 		vv, err := strconv.ParseInt(v, 0, 64) | ||||
| 	chunkSize, err := getParameterAsInt64(parameters, "chunksize", defaultChunkSize, minChunkSize, maxChunkSize) | ||||
| 	if err != nil { | ||||
| 			return nil, fmt.Errorf("chunksize parameter must be an integer, %v invalid", chunkSizeParam) | ||||
| 		} | ||||
| 		chunkSize = vv | ||||
| 	case int64: | ||||
| 		chunkSize = v | ||||
| 	case int, uint, int32, uint32, uint64: | ||||
| 		chunkSize = reflect.ValueOf(v).Convert(reflect.TypeOf(chunkSize)).Int() | ||||
| 	case nil: | ||||
| 		// do nothing | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("invalid value for chunksize: %#v", chunkSizeParam) | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if chunkSize < minChunkSize { | ||||
| 		return nil, fmt.Errorf("The chunksize %#v parameter should be a number that is larger than or equal to %d", chunkSize, minChunkSize) | ||||
| 	multipartCopyChunkSize, err := getParameterAsInt64(parameters, "multipartcopychunksize", defaultMultipartCopyChunkSize, minChunkSize, maxChunkSize) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	multipartCopyMaxConcurrency, err := getParameterAsInt64(parameters, "multipartcopymaxconcurrency", defaultMultipartCopyMaxConcurrency, 1, math.MaxInt64) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	multipartCopyThresholdSize, err := getParameterAsInt64(parameters, "multipartcopythresholdsize", defaultMultipartCopyThresholdSize, 0, maxChunkSize) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	rootDirectory := parameters["rootdirectory"] | ||||
|  | @ -289,6 +312,9 @@ func FromParameters(parameters map[string]interface{}) (*Driver, error) { | |||
| 		fmt.Sprint(keyID), | ||||
| 		secureBool, | ||||
| 		chunkSize, | ||||
| 		multipartCopyChunkSize, | ||||
| 		multipartCopyMaxConcurrency, | ||||
| 		multipartCopyThresholdSize, | ||||
| 		fmt.Sprint(rootDirectory), | ||||
| 		storageClass, | ||||
| 		fmt.Sprint(userAgent), | ||||
|  | @ -298,6 +324,35 @@ func FromParameters(parameters map[string]interface{}) (*Driver, error) { | |||
| 	return New(params) | ||||
| } | ||||
| 
 | ||||
| // getParameterAsInt64 converts paramaters[name] to an int64 value (using | ||||
| // defaultt if nil), verifies it is no smaller than min, and returns it. | ||||
| func getParameterAsInt64(parameters map[string]interface{}, name string, defaultt int64, min int64, max int64) (int64, error) { | ||||
| 	rv := defaultt | ||||
| 	param := parameters[name] | ||||
| 	switch v := param.(type) { | ||||
| 	case string: | ||||
| 		vv, err := strconv.ParseInt(v, 0, 64) | ||||
| 		if err != nil { | ||||
| 			return 0, fmt.Errorf("%s parameter must be an integer, %v invalid", name, param) | ||||
| 		} | ||||
| 		rv = vv | ||||
| 	case int64: | ||||
| 		rv = v | ||||
| 	case int, uint, int32, uint32, uint64: | ||||
| 		rv = reflect.ValueOf(v).Convert(reflect.TypeOf(rv)).Int() | ||||
| 	case nil: | ||||
| 		// do nothing | ||||
| 	default: | ||||
| 		return 0, fmt.Errorf("invalid value for %s: %#v", name, param) | ||||
| 	} | ||||
| 
 | ||||
| 	if rv < min || rv > max { | ||||
| 		return 0, fmt.Errorf("The %s %#v parameter should be a number between %d and %d (inclusive)", name, rv, min, max) | ||||
| 	} | ||||
| 
 | ||||
| 	return rv, nil | ||||
| } | ||||
| 
 | ||||
| // New constructs a new Driver with the given AWS credentials, region, encryption flag, and | ||||
| // bucketName | ||||
| func New(params DriverParameters) (*Driver, error) { | ||||
|  | @ -351,6 +406,9 @@ func New(params DriverParameters) (*Driver, error) { | |||
| 		ChunkSize: params.ChunkSize, | ||||
| 		Encrypt:   params.Encrypt, | ||||
| 		KeyID:     params.KeyID, | ||||
| 		MultipartCopyChunkSize:      params.MultipartCopyChunkSize, | ||||
| 		MultipartCopyMaxConcurrency: params.MultipartCopyMaxConcurrency, | ||||
| 		MultipartCopyThresholdSize:  params.MultipartCopyThresholdSize, | ||||
| 		RootDirectory:               params.RootDirectory, | ||||
| 		StorageClass:                params.StorageClass, | ||||
| 		ObjectAcl:                   params.ObjectAcl, | ||||
|  | @ -565,6 +623,26 @@ func (d *driver) List(ctx context.Context, opath string) ([]string, error) { | |||
| // object. | ||||
| func (d *driver) Move(ctx context.Context, sourcePath string, destPath string) error { | ||||
| 	/* This is terrible, but aws doesn't have an actual move. */ | ||||
| 	if err := d.copy(ctx, sourcePath, destPath); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return d.Delete(ctx, sourcePath) | ||||
| } | ||||
| 
 | ||||
| // copy copies an object stored at sourcePath to destPath. | ||||
| func (d *driver) copy(ctx context.Context, sourcePath string, destPath string) error { | ||||
| 	// S3 can copy objects up to 5 GB in size with a single PUT Object - Copy | ||||
| 	// operation. For larger objects, the multipart upload API must be used. | ||||
| 	// | ||||
| 	// Empirically, multipart copy is fastest with 32 MB parts and is faster | ||||
| 	// than PUT Object - Copy for objects larger than 32 MB. | ||||
| 
 | ||||
| 	fileInfo, err := d.Stat(ctx, sourcePath) | ||||
| 	if err != nil { | ||||
| 		return parseError(sourcePath, err) | ||||
| 	} | ||||
| 
 | ||||
| 	if fileInfo.Size() <= d.MultipartCopyThresholdSize { | ||||
| 		_, err := d.S3.CopyObject(&s3.CopyObjectInput{ | ||||
| 			Bucket:               aws.String(d.Bucket), | ||||
| 			Key:                  aws.String(d.s3Path(destPath)), | ||||
|  | @ -578,8 +656,73 @@ func (d *driver) Move(ctx context.Context, sourcePath string, destPath string) e | |||
| 		if err != nil { | ||||
| 			return parseError(sourcePath, err) | ||||
| 		} | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	return d.Delete(ctx, sourcePath) | ||||
| 	// Even in the worst case, a multipart copy should take no more | ||||
| 	// than a few minutes, so 30 minutes is very conservative. | ||||
| 	expires := time.Now().Add(time.Duration(30) * time.Minute) | ||||
| 	createResp, err := d.S3.CreateMultipartUpload(&s3.CreateMultipartUploadInput{ | ||||
| 		Bucket:               aws.String(d.Bucket), | ||||
| 		Key:                  aws.String(d.s3Path(destPath)), | ||||
| 		ContentType:          d.getContentType(), | ||||
| 		ACL:                  d.getACL(), | ||||
| 		Expires:              aws.Time(expires), | ||||
| 		SSEKMSKeyId:          d.getSSEKMSKeyID(), | ||||
| 		ServerSideEncryption: d.getEncryptionMode(), | ||||
| 		StorageClass:         d.getStorageClass(), | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	numParts := (fileInfo.Size() + d.MultipartCopyChunkSize - 1) / d.MultipartCopyChunkSize | ||||
| 	completedParts := make([]*s3.CompletedPart, numParts) | ||||
| 	errChan := make(chan error, numParts) | ||||
| 	limiter := make(chan struct{}, d.MultipartCopyMaxConcurrency) | ||||
| 
 | ||||
| 	for i := range completedParts { | ||||
| 		i := int64(i) | ||||
| 		go func() { | ||||
| 			limiter <- struct{}{} | ||||
| 			firstByte := i * d.MultipartCopyChunkSize | ||||
| 			lastByte := firstByte + d.MultipartCopyChunkSize - 1 | ||||
| 			if lastByte >= fileInfo.Size() { | ||||
| 				lastByte = fileInfo.Size() - 1 | ||||
| 			} | ||||
| 			uploadResp, err := d.S3.UploadPartCopy(&s3.UploadPartCopyInput{ | ||||
| 				Bucket:          aws.String(d.Bucket), | ||||
| 				CopySource:      aws.String(d.Bucket + "/" + d.s3Path(sourcePath)), | ||||
| 				Key:             aws.String(d.s3Path(destPath)), | ||||
| 				PartNumber:      aws.Int64(i + 1), | ||||
| 				UploadId:        createResp.UploadId, | ||||
| 				CopySourceRange: aws.String(fmt.Sprintf("bytes=%d-%d", firstByte, lastByte)), | ||||
| 			}) | ||||
| 			if err == nil { | ||||
| 				completedParts[i] = &s3.CompletedPart{ | ||||
| 					ETag:       uploadResp.CopyPartResult.ETag, | ||||
| 					PartNumber: aws.Int64(i + 1), | ||||
| 				} | ||||
| 			} | ||||
| 			errChan <- err | ||||
| 			<-limiter | ||||
| 		}() | ||||
| 	} | ||||
| 
 | ||||
| 	for range completedParts { | ||||
| 		err := <-errChan | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	_, err = d.S3.CompleteMultipartUpload(&s3.CompleteMultipartUploadInput{ | ||||
| 		Bucket:          aws.String(d.Bucket), | ||||
| 		Key:             aws.String(d.s3Path(destPath)), | ||||
| 		UploadId:        createResp.UploadId, | ||||
| 		MultipartUpload: &s3.CompletedMultipartUpload{Parts: completedParts}, | ||||
| 	}) | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| func min(a, b int) int { | ||||
|  |  | |||
|  | @ -1,19 +1,21 @@ | |||
| package s3 | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"io/ioutil" | ||||
| 	"math/rand" | ||||
| 	"os" | ||||
| 	"strconv" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"gopkg.in/check.v1" | ||||
| 
 | ||||
| 	"github.com/aws/aws-sdk-go/aws" | ||||
| 	"github.com/aws/aws-sdk-go/service/s3" | ||||
| 
 | ||||
| 	"github.com/docker/distribution/context" | ||||
| 	storagedriver "github.com/docker/distribution/registry/storage/driver" | ||||
| 	"github.com/docker/distribution/registry/storage/driver/testsuites" | ||||
| 
 | ||||
| 	"gopkg.in/check.v1" | ||||
| ) | ||||
| 
 | ||||
| // Hook up gocheck into the "go test" runner. | ||||
|  | @ -65,6 +67,9 @@ func init() { | |||
| 			keyID, | ||||
| 			secureBool, | ||||
| 			minChunkSize, | ||||
| 			defaultMultipartCopyChunkSize, | ||||
| 			defaultMultipartCopyMaxConcurrency, | ||||
| 			defaultMultipartCopyThresholdSize, | ||||
| 			rootDirectory, | ||||
| 			storageClass, | ||||
| 			driverName + "-test", | ||||
|  | @ -238,3 +243,57 @@ func TestOverThousandBlobs(t *testing.T) { | |||
| 		t.Fatalf("unexpected error deleting thousand files: %v", err) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestMoveWithMultipartCopy(t *testing.T) { | ||||
| 	if skipS3() != "" { | ||||
| 		t.Skip(skipS3()) | ||||
| 	} | ||||
| 
 | ||||
| 	rootDir, err := ioutil.TempDir("", "driver-") | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("unexpected error creating temporary directory: %v", err) | ||||
| 	} | ||||
| 	defer os.Remove(rootDir) | ||||
| 
 | ||||
| 	d, err := s3DriverConstructor(rootDir, s3.StorageClassStandard) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("unexpected error creating driver: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	ctx := context.Background() | ||||
| 	sourcePath := "/source" | ||||
| 	destPath := "/dest" | ||||
| 
 | ||||
| 	defer d.Delete(ctx, sourcePath) | ||||
| 	defer d.Delete(ctx, destPath) | ||||
| 
 | ||||
| 	// An object larger than d's MultipartCopyThresholdSize will cause d.Move() to perform a multipart copy. | ||||
| 	multipartCopyThresholdSize := d.baseEmbed.Base.StorageDriver.(*driver).MultipartCopyThresholdSize | ||||
| 	contents := make([]byte, 2*multipartCopyThresholdSize) | ||||
| 	rand.Read(contents) | ||||
| 
 | ||||
| 	err = d.PutContent(ctx, sourcePath, contents) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("unexpected error creating content: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	err = d.Move(ctx, sourcePath, destPath) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("unexpected error moving file: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	received, err := d.GetContent(ctx, destPath) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("unexpected error getting content: %v", err) | ||||
| 	} | ||||
| 	if !bytes.Equal(contents, received) { | ||||
| 		t.Fatal("content differs") | ||||
| 	} | ||||
| 
 | ||||
| 	_, err = d.GetContent(ctx, sourcePath) | ||||
| 	switch err.(type) { | ||||
| 	case storagedriver.PathNotFoundError: | ||||
| 	default: | ||||
| 		t.Fatalf("unexpected error getting content: %v", err) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue