Attempt to deal with eventual consistency by retrying

Rather than accept the resulting of a layer validation, we retry up to three
times, backing off 100ms after each try. The thought is that we allow s3 files
to make their way into the correct location increasing the liklihood the
verification can proceed, if possible.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
This commit is contained in:
Stephen J Day 2015-04-20 18:43:19 -07:00 committed by Richard
parent 96f1e85396
commit 4686b3c0f4
1 changed files with 25 additions and 4 deletions

View File

@ -46,16 +46,37 @@ func (lw *layerWriter) StartedAt() time.Time {
// uploaded layer. The final size and checksum are validated against the
// contents of the uploaded layer. The checksum should be provided in the
// format <algorithm>:<hex digest>.
func (lw *layerWriter) Finish(digest digest.Digest) (distribution.Layer, error) {
func (lw *layerWriter) Finish(dgst digest.Digest) (distribution.Layer, error) {
ctxu.GetLogger(lw.layerStore.repository.ctx).Debug("(*layerWriter).Finish")
if err := lw.bufferedFileWriter.Close(); err != nil {
return nil, err
}
canonical, err := lw.validateLayer(digest)
if err != nil {
var (
canonical digest.Digest
err error
)
// HACK(stevvooe): To deal with s3's lack of consistency, attempt to retry
// validation on failure. Three attempts are made, backing off 100ms each
// time.
for retries := 0; ; retries++ {
canonical, err = lw.validateLayer(dgst)
if err == nil {
break
}
ctxu.GetLoggerWithField(lw.layerStore.repository.ctx, "retries", retries).
Errorf("error validating layer: %v", err)
if retries < 3 {
time.Sleep(100 * time.Millisecond)
continue
}
return nil, err
}
if err := lw.moveLayer(canonical); err != nil {
@ -64,7 +85,7 @@ func (lw *layerWriter) Finish(digest digest.Digest) (distribution.Layer, error)
}
// Link the layer blob into the repository.
if err := lw.linkLayer(canonical, digest); err != nil {
if err := lw.linkLayer(canonical, dgst); err != nil {
return nil, err
}