/* Copyright 2015 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package storage import ( "fmt" "sort" "strconv" "sync" "time" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/kubernetes/pkg/client/cache" "k8s.io/kubernetes/pkg/fields" "k8s.io/kubernetes/pkg/util" "k8s.io/kubernetes/pkg/util/clock" ) const ( // blockTimeout determines how long we're willing to block the request // to wait for a given resource version to be propagated to cache, // before terminating request and returning Timeout error with retry // after suggestion. blockTimeout = 3 * time.Second ) // watchCacheEvent is a single "watch event" that is send to users of // watchCache. Additionally to a typical "watch.Event" it contains // the previous value of the object to enable proper filtering in the // upper layers. type watchCacheEvent struct { Type watch.EventType Object runtime.Object ObjLabels labels.Set ObjFields fields.Set PrevObject runtime.Object PrevObjLabels labels.Set PrevObjFields fields.Set Key string ResourceVersion uint64 } // Computing a key of an object is generally non-trivial (it performs // e.g. validation underneath). To avoid computing it multiple times // (to serve the event in different List/Watch requests), in the // underlying store we are keeping pair (key, object). type storeElement struct { Key string Object runtime.Object } func storeElementKey(obj interface{}) (string, error) { elem, ok := obj.(*storeElement) if !ok { return "", fmt.Errorf("not a storeElement: %v", obj) } return elem.Key, nil } // watchCacheElement is a single "watch event" stored in a cache. // It contains the resource version of the object and the object // itself. type watchCacheElement struct { resourceVersion uint64 watchCacheEvent *watchCacheEvent } // watchCache implements a Store interface. // However, it depends on the elements implementing runtime.Object interface. // // watchCache is a "sliding window" (with a limited capacity) of objects // observed from a watch. type watchCache struct { sync.RWMutex // Condition on which lists are waiting for the fresh enough // resource version. cond *sync.Cond // Maximum size of history window. capacity int // keyFunc is used to get a key in the underlying storage for a given object. keyFunc func(runtime.Object) (string, error) // getAttrsFunc is used to get labels and fields of an object. getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error) // cache is used a cyclic buffer - its first element (with the smallest // resourceVersion) is defined by startIndex, its last element is defined // by endIndex (if cache is full it will be startIndex + capacity). // Both startIndex and endIndex can be greater than buffer capacity - // you should always apply modulo capacity to get an index in cache array. cache []watchCacheElement startIndex int endIndex int // store will effectively support LIST operation from the "end of cache // history" i.e. from the moment just after the newest cached watched event. // It is necessary to effectively allow clients to start watching at now. // NOTE: We assume that is thread-safe. store cache.Store // ResourceVersion up to which the watchCache is propagated. resourceVersion uint64 // This handler is run at the end of every successful Replace() method. onReplace func() // This handler is run at the end of every Add/Update/Delete method // and additionally gets the previous value of the object. onEvent func(*watchCacheEvent) // for testing timeouts. clock clock.Clock } func newWatchCache( capacity int, keyFunc func(runtime.Object) (string, error), getAttrsFunc func(runtime.Object) (labels.Set, fields.Set, error)) *watchCache { wc := &watchCache{ capacity: capacity, keyFunc: keyFunc, getAttrsFunc: getAttrsFunc, cache: make([]watchCacheElement, capacity), startIndex: 0, endIndex: 0, store: cache.NewStore(storeElementKey), resourceVersion: 0, clock: clock.RealClock{}, } wc.cond = sync.NewCond(wc.RLocker()) return wc } // Add takes runtime.Object as an argument. func (w *watchCache) Add(obj interface{}) error { object, resourceVersion, err := objectToVersionedRuntimeObject(obj) if err != nil { return err } event := watch.Event{Type: watch.Added, Object: object} f := func(elem *storeElement) error { return w.store.Add(elem) } return w.processEvent(event, resourceVersion, f) } // Update takes runtime.Object as an argument. func (w *watchCache) Update(obj interface{}) error { object, resourceVersion, err := objectToVersionedRuntimeObject(obj) if err != nil { return err } event := watch.Event{Type: watch.Modified, Object: object} f := func(elem *storeElement) error { return w.store.Update(elem) } return w.processEvent(event, resourceVersion, f) } // Delete takes runtime.Object as an argument. func (w *watchCache) Delete(obj interface{}) error { object, resourceVersion, err := objectToVersionedRuntimeObject(obj) if err != nil { return err } event := watch.Event{Type: watch.Deleted, Object: object} f := func(elem *storeElement) error { return w.store.Delete(elem) } return w.processEvent(event, resourceVersion, f) } func objectToVersionedRuntimeObject(obj interface{}) (runtime.Object, uint64, error) { object, ok := obj.(runtime.Object) if !ok { return nil, 0, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj) } meta, err := meta.Accessor(object) if err != nil { return nil, 0, err } resourceVersion, err := parseResourceVersion(meta.GetResourceVersion()) if err != nil { return nil, 0, err } return object, resourceVersion, nil } func parseResourceVersion(resourceVersion string) (uint64, error) { if resourceVersion == "" { return 0, nil } // Use bitsize being the size of int on the machine. return strconv.ParseUint(resourceVersion, 10, 0) } func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, updateFunc func(*storeElement) error) error { key, err := w.keyFunc(event.Object) if err != nil { return fmt.Errorf("couldn't compute key: %v", err) } elem := &storeElement{Key: key, Object: event.Object} // TODO: We should consider moving this lock below after the watchCacheEvent // is created. In such situation, the only problematic scenario is Replace( // happening after getting object from store and before acquiring a lock. // Maybe introduce another lock for this purpose. w.Lock() defer w.Unlock() previous, exists, err := w.store.Get(elem) if err != nil { return err } objLabels, objFields, err := w.getAttrsFunc(event.Object) if err != nil { return err } var prevObject runtime.Object var prevObjLabels labels.Set var prevObjFields fields.Set if exists { prevObject = previous.(*storeElement).Object prevObjLabels, prevObjFields, err = w.getAttrsFunc(prevObject) if err != nil { return err } } watchCacheEvent := &watchCacheEvent{ Type: event.Type, Object: event.Object, ObjLabels: objLabels, ObjFields: objFields, PrevObject: prevObject, PrevObjLabels: prevObjLabels, PrevObjFields: prevObjFields, Key: key, ResourceVersion: resourceVersion, } if w.onEvent != nil { w.onEvent(watchCacheEvent) } w.updateCache(resourceVersion, watchCacheEvent) w.resourceVersion = resourceVersion w.cond.Broadcast() return updateFunc(elem) } // Assumes that lock is already held for write. func (w *watchCache) updateCache(resourceVersion uint64, event *watchCacheEvent) { if w.endIndex == w.startIndex+w.capacity { // Cache is full - remove the oldest element. w.startIndex++ } w.cache[w.endIndex%w.capacity] = watchCacheElement{resourceVersion, event} w.endIndex++ } // List returns list of pointers to objects. func (w *watchCache) List() []interface{} { return w.store.List() } // waitUntilFreshAndBlock waits until cache is at least as fresh as given . // NOTE: This function acquired lock and doesn't release it. // You HAVE TO explicitly call w.RUnlock() after this function. func (w *watchCache) waitUntilFreshAndBlock(resourceVersion uint64, trace *util.Trace) error { startTime := w.clock.Now() go func() { // Wake us up when the time limit has expired. The docs // promise that time.After (well, NewTimer, which it calls) // will wait *at least* the duration given. Since this go // routine starts sometime after we record the start time, and // it will wake up the loop below sometime after the broadcast, // we don't need to worry about waking it up before the time // has expired accidentally. <-w.clock.After(blockTimeout) w.cond.Broadcast() }() w.RLock() if trace != nil { trace.Step("watchCache locked acquired") } for w.resourceVersion < resourceVersion { if w.clock.Since(startTime) >= blockTimeout { // Timeout with retry after 1 second. return errors.NewTimeoutError(fmt.Sprintf("Too large resource version: %v, current: %v", resourceVersion, w.resourceVersion), 1) } w.cond.Wait() } if trace != nil { trace.Step("watchCache fresh enough") } return nil } // WaitUntilFreshAndList returns list of pointers to objects. func (w *watchCache) WaitUntilFreshAndList(resourceVersion uint64, trace *util.Trace) ([]interface{}, uint64, error) { err := w.waitUntilFreshAndBlock(resourceVersion, trace) defer w.RUnlock() if err != nil { return nil, 0, err } return w.store.List(), w.resourceVersion, nil } // WaitUntilFreshAndGet returns a pointers to object. func (w *watchCache) WaitUntilFreshAndGet(resourceVersion uint64, key string, trace *util.Trace) (interface{}, bool, uint64, error) { err := w.waitUntilFreshAndBlock(resourceVersion, trace) defer w.RUnlock() if err != nil { return nil, false, 0, err } value, exists, err := w.store.GetByKey(key) return value, exists, w.resourceVersion, err } func (w *watchCache) ListKeys() []string { return w.store.ListKeys() } // Get takes runtime.Object as a parameter. However, it returns // pointer to . func (w *watchCache) Get(obj interface{}) (interface{}, bool, error) { object, ok := obj.(runtime.Object) if !ok { return nil, false, fmt.Errorf("obj does not implement runtime.Object interface: %v", obj) } key, err := w.keyFunc(object) if err != nil { return nil, false, fmt.Errorf("couldn't compute key: %v", err) } return w.store.Get(&storeElement{Key: key, Object: object}) } // GetByKey returns pointer to . func (w *watchCache) GetByKey(key string) (interface{}, bool, error) { return w.store.GetByKey(key) } // Replace takes slice of runtime.Object as a paramater. func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error { version, err := parseResourceVersion(resourceVersion) if err != nil { return err } toReplace := make([]interface{}, 0, len(objs)) for _, obj := range objs { object, ok := obj.(runtime.Object) if !ok { return fmt.Errorf("didn't get runtime.Object for replace: %#v", obj) } key, err := w.keyFunc(object) if err != nil { return fmt.Errorf("couldn't compute key: %v", err) } toReplace = append(toReplace, &storeElement{Key: key, Object: object}) } w.Lock() defer w.Unlock() w.startIndex = 0 w.endIndex = 0 if err := w.store.Replace(toReplace, resourceVersion); err != nil { return err } w.resourceVersion = version if w.onReplace != nil { w.onReplace() } w.cond.Broadcast() return nil } func (w *watchCache) SetOnReplace(onReplace func()) { w.Lock() defer w.Unlock() w.onReplace = onReplace } func (w *watchCache) SetOnEvent(onEvent func(*watchCacheEvent)) { w.Lock() defer w.Unlock() w.onEvent = onEvent } func (w *watchCache) GetAllEventsSinceThreadUnsafe(resourceVersion uint64) ([]*watchCacheEvent, error) { size := w.endIndex - w.startIndex oldest := w.resourceVersion if size > 0 { oldest = w.cache[w.startIndex%w.capacity].resourceVersion } if resourceVersion == 0 { // resourceVersion = 0 means that we don't require any specific starting point // and we would like to start watching from ~now. // However, to keep backward compatibility, we additionally need to return the // current state and only then start watching from that point. // // TODO: In v2 api, we should stop returning the current state - #13969. allItems := w.store.List() result := make([]*watchCacheEvent, len(allItems)) for i, item := range allItems { elem, ok := item.(*storeElement) if !ok { return nil, fmt.Errorf("not a storeElement: %v", elem) } objLabels, objFields, err := w.getAttrsFunc(elem.Object) if err != nil { return nil, err } result[i] = &watchCacheEvent{ Type: watch.Added, Object: elem.Object, ObjLabels: objLabels, ObjFields: objFields, Key: elem.Key, ResourceVersion: w.resourceVersion, } } return result, nil } if resourceVersion < oldest-1 { return nil, errors.NewGone(fmt.Sprintf("too old resource version: %d (%d)", resourceVersion, oldest-1)) } // Binary search the smallest index at which resourceVersion is greater than the given one. f := func(i int) bool { return w.cache[(w.startIndex+i)%w.capacity].resourceVersion > resourceVersion } first := sort.Search(size, f) result := make([]*watchCacheEvent, size-first) for i := 0; i < size-first; i++ { result[i] = w.cache[(w.startIndex+first+i)%w.capacity].watchCacheEvent } return result, nil } func (w *watchCache) GetAllEventsSince(resourceVersion uint64) ([]*watchCacheEvent, error) { w.RLock() defer w.RUnlock() return w.GetAllEventsSinceThreadUnsafe(resourceVersion) } func (w *watchCache) Resync() error { // Nothing to do return nil }