| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 | // Copyright 2015 The etcd Authors//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////     http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.package mvccimport (	"sort"	"sync"	"github.com/google/btree"	"go.uber.org/zap")type index interface {	Get(key []byte, atRev int64) (rev, created revision, ver int64, err error)	Range(key, end []byte, atRev int64) ([][]byte, []revision)	Revisions(key, end []byte, atRev int64) []revision	Put(key []byte, rev revision)	Tombstone(key []byte, rev revision) error	RangeSince(key, end []byte, rev int64) []revision	Compact(rev int64) map[revision]struct{}	Keep(rev int64) map[revision]struct{}	Equal(b index) bool	Insert(ki *keyIndex)	KeyIndex(ki *keyIndex) *keyIndex}type treeIndex struct {	sync.RWMutex	tree *btree.BTree	lg   *zap.Logger}func newTreeIndex(lg *zap.Logger) index {	return &treeIndex{		tree: btree.New(32),		lg:   lg,	}}func (ti *treeIndex) Put(key []byte, rev revision) {	keyi := &keyIndex{key: key}	ti.Lock()	defer ti.Unlock()	item := ti.tree.Get(keyi)	if item == nil {		keyi.put(ti.lg, rev.main, rev.sub)		ti.tree.ReplaceOrInsert(keyi)		return	}	okeyi := item.(*keyIndex)	okeyi.put(ti.lg, rev.main, rev.sub)}func (ti *treeIndex) Get(key []byte, atRev int64) (modified, created revision, ver int64, err error) {	keyi := &keyIndex{key: key}	ti.RLock()	defer ti.RUnlock()	if keyi = ti.keyIndex(keyi); keyi == nil {		return revision{}, revision{}, 0, ErrRevisionNotFound	}	return keyi.get(ti.lg, atRev)}func (ti *treeIndex) KeyIndex(keyi *keyIndex) *keyIndex {	ti.RLock()	defer ti.RUnlock()	return ti.keyIndex(keyi)}func (ti *treeIndex) keyIndex(keyi *keyIndex) *keyIndex {	if item := ti.tree.Get(keyi); item != nil {		return item.(*keyIndex)	}	return nil}func (ti *treeIndex) visit(key, end []byte, f func(ki *keyIndex)) {	keyi, endi := &keyIndex{key: key}, &keyIndex{key: end}	ti.RLock()	defer ti.RUnlock()	ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {		if len(endi.key) > 0 && !item.Less(endi) {			return false		}		f(item.(*keyIndex))		return true	})}func (ti *treeIndex) Revisions(key, end []byte, atRev int64) (revs []revision) {	if end == nil {		rev, _, _, err := ti.Get(key, atRev)		if err != nil {			return nil		}		return []revision{rev}	}	ti.visit(key, end, func(ki *keyIndex) {		if rev, _, _, err := ki.get(ti.lg, atRev); err == nil {			revs = append(revs, rev)		}	})	return revs}func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []revision) {	if end == nil {		rev, _, _, err := ti.Get(key, atRev)		if err != nil {			return nil, nil		}		return [][]byte{key}, []revision{rev}	}	ti.visit(key, end, func(ki *keyIndex) {		if rev, _, _, err := ki.get(ti.lg, atRev); err == nil {			revs = append(revs, rev)			keys = append(keys, ki.key)		}	})	return keys, revs}func (ti *treeIndex) Tombstone(key []byte, rev revision) error {	keyi := &keyIndex{key: key}	ti.Lock()	defer ti.Unlock()	item := ti.tree.Get(keyi)	if item == nil {		return ErrRevisionNotFound	}	ki := item.(*keyIndex)	return ki.tombstone(ti.lg, rev.main, rev.sub)}// RangeSince returns all revisions from key(including) to end(excluding)// at or after the given rev. The returned slice is sorted in the order// of revision.func (ti *treeIndex) RangeSince(key, end []byte, rev int64) []revision {	keyi := &keyIndex{key: key}	ti.RLock()	defer ti.RUnlock()	if end == nil {		item := ti.tree.Get(keyi)		if item == nil {			return nil		}		keyi = item.(*keyIndex)		return keyi.since(ti.lg, rev)	}	endi := &keyIndex{key: end}	var revs []revision	ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {		if len(endi.key) > 0 && !item.Less(endi) {			return false		}		curKeyi := item.(*keyIndex)		revs = append(revs, curKeyi.since(ti.lg, rev)...)		return true	})	sort.Sort(revisions(revs))	return revs}func (ti *treeIndex) Compact(rev int64) map[revision]struct{} {	available := make(map[revision]struct{})	if ti.lg != nil {		ti.lg.Info("compact tree index", zap.Int64("revision", rev))	} else {		plog.Printf("store.index: compact %d", rev)	}	ti.Lock()	clone := ti.tree.Clone()	ti.Unlock()	clone.Ascend(func(item btree.Item) bool {		keyi := item.(*keyIndex)		//Lock is needed here to prevent modification to the keyIndex while		//compaction is going on or revision added to empty before deletion		ti.Lock()		keyi.compact(ti.lg, rev, available)		if keyi.isEmpty() {			item := ti.tree.Delete(keyi)			if item == nil {				if ti.lg != nil {					ti.lg.Panic("failed to delete during compaction")				} else {					plog.Panic("store.index: unexpected delete failure during compaction")				}			}		}		ti.Unlock()		return true	})	return available}// Keep finds all revisions to be kept for a Compaction at the given rev.func (ti *treeIndex) Keep(rev int64) map[revision]struct{} {	available := make(map[revision]struct{})	ti.RLock()	defer ti.RUnlock()	ti.tree.Ascend(func(i btree.Item) bool {		keyi := i.(*keyIndex)		keyi.keep(rev, available)		return true	})	return available}func (ti *treeIndex) Equal(bi index) bool {	b := bi.(*treeIndex)	if ti.tree.Len() != b.tree.Len() {		return false	}	equal := true	ti.tree.Ascend(func(item btree.Item) bool {		aki := item.(*keyIndex)		bki := b.tree.Get(item).(*keyIndex)		if !aki.equal(bki) {			equal = false			return false		}		return true	})	return equal}func (ti *treeIndex) Insert(ki *keyIndex) {	ti.Lock()	defer ti.Unlock()	ti.tree.ReplaceOrInsert(ki)}
 |