tzeneto
/
custom-kube-scheduler


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
							package ebpf

import (
	"fmt"
	"unsafe"

	"github.com/cilium/ebpf/internal"
	"github.com/cilium/ebpf/internal/unix"

	"github.com/pkg/errors"
)

// MapSpec defines a Map.
type MapSpec struct {
	// Name is passed to the kernel as a debug aid. Must only contain
	// alpha numeric and '_' characters.
	Name       string
	Type       MapType
	KeySize    uint32
	ValueSize  uint32
	MaxEntries uint32
	Flags      uint32
	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
	InnerMap *MapSpec
}

func (ms *MapSpec) String() string {
	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
}

// Copy returns a copy of the spec.
func (ms *MapSpec) Copy() *MapSpec {
	if ms == nil {
		return nil
	}

	cpy := *ms
	cpy.InnerMap = ms.InnerMap.Copy()
	return &cpy
}

// Map represents a Map file descriptor.
//
// It is not safe to close a map which is used by other goroutines.
//
// Methods which take interface{} arguments by default encode
// them using binary.Read/Write in the machine's native endianness.
//
// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
// if you require custom encoding.
type Map struct {
	fd  *bpfFD
	abi MapABI
	// Per CPU maps return values larger than the size in the spec
	fullValueSize int
}

// NewMapFromFD creates a map from a raw fd.
//
// You should not use fd after calling this function.
func NewMapFromFD(fd int) (*Map, error) {
	if fd < 0 {
		return nil, errors.New("invalid fd")
	}
	bpfFd := newBPFFD(uint32(fd))

	abi, err := newMapABIFromFd(bpfFd)
	if err != nil {
		bpfFd.forget()
		return nil, err
	}
	return newMap(bpfFd, abi)
}

// NewMap creates a new Map.
//
// Creating a map for the first time will perform feature detection
// by creating small, temporary maps.
func NewMap(spec *MapSpec) (*Map, error) {
	if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
		return createMap(spec, nil)
	}

	if spec.InnerMap == nil {
		return nil, errors.Errorf("%s requires InnerMap", spec.Type)
	}

	template, err := createMap(spec.InnerMap, nil)
	if err != nil {
		return nil, err
	}
	defer template.Close()

	return createMap(spec, template.fd)
}

func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
	spec = spec.Copy()

	switch spec.Type {
	case ArrayOfMaps:
		fallthrough
	case HashOfMaps:
		if spec.ValueSize != 0 && spec.ValueSize != 4 {
			return nil, errors.Errorf("ValueSize must be zero or four for map of map")
		}
		spec.ValueSize = 4

	case PerfEventArray:
		if spec.KeySize != 0 {
			return nil, errors.Errorf("KeySize must be zero for perf event array")
		}
		if spec.ValueSize != 0 {
			return nil, errors.Errorf("ValueSize must be zero for perf event array")
		}
		if spec.MaxEntries == 0 {
			n, err := internal.OnlineCPUs()
			if err != nil {
				return nil, errors.Wrap(err, "perf event array")
			}
			spec.MaxEntries = uint32(n)
		}

		spec.KeySize = 4
		spec.ValueSize = 4
	}

	attr := bpfMapCreateAttr{
		mapType:    spec.Type,
		keySize:    spec.KeySize,
		valueSize:  spec.ValueSize,
		maxEntries: spec.MaxEntries,
		flags:      spec.Flags,
	}

	if inner != nil {
		var err error
		attr.innerMapFd, err = inner.value()
		if err != nil {
			return nil, errors.Wrap(err, "map create")
		}
	}

	name, err := newBPFObjName(spec.Name)
	if err != nil {
		return nil, errors.Wrap(err, "map create")
	}

	if haveObjName.Result() {
		attr.mapName = name
	}

	fd, err := bpfMapCreate(&attr)
	if err != nil {
		return nil, errors.Wrap(err, "map create")
	}

	return newMap(fd, newMapABIFromSpec(spec))
}

func newMap(fd *bpfFD, abi *MapABI) (*Map, error) {
	m := &Map{
		fd,
		*abi,
		int(abi.ValueSize),
	}

	if !abi.Type.hasPerCPUValue() {
		return m, nil
	}

	possibleCPUs, err := internal.PossibleCPUs()
	if err != nil {
		return nil, err
	}

	m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs
	return m, nil
}

func (m *Map) String() string {
	return fmt.Sprintf("%s#%d", m.abi.Type, m.fd)
}

// ABI gets the ABI of the Map
func (m *Map) ABI() MapABI {
	return m.abi
}

// Lookup retrieves a value from a Map.
//
// Calls Close() on valueOut if it is of type **Map or **Program,
// and *valueOut is not nil.
//
// Returns an error if the key doesn't exist, see IsNotExist.
func (m *Map) Lookup(key, valueOut interface{}) error {
	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)

	if err := m.lookup(key, valuePtr); err != nil {
		return err
	}

	if valueBytes == nil {
		return nil
	}

	if m.abi.Type.hasPerCPUValue() {
		return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes)
	}

	switch value := valueOut.(type) {
	case **Map:
		m, err := unmarshalMap(valueBytes)
		if err != nil {
			return err
		}

		(*value).Close()
		*value = m
		return nil
	case *Map:
		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
	case Map:
		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))

	case **Program:
		p, err := unmarshalProgram(valueBytes)
		if err != nil {
			return err
		}

		(*value).Close()
		*value = p
		return nil
	case *Program:
		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
	case Program:
		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))

	default:
		return unmarshalBytes(valueOut, valueBytes)
	}
}

// LookupBytes gets a value from Map.
//
// Returns a nil value if a key doesn't exist.
func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
	valueBytes := make([]byte, m.fullValueSize)
	valuePtr := newPtr(unsafe.Pointer(&valueBytes[0]))

	err := m.lookup(key, valuePtr)
	if IsNotExist(err) {
		return nil, nil
	}

	return valueBytes, err
}

func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
	if err != nil {
		return errors.WithMessage(err, "can't marshal key")
	}

	err = bpfMapLookupElem(m.fd, keyPtr, valueOut)
	return errors.WithMessage(err, "lookup failed")
}

// MapUpdateFlags controls the behaviour of the Map.Update call.
//
// The exact semantics depend on the specific MapType.
type MapUpdateFlags uint64

const (
	// UpdateAny creates a new element or update an existing one.
	UpdateAny MapUpdateFlags = iota
	// UpdateNoExist creates a new element.
	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
	// UpdateExist updates an existing element.
	UpdateExist
)

// Put replaces or creates a value in map.
//
// It is equivalent to calling Update with UpdateAny.
func (m *Map) Put(key, value interface{}) error {
	return m.Update(key, value, UpdateAny)
}

// Update changes the value of a key.
func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
	if err != nil {
		return errors.WithMessage(err, "can't marshal key")
	}

	var valuePtr syscallPtr
	if m.abi.Type.hasPerCPUValue() {
		valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
	} else {
		valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
	}
	if err != nil {
		return errors.WithMessage(err, "can't marshal value")
	}

	return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags))
}

// Delete removes a value.
//
// Returns an error if the key does not exist, see IsNotExist.
func (m *Map) Delete(key interface{}) error {
	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
	if err != nil {
		return errors.WithMessage(err, "can't marshal key")
	}

	err = bpfMapDeleteElem(m.fd, keyPtr)
	return errors.WithMessage(err, "can't delete key")
}

// NextKey finds the key following an initial key.
//
// See NextKeyBytes for details.
func (m *Map) NextKey(key, nextKeyOut interface{}) error {
	nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))

	if err := m.nextKey(key, nextKeyPtr); err != nil {
		return err
	}

	if nextKeyBytes == nil {
		return nil
	}

	err := unmarshalBytes(nextKeyOut, nextKeyBytes)
	return errors.WithMessage(err, "can't unmarshal next key")
}

// NextKeyBytes returns the key following an initial key as a byte slice.
//
// Passing nil will return the first key.
//
// Use Iterate if you want to traverse all entries in the map.
func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
	nextKey := make([]byte, m.abi.KeySize)
	nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0]))

	err := m.nextKey(key, nextKeyPtr)
	if IsNotExist(err) {
		return nil, nil
	}

	return nextKey, err
}

func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
	var (
		keyPtr syscallPtr
		err    error
	)

	if key != nil {
		keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
		if err != nil {
			return errors.WithMessage(err, "can't marshal key")
		}
	}

	err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut)
	return errors.WithMessage(err, "can't get next key")
}

// Iterate traverses a map.
//
// It's safe to create multiple iterators at the same time.
//
// It's not possible to guarantee that all keys in a map will be
// returned if there are concurrent modifications to the map.
func (m *Map) Iterate() *MapIterator {
	return newMapIterator(m)
}

// Close removes a Map
func (m *Map) Close() error {
	if m == nil {
		// This makes it easier to clean up when iterating maps
		// of maps / programs.
		return nil
	}

	return m.fd.close()
}

// FD gets the file descriptor of the Map.
//
// Calling this function is invalid after Close has been called.
func (m *Map) FD() int {
	fd, err := m.fd.value()
	if err != nil {
		// Best effort: -1 is the number most likely to be an
		// invalid file descriptor.
		return -1
	}

	return int(fd)
}

// Clone creates a duplicate of the Map.
//
// Closing the duplicate does not affect the original, and vice versa.
// Changes made to the map are reflected by both instances however.
//
// Cloning a nil Map returns nil.
func (m *Map) Clone() (*Map, error) {
	if m == nil {
		return nil, nil
	}

	dup, err := m.fd.dup()
	if err != nil {
		return nil, errors.Wrap(err, "can't clone map")
	}

	return newMap(dup, &m.abi)
}

// Pin persists the map past the lifetime of the process that created it.
//
// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
func (m *Map) Pin(fileName string) error {
	return bpfPinObject(fileName, m.fd)
}

// LoadPinnedMap load a Map from a BPF file.
//
// Requires at least Linux 4.13, and is not compatible with
// nested maps. Use LoadPinnedMapExplicit in these situations.
func LoadPinnedMap(fileName string) (*Map, error) {
	fd, err := bpfGetObject(fileName)
	if err != nil {
		return nil, err
	}
	abi, err := newMapABIFromFd(fd)
	if err != nil {
		_ = fd.close()
		return nil, err
	}
	return newMap(fd, abi)
}

// LoadPinnedMapExplicit loads a map with explicit parameters.
func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
	fd, err := bpfGetObject(fileName)
	if err != nil {
		return nil, err
	}
	return newMap(fd, abi)
}

func unmarshalMap(buf []byte) (*Map, error) {
	if len(buf) != 4 {
		return nil, errors.New("map id requires 4 byte value")
	}

	// Looking up an entry in a nested map or prog array returns an id,
	// not an fd.
	id := internal.NativeEndian.Uint32(buf)
	fd, err := bpfGetMapFDByID(id)
	if err != nil {
		return nil, err
	}

	abi, err := newMapABIFromFd(fd)
	if err != nil {
		_ = fd.close()
		return nil, err
	}

	return newMap(fd, abi)
}

// MarshalBinary implements BinaryMarshaler.
func (m *Map) MarshalBinary() ([]byte, error) {
	fd, err := m.fd.value()
	if err != nil {
		return nil, err
	}

	buf := make([]byte, 4)
	internal.NativeEndian.PutUint32(buf, fd)
	return buf, nil
}

// MapIterator iterates a Map.
//
// See Map.Iterate.
type MapIterator struct {
	target            *Map
	prevKey           interface{}
	prevBytes         []byte
	count, maxEntries uint32
	done              bool
	err               error
}

func newMapIterator(target *Map) *MapIterator {
	return &MapIterator{
		target:     target,
		maxEntries: target.abi.MaxEntries,
		prevBytes:  make([]byte, int(target.abi.KeySize)),
	}
}

var errIterationAborted = errors.New("iteration aborted")

// Next decodes the next key and value.
//
// Iterating a hash map from which keys are being deleted is not
// safe. You may see the same key multiple times. Iteration may
// also abort with an error, see IsIterationAborted.
//
// Returns false if there are no more entries. You must check
// the result of Err afterwards.
//
// See Map.Get for further caveats around valueOut.
func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
	if mi.err != nil || mi.done {
		return false
	}

	for ; mi.count < mi.maxEntries; mi.count++ {
		var nextBytes []byte
		nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
		if mi.err != nil {
			return false
		}

		if nextBytes == nil {
			mi.done = true
			return false
		}

		// The user can get access to nextBytes since unmarshalBytes
		// does not copy when unmarshaling into a []byte.
		// Make a copy to prevent accidental corruption of
		// iterator state.
		copy(mi.prevBytes, nextBytes)
		mi.prevKey = mi.prevBytes

		mi.err = mi.target.Lookup(nextBytes, valueOut)
		if IsNotExist(mi.err) {
			// Even though the key should be valid, we couldn't look up
			// its value. If we're iterating a hash map this is probably
			// because a concurrent delete removed the value before we
			// could get it. This means that the next call to NextKeyBytes
			// is very likely to restart iteration.
			// If we're iterating one of the fd maps like
			// ProgramArray it means that a given slot doesn't have
			// a valid fd associated. It's OK to continue to the next slot.
			continue
		}
		if mi.err != nil {
			return false
		}

		mi.err = unmarshalBytes(keyOut, nextBytes)
		return mi.err == nil
	}

	mi.err = errIterationAborted
	return false
}

// Err returns any encountered error.
//
// The method must be called after Next returns nil.
func (mi *MapIterator) Err() error {
	return mi.err
}

// IsNotExist returns true if the error indicates that a
// key doesn't exist.
func IsNotExist(err error) bool {
	return errors.Cause(err) == unix.ENOENT
}

// IsIterationAborted returns true if the iteration was aborted.
//
// This occurs when keys are deleted from a hash map during iteration.
func IsIterationAborted(err error) bool {
	return errors.Cause(err) == errIterationAborted
}