kubenet_linux.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689
  1. // +build linux
  2. /*
  3. Copyright 2014 The Kubernetes Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package kubenet
  15. import (
  16. "fmt"
  17. "io/ioutil"
  18. "net"
  19. "strings"
  20. "sync"
  21. "time"
  22. "github.com/containernetworking/cni/libcni"
  23. cnitypes "github.com/containernetworking/cni/pkg/types"
  24. cnitypes020 "github.com/containernetworking/cni/pkg/types/020"
  25. "github.com/vishvananda/netlink"
  26. "golang.org/x/sys/unix"
  27. utilerrors "k8s.io/apimachinery/pkg/util/errors"
  28. utilnet "k8s.io/apimachinery/pkg/util/net"
  29. utilsets "k8s.io/apimachinery/pkg/util/sets"
  30. "k8s.io/klog"
  31. kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
  32. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  33. "k8s.io/kubernetes/pkg/kubelet/dockershim/network"
  34. "k8s.io/kubernetes/pkg/kubelet/dockershim/network/hostport"
  35. "k8s.io/kubernetes/pkg/util/bandwidth"
  36. utildbus "k8s.io/kubernetes/pkg/util/dbus"
  37. utilebtables "k8s.io/kubernetes/pkg/util/ebtables"
  38. utiliptables "k8s.io/kubernetes/pkg/util/iptables"
  39. utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
  40. utilexec "k8s.io/utils/exec"
  41. )
  42. const (
  43. BridgeName = "cbr0"
  44. DefaultCNIDir = "/opt/cni/bin"
  45. sysctlBridgeCallIPTables = "net/bridge/bridge-nf-call-iptables"
  46. // fallbackMTU is used if an MTU is not specified, and we cannot determine the MTU
  47. fallbackMTU = 1460
  48. // ebtables Chain to store dedup rules
  49. dedupChain = utilebtables.Chain("KUBE-DEDUP")
  50. // defaultIPAMDir is the default location for the checkpoint files stored by host-local ipam
  51. // https://github.com/containernetworking/cni/tree/master/plugins/ipam/host-local#backends
  52. defaultIPAMDir = "/var/lib/cni/networks"
  53. )
  54. // CNI plugins required by kubenet in /opt/cni/bin or user-specified directory
  55. var requiredCNIPlugins = [...]string{"bridge", "host-local", "loopback"}
  56. type kubenetNetworkPlugin struct {
  57. network.NoopNetworkPlugin
  58. host network.Host
  59. netConfig *libcni.NetworkConfig
  60. loConfig *libcni.NetworkConfig
  61. cniConfig libcni.CNI
  62. bandwidthShaper bandwidth.Shaper
  63. mu sync.Mutex //Mutex for protecting podIPs map, netConfig, and shaper initialization
  64. podIPs map[kubecontainer.ContainerID]string
  65. mtu int
  66. execer utilexec.Interface
  67. nsenterPath string
  68. hairpinMode kubeletconfig.HairpinMode
  69. // kubenet can use either hostportSyncer and hostportManager to implement hostports
  70. // Currently, if network host supports legacy features, hostportSyncer will be used,
  71. // otherwise, hostportManager will be used.
  72. hostportSyncer hostport.HostportSyncer
  73. hostportManager hostport.HostPortManager
  74. iptables utiliptables.Interface
  75. sysctl utilsysctl.Interface
  76. ebtables utilebtables.Interface
  77. // binDirs is passed by kubelet cni-bin-dir parameter.
  78. // kubenet will search for CNI binaries in DefaultCNIDir first, then continue to binDirs.
  79. binDirs []string
  80. nonMasqueradeCIDR string
  81. podCidr string
  82. gateway net.IP
  83. }
  84. func NewPlugin(networkPluginDirs []string) network.NetworkPlugin {
  85. protocol := utiliptables.ProtocolIpv4
  86. execer := utilexec.New()
  87. dbus := utildbus.New()
  88. sysctl := utilsysctl.New()
  89. iptInterface := utiliptables.New(execer, dbus, protocol)
  90. return &kubenetNetworkPlugin{
  91. podIPs: make(map[kubecontainer.ContainerID]string),
  92. execer: utilexec.New(),
  93. iptables: iptInterface,
  94. sysctl: sysctl,
  95. binDirs: append([]string{DefaultCNIDir}, networkPluginDirs...),
  96. hostportSyncer: hostport.NewHostportSyncer(iptInterface),
  97. hostportManager: hostport.NewHostportManager(iptInterface),
  98. nonMasqueradeCIDR: "10.0.0.0/8",
  99. }
  100. }
  101. func (plugin *kubenetNetworkPlugin) Init(host network.Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error {
  102. plugin.host = host
  103. plugin.hairpinMode = hairpinMode
  104. plugin.nonMasqueradeCIDR = nonMasqueradeCIDR
  105. plugin.cniConfig = &libcni.CNIConfig{Path: plugin.binDirs}
  106. if mtu == network.UseDefaultMTU {
  107. if link, err := findMinMTU(); err == nil {
  108. plugin.mtu = link.MTU
  109. klog.V(5).Infof("Using interface %s MTU %d as bridge MTU", link.Name, link.MTU)
  110. } else {
  111. plugin.mtu = fallbackMTU
  112. klog.Warningf("Failed to find default bridge MTU, using %d: %v", fallbackMTU, err)
  113. }
  114. } else {
  115. plugin.mtu = mtu
  116. }
  117. // Since this plugin uses a Linux bridge, set bridge-nf-call-iptables=1
  118. // is necessary to ensure kube-proxy functions correctly.
  119. //
  120. // This will return an error on older kernel version (< 3.18) as the module
  121. // was built-in, we simply ignore the error here. A better thing to do is
  122. // to check the kernel version in the future.
  123. plugin.execer.Command("modprobe", "br-netfilter").CombinedOutput()
  124. err := plugin.sysctl.SetSysctl(sysctlBridgeCallIPTables, 1)
  125. if err != nil {
  126. klog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIPTables, err)
  127. }
  128. plugin.loConfig, err = libcni.ConfFromBytes([]byte(`{
  129. "cniVersion": "0.1.0",
  130. "name": "kubenet-loopback",
  131. "type": "loopback"
  132. }`))
  133. if err != nil {
  134. return fmt.Errorf("Failed to generate loopback config: %v", err)
  135. }
  136. plugin.nsenterPath, err = plugin.execer.LookPath("nsenter")
  137. if err != nil {
  138. return fmt.Errorf("Failed to find nsenter binary: %v", err)
  139. }
  140. // Need to SNAT outbound traffic from cluster
  141. if err = plugin.ensureMasqRule(); err != nil {
  142. return err
  143. }
  144. return nil
  145. }
  146. // TODO: move thic logic into cni bridge plugin and remove this from kubenet
  147. func (plugin *kubenetNetworkPlugin) ensureMasqRule() error {
  148. if plugin.nonMasqueradeCIDR != "0.0.0.0/0" {
  149. if _, err := plugin.iptables.EnsureRule(utiliptables.Append, utiliptables.TableNAT, utiliptables.ChainPostrouting,
  150. "-m", "comment", "--comment", "kubenet: SNAT for outbound traffic from cluster",
  151. "-m", "addrtype", "!", "--dst-type", "LOCAL",
  152. "!", "-d", plugin.nonMasqueradeCIDR,
  153. "-j", "MASQUERADE"); err != nil {
  154. return fmt.Errorf("Failed to ensure that %s chain %s jumps to MASQUERADE: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, err)
  155. }
  156. }
  157. return nil
  158. }
  159. func findMinMTU() (*net.Interface, error) {
  160. intfs, err := net.Interfaces()
  161. if err != nil {
  162. return nil, err
  163. }
  164. mtu := 999999
  165. defIntfIndex := -1
  166. for i, intf := range intfs {
  167. if ((intf.Flags & net.FlagUp) != 0) && (intf.Flags&(net.FlagLoopback|net.FlagPointToPoint) == 0) {
  168. if intf.MTU < mtu {
  169. mtu = intf.MTU
  170. defIntfIndex = i
  171. }
  172. }
  173. }
  174. if mtu >= 999999 || mtu < 576 || defIntfIndex < 0 {
  175. return nil, fmt.Errorf("no suitable interface: %v", BridgeName)
  176. }
  177. return &intfs[defIntfIndex], nil
  178. }
  179. const NET_CONFIG_TEMPLATE = `{
  180. "cniVersion": "0.1.0",
  181. "name": "kubenet",
  182. "type": "bridge",
  183. "bridge": "%s",
  184. "mtu": %d,
  185. "addIf": "%s",
  186. "isGateway": true,
  187. "ipMasq": false,
  188. "hairpinMode": %t,
  189. "ipam": {
  190. "type": "host-local",
  191. "subnet": "%s",
  192. "gateway": "%s",
  193. "routes": [
  194. { "dst": "0.0.0.0/0" }
  195. ]
  196. }
  197. }`
  198. func (plugin *kubenetNetworkPlugin) Event(name string, details map[string]interface{}) {
  199. if name != network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE {
  200. return
  201. }
  202. plugin.mu.Lock()
  203. defer plugin.mu.Unlock()
  204. podCIDR, ok := details[network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR].(string)
  205. if !ok {
  206. klog.Warningf("%s event didn't contain pod CIDR", network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE)
  207. return
  208. }
  209. if plugin.netConfig != nil {
  210. klog.Warningf("Ignoring subsequent pod CIDR update to %s", podCIDR)
  211. return
  212. }
  213. klog.V(5).Infof("PodCIDR is set to %q", podCIDR)
  214. _, cidr, err := net.ParseCIDR(podCIDR)
  215. if err == nil {
  216. setHairpin := plugin.hairpinMode == kubeletconfig.HairpinVeth
  217. // Set bridge address to first address in IPNet
  218. cidr.IP[len(cidr.IP)-1] += 1
  219. json := fmt.Sprintf(NET_CONFIG_TEMPLATE, BridgeName, plugin.mtu, network.DefaultInterfaceName, setHairpin, podCIDR, cidr.IP.String())
  220. klog.V(2).Infof("CNI network config set to %v", json)
  221. plugin.netConfig, err = libcni.ConfFromBytes([]byte(json))
  222. if err == nil {
  223. klog.V(5).Infof("CNI network config:\n%s", json)
  224. // Ensure cbr0 has no conflicting addresses; CNI's 'bridge'
  225. // plugin will bail out if the bridge has an unexpected one
  226. plugin.clearBridgeAddressesExcept(cidr)
  227. }
  228. plugin.podCidr = podCIDR
  229. plugin.gateway = cidr.IP
  230. }
  231. if err != nil {
  232. klog.Warningf("Failed to generate CNI network config: %v", err)
  233. }
  234. }
  235. func (plugin *kubenetNetworkPlugin) clearBridgeAddressesExcept(keep *net.IPNet) {
  236. bridge, err := netlink.LinkByName(BridgeName)
  237. if err != nil {
  238. return
  239. }
  240. addrs, err := netlink.AddrList(bridge, unix.AF_INET)
  241. if err != nil {
  242. return
  243. }
  244. for _, addr := range addrs {
  245. if !utilnet.IPNetEqual(addr.IPNet, keep) {
  246. klog.V(2).Infof("Removing old address %s from %s", addr.IPNet.String(), BridgeName)
  247. netlink.AddrDel(bridge, &addr)
  248. }
  249. }
  250. }
  251. func (plugin *kubenetNetworkPlugin) Name() string {
  252. return KubenetPluginName
  253. }
  254. func (plugin *kubenetNetworkPlugin) Capabilities() utilsets.Int {
  255. return utilsets.NewInt()
  256. }
  257. // setup sets up networking through CNI using the given ns/name and sandbox ID.
  258. func (plugin *kubenetNetworkPlugin) setup(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
  259. // Disable DAD so we skip the kernel delay on bringing up new interfaces.
  260. if err := plugin.disableContainerDAD(id); err != nil {
  261. klog.V(3).Infof("Failed to disable DAD in container: %v", err)
  262. }
  263. // Bring up container loopback interface
  264. if _, err := plugin.addContainerToNetwork(plugin.loConfig, "lo", namespace, name, id); err != nil {
  265. return err
  266. }
  267. // Hook container up with our bridge
  268. resT, err := plugin.addContainerToNetwork(plugin.netConfig, network.DefaultInterfaceName, namespace, name, id)
  269. if err != nil {
  270. return err
  271. }
  272. // Coerce the CNI result version
  273. res, err := cnitypes020.GetResult(resT)
  274. if err != nil {
  275. return fmt.Errorf("unable to understand network config: %v", err)
  276. }
  277. if res.IP4 == nil {
  278. return fmt.Errorf("CNI plugin reported no IPv4 address for container %v.", id)
  279. }
  280. ip4 := res.IP4.IP.IP.To4()
  281. if ip4 == nil {
  282. return fmt.Errorf("CNI plugin reported an invalid IPv4 address for container %v: %+v.", id, res.IP4)
  283. }
  284. // Put the container bridge into promiscuous mode to force it to accept hairpin packets.
  285. // TODO: Remove this once the kernel bug (#20096) is fixed.
  286. if plugin.hairpinMode == kubeletconfig.PromiscuousBridge {
  287. link, err := netlink.LinkByName(BridgeName)
  288. if err != nil {
  289. return fmt.Errorf("failed to lookup %q: %v", BridgeName, err)
  290. }
  291. if link.Attrs().Promisc != 1 {
  292. // promiscuous mode is not on, then turn it on.
  293. err := netlink.SetPromiscOn(link)
  294. if err != nil {
  295. return fmt.Errorf("Error setting promiscuous mode on %s: %v", BridgeName, err)
  296. }
  297. }
  298. // configure the ebtables rules to eliminate duplicate packets by best effort
  299. plugin.syncEbtablesDedupRules(link.Attrs().HardwareAddr)
  300. }
  301. plugin.podIPs[id] = ip4.String()
  302. // The first SetUpPod call creates the bridge; get a shaper for the sake of initialization
  303. // TODO: replace with CNI traffic shaper plugin
  304. shaper := plugin.shaper()
  305. ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations)
  306. if err != nil {
  307. return fmt.Errorf("Error reading pod bandwidth annotations: %v", err)
  308. }
  309. if egress != nil || ingress != nil {
  310. if err := shaper.ReconcileCIDR(fmt.Sprintf("%s/32", ip4.String()), egress, ingress); err != nil {
  311. return fmt.Errorf("Failed to add pod to shaper: %v", err)
  312. }
  313. }
  314. // TODO: replace with CNI port-forwarding plugin
  315. portMappings, err := plugin.host.GetPodPortMappings(id.ID)
  316. if err != nil {
  317. return err
  318. }
  319. if portMappings != nil && len(portMappings) > 0 {
  320. if err := plugin.hostportManager.Add(id.ID, &hostport.PodPortMapping{
  321. Namespace: namespace,
  322. Name: name,
  323. PortMappings: portMappings,
  324. IP: ip4,
  325. HostNetwork: false,
  326. }, BridgeName); err != nil {
  327. return err
  328. }
  329. }
  330. return nil
  331. }
  332. func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
  333. plugin.mu.Lock()
  334. defer plugin.mu.Unlock()
  335. start := time.Now()
  336. defer func() {
  337. klog.V(4).Infof("SetUpPod took %v for %s/%s", time.Since(start), namespace, name)
  338. }()
  339. if err := plugin.Status(); err != nil {
  340. return fmt.Errorf("Kubenet cannot SetUpPod: %v", err)
  341. }
  342. if err := plugin.setup(namespace, name, id, annotations); err != nil {
  343. // Make sure everything gets cleaned up on errors
  344. podIP, _ := plugin.podIPs[id]
  345. if err := plugin.teardown(namespace, name, id, podIP); err != nil {
  346. // Not a hard error or warning
  347. klog.V(4).Infof("Failed to clean up %s/%s after SetUpPod failure: %v", namespace, name, err)
  348. }
  349. return err
  350. }
  351. // Need to SNAT outbound traffic from cluster
  352. if err := plugin.ensureMasqRule(); err != nil {
  353. klog.Errorf("Failed to ensure MASQ rule: %v", err)
  354. }
  355. return nil
  356. }
  357. // Tears down as much of a pod's network as it can even if errors occur. Returns
  358. // an aggregate error composed of all errors encountered during the teardown.
  359. func (plugin *kubenetNetworkPlugin) teardown(namespace string, name string, id kubecontainer.ContainerID, podIP string) error {
  360. errList := []error{}
  361. if podIP != "" {
  362. klog.V(5).Infof("Removing pod IP %s from shaper", podIP)
  363. // shaper wants /32
  364. if err := plugin.shaper().Reset(fmt.Sprintf("%s/32", podIP)); err != nil {
  365. // Possible bandwidth shaping wasn't enabled for this pod anyways
  366. klog.V(4).Infof("Failed to remove pod IP %s from shaper: %v", podIP, err)
  367. }
  368. delete(plugin.podIPs, id)
  369. }
  370. if err := plugin.delContainerFromNetwork(plugin.netConfig, network.DefaultInterfaceName, namespace, name, id); err != nil {
  371. // This is to prevent returning error when TearDownPod is called twice on the same pod. This helps to reduce event pollution.
  372. if podIP != "" {
  373. klog.Warningf("Failed to delete container from kubenet: %v", err)
  374. } else {
  375. errList = append(errList, err)
  376. }
  377. }
  378. portMappings, err := plugin.host.GetPodPortMappings(id.ID)
  379. if err != nil {
  380. errList = append(errList, err)
  381. } else if portMappings != nil && len(portMappings) > 0 {
  382. if err = plugin.hostportManager.Remove(id.ID, &hostport.PodPortMapping{
  383. Namespace: namespace,
  384. Name: name,
  385. PortMappings: portMappings,
  386. HostNetwork: false,
  387. }); err != nil {
  388. errList = append(errList, err)
  389. }
  390. }
  391. return utilerrors.NewAggregate(errList)
  392. }
  393. func (plugin *kubenetNetworkPlugin) TearDownPod(namespace string, name string, id kubecontainer.ContainerID) error {
  394. plugin.mu.Lock()
  395. defer plugin.mu.Unlock()
  396. start := time.Now()
  397. defer func() {
  398. klog.V(4).Infof("TearDownPod took %v for %s/%s", time.Since(start), namespace, name)
  399. }()
  400. if plugin.netConfig == nil {
  401. return fmt.Errorf("Kubenet needs a PodCIDR to tear down pods")
  402. }
  403. // no cached IP is Ok during teardown
  404. podIP, _ := plugin.podIPs[id]
  405. if err := plugin.teardown(namespace, name, id, podIP); err != nil {
  406. return err
  407. }
  408. // Need to SNAT outbound traffic from cluster
  409. if err := plugin.ensureMasqRule(); err != nil {
  410. klog.Errorf("Failed to ensure MASQ rule: %v", err)
  411. }
  412. return nil
  413. }
  414. // TODO: Use the addToNetwork function to obtain the IP of the Pod. That will assume idempotent ADD call to the plugin.
  415. // Also fix the runtime's call to Status function to be done only in the case that the IP is lost, no need to do periodic calls
  416. func (plugin *kubenetNetworkPlugin) GetPodNetworkStatus(namespace string, name string, id kubecontainer.ContainerID) (*network.PodNetworkStatus, error) {
  417. plugin.mu.Lock()
  418. defer plugin.mu.Unlock()
  419. // Assuming the ip of pod does not change. Try to retrieve ip from kubenet map first.
  420. if podIP, ok := plugin.podIPs[id]; ok {
  421. return &network.PodNetworkStatus{IP: net.ParseIP(podIP)}, nil
  422. }
  423. netnsPath, err := plugin.host.GetNetNS(id.ID)
  424. if err != nil {
  425. return nil, fmt.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
  426. }
  427. if netnsPath == "" {
  428. return nil, fmt.Errorf("Cannot find the network namespace, skipping pod network status for container %q", id)
  429. }
  430. ip, err := network.GetPodIP(plugin.execer, plugin.nsenterPath, netnsPath, network.DefaultInterfaceName)
  431. if err != nil {
  432. return nil, err
  433. }
  434. plugin.podIPs[id] = ip.String()
  435. return &network.PodNetworkStatus{IP: ip}, nil
  436. }
  437. func (plugin *kubenetNetworkPlugin) Status() error {
  438. // Can't set up pods if we don't have a PodCIDR yet
  439. if plugin.netConfig == nil {
  440. return fmt.Errorf("Kubenet does not have netConfig. This is most likely due to lack of PodCIDR")
  441. }
  442. if !plugin.checkRequiredCNIPlugins() {
  443. return fmt.Errorf("could not locate kubenet required CNI plugins %v at %q", requiredCNIPlugins, plugin.binDirs)
  444. }
  445. return nil
  446. }
  447. // checkRequiredCNIPlugins returns if all kubenet required cni plugins can be found at /opt/cni/bin or user specified NetworkPluginDir.
  448. func (plugin *kubenetNetworkPlugin) checkRequiredCNIPlugins() bool {
  449. for _, dir := range plugin.binDirs {
  450. if plugin.checkRequiredCNIPluginsInOneDir(dir) {
  451. return true
  452. }
  453. }
  454. return false
  455. }
  456. // checkRequiredCNIPluginsInOneDir returns true if all required cni plugins are placed in dir
  457. func (plugin *kubenetNetworkPlugin) checkRequiredCNIPluginsInOneDir(dir string) bool {
  458. files, err := ioutil.ReadDir(dir)
  459. if err != nil {
  460. return false
  461. }
  462. for _, cniPlugin := range requiredCNIPlugins {
  463. found := false
  464. for _, file := range files {
  465. if strings.TrimSpace(file.Name()) == cniPlugin {
  466. found = true
  467. break
  468. }
  469. }
  470. if !found {
  471. return false
  472. }
  473. }
  474. return true
  475. }
  476. func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID, needNetNs bool) (*libcni.RuntimeConf, error) {
  477. netnsPath, err := plugin.host.GetNetNS(id.ID)
  478. if needNetNs && err != nil {
  479. klog.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
  480. }
  481. return &libcni.RuntimeConf{
  482. ContainerID: id.ID,
  483. NetNS: netnsPath,
  484. IfName: ifName,
  485. }, nil
  486. }
  487. func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) {
  488. rt, err := plugin.buildCNIRuntimeConf(ifName, id, true)
  489. if err != nil {
  490. return nil, fmt.Errorf("Error building CNI config: %v", err)
  491. }
  492. klog.V(3).Infof("Adding %s/%s to '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt)
  493. // The network plugin can take up to 3 seconds to execute,
  494. // so yield the lock while it runs.
  495. plugin.mu.Unlock()
  496. res, err := plugin.cniConfig.AddNetwork(config, rt)
  497. plugin.mu.Lock()
  498. if err != nil {
  499. return nil, fmt.Errorf("Error adding container to network: %v", err)
  500. }
  501. return res, nil
  502. }
  503. func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error {
  504. rt, err := plugin.buildCNIRuntimeConf(ifName, id, false)
  505. if err != nil {
  506. return fmt.Errorf("Error building CNI config: %v", err)
  507. }
  508. klog.V(3).Infof("Removing %s/%s from '%s' with CNI '%s' plugin and runtime: %+v", namespace, name, config.Network.Name, config.Network.Type, rt)
  509. err = plugin.cniConfig.DelNetwork(config, rt)
  510. // The pod may not get deleted successfully at the first time.
  511. // Ignore "no such file or directory" error in case the network has already been deleted in previous attempts.
  512. if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
  513. return fmt.Errorf("Error removing container from network: %v", err)
  514. }
  515. return nil
  516. }
  517. // shaper retrieves the bandwidth shaper and, if it hasn't been fetched before,
  518. // initializes it and ensures the bridge is appropriately configured
  519. // This function should only be called while holding the `plugin.mu` lock
  520. func (plugin *kubenetNetworkPlugin) shaper() bandwidth.Shaper {
  521. if plugin.bandwidthShaper == nil {
  522. plugin.bandwidthShaper = bandwidth.NewTCShaper(BridgeName)
  523. plugin.bandwidthShaper.ReconcileInterface()
  524. }
  525. return plugin.bandwidthShaper
  526. }
  527. //TODO: make this into a goroutine and rectify the dedup rules periodically
  528. func (plugin *kubenetNetworkPlugin) syncEbtablesDedupRules(macAddr net.HardwareAddr) {
  529. if plugin.ebtables == nil {
  530. plugin.ebtables = utilebtables.New(plugin.execer)
  531. klog.V(3).Infof("Flushing dedup chain")
  532. if err := plugin.ebtables.FlushChain(utilebtables.TableFilter, dedupChain); err != nil {
  533. klog.Errorf("Failed to flush dedup chain: %v", err)
  534. }
  535. }
  536. _, err := plugin.ebtables.GetVersion()
  537. if err != nil {
  538. klog.Warningf("Failed to get ebtables version. Skip syncing ebtables dedup rules: %v", err)
  539. return
  540. }
  541. klog.V(3).Infof("Filtering packets with ebtables on mac address: %v, gateway: %v, pod CIDR: %v", macAddr.String(), plugin.gateway.String(), plugin.podCidr)
  542. _, err = plugin.ebtables.EnsureChain(utilebtables.TableFilter, dedupChain)
  543. if err != nil {
  544. klog.Errorf("Failed to ensure %v chain %v", utilebtables.TableFilter, dedupChain)
  545. return
  546. }
  547. _, err = plugin.ebtables.EnsureRule(utilebtables.Append, utilebtables.TableFilter, utilebtables.ChainOutput, "-j", string(dedupChain))
  548. if err != nil {
  549. klog.Errorf("Failed to ensure %v chain %v jump to %v chain: %v", utilebtables.TableFilter, utilebtables.ChainOutput, dedupChain, err)
  550. return
  551. }
  552. commonArgs := []string{"-p", "IPv4", "-s", macAddr.String(), "-o", "veth+"}
  553. _, err = plugin.ebtables.EnsureRule(utilebtables.Prepend, utilebtables.TableFilter, dedupChain, append(commonArgs, "--ip-src", plugin.gateway.String(), "-j", "ACCEPT")...)
  554. if err != nil {
  555. klog.Errorf("Failed to ensure packets from cbr0 gateway to be accepted")
  556. return
  557. }
  558. _, err = plugin.ebtables.EnsureRule(utilebtables.Append, utilebtables.TableFilter, dedupChain, append(commonArgs, "--ip-src", plugin.podCidr, "-j", "DROP")...)
  559. if err != nil {
  560. klog.Errorf("Failed to ensure packets from podCidr but has mac address of cbr0 to get dropped.")
  561. return
  562. }
  563. }
  564. // disableContainerDAD disables duplicate address detection in the container.
  565. // DAD has a negative affect on pod creation latency, since we have to wait
  566. // a second or more for the addresses to leave the "tentative" state. Since
  567. // we're sure there won't be an address conflict (since we manage them manually),
  568. // this is safe. See issue 54651.
  569. //
  570. // This sets net.ipv6.conf.default.dad_transmits to 0. It must be run *before*
  571. // the CNI plugins are run.
  572. func (plugin *kubenetNetworkPlugin) disableContainerDAD(id kubecontainer.ContainerID) error {
  573. key := "net/ipv6/conf/default/dad_transmits"
  574. sysctlBin, err := plugin.execer.LookPath("sysctl")
  575. if err != nil {
  576. return fmt.Errorf("Could not find sysctl binary: %s", err)
  577. }
  578. netnsPath, err := plugin.host.GetNetNS(id.ID)
  579. if err != nil {
  580. return fmt.Errorf("Failed to get netns: %v", err)
  581. }
  582. if netnsPath == "" {
  583. return fmt.Errorf("Pod has no network namespace")
  584. }
  585. // If the sysctl doesn't exist, it means ipv6 is disabled; log and move on
  586. if _, err := plugin.sysctl.GetSysctl(key); err != nil {
  587. return fmt.Errorf("Ipv6 not enabled: %v", err)
  588. }
  589. output, err := plugin.execer.Command(plugin.nsenterPath,
  590. fmt.Sprintf("--net=%s", netnsPath), "-F", "--",
  591. sysctlBin, "-w", fmt.Sprintf("%s=%s", key, "0"),
  592. ).CombinedOutput()
  593. if err != nil {
  594. return fmt.Errorf("Failed to write sysctl: output: %s error: %s",
  595. output, err)
  596. }
  597. return nil
  598. }