qdisc_linux.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. package netlink
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "strconv"
  6. "strings"
  7. "syscall"
  8. "github.com/vishvananda/netlink/nl"
  9. )
  10. // NOTE function is here because it uses other linux functions
  11. func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
  12. var limit uint32 = 1000
  13. var lossCorr, delayCorr, duplicateCorr uint32
  14. var reorderProb, reorderCorr uint32
  15. var corruptProb, corruptCorr uint32
  16. latency := nattrs.Latency
  17. loss := Percentage2u32(nattrs.Loss)
  18. gap := nattrs.Gap
  19. duplicate := Percentage2u32(nattrs.Duplicate)
  20. jitter := nattrs.Jitter
  21. // Correlation
  22. if latency > 0 && jitter > 0 {
  23. delayCorr = Percentage2u32(nattrs.DelayCorr)
  24. }
  25. if loss > 0 {
  26. lossCorr = Percentage2u32(nattrs.LossCorr)
  27. }
  28. if duplicate > 0 {
  29. duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
  30. }
  31. // FIXME should validate values(like loss/duplicate are percentages...)
  32. latency = time2Tick(latency)
  33. if nattrs.Limit != 0 {
  34. limit = nattrs.Limit
  35. }
  36. // Jitter is only value if latency is > 0
  37. if latency > 0 {
  38. jitter = time2Tick(jitter)
  39. }
  40. reorderProb = Percentage2u32(nattrs.ReorderProb)
  41. reorderCorr = Percentage2u32(nattrs.ReorderCorr)
  42. if reorderProb > 0 {
  43. // ERROR if lantency == 0
  44. if gap == 0 {
  45. gap = 1
  46. }
  47. }
  48. corruptProb = Percentage2u32(nattrs.CorruptProb)
  49. corruptCorr = Percentage2u32(nattrs.CorruptCorr)
  50. return &Netem{
  51. QdiscAttrs: attrs,
  52. Latency: latency,
  53. DelayCorr: delayCorr,
  54. Limit: limit,
  55. Loss: loss,
  56. LossCorr: lossCorr,
  57. Gap: gap,
  58. Duplicate: duplicate,
  59. DuplicateCorr: duplicateCorr,
  60. Jitter: jitter,
  61. ReorderProb: reorderProb,
  62. ReorderCorr: reorderCorr,
  63. CorruptProb: corruptProb,
  64. CorruptCorr: corruptCorr,
  65. }
  66. }
  67. // QdiscDel will delete a qdisc from the system.
  68. // Equivalent to: `tc qdisc del $qdisc`
  69. func QdiscDel(qdisc Qdisc) error {
  70. return pkgHandle.QdiscDel(qdisc)
  71. }
  72. // QdiscDel will delete a qdisc from the system.
  73. // Equivalent to: `tc qdisc del $qdisc`
  74. func (h *Handle) QdiscDel(qdisc Qdisc) error {
  75. return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
  76. }
  77. // QdiscChange will change a qdisc in place
  78. // Equivalent to: `tc qdisc change $qdisc`
  79. // The parent and handle MUST NOT be changed.
  80. func QdiscChange(qdisc Qdisc) error {
  81. return pkgHandle.QdiscChange(qdisc)
  82. }
  83. // QdiscChange will change a qdisc in place
  84. // Equivalent to: `tc qdisc change $qdisc`
  85. // The parent and handle MUST NOT be changed.
  86. func (h *Handle) QdiscChange(qdisc Qdisc) error {
  87. return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
  88. }
  89. // QdiscReplace will replace a qdisc to the system.
  90. // Equivalent to: `tc qdisc replace $qdisc`
  91. // The handle MUST change.
  92. func QdiscReplace(qdisc Qdisc) error {
  93. return pkgHandle.QdiscReplace(qdisc)
  94. }
  95. // QdiscReplace will replace a qdisc to the system.
  96. // Equivalent to: `tc qdisc replace $qdisc`
  97. // The handle MUST change.
  98. func (h *Handle) QdiscReplace(qdisc Qdisc) error {
  99. return h.qdiscModify(
  100. syscall.RTM_NEWQDISC,
  101. syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
  102. qdisc)
  103. }
  104. // QdiscAdd will add a qdisc to the system.
  105. // Equivalent to: `tc qdisc add $qdisc`
  106. func QdiscAdd(qdisc Qdisc) error {
  107. return pkgHandle.QdiscAdd(qdisc)
  108. }
  109. // QdiscAdd will add a qdisc to the system.
  110. // Equivalent to: `tc qdisc add $qdisc`
  111. func (h *Handle) QdiscAdd(qdisc Qdisc) error {
  112. return h.qdiscModify(
  113. syscall.RTM_NEWQDISC,
  114. syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
  115. qdisc)
  116. }
  117. func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
  118. req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
  119. base := qdisc.Attrs()
  120. msg := &nl.TcMsg{
  121. Family: nl.FAMILY_ALL,
  122. Ifindex: int32(base.LinkIndex),
  123. Handle: base.Handle,
  124. Parent: base.Parent,
  125. }
  126. req.AddData(msg)
  127. // When deleting don't bother building the rest of the netlink payload
  128. if cmd != syscall.RTM_DELQDISC {
  129. if err := qdiscPayload(req, qdisc); err != nil {
  130. return err
  131. }
  132. }
  133. _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
  134. return err
  135. }
  136. func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
  137. req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
  138. options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
  139. switch qdisc := qdisc.(type) {
  140. case *Prio:
  141. tcmap := nl.TcPrioMap{
  142. Bands: int32(qdisc.Bands),
  143. Priomap: qdisc.PriorityMap,
  144. }
  145. options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
  146. case *Tbf:
  147. opt := nl.TcTbfQopt{}
  148. opt.Rate.Rate = uint32(qdisc.Rate)
  149. opt.Peakrate.Rate = uint32(qdisc.Peakrate)
  150. opt.Limit = qdisc.Limit
  151. opt.Buffer = qdisc.Buffer
  152. nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
  153. if qdisc.Rate >= uint64(1<<32) {
  154. nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate))
  155. }
  156. if qdisc.Peakrate >= uint64(1<<32) {
  157. nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate))
  158. }
  159. if qdisc.Peakrate > 0 {
  160. nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst))
  161. }
  162. case *Htb:
  163. opt := nl.TcHtbGlob{}
  164. opt.Version = qdisc.Version
  165. opt.Rate2Quantum = qdisc.Rate2Quantum
  166. opt.Defcls = qdisc.Defcls
  167. // TODO: Handle Debug properly. For now default to 0
  168. opt.Debug = qdisc.Debug
  169. opt.DirectPkts = qdisc.DirectPkts
  170. nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
  171. // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
  172. case *Netem:
  173. opt := nl.TcNetemQopt{}
  174. opt.Latency = qdisc.Latency
  175. opt.Limit = qdisc.Limit
  176. opt.Loss = qdisc.Loss
  177. opt.Gap = qdisc.Gap
  178. opt.Duplicate = qdisc.Duplicate
  179. opt.Jitter = qdisc.Jitter
  180. options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
  181. // Correlation
  182. corr := nl.TcNetemCorr{}
  183. corr.DelayCorr = qdisc.DelayCorr
  184. corr.LossCorr = qdisc.LossCorr
  185. corr.DupCorr = qdisc.DuplicateCorr
  186. if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
  187. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
  188. }
  189. // Corruption
  190. corruption := nl.TcNetemCorrupt{}
  191. corruption.Probability = qdisc.CorruptProb
  192. corruption.Correlation = qdisc.CorruptCorr
  193. if corruption.Probability > 0 {
  194. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
  195. }
  196. // Reorder
  197. reorder := nl.TcNetemReorder{}
  198. reorder.Probability = qdisc.ReorderProb
  199. reorder.Correlation = qdisc.ReorderCorr
  200. if reorder.Probability > 0 {
  201. nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
  202. }
  203. case *Ingress:
  204. // ingress filters must use the proper handle
  205. if qdisc.Attrs().Parent != HANDLE_INGRESS {
  206. return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
  207. }
  208. }
  209. req.AddData(options)
  210. return nil
  211. }
  212. // QdiscList gets a list of qdiscs in the system.
  213. // Equivalent to: `tc qdisc show`.
  214. // The list can be filtered by link.
  215. func QdiscList(link Link) ([]Qdisc, error) {
  216. return pkgHandle.QdiscList(link)
  217. }
  218. // QdiscList gets a list of qdiscs in the system.
  219. // Equivalent to: `tc qdisc show`.
  220. // The list can be filtered by link.
  221. func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
  222. req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
  223. index := int32(0)
  224. if link != nil {
  225. base := link.Attrs()
  226. h.ensureIndex(base)
  227. index = int32(base.Index)
  228. }
  229. msg := &nl.TcMsg{
  230. Family: nl.FAMILY_ALL,
  231. Ifindex: index,
  232. }
  233. req.AddData(msg)
  234. msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
  235. if err != nil {
  236. return nil, err
  237. }
  238. var res []Qdisc
  239. for _, m := range msgs {
  240. msg := nl.DeserializeTcMsg(m)
  241. attrs, err := nl.ParseRouteAttr(m[msg.Len():])
  242. if err != nil {
  243. return nil, err
  244. }
  245. // skip qdiscs from other interfaces
  246. if link != nil && msg.Ifindex != index {
  247. continue
  248. }
  249. base := QdiscAttrs{
  250. LinkIndex: int(msg.Ifindex),
  251. Handle: msg.Handle,
  252. Parent: msg.Parent,
  253. Refcnt: msg.Info,
  254. }
  255. var qdisc Qdisc
  256. qdiscType := ""
  257. for _, attr := range attrs {
  258. switch attr.Attr.Type {
  259. case nl.TCA_KIND:
  260. qdiscType = string(attr.Value[:len(attr.Value)-1])
  261. switch qdiscType {
  262. case "pfifo_fast":
  263. qdisc = &PfifoFast{}
  264. case "prio":
  265. qdisc = &Prio{}
  266. case "tbf":
  267. qdisc = &Tbf{}
  268. case "ingress":
  269. qdisc = &Ingress{}
  270. case "htb":
  271. qdisc = &Htb{}
  272. case "netem":
  273. qdisc = &Netem{}
  274. default:
  275. qdisc = &GenericQdisc{QdiscType: qdiscType}
  276. }
  277. case nl.TCA_OPTIONS:
  278. switch qdiscType {
  279. case "pfifo_fast":
  280. // pfifo returns TcPrioMap directly without wrapping it in rtattr
  281. if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
  282. return nil, err
  283. }
  284. case "prio":
  285. // prio returns TcPrioMap directly without wrapping it in rtattr
  286. if err := parsePrioData(qdisc, attr.Value); err != nil {
  287. return nil, err
  288. }
  289. case "tbf":
  290. data, err := nl.ParseRouteAttr(attr.Value)
  291. if err != nil {
  292. return nil, err
  293. }
  294. if err := parseTbfData(qdisc, data); err != nil {
  295. return nil, err
  296. }
  297. case "htb":
  298. data, err := nl.ParseRouteAttr(attr.Value)
  299. if err != nil {
  300. return nil, err
  301. }
  302. if err := parseHtbData(qdisc, data); err != nil {
  303. return nil, err
  304. }
  305. case "netem":
  306. if err := parseNetemData(qdisc, attr.Value); err != nil {
  307. return nil, err
  308. }
  309. // no options for ingress
  310. }
  311. }
  312. }
  313. *qdisc.Attrs() = base
  314. res = append(res, qdisc)
  315. }
  316. return res, nil
  317. }
  318. func parsePfifoFastData(qdisc Qdisc, value []byte) error {
  319. pfifo := qdisc.(*PfifoFast)
  320. tcmap := nl.DeserializeTcPrioMap(value)
  321. pfifo.PriorityMap = tcmap.Priomap
  322. pfifo.Bands = uint8(tcmap.Bands)
  323. return nil
  324. }
  325. func parsePrioData(qdisc Qdisc, value []byte) error {
  326. prio := qdisc.(*Prio)
  327. tcmap := nl.DeserializeTcPrioMap(value)
  328. prio.PriorityMap = tcmap.Priomap
  329. prio.Bands = uint8(tcmap.Bands)
  330. return nil
  331. }
  332. func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  333. native = nl.NativeEndian()
  334. htb := qdisc.(*Htb)
  335. for _, datum := range data {
  336. switch datum.Attr.Type {
  337. case nl.TCA_HTB_INIT:
  338. opt := nl.DeserializeTcHtbGlob(datum.Value)
  339. htb.Version = opt.Version
  340. htb.Rate2Quantum = opt.Rate2Quantum
  341. htb.Defcls = opt.Defcls
  342. htb.Debug = opt.Debug
  343. htb.DirectPkts = opt.DirectPkts
  344. case nl.TCA_HTB_DIRECT_QLEN:
  345. // TODO
  346. //htb.DirectQlen = native.uint32(datum.Value)
  347. }
  348. }
  349. return nil
  350. }
  351. func parseNetemData(qdisc Qdisc, value []byte) error {
  352. netem := qdisc.(*Netem)
  353. opt := nl.DeserializeTcNetemQopt(value)
  354. netem.Latency = opt.Latency
  355. netem.Limit = opt.Limit
  356. netem.Loss = opt.Loss
  357. netem.Gap = opt.Gap
  358. netem.Duplicate = opt.Duplicate
  359. netem.Jitter = opt.Jitter
  360. data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
  361. if err != nil {
  362. return err
  363. }
  364. for _, datum := range data {
  365. switch datum.Attr.Type {
  366. case nl.TCA_NETEM_CORR:
  367. opt := nl.DeserializeTcNetemCorr(datum.Value)
  368. netem.DelayCorr = opt.DelayCorr
  369. netem.LossCorr = opt.LossCorr
  370. netem.DuplicateCorr = opt.DupCorr
  371. case nl.TCA_NETEM_CORRUPT:
  372. opt := nl.DeserializeTcNetemCorrupt(datum.Value)
  373. netem.CorruptProb = opt.Probability
  374. netem.CorruptCorr = opt.Correlation
  375. case nl.TCA_NETEM_REORDER:
  376. opt := nl.DeserializeTcNetemReorder(datum.Value)
  377. netem.ReorderProb = opt.Probability
  378. netem.ReorderCorr = opt.Correlation
  379. }
  380. }
  381. return nil
  382. }
  383. func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  384. native = nl.NativeEndian()
  385. tbf := qdisc.(*Tbf)
  386. for _, datum := range data {
  387. switch datum.Attr.Type {
  388. case nl.TCA_TBF_PARMS:
  389. opt := nl.DeserializeTcTbfQopt(datum.Value)
  390. tbf.Rate = uint64(opt.Rate.Rate)
  391. tbf.Peakrate = uint64(opt.Peakrate.Rate)
  392. tbf.Limit = opt.Limit
  393. tbf.Buffer = opt.Buffer
  394. case nl.TCA_TBF_RATE64:
  395. tbf.Rate = native.Uint64(datum.Value[0:8])
  396. case nl.TCA_TBF_PRATE64:
  397. tbf.Peakrate = native.Uint64(datum.Value[0:8])
  398. case nl.TCA_TBF_PBURST:
  399. tbf.Minburst = native.Uint32(datum.Value[0:4])
  400. }
  401. }
  402. return nil
  403. }
  404. const (
  405. TIME_UNITS_PER_SEC = 1000000
  406. )
  407. var (
  408. tickInUsec float64
  409. clockFactor float64
  410. hz float64
  411. )
  412. func initClock() {
  413. data, err := ioutil.ReadFile("/proc/net/psched")
  414. if err != nil {
  415. return
  416. }
  417. parts := strings.Split(strings.TrimSpace(string(data)), " ")
  418. if len(parts) < 3 {
  419. return
  420. }
  421. var vals [3]uint64
  422. for i := range vals {
  423. val, err := strconv.ParseUint(parts[i], 16, 32)
  424. if err != nil {
  425. return
  426. }
  427. vals[i] = val
  428. }
  429. // compatibility
  430. if vals[2] == 1000000000 {
  431. vals[0] = vals[1]
  432. }
  433. clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
  434. tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
  435. hz = float64(vals[0])
  436. }
  437. func TickInUsec() float64 {
  438. if tickInUsec == 0.0 {
  439. initClock()
  440. }
  441. return tickInUsec
  442. }
  443. func ClockFactor() float64 {
  444. if clockFactor == 0.0 {
  445. initClock()
  446. }
  447. return clockFactor
  448. }
  449. func Hz() float64 {
  450. if hz == 0.0 {
  451. initClock()
  452. }
  453. return hz
  454. }
  455. func time2Tick(time uint32) uint32 {
  456. return uint32(float64(time) * TickInUsec())
  457. }
  458. func tick2Time(tick uint32) uint32 {
  459. return uint32(float64(tick) / TickInUsec())
  460. }
  461. func time2Ktime(time uint32) uint32 {
  462. return uint32(float64(time) * ClockFactor())
  463. }
  464. func ktime2Time(ktime uint32) uint32 {
  465. return uint32(float64(ktime) / ClockFactor())
  466. }
  467. func burst(rate uint64, buffer uint32) uint32 {
  468. return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
  469. }
  470. func latency(rate uint64, limit, buffer uint32) float64 {
  471. return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
  472. }
  473. func Xmittime(rate uint64, size uint32) float64 {
  474. return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
  475. }