lift.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package ssa
  5. // This file defines the lifting pass which tries to "lift" Alloc
  6. // cells (new/local variables) into SSA registers, replacing loads
  7. // with the dominating stored value, eliminating loads and stores, and
  8. // inserting φ-nodes as needed.
  9. // Cited papers and resources:
  10. //
  11. // Ron Cytron et al. 1991. Efficiently computing SSA form...
  12. // http://doi.acm.org/10.1145/115372.115320
  13. //
  14. // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
  15. // Software Practice and Experience 2001, 4:1-10.
  16. // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
  17. //
  18. // Daniel Berlin, llvmdev mailing list, 2012.
  19. // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
  20. // (Be sure to expand the whole thread.)
  21. // TODO(adonovan): opt: there are many optimizations worth evaluating, and
  22. // the conventional wisdom for SSA construction is that a simple
  23. // algorithm well engineered often beats those of better asymptotic
  24. // complexity on all but the most egregious inputs.
  25. //
  26. // Danny Berlin suggests that the Cooper et al. algorithm for
  27. // computing the dominance frontier is superior to Cytron et al.
  28. // Furthermore he recommends that rather than computing the DF for the
  29. // whole function then renaming all alloc cells, it may be cheaper to
  30. // compute the DF for each alloc cell separately and throw it away.
  31. //
  32. // Consider exploiting liveness information to avoid creating dead
  33. // φ-nodes which we then immediately remove.
  34. //
  35. // Also see many other "TODO: opt" suggestions in the code.
  36. import (
  37. "fmt"
  38. "go/token"
  39. "go/types"
  40. "math/big"
  41. "os"
  42. )
  43. // If true, show diagnostic information at each step of lifting.
  44. // Very verbose.
  45. const debugLifting = false
  46. // domFrontier maps each block to the set of blocks in its dominance
  47. // frontier. The outer slice is conceptually a map keyed by
  48. // Block.Index. The inner slice is conceptually a set, possibly
  49. // containing duplicates.
  50. //
  51. // TODO(adonovan): opt: measure impact of dups; consider a packed bit
  52. // representation, e.g. big.Int, and bitwise parallel operations for
  53. // the union step in the Children loop.
  54. //
  55. // domFrontier's methods mutate the slice's elements but not its
  56. // length, so their receivers needn't be pointers.
  57. //
  58. type domFrontier [][]*BasicBlock
  59. func (df domFrontier) add(u, v *BasicBlock) {
  60. p := &df[u.Index]
  61. *p = append(*p, v)
  62. }
  63. // build builds the dominance frontier df for the dominator (sub)tree
  64. // rooted at u, using the Cytron et al. algorithm.
  65. //
  66. // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
  67. // by pruning the entire IDF computation, rather than merely pruning
  68. // the DF -> IDF step.
  69. func (df domFrontier) build(u *BasicBlock) {
  70. // Encounter each node u in postorder of dom tree.
  71. for _, child := range u.dom.children {
  72. df.build(child)
  73. }
  74. for _, vb := range u.Succs {
  75. if v := vb.dom; v.idom != u {
  76. df.add(u, vb)
  77. }
  78. }
  79. for _, w := range u.dom.children {
  80. for _, vb := range df[w.Index] {
  81. // TODO(adonovan): opt: use word-parallel bitwise union.
  82. if v := vb.dom; v.idom != u {
  83. df.add(u, vb)
  84. }
  85. }
  86. }
  87. }
  88. func buildDomFrontier(fn *Function) domFrontier {
  89. df := make(domFrontier, len(fn.Blocks))
  90. df.build(fn.Blocks[0])
  91. if fn.Recover != nil {
  92. df.build(fn.Recover)
  93. }
  94. return df
  95. }
  96. func removeInstr(refs []Instruction, instr Instruction) []Instruction {
  97. i := 0
  98. for _, ref := range refs {
  99. if ref == instr {
  100. continue
  101. }
  102. refs[i] = ref
  103. i++
  104. }
  105. for j := i; j != len(refs); j++ {
  106. refs[j] = nil // aid GC
  107. }
  108. return refs[:i]
  109. }
  110. // lift replaces local and new Allocs accessed only with
  111. // load/store by SSA registers, inserting φ-nodes where necessary.
  112. // The result is a program in classical pruned SSA form.
  113. //
  114. // Preconditions:
  115. // - fn has no dead blocks (blockopt has run).
  116. // - Def/use info (Operands and Referrers) is up-to-date.
  117. // - The dominator tree is up-to-date.
  118. //
  119. func lift(fn *Function) {
  120. // TODO(adonovan): opt: lots of little optimizations may be
  121. // worthwhile here, especially if they cause us to avoid
  122. // buildDomFrontier. For example:
  123. //
  124. // - Alloc never loaded? Eliminate.
  125. // - Alloc never stored? Replace all loads with a zero constant.
  126. // - Alloc stored once? Replace loads with dominating store;
  127. // don't forget that an Alloc is itself an effective store
  128. // of zero.
  129. // - Alloc used only within a single block?
  130. // Use degenerate algorithm avoiding φ-nodes.
  131. // - Consider synergy with scalar replacement of aggregates (SRA).
  132. // e.g. *(&x.f) where x is an Alloc.
  133. // Perhaps we'd get better results if we generated this as x.f
  134. // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
  135. // Unclear.
  136. //
  137. // But we will start with the simplest correct code.
  138. df := buildDomFrontier(fn)
  139. if debugLifting {
  140. title := false
  141. for i, blocks := range df {
  142. if blocks != nil {
  143. if !title {
  144. fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
  145. title = true
  146. }
  147. fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
  148. }
  149. }
  150. }
  151. newPhis := make(newPhiMap)
  152. // During this pass we will replace some BasicBlock.Instrs
  153. // (allocs, loads and stores) with nil, keeping a count in
  154. // BasicBlock.gaps. At the end we will reset Instrs to the
  155. // concatenation of all non-dead newPhis and non-nil Instrs
  156. // for the block, reusing the original array if space permits.
  157. // While we're here, we also eliminate 'rundefers'
  158. // instructions in functions that contain no 'defer'
  159. // instructions.
  160. usesDefer := false
  161. // A counter used to generate ~unique ids for Phi nodes, as an
  162. // aid to debugging. We use large numbers to make them highly
  163. // visible. All nodes are renumbered later.
  164. fresh := 1000
  165. // Determine which allocs we can lift and number them densely.
  166. // The renaming phase uses this numbering for compact maps.
  167. numAllocs := 0
  168. for _, b := range fn.Blocks {
  169. b.gaps = 0
  170. b.rundefers = 0
  171. for _, instr := range b.Instrs {
  172. switch instr := instr.(type) {
  173. case *Alloc:
  174. index := -1
  175. if liftAlloc(df, instr, newPhis, &fresh) {
  176. index = numAllocs
  177. numAllocs++
  178. }
  179. instr.index = index
  180. case *Defer:
  181. usesDefer = true
  182. case *RunDefers:
  183. b.rundefers++
  184. }
  185. }
  186. }
  187. // renaming maps an alloc (keyed by index) to its replacement
  188. // value. Initially the renaming contains nil, signifying the
  189. // zero constant of the appropriate type; we construct the
  190. // Const lazily at most once on each path through the domtree.
  191. // TODO(adonovan): opt: cache per-function not per subtree.
  192. renaming := make([]Value, numAllocs)
  193. // Renaming.
  194. rename(fn.Blocks[0], renaming, newPhis)
  195. // Eliminate dead φ-nodes.
  196. removeDeadPhis(fn.Blocks, newPhis)
  197. // Prepend remaining live φ-nodes to each block.
  198. for _, b := range fn.Blocks {
  199. nps := newPhis[b]
  200. j := len(nps)
  201. rundefersToKill := b.rundefers
  202. if usesDefer {
  203. rundefersToKill = 0
  204. }
  205. if j+b.gaps+rundefersToKill == 0 {
  206. continue // fast path: no new phis or gaps
  207. }
  208. // Compact nps + non-nil Instrs into a new slice.
  209. // TODO(adonovan): opt: compact in situ (rightwards)
  210. // if Instrs has sufficient space or slack.
  211. dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
  212. for i, np := range nps {
  213. dst[i] = np.phi
  214. }
  215. for _, instr := range b.Instrs {
  216. if instr == nil {
  217. continue
  218. }
  219. if !usesDefer {
  220. if _, ok := instr.(*RunDefers); ok {
  221. continue
  222. }
  223. }
  224. dst[j] = instr
  225. j++
  226. }
  227. b.Instrs = dst
  228. }
  229. // Remove any fn.Locals that were lifted.
  230. j := 0
  231. for _, l := range fn.Locals {
  232. if l.index < 0 {
  233. fn.Locals[j] = l
  234. j++
  235. }
  236. }
  237. // Nil out fn.Locals[j:] to aid GC.
  238. for i := j; i < len(fn.Locals); i++ {
  239. fn.Locals[i] = nil
  240. }
  241. fn.Locals = fn.Locals[:j]
  242. }
  243. // removeDeadPhis removes φ-nodes not transitively needed by a
  244. // non-Phi, non-DebugRef instruction.
  245. func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
  246. // First pass: find the set of "live" φ-nodes: those reachable
  247. // from some non-Phi instruction.
  248. //
  249. // We compute reachability in reverse, starting from each φ,
  250. // rather than forwards, starting from each live non-Phi
  251. // instruction, because this way visits much less of the
  252. // Value graph.
  253. livePhis := make(map[*Phi]bool)
  254. for _, npList := range newPhis {
  255. for _, np := range npList {
  256. phi := np.phi
  257. if !livePhis[phi] && phiHasDirectReferrer(phi) {
  258. markLivePhi(livePhis, phi)
  259. }
  260. }
  261. }
  262. // Existing φ-nodes due to && and || operators
  263. // are all considered live (see Go issue 19622).
  264. for _, b := range blocks {
  265. for _, phi := range b.phis() {
  266. markLivePhi(livePhis, phi.(*Phi))
  267. }
  268. }
  269. // Second pass: eliminate unused phis from newPhis.
  270. for block, npList := range newPhis {
  271. j := 0
  272. for _, np := range npList {
  273. if livePhis[np.phi] {
  274. npList[j] = np
  275. j++
  276. } else {
  277. // discard it, first removing it from referrers
  278. for _, val := range np.phi.Edges {
  279. if refs := val.Referrers(); refs != nil {
  280. *refs = removeInstr(*refs, np.phi)
  281. }
  282. }
  283. np.phi.block = nil
  284. }
  285. }
  286. newPhis[block] = npList[:j]
  287. }
  288. }
  289. // markLivePhi marks phi, and all φ-nodes transitively reachable via
  290. // its Operands, live.
  291. func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
  292. livePhis[phi] = true
  293. for _, rand := range phi.Operands(nil) {
  294. if q, ok := (*rand).(*Phi); ok {
  295. if !livePhis[q] {
  296. markLivePhi(livePhis, q)
  297. }
  298. }
  299. }
  300. }
  301. // phiHasDirectReferrer reports whether phi is directly referred to by
  302. // a non-Phi instruction. Such instructions are the
  303. // roots of the liveness traversal.
  304. func phiHasDirectReferrer(phi *Phi) bool {
  305. for _, instr := range *phi.Referrers() {
  306. if _, ok := instr.(*Phi); !ok {
  307. return true
  308. }
  309. }
  310. return false
  311. }
  312. type BlockSet struct{ big.Int } // (inherit methods from Int)
  313. // add adds b to the set and returns true if the set changed.
  314. func (s *BlockSet) Add(b *BasicBlock) bool {
  315. i := b.Index
  316. if s.Bit(i) != 0 {
  317. return false
  318. }
  319. s.SetBit(&s.Int, i, 1)
  320. return true
  321. }
  322. func (s *BlockSet) Has(b *BasicBlock) bool {
  323. return s.Bit(b.Index) == 1
  324. }
  325. // take removes an arbitrary element from a set s and
  326. // returns its index, or returns -1 if empty.
  327. func (s *BlockSet) Take() int {
  328. l := s.BitLen()
  329. for i := 0; i < l; i++ {
  330. if s.Bit(i) == 1 {
  331. s.SetBit(&s.Int, i, 0)
  332. return i
  333. }
  334. }
  335. return -1
  336. }
  337. // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
  338. // it replaces.
  339. type newPhi struct {
  340. phi *Phi
  341. alloc *Alloc
  342. }
  343. // newPhiMap records for each basic block, the set of newPhis that
  344. // must be prepended to the block.
  345. type newPhiMap map[*BasicBlock][]newPhi
  346. // liftAlloc determines whether alloc can be lifted into registers,
  347. // and if so, it populates newPhis with all the φ-nodes it may require
  348. // and returns true.
  349. //
  350. // fresh is a source of fresh ids for phi nodes.
  351. //
  352. func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
  353. // Don't lift aggregates into registers, because we don't have
  354. // a way to express their zero-constants.
  355. switch deref(alloc.Type()).Underlying().(type) {
  356. case *types.Array, *types.Struct:
  357. return false
  358. }
  359. // Don't lift named return values in functions that defer
  360. // calls that may recover from panic.
  361. if fn := alloc.Parent(); fn.Recover != nil {
  362. for _, nr := range fn.namedResults {
  363. if nr == alloc {
  364. return false
  365. }
  366. }
  367. }
  368. // Compute defblocks, the set of blocks containing a
  369. // definition of the alloc cell.
  370. var defblocks BlockSet
  371. for _, instr := range *alloc.Referrers() {
  372. // Bail out if we discover the alloc is not liftable;
  373. // the only operations permitted to use the alloc are
  374. // loads/stores into the cell, and DebugRef.
  375. switch instr := instr.(type) {
  376. case *Store:
  377. if instr.Val == alloc {
  378. return false // address used as value
  379. }
  380. if instr.Addr != alloc {
  381. panic("Alloc.Referrers is inconsistent")
  382. }
  383. defblocks.Add(instr.Block())
  384. case *UnOp:
  385. if instr.Op != token.MUL {
  386. return false // not a load
  387. }
  388. if instr.X != alloc {
  389. panic("Alloc.Referrers is inconsistent")
  390. }
  391. case *DebugRef:
  392. // ok
  393. default:
  394. return false // some other instruction
  395. }
  396. }
  397. // The Alloc itself counts as a (zero) definition of the cell.
  398. defblocks.Add(alloc.Block())
  399. if debugLifting {
  400. fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
  401. }
  402. fn := alloc.Parent()
  403. // Φ-insertion.
  404. //
  405. // What follows is the body of the main loop of the insert-φ
  406. // function described by Cytron et al, but instead of using
  407. // counter tricks, we just reset the 'hasAlready' and 'work'
  408. // sets each iteration. These are bitmaps so it's pretty cheap.
  409. //
  410. // TODO(adonovan): opt: recycle slice storage for W,
  411. // hasAlready, defBlocks across liftAlloc calls.
  412. var hasAlready BlockSet
  413. // Initialize W and work to defblocks.
  414. var work BlockSet = defblocks // blocks seen
  415. var W BlockSet // blocks to do
  416. W.Set(&defblocks.Int)
  417. // Traverse iterated dominance frontier, inserting φ-nodes.
  418. for i := W.Take(); i != -1; i = W.Take() {
  419. u := fn.Blocks[i]
  420. for _, v := range df[u.Index] {
  421. if hasAlready.Add(v) {
  422. // Create φ-node.
  423. // It will be prepended to v.Instrs later, if needed.
  424. phi := &Phi{
  425. Edges: make([]Value, len(v.Preds)),
  426. Comment: alloc.Comment,
  427. }
  428. // This is merely a debugging aid:
  429. phi.setNum(*fresh)
  430. *fresh++
  431. phi.pos = alloc.Pos()
  432. phi.setType(deref(alloc.Type()))
  433. phi.block = v
  434. if debugLifting {
  435. fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
  436. }
  437. newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
  438. if work.Add(v) {
  439. W.Add(v)
  440. }
  441. }
  442. }
  443. }
  444. return true
  445. }
  446. // replaceAll replaces all intraprocedural uses of x with y,
  447. // updating x.Referrers and y.Referrers.
  448. // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
  449. //
  450. func replaceAll(x, y Value) {
  451. var rands []*Value
  452. pxrefs := x.Referrers()
  453. pyrefs := y.Referrers()
  454. for _, instr := range *pxrefs {
  455. rands = instr.Operands(rands[:0]) // recycle storage
  456. for _, rand := range rands {
  457. if *rand != nil {
  458. if *rand == x {
  459. *rand = y
  460. }
  461. }
  462. }
  463. if pyrefs != nil {
  464. *pyrefs = append(*pyrefs, instr) // dups ok
  465. }
  466. }
  467. *pxrefs = nil // x is now unreferenced
  468. }
  469. // renamed returns the value to which alloc is being renamed,
  470. // constructing it lazily if it's the implicit zero initialization.
  471. //
  472. func renamed(renaming []Value, alloc *Alloc) Value {
  473. v := renaming[alloc.index]
  474. if v == nil {
  475. v = zeroConst(deref(alloc.Type()))
  476. renaming[alloc.index] = v
  477. }
  478. return v
  479. }
  480. // rename implements the (Cytron et al) SSA renaming algorithm, a
  481. // preorder traversal of the dominator tree replacing all loads of
  482. // Alloc cells with the value stored to that cell by the dominating
  483. // store instruction. For lifting, we need only consider loads,
  484. // stores and φ-nodes.
  485. //
  486. // renaming is a map from *Alloc (keyed by index number) to its
  487. // dominating stored value; newPhis[x] is the set of new φ-nodes to be
  488. // prepended to block x.
  489. //
  490. func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
  491. // Each φ-node becomes the new name for its associated Alloc.
  492. for _, np := range newPhis[u] {
  493. phi := np.phi
  494. alloc := np.alloc
  495. renaming[alloc.index] = phi
  496. }
  497. // Rename loads and stores of allocs.
  498. for i, instr := range u.Instrs {
  499. switch instr := instr.(type) {
  500. case *Alloc:
  501. if instr.index >= 0 { // store of zero to Alloc cell
  502. // Replace dominated loads by the zero value.
  503. renaming[instr.index] = nil
  504. if debugLifting {
  505. fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
  506. }
  507. // Delete the Alloc.
  508. u.Instrs[i] = nil
  509. u.gaps++
  510. }
  511. case *Store:
  512. if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
  513. // Replace dominated loads by the stored value.
  514. renaming[alloc.index] = instr.Val
  515. if debugLifting {
  516. fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
  517. instr, instr.Val.Name())
  518. }
  519. // Remove the store from the referrer list of the stored value.
  520. if refs := instr.Val.Referrers(); refs != nil {
  521. *refs = removeInstr(*refs, instr)
  522. }
  523. // Delete the Store.
  524. u.Instrs[i] = nil
  525. u.gaps++
  526. }
  527. case *UnOp:
  528. if instr.Op == token.MUL {
  529. if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
  530. newval := renamed(renaming, alloc)
  531. if debugLifting {
  532. fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
  533. instr.Name(), instr, newval.Name())
  534. }
  535. // Replace all references to
  536. // the loaded value by the
  537. // dominating stored value.
  538. replaceAll(instr, newval)
  539. // Delete the Load.
  540. u.Instrs[i] = nil
  541. u.gaps++
  542. }
  543. }
  544. case *DebugRef:
  545. if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
  546. if instr.IsAddr {
  547. instr.X = renamed(renaming, alloc)
  548. instr.IsAddr = false
  549. // Add DebugRef to instr.X's referrers.
  550. if refs := instr.X.Referrers(); refs != nil {
  551. *refs = append(*refs, instr)
  552. }
  553. } else {
  554. // A source expression denotes the address
  555. // of an Alloc that was optimized away.
  556. instr.X = nil
  557. // Delete the DebugRef.
  558. u.Instrs[i] = nil
  559. u.gaps++
  560. }
  561. }
  562. }
  563. }
  564. // For each φ-node in a CFG successor, rename the edge.
  565. for _, v := range u.Succs {
  566. phis := newPhis[v]
  567. if len(phis) == 0 {
  568. continue
  569. }
  570. i := v.predIndex(u)
  571. for _, np := range phis {
  572. phi := np.phi
  573. alloc := np.alloc
  574. newval := renamed(renaming, alloc)
  575. if debugLifting {
  576. fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
  577. phi.Name(), u, v, i, alloc.Name(), newval.Name())
  578. }
  579. phi.Edges[i] = newval
  580. if prefs := newval.Referrers(); prefs != nil {
  581. *prefs = append(*prefs, phi)
  582. }
  583. }
  584. }
  585. // Continue depth-first recursion over domtree, pushing a
  586. // fresh copy of the renaming map for each subtree.
  587. for i, v := range u.dom.children {
  588. r := renaming
  589. if i < len(u.dom.children)-1 {
  590. // On all but the final iteration, we must make
  591. // a copy to avoid destructive update.
  592. r = make([]Value, len(renaming))
  593. copy(r, renaming)
  594. }
  595. rename(v, r, newPhis)
  596. }
  597. }