| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658 |
- // Copyright 2013 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package ssa
- // This file defines the lifting pass which tries to "lift" Alloc
- // cells (new/local variables) into SSA registers, replacing loads
- // with the dominating stored value, eliminating loads and stores, and
- // inserting φ-nodes as needed.
- // Cited papers and resources:
- //
- // Ron Cytron et al. 1991. Efficiently computing SSA form...
- // http://doi.acm.org/10.1145/115372.115320
- //
- // Cooper, Harvey, Kennedy. 2001. A Simple, Fast Dominance Algorithm.
- // Software Practice and Experience 2001, 4:1-10.
- // http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
- //
- // Daniel Berlin, llvmdev mailing list, 2012.
- // http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
- // (Be sure to expand the whole thread.)
- // TODO(adonovan): opt: there are many optimizations worth evaluating, and
- // the conventional wisdom for SSA construction is that a simple
- // algorithm well engineered often beats those of better asymptotic
- // complexity on all but the most egregious inputs.
- //
- // Danny Berlin suggests that the Cooper et al. algorithm for
- // computing the dominance frontier is superior to Cytron et al.
- // Furthermore he recommends that rather than computing the DF for the
- // whole function then renaming all alloc cells, it may be cheaper to
- // compute the DF for each alloc cell separately and throw it away.
- //
- // Consider exploiting liveness information to avoid creating dead
- // φ-nodes which we then immediately remove.
- //
- // Also see many other "TODO: opt" suggestions in the code.
- import (
- "fmt"
- "go/token"
- "go/types"
- "math/big"
- "os"
- )
- // If true, show diagnostic information at each step of lifting.
- // Very verbose.
- const debugLifting = false
- // domFrontier maps each block to the set of blocks in its dominance
- // frontier. The outer slice is conceptually a map keyed by
- // Block.Index. The inner slice is conceptually a set, possibly
- // containing duplicates.
- //
- // TODO(adonovan): opt: measure impact of dups; consider a packed bit
- // representation, e.g. big.Int, and bitwise parallel operations for
- // the union step in the Children loop.
- //
- // domFrontier's methods mutate the slice's elements but not its
- // length, so their receivers needn't be pointers.
- //
- type domFrontier [][]*BasicBlock
- func (df domFrontier) add(u, v *BasicBlock) {
- p := &df[u.Index]
- *p = append(*p, v)
- }
- // build builds the dominance frontier df for the dominator (sub)tree
- // rooted at u, using the Cytron et al. algorithm.
- //
- // TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
- // by pruning the entire IDF computation, rather than merely pruning
- // the DF -> IDF step.
- func (df domFrontier) build(u *BasicBlock) {
- // Encounter each node u in postorder of dom tree.
- for _, child := range u.dom.children {
- df.build(child)
- }
- for _, vb := range u.Succs {
- if v := vb.dom; v.idom != u {
- df.add(u, vb)
- }
- }
- for _, w := range u.dom.children {
- for _, vb := range df[w.Index] {
- // TODO(adonovan): opt: use word-parallel bitwise union.
- if v := vb.dom; v.idom != u {
- df.add(u, vb)
- }
- }
- }
- }
- func buildDomFrontier(fn *Function) domFrontier {
- df := make(domFrontier, len(fn.Blocks))
- df.build(fn.Blocks[0])
- if fn.Recover != nil {
- df.build(fn.Recover)
- }
- return df
- }
- func removeInstr(refs []Instruction, instr Instruction) []Instruction {
- i := 0
- for _, ref := range refs {
- if ref == instr {
- continue
- }
- refs[i] = ref
- i++
- }
- for j := i; j != len(refs); j++ {
- refs[j] = nil // aid GC
- }
- return refs[:i]
- }
- // lift replaces local and new Allocs accessed only with
- // load/store by SSA registers, inserting φ-nodes where necessary.
- // The result is a program in classical pruned SSA form.
- //
- // Preconditions:
- // - fn has no dead blocks (blockopt has run).
- // - Def/use info (Operands and Referrers) is up-to-date.
- // - The dominator tree is up-to-date.
- //
- func lift(fn *Function) {
- // TODO(adonovan): opt: lots of little optimizations may be
- // worthwhile here, especially if they cause us to avoid
- // buildDomFrontier. For example:
- //
- // - Alloc never loaded? Eliminate.
- // - Alloc never stored? Replace all loads with a zero constant.
- // - Alloc stored once? Replace loads with dominating store;
- // don't forget that an Alloc is itself an effective store
- // of zero.
- // - Alloc used only within a single block?
- // Use degenerate algorithm avoiding φ-nodes.
- // - Consider synergy with scalar replacement of aggregates (SRA).
- // e.g. *(&x.f) where x is an Alloc.
- // Perhaps we'd get better results if we generated this as x.f
- // i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
- // Unclear.
- //
- // But we will start with the simplest correct code.
- df := buildDomFrontier(fn)
- if debugLifting {
- title := false
- for i, blocks := range df {
- if blocks != nil {
- if !title {
- fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
- title = true
- }
- fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
- }
- }
- }
- newPhis := make(newPhiMap)
- // During this pass we will replace some BasicBlock.Instrs
- // (allocs, loads and stores) with nil, keeping a count in
- // BasicBlock.gaps. At the end we will reset Instrs to the
- // concatenation of all non-dead newPhis and non-nil Instrs
- // for the block, reusing the original array if space permits.
- // While we're here, we also eliminate 'rundefers'
- // instructions in functions that contain no 'defer'
- // instructions.
- usesDefer := false
- // A counter used to generate ~unique ids for Phi nodes, as an
- // aid to debugging. We use large numbers to make them highly
- // visible. All nodes are renumbered later.
- fresh := 1000
- // Determine which allocs we can lift and number them densely.
- // The renaming phase uses this numbering for compact maps.
- numAllocs := 0
- for _, b := range fn.Blocks {
- b.gaps = 0
- b.rundefers = 0
- for _, instr := range b.Instrs {
- switch instr := instr.(type) {
- case *Alloc:
- index := -1
- if liftAlloc(df, instr, newPhis, &fresh) {
- index = numAllocs
- numAllocs++
- }
- instr.index = index
- case *Defer:
- usesDefer = true
- case *RunDefers:
- b.rundefers++
- }
- }
- }
- // renaming maps an alloc (keyed by index) to its replacement
- // value. Initially the renaming contains nil, signifying the
- // zero constant of the appropriate type; we construct the
- // Const lazily at most once on each path through the domtree.
- // TODO(adonovan): opt: cache per-function not per subtree.
- renaming := make([]Value, numAllocs)
- // Renaming.
- rename(fn.Blocks[0], renaming, newPhis)
- // Eliminate dead φ-nodes.
- removeDeadPhis(fn.Blocks, newPhis)
- // Prepend remaining live φ-nodes to each block.
- for _, b := range fn.Blocks {
- nps := newPhis[b]
- j := len(nps)
- rundefersToKill := b.rundefers
- if usesDefer {
- rundefersToKill = 0
- }
- if j+b.gaps+rundefersToKill == 0 {
- continue // fast path: no new phis or gaps
- }
- // Compact nps + non-nil Instrs into a new slice.
- // TODO(adonovan): opt: compact in situ (rightwards)
- // if Instrs has sufficient space or slack.
- dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
- for i, np := range nps {
- dst[i] = np.phi
- }
- for _, instr := range b.Instrs {
- if instr == nil {
- continue
- }
- if !usesDefer {
- if _, ok := instr.(*RunDefers); ok {
- continue
- }
- }
- dst[j] = instr
- j++
- }
- b.Instrs = dst
- }
- // Remove any fn.Locals that were lifted.
- j := 0
- for _, l := range fn.Locals {
- if l.index < 0 {
- fn.Locals[j] = l
- j++
- }
- }
- // Nil out fn.Locals[j:] to aid GC.
- for i := j; i < len(fn.Locals); i++ {
- fn.Locals[i] = nil
- }
- fn.Locals = fn.Locals[:j]
- }
- // removeDeadPhis removes φ-nodes not transitively needed by a
- // non-Phi, non-DebugRef instruction.
- func removeDeadPhis(blocks []*BasicBlock, newPhis newPhiMap) {
- // First pass: find the set of "live" φ-nodes: those reachable
- // from some non-Phi instruction.
- //
- // We compute reachability in reverse, starting from each φ,
- // rather than forwards, starting from each live non-Phi
- // instruction, because this way visits much less of the
- // Value graph.
- livePhis := make(map[*Phi]bool)
- for _, npList := range newPhis {
- for _, np := range npList {
- phi := np.phi
- if !livePhis[phi] && phiHasDirectReferrer(phi) {
- markLivePhi(livePhis, phi)
- }
- }
- }
- // Existing φ-nodes due to && and || operators
- // are all considered live (see Go issue 19622).
- for _, b := range blocks {
- for _, phi := range b.phis() {
- markLivePhi(livePhis, phi.(*Phi))
- }
- }
- // Second pass: eliminate unused phis from newPhis.
- for block, npList := range newPhis {
- j := 0
- for _, np := range npList {
- if livePhis[np.phi] {
- npList[j] = np
- j++
- } else {
- // discard it, first removing it from referrers
- for _, val := range np.phi.Edges {
- if refs := val.Referrers(); refs != nil {
- *refs = removeInstr(*refs, np.phi)
- }
- }
- np.phi.block = nil
- }
- }
- newPhis[block] = npList[:j]
- }
- }
- // markLivePhi marks phi, and all φ-nodes transitively reachable via
- // its Operands, live.
- func markLivePhi(livePhis map[*Phi]bool, phi *Phi) {
- livePhis[phi] = true
- for _, rand := range phi.Operands(nil) {
- if q, ok := (*rand).(*Phi); ok {
- if !livePhis[q] {
- markLivePhi(livePhis, q)
- }
- }
- }
- }
- // phiHasDirectReferrer reports whether phi is directly referred to by
- // a non-Phi instruction. Such instructions are the
- // roots of the liveness traversal.
- func phiHasDirectReferrer(phi *Phi) bool {
- for _, instr := range *phi.Referrers() {
- if _, ok := instr.(*Phi); !ok {
- return true
- }
- }
- return false
- }
- type BlockSet struct{ big.Int } // (inherit methods from Int)
- // add adds b to the set and returns true if the set changed.
- func (s *BlockSet) Add(b *BasicBlock) bool {
- i := b.Index
- if s.Bit(i) != 0 {
- return false
- }
- s.SetBit(&s.Int, i, 1)
- return true
- }
- func (s *BlockSet) Has(b *BasicBlock) bool {
- return s.Bit(b.Index) == 1
- }
- // take removes an arbitrary element from a set s and
- // returns its index, or returns -1 if empty.
- func (s *BlockSet) Take() int {
- l := s.BitLen()
- for i := 0; i < l; i++ {
- if s.Bit(i) == 1 {
- s.SetBit(&s.Int, i, 0)
- return i
- }
- }
- return -1
- }
- // newPhi is a pair of a newly introduced φ-node and the lifted Alloc
- // it replaces.
- type newPhi struct {
- phi *Phi
- alloc *Alloc
- }
- // newPhiMap records for each basic block, the set of newPhis that
- // must be prepended to the block.
- type newPhiMap map[*BasicBlock][]newPhi
- // liftAlloc determines whether alloc can be lifted into registers,
- // and if so, it populates newPhis with all the φ-nodes it may require
- // and returns true.
- //
- // fresh is a source of fresh ids for phi nodes.
- //
- func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap, fresh *int) bool {
- // Don't lift aggregates into registers, because we don't have
- // a way to express their zero-constants.
- switch deref(alloc.Type()).Underlying().(type) {
- case *types.Array, *types.Struct:
- return false
- }
- // Don't lift named return values in functions that defer
- // calls that may recover from panic.
- if fn := alloc.Parent(); fn.Recover != nil {
- for _, nr := range fn.namedResults {
- if nr == alloc {
- return false
- }
- }
- }
- // Compute defblocks, the set of blocks containing a
- // definition of the alloc cell.
- var defblocks BlockSet
- for _, instr := range *alloc.Referrers() {
- // Bail out if we discover the alloc is not liftable;
- // the only operations permitted to use the alloc are
- // loads/stores into the cell, and DebugRef.
- switch instr := instr.(type) {
- case *Store:
- if instr.Val == alloc {
- return false // address used as value
- }
- if instr.Addr != alloc {
- panic("Alloc.Referrers is inconsistent")
- }
- defblocks.Add(instr.Block())
- case *UnOp:
- if instr.Op != token.MUL {
- return false // not a load
- }
- if instr.X != alloc {
- panic("Alloc.Referrers is inconsistent")
- }
- case *DebugRef:
- // ok
- default:
- return false // some other instruction
- }
- }
- // The Alloc itself counts as a (zero) definition of the cell.
- defblocks.Add(alloc.Block())
- if debugLifting {
- fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
- }
- fn := alloc.Parent()
- // Φ-insertion.
- //
- // What follows is the body of the main loop of the insert-φ
- // function described by Cytron et al, but instead of using
- // counter tricks, we just reset the 'hasAlready' and 'work'
- // sets each iteration. These are bitmaps so it's pretty cheap.
- //
- // TODO(adonovan): opt: recycle slice storage for W,
- // hasAlready, defBlocks across liftAlloc calls.
- var hasAlready BlockSet
- // Initialize W and work to defblocks.
- var work BlockSet = defblocks // blocks seen
- var W BlockSet // blocks to do
- W.Set(&defblocks.Int)
- // Traverse iterated dominance frontier, inserting φ-nodes.
- for i := W.Take(); i != -1; i = W.Take() {
- u := fn.Blocks[i]
- for _, v := range df[u.Index] {
- if hasAlready.Add(v) {
- // Create φ-node.
- // It will be prepended to v.Instrs later, if needed.
- phi := &Phi{
- Edges: make([]Value, len(v.Preds)),
- Comment: alloc.Comment,
- }
- // This is merely a debugging aid:
- phi.setNum(*fresh)
- *fresh++
- phi.pos = alloc.Pos()
- phi.setType(deref(alloc.Type()))
- phi.block = v
- if debugLifting {
- fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
- }
- newPhis[v] = append(newPhis[v], newPhi{phi, alloc})
- if work.Add(v) {
- W.Add(v)
- }
- }
- }
- }
- return true
- }
- // replaceAll replaces all intraprocedural uses of x with y,
- // updating x.Referrers and y.Referrers.
- // Precondition: x.Referrers() != nil, i.e. x must be local to some function.
- //
- func replaceAll(x, y Value) {
- var rands []*Value
- pxrefs := x.Referrers()
- pyrefs := y.Referrers()
- for _, instr := range *pxrefs {
- rands = instr.Operands(rands[:0]) // recycle storage
- for _, rand := range rands {
- if *rand != nil {
- if *rand == x {
- *rand = y
- }
- }
- }
- if pyrefs != nil {
- *pyrefs = append(*pyrefs, instr) // dups ok
- }
- }
- *pxrefs = nil // x is now unreferenced
- }
- // renamed returns the value to which alloc is being renamed,
- // constructing it lazily if it's the implicit zero initialization.
- //
- func renamed(renaming []Value, alloc *Alloc) Value {
- v := renaming[alloc.index]
- if v == nil {
- v = zeroConst(deref(alloc.Type()))
- renaming[alloc.index] = v
- }
- return v
- }
- // rename implements the (Cytron et al) SSA renaming algorithm, a
- // preorder traversal of the dominator tree replacing all loads of
- // Alloc cells with the value stored to that cell by the dominating
- // store instruction. For lifting, we need only consider loads,
- // stores and φ-nodes.
- //
- // renaming is a map from *Alloc (keyed by index number) to its
- // dominating stored value; newPhis[x] is the set of new φ-nodes to be
- // prepended to block x.
- //
- func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
- // Each φ-node becomes the new name for its associated Alloc.
- for _, np := range newPhis[u] {
- phi := np.phi
- alloc := np.alloc
- renaming[alloc.index] = phi
- }
- // Rename loads and stores of allocs.
- for i, instr := range u.Instrs {
- switch instr := instr.(type) {
- case *Alloc:
- if instr.index >= 0 { // store of zero to Alloc cell
- // Replace dominated loads by the zero value.
- renaming[instr.index] = nil
- if debugLifting {
- fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
- }
- // Delete the Alloc.
- u.Instrs[i] = nil
- u.gaps++
- }
- case *Store:
- if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
- // Replace dominated loads by the stored value.
- renaming[alloc.index] = instr.Val
- if debugLifting {
- fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
- instr, instr.Val.Name())
- }
- // Remove the store from the referrer list of the stored value.
- if refs := instr.Val.Referrers(); refs != nil {
- *refs = removeInstr(*refs, instr)
- }
- // Delete the Store.
- u.Instrs[i] = nil
- u.gaps++
- }
- case *UnOp:
- if instr.Op == token.MUL {
- if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
- newval := renamed(renaming, alloc)
- if debugLifting {
- fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
- instr.Name(), instr, newval.Name())
- }
- // Replace all references to
- // the loaded value by the
- // dominating stored value.
- replaceAll(instr, newval)
- // Delete the Load.
- u.Instrs[i] = nil
- u.gaps++
- }
- }
- case *DebugRef:
- if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
- if instr.IsAddr {
- instr.X = renamed(renaming, alloc)
- instr.IsAddr = false
- // Add DebugRef to instr.X's referrers.
- if refs := instr.X.Referrers(); refs != nil {
- *refs = append(*refs, instr)
- }
- } else {
- // A source expression denotes the address
- // of an Alloc that was optimized away.
- instr.X = nil
- // Delete the DebugRef.
- u.Instrs[i] = nil
- u.gaps++
- }
- }
- }
- }
- // For each φ-node in a CFG successor, rename the edge.
- for _, v := range u.Succs {
- phis := newPhis[v]
- if len(phis) == 0 {
- continue
- }
- i := v.predIndex(u)
- for _, np := range phis {
- phi := np.phi
- alloc := np.alloc
- newval := renamed(renaming, alloc)
- if debugLifting {
- fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
- phi.Name(), u, v, i, alloc.Name(), newval.Name())
- }
- phi.Edges[i] = newval
- if prefs := newval.Referrers(); prefs != nil {
- *prefs = append(*prefs, phi)
- }
- }
- }
- // Continue depth-first recursion over domtree, pushing a
- // fresh copy of the renaming map for each subtree.
- for i, v := range u.dom.children {
- r := renaming
- if i < len(u.dom.children)-1 {
- // On all but the final iteration, we must make
- // a copy to avoid destructive update.
- r = make([]Value, len(renaming))
- copy(r, renaming)
- }
- rename(v, r, newPhis)
- }
- }
|