remote.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /* Copyright 2018 The Bazel Authors. All rights reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. */
  12. package repos
  13. import (
  14. "bytes"
  15. "fmt"
  16. "os/exec"
  17. "path"
  18. "regexp"
  19. "strings"
  20. "sync"
  21. "github.com/bazelbuild/bazel-gazelle/internal/label"
  22. "github.com/bazelbuild/bazel-gazelle/internal/pathtools"
  23. "golang.org/x/tools/go/vcs"
  24. )
  25. // UpdateRepo returns an object describing a repository at the most recent
  26. // commit or version tag.
  27. //
  28. // This function uses RemoteCache to retrieve information about the repository.
  29. // Depending on how the RemoteCache was initialized and used earlier, some
  30. // information may already be locally available. Frequently though, information
  31. // will be fetched over the network, so this function may be slow.
  32. func UpdateRepo(rc *RemoteCache, importPath string) (Repo, error) {
  33. root, name, err := rc.Root(importPath)
  34. if err != nil {
  35. return Repo{}, err
  36. }
  37. remote, vcs, err := rc.Remote(root)
  38. if err != nil {
  39. return Repo{}, err
  40. }
  41. commit, tag, err := rc.Head(remote, vcs)
  42. if err != nil {
  43. return Repo{}, err
  44. }
  45. repo := Repo{
  46. Name: name,
  47. GoPrefix: root,
  48. Commit: commit,
  49. Tag: tag,
  50. Remote: remote,
  51. VCS: vcs,
  52. }
  53. return repo, nil
  54. }
  55. // RemoteCache stores information about external repositories. The cache may
  56. // be initialized with information about known repositories, i.e., those listed
  57. // in the WORKSPACE file and mentioned on the command line. Other information
  58. // is retrieved over the network.
  59. //
  60. // Public methods of RemoteCache may be slow in cases where a network fetch
  61. // is needed. Public methods may be called concurrently.
  62. type RemoteCache struct {
  63. // RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may
  64. // be overridden so that tests may avoid accessing the network.
  65. RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error)
  66. // HeadCmd returns the latest commit on the default branch in the given
  67. // repository. This is used by Head. It may be stubbed out for tests.
  68. HeadCmd func(remote, vcs string) (string, error)
  69. root, remote, head remoteCacheMap
  70. }
  71. // remoteCacheMap is a thread-safe, idempotent cache. It is used to store
  72. // information which should be fetched over the network no more than once.
  73. // This follows the Memo pattern described in The Go Programming Language,
  74. // section 9.7.
  75. type remoteCacheMap struct {
  76. mu sync.Mutex
  77. cache map[string]*remoteCacheEntry
  78. }
  79. type remoteCacheEntry struct {
  80. value interface{}
  81. err error
  82. // ready is nil for entries that were added when the cache was initialized.
  83. // It is non-nil for other entries. It is closed when an entry is ready,
  84. // i.e., the operation loading the entry completed.
  85. ready chan struct{}
  86. }
  87. type rootValue struct {
  88. root, name string
  89. }
  90. type remoteValue struct {
  91. remote, vcs string
  92. }
  93. type headValue struct {
  94. commit, tag string
  95. }
  96. // NewRemoteCache creates a new RemoteCache with a set of known repositories.
  97. // The Root and Remote methods will return information about repositories listed
  98. // here without accessing the network. However, the Head method will still
  99. // access the network for these repositories to retrieve information about new
  100. // versions.
  101. func NewRemoteCache(knownRepos []Repo) *RemoteCache {
  102. r := &RemoteCache{
  103. RepoRootForImportPath: vcs.RepoRootForImportPath,
  104. HeadCmd: defaultHeadCmd,
  105. root: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  106. remote: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  107. head: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  108. }
  109. for _, repo := range knownRepos {
  110. r.root.cache[repo.GoPrefix] = &remoteCacheEntry{
  111. value: rootValue{
  112. root: repo.GoPrefix,
  113. name: repo.Name,
  114. },
  115. }
  116. if repo.Remote != "" {
  117. r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{
  118. value: remoteValue{
  119. remote: repo.Remote,
  120. vcs: repo.VCS,
  121. },
  122. }
  123. }
  124. }
  125. return r
  126. }
  127. var gopkginPattern = regexp.MustCompile("^(gopkg.in/(?:[^/]+/)?[^/]+\\.v\\d+)(?:/|$)")
  128. var knownPrefixes = []struct {
  129. prefix string
  130. missing int
  131. }{
  132. {prefix: "golang.org/x", missing: 1},
  133. {prefix: "google.golang.org", missing: 1},
  134. {prefix: "cloud.google.com", missing: 1},
  135. {prefix: "github.com", missing: 2},
  136. }
  137. // Root returns the portion of an import path that corresponds to the root
  138. // directory of the repository containing the given import path. For example,
  139. // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools".
  140. // The workspace name of the repository is also returned. This may be a custom
  141. // name set in WORKSPACE, or it may be a generated name based on the root path.
  142. func (r *RemoteCache) Root(importPath string) (root, name string, err error) {
  143. // Try prefixes of the import path in the cache, but don't actually go out
  144. // to vcs yet. We do this before handling known special cases because
  145. // the cache is pre-populated with repository rules, and we want to use their
  146. // names if we can.
  147. prefix := importPath
  148. for {
  149. v, ok, err := r.root.get(prefix)
  150. if ok {
  151. if err != nil {
  152. return "", "", err
  153. }
  154. value := v.(rootValue)
  155. return value.root, value.name, nil
  156. }
  157. prefix = path.Dir(prefix)
  158. if prefix == "." || prefix == "/" {
  159. break
  160. }
  161. }
  162. // Try known prefixes.
  163. for _, p := range knownPrefixes {
  164. if pathtools.HasPrefix(importPath, p.prefix) {
  165. rest := pathtools.TrimPrefix(importPath, p.prefix)
  166. var components []string
  167. if rest != "" {
  168. components = strings.Split(rest, "/")
  169. }
  170. if len(components) < p.missing {
  171. return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix)
  172. }
  173. root = p.prefix
  174. for _, c := range components[:p.missing] {
  175. root = path.Join(root, c)
  176. }
  177. name = label.ImportPathToBazelRepoName(root)
  178. return root, name, nil
  179. }
  180. }
  181. // gopkg.in is special, and might have either one or two levels of
  182. // missing paths. See http://labix.org/gopkg.in for URL patterns.
  183. if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 {
  184. root = match[1]
  185. name = label.ImportPathToBazelRepoName(root)
  186. return root, name, nil
  187. }
  188. // Find the prefix using vcs and cache the result.
  189. v, err := r.root.ensure(importPath, func() (interface{}, error) {
  190. res, err := r.RepoRootForImportPath(importPath, false)
  191. if err != nil {
  192. return nil, err
  193. }
  194. return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil
  195. })
  196. if err != nil {
  197. return "", "", err
  198. }
  199. value := v.(rootValue)
  200. return value.root, value.name, nil
  201. }
  202. // Remote returns the VCS name and the remote URL for a repository with the
  203. // given root import path. This is suitable for creating new repository rules.
  204. func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) {
  205. v, err := r.remote.ensure(root, func() (interface{}, error) {
  206. repo, err := r.RepoRootForImportPath(root, false)
  207. if err != nil {
  208. return nil, err
  209. }
  210. return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil
  211. })
  212. if err != nil {
  213. return "", "", err
  214. }
  215. value := v.(remoteValue)
  216. return value.remote, value.vcs, nil
  217. }
  218. // Head returns the most recent commit id on the default branch and latest
  219. // version tag for the given remote repository. The tag "" is returned if
  220. // no latest version was found.
  221. //
  222. // TODO(jayconrod): support VCS other than git.
  223. // TODO(jayconrod): support version tags. "" is always returned.
  224. func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) {
  225. if vcs != "git" {
  226. return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs)
  227. }
  228. v, err := r.head.ensure(remote, func() (interface{}, error) {
  229. commit, err := r.HeadCmd(remote, vcs)
  230. if err != nil {
  231. return nil, err
  232. }
  233. return headValue{commit: commit}, nil
  234. })
  235. if err != nil {
  236. return "", "", err
  237. }
  238. value := v.(headValue)
  239. return value.commit, value.tag, nil
  240. }
  241. func defaultHeadCmd(remote, vcs string) (string, error) {
  242. switch vcs {
  243. case "local":
  244. return "", nil
  245. case "git":
  246. // Old versions of git ls-remote exit with code 129 when "--" is passed.
  247. // We'll try to validate the argument here instead.
  248. if strings.HasPrefix(remote, "-") {
  249. return "", fmt.Errorf("remote must not start with '-': %q", remote)
  250. }
  251. cmd := exec.Command("git", "ls-remote", remote, "HEAD")
  252. out, err := cmd.Output()
  253. if err != nil {
  254. return "", err
  255. }
  256. ix := bytes.IndexByte(out, '\t')
  257. if ix < 0 {
  258. return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote)
  259. }
  260. return string(out[:ix]), nil
  261. default:
  262. return "", fmt.Errorf("unknown version control system: %s", vcs)
  263. }
  264. }
  265. // get retrieves a value associated with the given key from the cache. ok will
  266. // be true if the key exists in the cache, even if it's in the process of
  267. // being fetched.
  268. func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) {
  269. m.mu.Lock()
  270. e, ok := m.cache[key]
  271. m.mu.Unlock()
  272. if !ok {
  273. return nil, ok, nil
  274. }
  275. if e.ready != nil {
  276. <-e.ready
  277. }
  278. return e.value, ok, e.err
  279. }
  280. // ensure retreives a value associated with the given key from the cache. If
  281. // the key does not exist in the cache, the load function will be called,
  282. // and its result will be associated with the key. The load function will not
  283. // be called more than once for any key.
  284. func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) {
  285. m.mu.Lock()
  286. e, ok := m.cache[key]
  287. if !ok {
  288. e = &remoteCacheEntry{ready: make(chan struct{})}
  289. m.cache[key] = e
  290. m.mu.Unlock()
  291. e.value, e.err = load()
  292. close(e.ready)
  293. } else {
  294. m.mu.Unlock()
  295. if e.ready != nil {
  296. <-e.ready
  297. }
  298. }
  299. return e.value, e.err
  300. }