remote.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /* Copyright 2018 The Bazel Authors. All rights reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. */
  12. package repo
  13. import (
  14. "bytes"
  15. "encoding/json"
  16. "fmt"
  17. "io/ioutil"
  18. "os"
  19. "os/exec"
  20. "path"
  21. "path/filepath"
  22. "regexp"
  23. "runtime"
  24. "strings"
  25. "sync"
  26. "github.com/bazelbuild/bazel-gazelle/label"
  27. "github.com/bazelbuild/bazel-gazelle/pathtools"
  28. "golang.org/x/tools/go/vcs"
  29. )
  30. // RemoteCache stores information about external repositories. The cache may
  31. // be initialized with information about known repositories, i.e., those listed
  32. // in the WORKSPACE file and mentioned on the command line. Other information
  33. // is retrieved over the network.
  34. //
  35. // Public methods of RemoteCache may be slow in cases where a network fetch
  36. // is needed. Public methods may be called concurrently.
  37. //
  38. // TODO(jayconrod): this is very Go-centric. It should be moved to language/go.
  39. // Unfortunately, doing so would break the resolve.Resolver interface.
  40. type RemoteCache struct {
  41. // RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may
  42. // be overridden so that tests may avoid accessing the network.
  43. RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error)
  44. // HeadCmd returns the latest commit on the default branch in the given
  45. // repository. This is used by Head. It may be stubbed out for tests.
  46. HeadCmd func(remote, vcs string) (string, error)
  47. // ModInfo returns the module path and version that provides the package
  48. // with the given import path. This is used by Mod. It may be stubbed
  49. // out for tests.
  50. ModInfo func(importPath string) (modPath string, err error)
  51. // ModVersionInfo returns the module path, true version, and sum for
  52. // the module that provides the package with the given import path.
  53. // This is used by ModVersion. It may be stubbed out for tests.
  54. ModVersionInfo func(modPath, query string) (version, sum string, err error)
  55. root, remote, head, mod, modVersion remoteCacheMap
  56. tmpOnce sync.Once
  57. tmpDir string
  58. tmpErr error
  59. }
  60. // remoteCacheMap is a thread-safe, idempotent cache. It is used to store
  61. // information which should be fetched over the network no more than once.
  62. // This follows the Memo pattern described in The Go Programming Language,
  63. // section 9.7.
  64. type remoteCacheMap struct {
  65. mu sync.Mutex
  66. cache map[string]*remoteCacheEntry
  67. }
  68. type remoteCacheEntry struct {
  69. value interface{}
  70. err error
  71. // ready is nil for entries that were added when the cache was initialized.
  72. // It is non-nil for other entries. It is closed when an entry is ready,
  73. // i.e., the operation loading the entry completed.
  74. ready chan struct{}
  75. }
  76. type rootValue struct {
  77. root, name string
  78. }
  79. type remoteValue struct {
  80. remote, vcs string
  81. }
  82. type headValue struct {
  83. commit, tag string
  84. }
  85. type modValue struct {
  86. path, name string
  87. known bool
  88. }
  89. type modVersionValue struct {
  90. path, name, version, sum string
  91. }
  92. // Repo describes details of a Go repository known in advance. It is used to
  93. // initialize RemoteCache so that some repositories don't need to be looked up.
  94. //
  95. // DEPRECATED: Go-specific details should be removed from RemoteCache, and
  96. // lookup logic should be moved to language/go. This means RemoteCache will
  97. // need to be initialized in a different way.
  98. type Repo struct {
  99. Name, GoPrefix, Remote, VCS string
  100. }
  101. // NewRemoteCache creates a new RemoteCache with a set of known repositories.
  102. // The Root and Remote methods will return information about repositories listed
  103. // here without accessing the network. However, the Head method will still
  104. // access the network for these repositories to retrieve information about new
  105. // versions.
  106. //
  107. // A cleanup function is also returned. The caller must call this when
  108. // RemoteCache is no longer needed. RemoteCache may write files to a temporary
  109. // directory. This will delete them.
  110. func NewRemoteCache(knownRepos []Repo) (r *RemoteCache, cleanup func() error) {
  111. r = &RemoteCache{
  112. RepoRootForImportPath: vcs.RepoRootForImportPath,
  113. HeadCmd: defaultHeadCmd,
  114. root: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  115. remote: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  116. head: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  117. mod: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  118. modVersion: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
  119. }
  120. r.ModInfo = func(importPath string) (string, error) {
  121. return defaultModInfo(r, importPath)
  122. }
  123. r.ModVersionInfo = func(modPath, query string) (string, string, error) {
  124. return defaultModVersionInfo(r, modPath, query)
  125. }
  126. for _, repo := range knownRepos {
  127. r.root.cache[repo.GoPrefix] = &remoteCacheEntry{
  128. value: rootValue{
  129. root: repo.GoPrefix,
  130. name: repo.Name,
  131. },
  132. }
  133. if repo.Remote != "" {
  134. r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{
  135. value: remoteValue{
  136. remote: repo.Remote,
  137. vcs: repo.VCS,
  138. },
  139. }
  140. }
  141. r.mod.cache[repo.GoPrefix] = &remoteCacheEntry{
  142. value: modValue{
  143. path: repo.GoPrefix,
  144. name: repo.Name,
  145. known: true,
  146. },
  147. }
  148. }
  149. // Augment knownRepos with additional prefixes for
  150. // minimal module compatibility. For example, if repo "com_example_foo_v2"
  151. // has prefix "example.com/foo/v2", map "example.com/foo" to the same
  152. // entry.
  153. // TODO(jayconrod): there should probably be some control over whether
  154. // callers can use these mappings: packages within modules should not be
  155. // allowed to use them. However, we'll return the same result nearly all
  156. // the time, and simpler is better.
  157. for _, repo := range knownRepos {
  158. path := pathWithoutSemver(repo.GoPrefix)
  159. if path == "" || r.root.cache[path] != nil {
  160. continue
  161. }
  162. r.root.cache[path] = r.root.cache[repo.GoPrefix]
  163. if e := r.remote.cache[repo.GoPrefix]; e != nil {
  164. r.remote.cache[path] = e
  165. }
  166. r.mod.cache[path] = r.mod.cache[repo.GoPrefix]
  167. }
  168. return r, r.cleanup
  169. }
  170. func (r *RemoteCache) cleanup() error {
  171. if r.tmpDir == "" {
  172. return nil
  173. }
  174. return os.RemoveAll(r.tmpDir)
  175. }
  176. var gopkginPattern = regexp.MustCompile("^(gopkg.in/(?:[^/]+/)?[^/]+\\.v\\d+)(?:/|$)")
  177. var knownPrefixes = []struct {
  178. prefix string
  179. missing int
  180. }{
  181. {prefix: "golang.org/x", missing: 1},
  182. {prefix: "google.golang.org", missing: 1},
  183. {prefix: "cloud.google.com", missing: 1},
  184. {prefix: "github.com", missing: 2},
  185. }
  186. // Root returns the portion of an import path that corresponds to the root
  187. // directory of the repository containing the given import path. For example,
  188. // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools".
  189. // The workspace name of the repository is also returned. This may be a custom
  190. // name set in WORKSPACE, or it may be a generated name based on the root path.
  191. func (r *RemoteCache) Root(importPath string) (root, name string, err error) {
  192. // Try prefixes of the import path in the cache, but don't actually go out
  193. // to vcs yet. We do this before handling known special cases because
  194. // the cache is pre-populated with repository rules, and we want to use their
  195. // names if we can.
  196. prefix := importPath
  197. for {
  198. v, ok, err := r.root.get(prefix)
  199. if ok {
  200. if err != nil {
  201. return "", "", err
  202. }
  203. value := v.(rootValue)
  204. return value.root, value.name, nil
  205. }
  206. prefix = path.Dir(prefix)
  207. if prefix == "." || prefix == "/" {
  208. break
  209. }
  210. }
  211. // Try known prefixes.
  212. for _, p := range knownPrefixes {
  213. if pathtools.HasPrefix(importPath, p.prefix) {
  214. rest := pathtools.TrimPrefix(importPath, p.prefix)
  215. var components []string
  216. if rest != "" {
  217. components = strings.Split(rest, "/")
  218. }
  219. if len(components) < p.missing {
  220. return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix)
  221. }
  222. root = p.prefix
  223. for _, c := range components[:p.missing] {
  224. root = path.Join(root, c)
  225. }
  226. name = label.ImportPathToBazelRepoName(root)
  227. return root, name, nil
  228. }
  229. }
  230. // gopkg.in is special, and might have either one or two levels of
  231. // missing paths. See http://labix.org/gopkg.in for URL patterns.
  232. if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 {
  233. root = match[1]
  234. name = label.ImportPathToBazelRepoName(root)
  235. return root, name, nil
  236. }
  237. // Find the prefix using vcs and cache the result.
  238. v, err := r.root.ensure(importPath, func() (interface{}, error) {
  239. res, err := r.RepoRootForImportPath(importPath, false)
  240. if err != nil {
  241. return nil, err
  242. }
  243. return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil
  244. })
  245. if err != nil {
  246. return "", "", err
  247. }
  248. value := v.(rootValue)
  249. return value.root, value.name, nil
  250. }
  251. // Remote returns the VCS name and the remote URL for a repository with the
  252. // given root import path. This is suitable for creating new repository rules.
  253. func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) {
  254. v, err := r.remote.ensure(root, func() (interface{}, error) {
  255. repo, err := r.RepoRootForImportPath(root, false)
  256. if err != nil {
  257. return nil, err
  258. }
  259. return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil
  260. })
  261. if err != nil {
  262. return "", "", err
  263. }
  264. value := v.(remoteValue)
  265. return value.remote, value.vcs, nil
  266. }
  267. // Head returns the most recent commit id on the default branch and latest
  268. // version tag for the given remote repository. The tag "" is returned if
  269. // no latest version was found.
  270. //
  271. // TODO(jayconrod): support VCS other than git.
  272. // TODO(jayconrod): support version tags. "" is always returned.
  273. func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) {
  274. if vcs != "git" {
  275. return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs)
  276. }
  277. v, err := r.head.ensure(remote, func() (interface{}, error) {
  278. commit, err := r.HeadCmd(remote, vcs)
  279. if err != nil {
  280. return nil, err
  281. }
  282. return headValue{commit: commit}, nil
  283. })
  284. if err != nil {
  285. return "", "", err
  286. }
  287. value := v.(headValue)
  288. return value.commit, value.tag, nil
  289. }
  290. func defaultHeadCmd(remote, vcs string) (string, error) {
  291. switch vcs {
  292. case "local":
  293. return "", nil
  294. case "git":
  295. // Old versions of git ls-remote exit with code 129 when "--" is passed.
  296. // We'll try to validate the argument here instead.
  297. if strings.HasPrefix(remote, "-") {
  298. return "", fmt.Errorf("remote must not start with '-': %q", remote)
  299. }
  300. cmd := exec.Command("git", "ls-remote", remote, "HEAD")
  301. out, err := cmd.Output()
  302. if err != nil {
  303. var stdErr []byte
  304. if e, ok := err.(*exec.ExitError); ok {
  305. stdErr = e.Stderr
  306. }
  307. return "", fmt.Errorf("git ls-remote for %s : %v : %s", remote, err, stdErr)
  308. }
  309. ix := bytes.IndexByte(out, '\t')
  310. if ix < 0 {
  311. return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote)
  312. }
  313. return string(out[:ix]), nil
  314. default:
  315. return "", fmt.Errorf("unknown version control system: %s", vcs)
  316. }
  317. }
  318. // Mod returns the module path for the module that contains the package
  319. // named by importPath. The name of the go_repository rule for the module
  320. // is also returned. For example, calling Mod on "github.com/foo/bar/v2/baz"
  321. // would give the module path "github.com/foo/bar/v2" and the name
  322. // "com_github_foo_bar_v2".
  323. //
  324. // If a known repository *could* provide importPath (because its "importpath"
  325. // is a prefix of importPath), Mod will assume that it does. This may give
  326. // inaccurate results if importPath is in an undeclared nested module. Run
  327. // "gazelle update-repos -from_file=go.mod" first for best results.
  328. //
  329. // If no known repository could provide importPath, Mod will run "go list" to
  330. // find the module. The special patterns that Root uses are ignored. Results are
  331. // cached. Use GOPROXY for faster results.
  332. func (r *RemoteCache) Mod(importPath string) (modPath, name string, err error) {
  333. // Check if any of the known repositories is a prefix.
  334. prefix := importPath
  335. for {
  336. v, ok, err := r.mod.get(prefix)
  337. if ok {
  338. if err != nil {
  339. return "", "", err
  340. }
  341. value := v.(modValue)
  342. if value.known {
  343. return value.path, value.name, nil
  344. } else {
  345. break
  346. }
  347. }
  348. prefix = path.Dir(prefix)
  349. if prefix == "." || prefix == "/" {
  350. break
  351. }
  352. }
  353. // Ask "go list".
  354. v, err := r.mod.ensure(importPath, func() (interface{}, error) {
  355. modPath, err := r.ModInfo(importPath)
  356. if err != nil {
  357. return nil, err
  358. }
  359. return modValue{
  360. path: modPath,
  361. name: label.ImportPathToBazelRepoName(modPath),
  362. }, nil
  363. })
  364. if err != nil {
  365. return "", "", err
  366. }
  367. value := v.(modValue)
  368. return value.path, value.name, nil
  369. }
  370. func defaultModInfo(rc *RemoteCache, importPath string) (modPath string, err error) {
  371. rc.initTmp()
  372. if rc.tmpErr != nil {
  373. return "", rc.tmpErr
  374. }
  375. goTool := findGoTool()
  376. cmd := exec.Command(goTool, "list", "-find", "-f", "{{.Module.Path}}", "--", importPath)
  377. cmd.Dir = rc.tmpDir
  378. cmd.Env = append(os.Environ(), "GO111MODULE=on")
  379. out, err := cmd.Output()
  380. if err != nil {
  381. var stdErr []byte
  382. if e, ok := err.(*exec.ExitError); ok {
  383. stdErr = e.Stderr
  384. }
  385. return "", fmt.Errorf("finding module path for import %s: %v: %s", importPath, err, stdErr)
  386. }
  387. return strings.TrimSpace(string(out)), nil
  388. }
  389. // ModVersion looks up information about a module at a given version.
  390. // The path must be the module path, not a package within the module.
  391. // The version may be a canonical semantic version, a query like "latest",
  392. // or a branch, tag, or revision name. ModVersion returns the name of
  393. // the repository rule providing the module (if any), the true version,
  394. // and the sum.
  395. func (r *RemoteCache) ModVersion(modPath, query string) (name, version, sum string, err error) {
  396. // Ask "go list".
  397. arg := modPath + "@" + query
  398. v, err := r.modVersion.ensure(arg, func() (interface{}, error) {
  399. version, sum, err := r.ModVersionInfo(modPath, query)
  400. if err != nil {
  401. return nil, err
  402. }
  403. return modVersionValue{
  404. path: modPath,
  405. version: version,
  406. sum: sum,
  407. }, nil
  408. })
  409. if err != nil {
  410. return "", "", "", err
  411. }
  412. value := v.(modVersionValue)
  413. // Try to find the repository name for the module, if there's already
  414. // a repository rule that provides it.
  415. v, ok, err := r.mod.get(modPath)
  416. if ok && err == nil {
  417. name = v.(modValue).name
  418. } else {
  419. name = label.ImportPathToBazelRepoName(modPath)
  420. }
  421. return name, value.version, value.sum, nil
  422. }
  423. func defaultModVersionInfo(rc *RemoteCache, modPath, query string) (version, sum string, err error) {
  424. rc.initTmp()
  425. if rc.tmpErr != nil {
  426. return "", "", rc.tmpErr
  427. }
  428. goTool := findGoTool()
  429. cmd := exec.Command(goTool, "mod", "download", "-json", "--", modPath+"@"+query)
  430. cmd.Dir = rc.tmpDir
  431. cmd.Env = append(os.Environ(), "GO111MODULE=on")
  432. out, err := cmd.Output()
  433. if err != nil {
  434. var stdErr []byte
  435. if e, ok := err.(*exec.ExitError); ok {
  436. stdErr = e.Stderr
  437. }
  438. return "", "", fmt.Errorf("finding module version and sum for %s@%s: %v: %s", modPath, query, err, stdErr)
  439. }
  440. var result struct{ Version, Sum string }
  441. if err := json.Unmarshal(out, &result); err != nil {
  442. fmt.Println(out)
  443. return "", "", fmt.Errorf("finding module version and sum for %s@%s: invalid output from 'go mod download': %v", modPath, query, err)
  444. }
  445. return result.Version, result.Sum, nil
  446. }
  447. // get retrieves a value associated with the given key from the cache. ok will
  448. // be true if the key exists in the cache, even if it's in the process of
  449. // being fetched.
  450. func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) {
  451. m.mu.Lock()
  452. e, ok := m.cache[key]
  453. m.mu.Unlock()
  454. if !ok {
  455. return nil, ok, nil
  456. }
  457. if e.ready != nil {
  458. <-e.ready
  459. }
  460. return e.value, ok, e.err
  461. }
  462. // ensure retreives a value associated with the given key from the cache. If
  463. // the key does not exist in the cache, the load function will be called,
  464. // and its result will be associated with the key. The load function will not
  465. // be called more than once for any key.
  466. func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) {
  467. m.mu.Lock()
  468. e, ok := m.cache[key]
  469. if !ok {
  470. e = &remoteCacheEntry{ready: make(chan struct{})}
  471. m.cache[key] = e
  472. m.mu.Unlock()
  473. e.value, e.err = load()
  474. close(e.ready)
  475. } else {
  476. m.mu.Unlock()
  477. if e.ready != nil {
  478. <-e.ready
  479. }
  480. }
  481. return e.value, e.err
  482. }
  483. func (rc *RemoteCache) initTmp() {
  484. rc.tmpOnce.Do(func() {
  485. rc.tmpDir, rc.tmpErr = ioutil.TempDir("", "gazelle-remotecache-")
  486. if rc.tmpErr != nil {
  487. return
  488. }
  489. rc.tmpErr = ioutil.WriteFile(filepath.Join(rc.tmpDir, "go.mod"), []byte(`module gazelle_remote_cache__\n`), 0666)
  490. })
  491. }
  492. var semverRex = regexp.MustCompile(`^.*?(/v\d+)(?:/.*)?$`)
  493. // pathWithoutSemver removes a semantic version suffix from path.
  494. // For example, if path is "example.com/foo/v2/bar", pathWithoutSemver
  495. // will return "example.com/foo/bar". If there is no semantic version suffix,
  496. // "" will be returned.
  497. // TODO(jayconrod): copied from language/go. This whole type should be
  498. // migrated there.
  499. func pathWithoutSemver(path string) string {
  500. m := semverRex.FindStringSubmatchIndex(path)
  501. if m == nil {
  502. return ""
  503. }
  504. v := path[m[2]+2 : m[3]]
  505. if v == "0" || v == "1" {
  506. return ""
  507. }
  508. return path[:m[2]] + path[m[3]:]
  509. }
  510. // findGoTool attempts to locate the go executable. If GOROOT is set, we'll
  511. // prefer the one in there; otherwise, we'll rely on PATH. If the wrapper
  512. // script generated by the gazelle rule is invoked by Bazel, it will set
  513. // GOROOT to the configured SDK. We don't want to rely on the host SDK in
  514. // that situation.
  515. //
  516. // TODO(jayconrod): copied from language/go (though it was originally in this
  517. // package). Go-specific details should be removed from RemoteCache, and
  518. // this copy should be deleted.
  519. func findGoTool() string {
  520. path := "go" // rely on PATH by default
  521. if goroot, ok := os.LookupEnv("GOROOT"); ok {
  522. path = filepath.Join(goroot, "bin", "go")
  523. }
  524. if runtime.GOOS == "windows" {
  525. path += ".exe"
  526. }
  527. return path
  528. }