fileinfo.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. /* Copyright 2018 The Bazel Authors. All rights reserved.
  2. Licensed under the Apache License, Version 2.0 (the "License");
  3. you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at
  5. http://www.apache.org/licenses/LICENSE-2.0
  6. Unless required by applicable law or agreed to in writing, software
  7. distributed under the License is distributed on an "AS IS" BASIS,
  8. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. See the License for the specific language governing permissions and
  10. limitations under the License.
  11. */
  12. package golang
  13. import (
  14. "bufio"
  15. "bytes"
  16. "errors"
  17. "fmt"
  18. "go/ast"
  19. "go/parser"
  20. "go/token"
  21. "log"
  22. "os"
  23. "path"
  24. "path/filepath"
  25. "strconv"
  26. "strings"
  27. "unicode"
  28. "unicode/utf8"
  29. "github.com/bazelbuild/bazel-gazelle/internal/config"
  30. "github.com/bazelbuild/bazel-gazelle/internal/language/proto"
  31. "github.com/bazelbuild/bazel-gazelle/internal/rule"
  32. )
  33. // fileInfo holds information used to decide how to build a file. This
  34. // information comes from the file's name, from package and import declarations
  35. // (in .go files), and from +build and cgo comments.
  36. type fileInfo struct {
  37. path string
  38. name string
  39. // ext is the type of file, based on extension.
  40. ext ext
  41. // packageName is the Go package name of a .go file, without the
  42. // "_test" suffix if it was present. It is empty for non-Go files.
  43. packageName string
  44. // importPath is the canonical import path for this file's package.
  45. // This may be read from a package comment (in Go) or a go_package
  46. // option (in proto). This field is empty for files that don't specify
  47. // an import path.
  48. importPath string
  49. // isTest is true if the file stem (the part before the extension)
  50. // ends with "_test.go". This is never true for non-Go files.
  51. isTest bool
  52. // imports is a list of packages imported by a file. It does not include
  53. // "C" or anything from the standard library.
  54. imports []string
  55. // isCgo is true for .go files that import "C".
  56. isCgo bool
  57. // goos and goarch contain the OS and architecture suffixes in the filename,
  58. // if they were present.
  59. goos, goarch string
  60. // tags is a list of build tag lines. Each entry is the trimmed text of
  61. // a line after a "+build" prefix.
  62. tags []tagLine
  63. // copts and clinkopts contain flags that are part of CFLAGS, CPPFLAGS,
  64. // CXXFLAGS, and LDFLAGS directives in cgo comments.
  65. copts, clinkopts []taggedOpts
  66. // hasServices indicates whether a .proto file has service definitions.
  67. hasServices bool
  68. }
  69. // tagLine represents the space-separated disjunction of build tag groups
  70. // in a line comment.
  71. type tagLine []tagGroup
  72. // check returns true if at least one of the tag groups is satisfied.
  73. func (l tagLine) check(c *config.Config, os, arch string) bool {
  74. if len(l) == 0 {
  75. return false
  76. }
  77. for _, g := range l {
  78. if g.check(c, os, arch) {
  79. return true
  80. }
  81. }
  82. return false
  83. }
  84. // tagGroup represents a comma-separated conjuction of build tags.
  85. type tagGroup []string
  86. // check returns true if all of the tags are true. Tags that start with
  87. // "!" are negated (but "!!") is not allowed. Go release tags (e.g., "go1.8")
  88. // are ignored. If the group contains an os or arch tag, but the os or arch
  89. // parameters are empty, check returns false even if the tag is negated.
  90. func (g tagGroup) check(c *config.Config, os, arch string) bool {
  91. goConf := getGoConfig(c)
  92. for _, t := range g {
  93. if strings.HasPrefix(t, "!!") { // bad syntax, reject always
  94. return false
  95. }
  96. not := strings.HasPrefix(t, "!")
  97. if not {
  98. t = t[1:]
  99. }
  100. if isIgnoredTag(t) {
  101. // Release tags are treated as "unknown" and are considered true,
  102. // whether or not they are negated.
  103. continue
  104. }
  105. var match bool
  106. if _, ok := rule.KnownOSSet[t]; ok {
  107. if os == "" {
  108. return false
  109. }
  110. match = os == t
  111. } else if _, ok := rule.KnownArchSet[t]; ok {
  112. if arch == "" {
  113. return false
  114. }
  115. match = arch == t
  116. } else {
  117. match = goConf.genericTags[t]
  118. }
  119. if not {
  120. match = !match
  121. }
  122. if !match {
  123. return false
  124. }
  125. }
  126. return true
  127. }
  128. // taggedOpts a list of compile or link options which should only be applied
  129. // if the given set of build tags are satisfied. These options have already
  130. // been tokenized using the same algorithm that "go build" uses, then joined
  131. // with OptSeparator.
  132. type taggedOpts struct {
  133. tags tagLine
  134. opts string
  135. }
  136. // optSeparator is a special character inserted between options that appeared
  137. // together in a #cgo directive. This allows options to be split, modified,
  138. // and escaped by other packages.
  139. //
  140. // It's important to keep options grouped together in the same string. For
  141. // example, if we have "-framework IOKit" together in a #cgo directive,
  142. // "-framework" shouldn't be treated as a separate string for the purposes of
  143. // sorting and de-duplicating.
  144. const optSeparator = "\x1D"
  145. // ext indicates how a file should be treated, based on extension.
  146. type ext int
  147. const (
  148. // unknownExt is applied files that aren't buildable with Go.
  149. unknownExt ext = iota
  150. // goExt is applied to .go files.
  151. goExt
  152. // cExt is applied to C and C++ files.
  153. cExt
  154. // hExt is applied to header files. If cgo code is present, these may be
  155. // C or C++ headers. If not, they are treated as Go assembly headers.
  156. hExt
  157. // sExt is applied to Go assembly files, ending with .s.
  158. sExt
  159. // csExt is applied to other assembly files, ending with .S. These are built
  160. // with the C compiler if cgo code is present.
  161. csExt
  162. // protoExt is applied to .proto files.
  163. protoExt
  164. )
  165. // fileNameInfo returns information that can be inferred from the name of
  166. // a file. It does not read data from the file.
  167. func fileNameInfo(path_ string) fileInfo {
  168. name := filepath.Base(path_)
  169. var ext ext
  170. switch path.Ext(name) {
  171. case ".go":
  172. ext = goExt
  173. case ".c", ".cc", ".cpp", ".cxx", ".m", ".mm":
  174. ext = cExt
  175. case ".h", ".hh", ".hpp", ".hxx":
  176. ext = hExt
  177. case ".s":
  178. ext = sExt
  179. case ".S":
  180. ext = csExt
  181. case ".proto":
  182. ext = protoExt
  183. default:
  184. ext = unknownExt
  185. }
  186. // Determine test, goos, and goarch. This is intended to match the logic
  187. // in goodOSArchFile in go/build.
  188. var isTest bool
  189. var goos, goarch string
  190. l := strings.Split(name[:len(name)-len(path.Ext(name))], "_")
  191. if len(l) >= 2 && l[len(l)-1] == "test" {
  192. isTest = ext == goExt
  193. l = l[:len(l)-1]
  194. }
  195. switch {
  196. case len(l) >= 3 && rule.KnownOSSet[l[len(l)-2]] && rule.KnownArchSet[l[len(l)-1]]:
  197. goos = l[len(l)-2]
  198. goarch = l[len(l)-1]
  199. case len(l) >= 2 && rule.KnownOSSet[l[len(l)-1]]:
  200. goos = l[len(l)-1]
  201. case len(l) >= 2 && rule.KnownArchSet[l[len(l)-1]]:
  202. goarch = l[len(l)-1]
  203. }
  204. return fileInfo{
  205. path: path_,
  206. name: name,
  207. ext: ext,
  208. isTest: isTest,
  209. goos: goos,
  210. goarch: goarch,
  211. }
  212. }
  213. // otherFileInfo returns information about a non-.go file. It will parse
  214. // part of the file to determine build tags. If the file can't be read, an
  215. // error will be logged, and partial information will be returned.
  216. func otherFileInfo(path string) fileInfo {
  217. info := fileNameInfo(path)
  218. if info.ext == unknownExt {
  219. return info
  220. }
  221. tags, err := readTags(info.path)
  222. if err != nil {
  223. log.Printf("%s: error reading file: %v", info.path, err)
  224. return info
  225. }
  226. info.tags = tags
  227. return info
  228. }
  229. // goFileInfo returns information about a .go file. It will parse part of the
  230. // file to determine the package name, imports, and build constraints.
  231. // If the file can't be read, an error will be logged, and partial information
  232. // will be returned.
  233. // This function is intended to match go/build.Context.Import.
  234. // TODD(#53): extract canonical import path
  235. func goFileInfo(path, rel string) fileInfo {
  236. info := fileNameInfo(path)
  237. fset := token.NewFileSet()
  238. pf, err := parser.ParseFile(fset, info.path, nil, parser.ImportsOnly|parser.ParseComments)
  239. if err != nil {
  240. log.Printf("%s: error reading go file: %v", info.path, err)
  241. return info
  242. }
  243. info.packageName = pf.Name.Name
  244. if info.isTest && strings.HasSuffix(info.packageName, "_test") {
  245. info.packageName = info.packageName[:len(info.packageName)-len("_test")]
  246. }
  247. for _, decl := range pf.Decls {
  248. d, ok := decl.(*ast.GenDecl)
  249. if !ok {
  250. continue
  251. }
  252. for _, dspec := range d.Specs {
  253. spec, ok := dspec.(*ast.ImportSpec)
  254. if !ok {
  255. continue
  256. }
  257. quoted := spec.Path.Value
  258. path, err := strconv.Unquote(quoted)
  259. if err != nil {
  260. log.Printf("%s: error reading go file: %v", info.path, err)
  261. continue
  262. }
  263. if path == "C" {
  264. if info.isTest {
  265. log.Printf("%s: warning: use of cgo in test not supported", info.path)
  266. }
  267. info.isCgo = true
  268. cg := spec.Doc
  269. if cg == nil && len(d.Specs) == 1 {
  270. cg = d.Doc
  271. }
  272. if cg != nil {
  273. if err := saveCgo(&info, rel, cg); err != nil {
  274. log.Printf("%s: error reading go file: %v", info.path, err)
  275. }
  276. }
  277. continue
  278. }
  279. info.imports = append(info.imports, path)
  280. }
  281. }
  282. tags, err := readTags(info.path)
  283. if err != nil {
  284. log.Printf("%s: error reading go file: %v", info.path, err)
  285. return info
  286. }
  287. info.tags = tags
  288. return info
  289. }
  290. // saveCgo extracts CFLAGS, CPPFLAGS, CXXFLAGS, and LDFLAGS directives
  291. // from a comment above a "C" import. This is intended to match logic in
  292. // go/build.Context.saveCgo.
  293. func saveCgo(info *fileInfo, rel string, cg *ast.CommentGroup) error {
  294. text := cg.Text()
  295. for _, line := range strings.Split(text, "\n") {
  296. orig := line
  297. // Line is
  298. // #cgo [GOOS/GOARCH...] LDFLAGS: stuff
  299. //
  300. line = strings.TrimSpace(line)
  301. if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') {
  302. continue
  303. }
  304. // Split at colon.
  305. line = strings.TrimSpace(line[4:])
  306. i := strings.Index(line, ":")
  307. if i < 0 {
  308. return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig)
  309. }
  310. line, optstr := strings.TrimSpace(line[:i]), strings.TrimSpace(line[i+1:])
  311. // Parse tags and verb.
  312. f := strings.Fields(line)
  313. if len(f) < 1 {
  314. return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig)
  315. }
  316. verb := f[len(f)-1]
  317. tags := parseTagsInGroups(f[:len(f)-1])
  318. // Parse options.
  319. opts, err := splitQuoted(optstr)
  320. if err != nil {
  321. return fmt.Errorf("%s: invalid #cgo line: %s", info.path, orig)
  322. }
  323. var ok bool
  324. for i, opt := range opts {
  325. if opt, ok = expandSrcDir(opt, rel); !ok {
  326. return fmt.Errorf("%s: malformed #cgo argument: %s", info.path, orig)
  327. }
  328. opts[i] = opt
  329. }
  330. joinedStr := strings.Join(opts, optSeparator)
  331. // Add tags to appropriate list.
  332. switch verb {
  333. case "CFLAGS", "CPPFLAGS", "CXXFLAGS":
  334. info.copts = append(info.copts, taggedOpts{tags, joinedStr})
  335. case "LDFLAGS":
  336. info.clinkopts = append(info.clinkopts, taggedOpts{tags, joinedStr})
  337. case "pkg-config":
  338. return fmt.Errorf("%s: pkg-config not supported: %s", info.path, orig)
  339. default:
  340. return fmt.Errorf("%s: invalid #cgo verb: %s", info.path, orig)
  341. }
  342. }
  343. return nil
  344. }
  345. // splitQuoted splits the string s around each instance of one or more consecutive
  346. // white space characters while taking into account quotes and escaping, and
  347. // returns an array of substrings of s or an empty list if s contains only white space.
  348. // Single quotes and double quotes are recognized to prevent splitting within the
  349. // quoted region, and are removed from the resulting substrings. If a quote in s
  350. // isn't closed err will be set and r will have the unclosed argument as the
  351. // last element. The backslash is used for escaping.
  352. //
  353. // For example, the following string:
  354. //
  355. // a b:"c d" 'e''f' "g\""
  356. //
  357. // Would be parsed as:
  358. //
  359. // []string{"a", "b:c d", "ef", `g"`}
  360. //
  361. // Copied from go/build.splitQuoted
  362. func splitQuoted(s string) (r []string, err error) {
  363. var args []string
  364. arg := make([]rune, len(s))
  365. escaped := false
  366. quoted := false
  367. quote := '\x00'
  368. i := 0
  369. for _, rune := range s {
  370. switch {
  371. case escaped:
  372. escaped = false
  373. case rune == '\\':
  374. escaped = true
  375. continue
  376. case quote != '\x00':
  377. if rune == quote {
  378. quote = '\x00'
  379. continue
  380. }
  381. case rune == '"' || rune == '\'':
  382. quoted = true
  383. quote = rune
  384. continue
  385. case unicode.IsSpace(rune):
  386. if quoted || i > 0 {
  387. quoted = false
  388. args = append(args, string(arg[:i]))
  389. i = 0
  390. }
  391. continue
  392. }
  393. arg[i] = rune
  394. i++
  395. }
  396. if quoted || i > 0 {
  397. args = append(args, string(arg[:i]))
  398. }
  399. if quote != 0 {
  400. err = errors.New("unclosed quote")
  401. } else if escaped {
  402. err = errors.New("unfinished escaping")
  403. }
  404. return args, err
  405. }
  406. // expandSrcDir expands any occurrence of ${SRCDIR}, making sure
  407. // the result is safe for the shell.
  408. //
  409. // Copied from go/build.expandSrcDir
  410. func expandSrcDir(str string, srcdir string) (string, bool) {
  411. // "\" delimited paths cause safeCgoName to fail
  412. // so convert native paths with a different delimiter
  413. // to "/" before starting (eg: on windows).
  414. srcdir = filepath.ToSlash(srcdir)
  415. // Spaces are tolerated in ${SRCDIR}, but not anywhere else.
  416. chunks := strings.Split(str, "${SRCDIR}")
  417. if len(chunks) < 2 {
  418. return str, safeCgoName(str, false)
  419. }
  420. ok := true
  421. for _, chunk := range chunks {
  422. ok = ok && (chunk == "" || safeCgoName(chunk, false))
  423. }
  424. ok = ok && (srcdir == "" || safeCgoName(srcdir, true))
  425. res := strings.Join(chunks, srcdir)
  426. return res, ok && res != ""
  427. }
  428. // NOTE: $ is not safe for the shell, but it is allowed here because of linker options like -Wl,$ORIGIN.
  429. // We never pass these arguments to a shell (just to programs we construct argv for), so this should be okay.
  430. // See golang.org/issue/6038.
  431. // The @ is for OS X. See golang.org/issue/13720.
  432. // The % is for Jenkins. See golang.org/issue/16959.
  433. const safeString = "+-.,/0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz:$@%"
  434. const safeSpaces = " "
  435. var safeBytes = []byte(safeSpaces + safeString)
  436. // Copied from go/build.safeCgoName
  437. func safeCgoName(s string, spaces bool) bool {
  438. if s == "" {
  439. return false
  440. }
  441. safe := safeBytes
  442. if !spaces {
  443. safe = safe[len(safeSpaces):]
  444. }
  445. for i := 0; i < len(s); i++ {
  446. if c := s[i]; c < utf8.RuneSelf && bytes.IndexByte(safe, c) < 0 {
  447. return false
  448. }
  449. }
  450. return true
  451. }
  452. // readTags reads and extracts build tags from the block of comments
  453. // and blank lines at the start of a file which is separated from the
  454. // rest of the file by a blank line. Each string in the returned slice
  455. // is the trimmed text of a line after a "+build" prefix.
  456. // Based on go/build.Context.shouldBuild.
  457. func readTags(path string) ([]tagLine, error) {
  458. f, err := os.Open(path)
  459. if err != nil {
  460. return nil, err
  461. }
  462. defer f.Close()
  463. scanner := bufio.NewScanner(f)
  464. // Pass 1: Identify leading run of // comments and blank lines,
  465. // which must be followed by a blank line.
  466. var lines []string
  467. end := 0
  468. for scanner.Scan() {
  469. line := strings.TrimSpace(scanner.Text())
  470. if line == "" {
  471. end = len(lines)
  472. continue
  473. }
  474. if strings.HasPrefix(line, "//") {
  475. lines = append(lines, line[len("//"):])
  476. continue
  477. }
  478. break
  479. }
  480. if err := scanner.Err(); err != nil {
  481. return nil, err
  482. }
  483. lines = lines[:end]
  484. // Pass 2: Process each line in the run.
  485. var tagLines []tagLine
  486. for _, line := range lines {
  487. fields := strings.Fields(line)
  488. if len(fields) > 0 && fields[0] == "+build" {
  489. tagLines = append(tagLines, parseTagsInGroups(fields[1:]))
  490. }
  491. }
  492. return tagLines, nil
  493. }
  494. func parseTagsInGroups(groups []string) tagLine {
  495. var l tagLine
  496. for _, g := range groups {
  497. l = append(l, tagGroup(strings.Split(g, ",")))
  498. }
  499. return l
  500. }
  501. func isOSArchSpecific(info fileInfo, cgoTags tagLine) (osSpecific, archSpecific bool) {
  502. if info.goos != "" {
  503. osSpecific = true
  504. }
  505. if info.goarch != "" {
  506. archSpecific = true
  507. }
  508. lines := info.tags
  509. if len(cgoTags) > 0 {
  510. lines = append(lines, cgoTags)
  511. }
  512. for _, line := range lines {
  513. for _, group := range line {
  514. for _, tag := range group {
  515. if strings.HasPrefix(tag, "!") {
  516. tag = tag[1:]
  517. }
  518. _, osOk := rule.KnownOSSet[tag]
  519. if osOk {
  520. osSpecific = true
  521. }
  522. _, archOk := rule.KnownArchSet[tag]
  523. if archOk {
  524. archSpecific = true
  525. }
  526. }
  527. }
  528. }
  529. return osSpecific, archSpecific
  530. }
  531. // checkConstraints determines whether build constraints are satisfied on
  532. // a given platform.
  533. //
  534. // The first few arguments describe the platform. genericTags is the set
  535. // of build tags that are true on all platforms. os and arch are the platform
  536. // GOOS and GOARCH strings. If os or arch is empty, checkConstraints will
  537. // return false in the presence of OS and architecture constraints, even
  538. // if they are negated.
  539. //
  540. // The remaining arguments describe the file being tested. All of these may
  541. // be empty or nil. osSuffix and archSuffix are filename suffixes. fileTags
  542. // is a list tags from +build comments found near the top of the file. cgoTags
  543. // is an extra set of tags in a #cgo directive.
  544. func checkConstraints(c *config.Config, os, arch, osSuffix, archSuffix string, fileTags []tagLine, cgoTags tagLine) bool {
  545. if osSuffix != "" && osSuffix != os || archSuffix != "" && archSuffix != arch {
  546. return false
  547. }
  548. for _, l := range fileTags {
  549. if !l.check(c, os, arch) {
  550. return false
  551. }
  552. }
  553. if len(cgoTags) > 0 && !cgoTags.check(c, os, arch) {
  554. return false
  555. }
  556. return true
  557. }
  558. // isIgnoredTag returns whether the tag is "cgo" or is a release tag.
  559. // Release tags match the pattern "go[0-9]\.[0-9]+".
  560. // Gazelle won't consider whether an ignored tag is satisfied when evaluating
  561. // build constraints for a file.
  562. func isIgnoredTag(tag string) bool {
  563. if tag == "cgo" || tag == "race" || tag == "msan" {
  564. return true
  565. }
  566. if len(tag) < 5 || !strings.HasPrefix(tag, "go") {
  567. return false
  568. }
  569. if tag[2] < '0' || tag[2] > '9' || tag[3] != '.' {
  570. return false
  571. }
  572. for _, c := range tag[4:] {
  573. if c < '0' || c > '9' {
  574. return false
  575. }
  576. }
  577. return true
  578. }
  579. // protoFileInfo extracts metadata from a proto file. The proto extension
  580. // already "parses" these and stores metadata in proto.FileInfo, so this is
  581. // just processing relevant options.
  582. func protoFileInfo(path_ string, protoInfo proto.FileInfo) fileInfo {
  583. info := fileNameInfo(path_)
  584. // Look for "option go_package". If there's no / in the package option, then
  585. // it's just a simple package name, not a full import path.
  586. for _, opt := range protoInfo.Options {
  587. if opt.Key != "go_package" {
  588. continue
  589. }
  590. if strings.LastIndexByte(opt.Value, '/') == -1 {
  591. info.packageName = opt.Value
  592. } else {
  593. if i := strings.LastIndexByte(opt.Value, ';'); i != -1 {
  594. info.importPath = opt.Value[:i]
  595. info.packageName = opt.Value[i+1:]
  596. } else {
  597. info.importPath = opt.Value
  598. info.packageName = path.Base(opt.Value)
  599. }
  600. }
  601. }
  602. // Set the Go package name from the proto package name if there was no
  603. // option go_package.
  604. if info.packageName == "" && protoInfo.PackageName != "" {
  605. info.packageName = strings.Replace(protoInfo.PackageName, ".", "_", -1)
  606. }
  607. info.imports = protoInfo.Imports
  608. info.hasServices = protoInfo.HasServices
  609. return info
  610. }