123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247 |
- package misspell
- import (
- "bufio"
- "bytes"
- "io"
- "regexp"
- "strings"
- "text/scanner"
- )
- func max(x, y int) int {
- if x > y {
- return x
- }
- return y
- }
- func inArray(haystack []string, needle string) bool {
- for _, word := range haystack {
- if needle == word {
- return true
- }
- }
- return false
- }
- var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
- // Diff is datastructure showing what changed in a single line
- type Diff struct {
- Filename string
- FullLine string
- Line int
- Column int
- Original string
- Corrected string
- }
- // Replacer is the main struct for spelling correction
- type Replacer struct {
- Replacements []string
- Debug bool
- engine *StringReplacer
- corrected map[string]string
- }
- // New creates a new default Replacer using the main rule list
- func New() *Replacer {
- r := Replacer{
- Replacements: DictMain,
- }
- r.Compile()
- return &r
- }
- // RemoveRule deletes existings rules.
- // TODO: make inplace to save memory
- func (r *Replacer) RemoveRule(ignore []string) {
- newwords := make([]string, 0, len(r.Replacements))
- for i := 0; i < len(r.Replacements); i += 2 {
- if inArray(ignore, r.Replacements[i]) {
- continue
- }
- newwords = append(newwords, r.Replacements[i:i+2]...)
- }
- r.engine = nil
- r.Replacements = newwords
- }
- // AddRuleList appends new rules.
- // Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
- // Note: does not check for duplictes
- func (r *Replacer) AddRuleList(additions []string) {
- r.engine = nil
- r.Replacements = append(r.Replacements, additions...)
- }
- // Compile compiles the rules. Required before using the Replace functions
- func (r *Replacer) Compile() {
- r.corrected = make(map[string]string, len(r.Replacements)/2)
- for i := 0; i < len(r.Replacements); i += 2 {
- r.corrected[r.Replacements[i]] = r.Replacements[i+1]
- }
- r.engine = NewStringReplacer(r.Replacements...)
- }
- /*
- line1 and line2 are different
- extract words from each line1
- replace word -> newword
- if word == new-word
- continue
- if new-word in list of replacements
- continue
- new word not original, and not in list of replacements
- some substring got mixed up. UNdo
- */
- func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
- first := 0
- redacted := RemoveNotWords(s)
- idx := wordRegexp.FindAllStringIndex(redacted, -1)
- for _, ab := range idx {
- word := s[ab[0]:ab[1]]
- newword := r.engine.Replace(word)
- if newword == word {
- // no replacement done
- continue
- }
- // ignore camelCase words
- // https://github.com/client9/misspell/issues/113
- if CaseStyle(word) == CaseUnknown {
- continue
- }
- if StringEqualFold(r.corrected[strings.ToLower(word)], newword) {
- // word got corrected into something we know
- io.WriteString(buf, s[first:ab[0]])
- io.WriteString(buf, newword)
- first = ab[1]
- next(Diff{
- FullLine: s,
- Line: lineNum,
- Original: word,
- Corrected: newword,
- Column: ab[0],
- })
- continue
- }
- // Word got corrected into something unknown. Ignore it
- }
- io.WriteString(buf, s[first:])
- }
- // ReplaceGo is a specialized routine for correcting Golang source
- // files. Currently only checks comments, not identifiers for
- // spelling.
- func (r *Replacer) ReplaceGo(input string) (string, []Diff) {
- var s scanner.Scanner
- s.Init(strings.NewReader(input))
- s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
- lastPos := 0
- output := ""
- Loop:
- for {
- switch s.Scan() {
- case scanner.Comment:
- origComment := s.TokenText()
- newComment := r.engine.Replace(origComment)
- if origComment != newComment {
- // s.Pos().Offset is the end of the current token
- // subtract len(origComment) to get the start of the token
- offset := s.Pos().Offset
- output = output + input[lastPos:offset-len(origComment)] + newComment
- lastPos = offset
- }
- case scanner.EOF:
- break Loop
- }
- }
- if lastPos == 0 {
- // no changes, no copies
- return input, nil
- }
- if lastPos < len(input) {
- output = output + input[lastPos:]
- }
- diffs := make([]Diff, 0, 8)
- buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
- // faster that making a bytes.Buffer and bufio.ReadString
- outlines := strings.SplitAfter(output, "\n")
- inlines := strings.SplitAfter(input, "\n")
- for i := 0; i < len(inlines); i++ {
- if inlines[i] == outlines[i] {
- buf.WriteString(outlines[i])
- continue
- }
- r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
- diffs = append(diffs, d)
- })
- }
- return buf.String(), diffs
- }
- // Replace is corrects misspellings in input, returning corrected version
- // along with a list of diffs.
- func (r *Replacer) Replace(input string) (string, []Diff) {
- output := r.engine.Replace(input)
- if input == output {
- return input, nil
- }
- diffs := make([]Diff, 0, 8)
- buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
- // faster that making a bytes.Buffer and bufio.ReadString
- outlines := strings.SplitAfter(output, "\n")
- inlines := strings.SplitAfter(input, "\n")
- for i := 0; i < len(inlines); i++ {
- if inlines[i] == outlines[i] {
- buf.WriteString(outlines[i])
- continue
- }
- r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
- diffs = append(diffs, d)
- })
- }
- return buf.String(), diffs
- }
- // ReplaceReader applies spelling corrections to a reader stream. Diffs are
- // emitted through a callback.
- func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
- var (
- err error
- line string
- lineNum int
- )
- reader := bufio.NewReader(raw)
- for err == nil {
- lineNum++
- line, err = reader.ReadString('\n')
- // if it's EOF, then line has the last line
- // don't like the check of err here and
- // in for loop
- if err != nil && err != io.EOF {
- return err
- }
- // easily 5x faster than regexp+map
- if line == r.engine.Replace(line) {
- io.WriteString(w, line)
- continue
- }
- // but it can be inaccurate, so we need to double check
- r.recheckLine(line, lineNum, w, next)
- }
- return nil
- }
|