camelcase.go 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Package camelcase is a micro package to split the words of a camelcase type
  2. // string into a slice of words.
  3. package camelcase
  4. import (
  5. "unicode"
  6. "unicode/utf8"
  7. )
  8. // Split splits the camelcase word and returns a list of words. It also
  9. // supports digits. Both lower camel case and upper camel case are supported.
  10. // For more info please check: http://en.wikipedia.org/wiki/CamelCase
  11. //
  12. // Examples
  13. //
  14. // "" => [""]
  15. // "lowercase" => ["lowercase"]
  16. // "Class" => ["Class"]
  17. // "MyClass" => ["My", "Class"]
  18. // "MyC" => ["My", "C"]
  19. // "HTML" => ["HTML"]
  20. // "PDFLoader" => ["PDF", "Loader"]
  21. // "AString" => ["A", "String"]
  22. // "SimpleXMLParser" => ["Simple", "XML", "Parser"]
  23. // "vimRPCPlugin" => ["vim", "RPC", "Plugin"]
  24. // "GL11Version" => ["GL", "11", "Version"]
  25. // "99Bottles" => ["99", "Bottles"]
  26. // "May5" => ["May", "5"]
  27. // "BFG9000" => ["BFG", "9000"]
  28. // "BöseÜberraschung" => ["Böse", "Überraschung"]
  29. // "Two spaces" => ["Two", " ", "spaces"]
  30. // "BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"]
  31. //
  32. // Splitting rules
  33. //
  34. // 1) If string is not valid UTF-8, return it without splitting as
  35. // single item array.
  36. // 2) Assign all unicode characters into one of 4 sets: lower case
  37. // letters, upper case letters, numbers, and all other characters.
  38. // 3) Iterate through characters of string, introducing splits
  39. // between adjacent characters that belong to different sets.
  40. // 4) Iterate through array of split strings, and if a given string
  41. // is upper case:
  42. // if subsequent string is lower case:
  43. // move last character of upper case string to beginning of
  44. // lower case string
  45. func Split(src string) (entries []string) {
  46. // don't split invalid utf8
  47. if !utf8.ValidString(src) {
  48. return []string{src}
  49. }
  50. entries = []string{}
  51. var runes [][]rune
  52. lastClass := 0
  53. class := 0
  54. // split into fields based on class of unicode character
  55. for _, r := range src {
  56. switch true {
  57. case unicode.IsLower(r):
  58. class = 1
  59. case unicode.IsUpper(r):
  60. class = 2
  61. case unicode.IsDigit(r):
  62. class = 3
  63. default:
  64. class = 4
  65. }
  66. if class == lastClass {
  67. runes[len(runes)-1] = append(runes[len(runes)-1], r)
  68. } else {
  69. runes = append(runes, []rune{r})
  70. }
  71. lastClass = class
  72. }
  73. // handle upper case -> lower case sequences, e.g.
  74. // "PDFL", "oader" -> "PDF", "Loader"
  75. for i := 0; i < len(runes)-1; i++ {
  76. if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) {
  77. runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...)
  78. runes[i] = runes[i][:len(runes[i])-1]
  79. }
  80. }
  81. // construct []string from results
  82. for _, s := range runes {
  83. if len(s) > 0 {
  84. entries = append(entries, string(s))
  85. }
  86. }
  87. return
  88. }