profile_gatherer.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. Copyright 2018 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package framework
  14. import (
  15. "bytes"
  16. "fmt"
  17. "os"
  18. "os/exec"
  19. "path"
  20. "strings"
  21. "sync"
  22. "time"
  23. e2elog "k8s.io/kubernetes/test/e2e/framework/log"
  24. e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
  25. )
  26. const (
  27. // DefaultCPUProfileSeconds is default value for how long the CPU profile is gathered for.
  28. DefaultCPUProfileSeconds = 30
  29. )
  30. func getProfilesDirectoryPath() string {
  31. return path.Join(TestContext.ReportDir, "profiles")
  32. }
  33. func createProfilesDirectoryIfNeeded() error {
  34. profileDirPath := getProfilesDirectoryPath()
  35. if _, err := os.Stat(profileDirPath); os.IsNotExist(err) {
  36. if mkdirErr := os.Mkdir(profileDirPath, 0777); mkdirErr != nil {
  37. return fmt.Errorf("Failed to create profiles dir: %v", mkdirErr)
  38. }
  39. } else if err != nil {
  40. return fmt.Errorf("Failed to check existence of profiles dir: %v", err)
  41. }
  42. return nil
  43. }
  44. func checkProfileGatheringPrerequisites() error {
  45. if !TestContext.AllowGatheringProfiles {
  46. return fmt.Errorf("Can't gather profiles as --allow-gathering-profiles is false")
  47. }
  48. if TestContext.ReportDir == "" {
  49. return fmt.Errorf("Can't gather profiles as --report-dir is empty")
  50. }
  51. if err := createProfilesDirectoryIfNeeded(); err != nil {
  52. return fmt.Errorf("Failed to ensure profiles dir: %v", err)
  53. }
  54. return nil
  55. }
  56. func getPortForComponent(componentName string) (int, error) {
  57. switch componentName {
  58. case "kube-apiserver":
  59. return 8080, nil
  60. case "kube-scheduler":
  61. return 10251, nil
  62. case "kube-controller-manager":
  63. return 10252, nil
  64. }
  65. return -1, fmt.Errorf("Port for component %v unknown", componentName)
  66. }
  67. // Gathers profiles from a master component through SSH. E.g usages:
  68. // - gatherProfile("kube-apiserver", "someTest", "heap")
  69. // - gatherProfile("kube-scheduler", "someTest", "profile")
  70. // - gatherProfile("kube-controller-manager", "someTest", "profile?seconds=20")
  71. //
  72. // We don't export this method but wrappers around it (see below).
  73. func gatherProfile(componentName, profileBaseName, profileKind string) error {
  74. if err := checkProfileGatheringPrerequisites(); err != nil {
  75. return fmt.Errorf("Profile gathering pre-requisite failed: %v", err)
  76. }
  77. profilePort, err := getPortForComponent(componentName)
  78. if err != nil {
  79. return fmt.Errorf("Profile gathering failed finding component port: %v", err)
  80. }
  81. if profileBaseName == "" {
  82. profileBaseName = time.Now().Format(time.RFC3339)
  83. }
  84. // Get the profile data over SSH.
  85. getCommand := fmt.Sprintf("curl -s localhost:%v/debug/pprof/%s", profilePort, profileKind)
  86. sshResult, err := e2essh.SSH(getCommand, GetMasterHost()+":22", TestContext.Provider)
  87. if err != nil {
  88. return fmt.Errorf("Failed to execute curl command on master through SSH: %v", err)
  89. }
  90. profilePrefix := componentName
  91. switch {
  92. case profileKind == "heap":
  93. profilePrefix += "_MemoryProfile_"
  94. case strings.HasPrefix(profileKind, "profile"):
  95. profilePrefix += "_CPUProfile_"
  96. default:
  97. return fmt.Errorf("Unknown profile kind provided: %s", profileKind)
  98. }
  99. // Write the profile data to a file.
  100. rawprofilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pprof")
  101. rawprofile, err := os.Create(rawprofilePath)
  102. if err != nil {
  103. return fmt.Errorf("Failed to create file for the profile graph: %v", err)
  104. }
  105. defer rawprofile.Close()
  106. if _, err := rawprofile.Write([]byte(sshResult.Stdout)); err != nil {
  107. return fmt.Errorf("Failed to write file with profile data: %v", err)
  108. }
  109. if err := rawprofile.Close(); err != nil {
  110. return fmt.Errorf("Failed to close file: %v", err)
  111. }
  112. // Create a graph from the data and write it to a pdf file.
  113. var cmd *exec.Cmd
  114. switch {
  115. // TODO: Support other profile kinds if needed (e.g inuse_space, alloc_objects, mutex, etc)
  116. case profileKind == "heap":
  117. cmd = exec.Command("go", "tool", "pprof", "-pdf", "-symbolize=none", "--alloc_space", rawprofile.Name())
  118. case strings.HasPrefix(profileKind, "profile"):
  119. cmd = exec.Command("go", "tool", "pprof", "-pdf", "-symbolize=none", rawprofile.Name())
  120. default:
  121. return fmt.Errorf("Unknown profile kind provided: %s", profileKind)
  122. }
  123. outfilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pdf")
  124. outfile, err := os.Create(outfilePath)
  125. if err != nil {
  126. return fmt.Errorf("Failed to create file for the profile graph: %v", err)
  127. }
  128. defer outfile.Close()
  129. cmd.Stdout = outfile
  130. stderr := bytes.NewBuffer(nil)
  131. cmd.Stderr = stderr
  132. if err := cmd.Run(); nil != err {
  133. return fmt.Errorf("Failed to run 'go tool pprof': %v, stderr: %#v", err, stderr.String())
  134. }
  135. return nil
  136. }
  137. // The below exposed functions can take a while to execute as they SSH to the master,
  138. // collect and copy the profile over and then graph it. To allow waiting for these to
  139. // finish before the parent goroutine itself finishes, we accept a sync.WaitGroup
  140. // argument in these functions. Typically you would use the following pattern:
  141. //
  142. // func TestFoo() {
  143. // var wg sync.WaitGroup
  144. // wg.Add(3)
  145. // go framework.GatherCPUProfile("kube-apiserver", "before_foo", &wg)
  146. // go framework.GatherMemoryProfile("kube-apiserver", "before_foo", &wg)
  147. // <<<< some code doing foo >>>>>>
  148. // go framework.GatherCPUProfile("kube-scheduler", "after_foo", &wg)
  149. // wg.Wait()
  150. // }
  151. //
  152. // If you do not wish to exercise the waiting logic, pass a nil value for the
  153. // waitgroup argument instead. However, then you would be responsible for ensuring
  154. // that the function finishes. There's also a polling-based gatherer utility for
  155. // CPU profiles available below.
  156. // GatherCPUProfile gathers CPU profile.
  157. func GatherCPUProfile(componentName string, profileBaseName string, wg *sync.WaitGroup) {
  158. GatherCPUProfileForSeconds(componentName, profileBaseName, DefaultCPUProfileSeconds, wg)
  159. }
  160. // GatherCPUProfileForSeconds gathers CPU profile for specified seconds.
  161. func GatherCPUProfileForSeconds(componentName string, profileBaseName string, seconds int, wg *sync.WaitGroup) {
  162. if wg != nil {
  163. defer wg.Done()
  164. }
  165. if err := gatherProfile(componentName, profileBaseName, fmt.Sprintf("profile?seconds=%v", seconds)); err != nil {
  166. e2elog.Logf("Failed to gather %v CPU profile: %v", componentName, err)
  167. }
  168. }
  169. // GatherMemoryProfile gathers memory profile.
  170. func GatherMemoryProfile(componentName string, profileBaseName string, wg *sync.WaitGroup) {
  171. if wg != nil {
  172. defer wg.Done()
  173. }
  174. if err := gatherProfile(componentName, profileBaseName, "heap"); err != nil {
  175. e2elog.Logf("Failed to gather %v memory profile: %v", componentName, err)
  176. }
  177. }
  178. // StartCPUProfileGatherer performs polling-based gathering of the component's CPU
  179. // profile. It takes the interval b/w consecutive gatherings as an argument and
  180. // starts the gathering goroutine. To stop the gatherer, close the returned channel.
  181. func StartCPUProfileGatherer(componentName string, profileBaseName string, interval time.Duration) chan struct{} {
  182. stopCh := make(chan struct{})
  183. go func() {
  184. for {
  185. select {
  186. case <-time.After(interval):
  187. GatherCPUProfile(componentName, profileBaseName+"_"+time.Now().Format(time.RFC3339), nil)
  188. case <-stopCh:
  189. return
  190. }
  191. }
  192. }()
  193. return stopCh
  194. }