server.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package services
  14. import (
  15. "flag"
  16. "fmt"
  17. "net/http"
  18. "os"
  19. "os/exec"
  20. "path"
  21. "reflect"
  22. "strconv"
  23. "strings"
  24. "syscall"
  25. "time"
  26. "k8s.io/klog"
  27. "k8s.io/kubernetes/test/e2e/framework"
  28. )
  29. var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.")
  30. // A server manages a separate server process started and killed with
  31. // commands.
  32. type server struct {
  33. // name is the name of the server, it is only used for logging.
  34. name string
  35. // startCommand is the command used to start the server
  36. startCommand *exec.Cmd
  37. // killCommand is the command used to stop the server. It is not required. If it
  38. // is not specified, `kill` will be used to stop the server.
  39. killCommand *exec.Cmd
  40. // restartCommand is the command used to restart the server. If provided, it will be used
  41. // instead of startCommand when restarting the server.
  42. restartCommand *exec.Cmd
  43. // healthCheckUrls is the urls used to check whether the server is ready.
  44. healthCheckUrls []string
  45. // outFilename is the name of the log file. The stdout and stderr of the server
  46. // will be redirected to this file.
  47. outFilename string
  48. // monitorParent determines whether the server should watch its parent process and exit
  49. // if its parent is gone.
  50. monitorParent bool
  51. // restartOnExit determines whether a restart loop is launched with the server
  52. restartOnExit bool
  53. // Writing to this channel, if it is not nil, stops the restart loop.
  54. // When tearing down a server, you should check for this channel and write to it if it exists.
  55. stopRestartingCh chan<- bool
  56. // Read from this to confirm that the restart loop has stopped.
  57. ackStopRestartingCh <-chan bool
  58. }
  59. // newServer returns a new server with the given name, commands, health check
  60. // URLs, etc.
  61. func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, monitorParent, restartOnExit bool) *server {
  62. return &server{
  63. name: name,
  64. startCommand: start,
  65. killCommand: kill,
  66. restartCommand: restart,
  67. healthCheckUrls: urls,
  68. outFilename: outputFileName,
  69. monitorParent: monitorParent,
  70. restartOnExit: restartOnExit,
  71. }
  72. }
  73. // commandToString format command to string.
  74. func commandToString(c *exec.Cmd) string {
  75. if c == nil {
  76. return ""
  77. }
  78. return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ")
  79. }
  80. func (s *server) String() string {
  81. return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name,
  82. commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename)
  83. }
  84. // start starts the server by running its commands, monitors it with a health
  85. // check, and ensures that it is restarted if applicable.
  86. //
  87. // Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly.
  88. func (s *server) start() error {
  89. klog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand))
  90. errCh := make(chan error)
  91. // Set up restart channels if the server is configured for restart on exit.
  92. var stopRestartingCh, ackStopRestartingCh chan bool
  93. if s.restartOnExit {
  94. if len(s.healthCheckUrls) == 0 {
  95. return fmt.Errorf("tried to start %s which has s.restartOnExit == true, but no health check urls provided", s)
  96. }
  97. stopRestartingCh = make(chan bool)
  98. ackStopRestartingCh = make(chan bool)
  99. s.stopRestartingCh = stopRestartingCh
  100. s.ackStopRestartingCh = ackStopRestartingCh
  101. }
  102. // This goroutine actually runs the start command for the server.
  103. go func() {
  104. defer close(errCh)
  105. // Create the output filename
  106. outPath := path.Join(framework.TestContext.ReportDir, s.outFilename)
  107. outfile, err := os.Create(outPath)
  108. if err != nil {
  109. errCh <- fmt.Errorf("failed to create file %q for `%s` %v", outPath, s, err)
  110. return
  111. }
  112. klog.Infof("Output file for server %q: %v", s.name, outfile.Name())
  113. defer outfile.Close()
  114. defer outfile.Sync()
  115. // Set the command to write the output file
  116. s.startCommand.Stdout = outfile
  117. s.startCommand.Stderr = outfile
  118. // If monitorParent is set, set Pdeathsig when starting the server.
  119. if s.monitorParent {
  120. // Death of this test process should kill the server as well.
  121. attrs := &syscall.SysProcAttr{}
  122. // Hack to set linux-only field without build tags.
  123. deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig")
  124. if deathSigField.IsValid() {
  125. deathSigField.Set(reflect.ValueOf(syscall.SIGTERM))
  126. } else {
  127. errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)")
  128. return
  129. }
  130. s.startCommand.SysProcAttr = attrs
  131. }
  132. // Start the command
  133. err = s.startCommand.Start()
  134. if err != nil {
  135. errCh <- fmt.Errorf("failed to run %s: %v", s, err)
  136. return
  137. }
  138. if !s.restartOnExit {
  139. klog.Infof("Waiting for server %q start command to complete", s.name)
  140. // If we aren't planning on restarting, ok to Wait() here to release resources.
  141. // Otherwise, we Wait() in the restart loop.
  142. err = s.startCommand.Wait()
  143. if err != nil {
  144. errCh <- fmt.Errorf("failed to run start command for server %q: %v", s.name, err)
  145. return
  146. }
  147. } else {
  148. usedStartCmd := true
  149. for {
  150. klog.Infof("Running health check for service %q", s.name)
  151. // Wait for an initial health check to pass, so that we are sure the server started.
  152. err := readinessCheck(s.name, s.healthCheckUrls, nil)
  153. if err != nil {
  154. if usedStartCmd {
  155. klog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name)
  156. s.startCommand.Wait() // Release resources if necessary.
  157. }
  158. // This should not happen, immediately stop the e2eService process.
  159. klog.Fatalf("Restart loop readinessCheck failed for %s", s)
  160. } else {
  161. klog.Infof("Initial health check passed for service %q", s.name)
  162. }
  163. // Initial health check passed, wait until a health check fails again.
  164. stillAlive:
  165. for {
  166. select {
  167. case <-stopRestartingCh:
  168. ackStopRestartingCh <- true
  169. return
  170. case <-time.After(time.Second):
  171. for _, url := range s.healthCheckUrls {
  172. resp, err := http.Head(url)
  173. if err != nil || resp.StatusCode != http.StatusOK {
  174. break stillAlive
  175. }
  176. }
  177. }
  178. }
  179. if usedStartCmd {
  180. s.startCommand.Wait() // Release resources from last cmd
  181. usedStartCmd = false
  182. }
  183. if s.restartCommand != nil {
  184. // Always make a fresh copy of restartCommand before
  185. // running, we may have to restart multiple times
  186. s.restartCommand = &exec.Cmd{
  187. Path: s.restartCommand.Path,
  188. Args: s.restartCommand.Args,
  189. Env: s.restartCommand.Env,
  190. Dir: s.restartCommand.Dir,
  191. Stdin: s.restartCommand.Stdin,
  192. Stdout: s.restartCommand.Stdout,
  193. Stderr: s.restartCommand.Stderr,
  194. ExtraFiles: s.restartCommand.ExtraFiles,
  195. SysProcAttr: s.restartCommand.SysProcAttr,
  196. }
  197. // Run and wait for exit. This command is assumed to have
  198. // short duration, e.g. systemctl restart
  199. klog.Infof("Restarting server %q with restart command", s.name)
  200. err = s.restartCommand.Run()
  201. if err != nil {
  202. // This should not happen, immediately stop the e2eService process.
  203. klog.Fatalf("Restarting server %s with restartCommand failed. Error: %v.", s, err)
  204. }
  205. } else {
  206. s.startCommand = &exec.Cmd{
  207. Path: s.startCommand.Path,
  208. Args: s.startCommand.Args,
  209. Env: s.startCommand.Env,
  210. Dir: s.startCommand.Dir,
  211. Stdin: s.startCommand.Stdin,
  212. Stdout: s.startCommand.Stdout,
  213. Stderr: s.startCommand.Stderr,
  214. ExtraFiles: s.startCommand.ExtraFiles,
  215. SysProcAttr: s.startCommand.SysProcAttr,
  216. }
  217. klog.Infof("Restarting server %q with start command", s.name)
  218. err = s.startCommand.Start()
  219. usedStartCmd = true
  220. if err != nil {
  221. // This should not happen, immediately stop the e2eService process.
  222. klog.Fatalf("Restarting server %s with startCommand failed. Error: %v.", s, err)
  223. }
  224. }
  225. }
  226. }
  227. }()
  228. return readinessCheck(s.name, s.healthCheckUrls, errCh)
  229. }
  230. // kill runs the server's kill command.
  231. func (s *server) kill() error {
  232. klog.Infof("Kill server %q", s.name)
  233. name := s.name
  234. cmd := s.startCommand
  235. // If s has a restart loop, turn it off.
  236. if s.restartOnExit {
  237. s.stopRestartingCh <- true
  238. <-s.ackStopRestartingCh
  239. }
  240. if s.killCommand != nil {
  241. return s.killCommand.Run()
  242. }
  243. if cmd == nil {
  244. return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name)
  245. }
  246. if cmd.Process == nil {
  247. klog.V(2).Infof("%q not running", name)
  248. return nil
  249. }
  250. pid := cmd.Process.Pid
  251. if pid <= 1 {
  252. return fmt.Errorf("invalid PID %d for %q", pid, name)
  253. }
  254. // Attempt to shut down the process in a friendly manner before forcing it.
  255. waitChan := make(chan error)
  256. go func() {
  257. _, err := cmd.Process.Wait()
  258. waitChan <- err
  259. close(waitChan)
  260. }()
  261. const timeout = 10 * time.Second
  262. for _, signal := range []string{"-TERM", "-KILL"} {
  263. klog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal)
  264. cmd := exec.Command("kill", signal, strconv.Itoa(pid))
  265. _, err := cmd.Output()
  266. if err != nil {
  267. klog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err)
  268. continue
  269. }
  270. select {
  271. case err := <-waitChan:
  272. if err != nil {
  273. return fmt.Errorf("error stopping %q: %v", name, err)
  274. }
  275. // Success!
  276. return nil
  277. case <-time.After(timeout):
  278. // Continue.
  279. }
  280. }
  281. return fmt.Errorf("unable to stop %q", name)
  282. }