remote.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package remote
  14. import (
  15. "flag"
  16. "fmt"
  17. "io/ioutil"
  18. "os"
  19. "os/exec"
  20. "path/filepath"
  21. "regexp"
  22. "strings"
  23. "time"
  24. utilerrors "k8s.io/apimachinery/pkg/util/errors"
  25. "k8s.io/klog"
  26. )
  27. var testTimeout = flag.Duration("test-timeout", 45*time.Minute, "How long (in golang duration format) to wait for ginkgo tests to complete.")
  28. var resultsDir = flag.String("results-dir", "/tmp/", "Directory to scp test results to.")
  29. const archiveName = "e2e_node_test.tar.gz"
  30. // CreateTestArchive creates the archive package for the node e2e test.
  31. func CreateTestArchive(suite TestSuite, systemSpecName string) (string, error) {
  32. klog.V(2).Infof("Building archive...")
  33. tardir, err := ioutil.TempDir("", "node-e2e-archive")
  34. if err != nil {
  35. return "", fmt.Errorf("failed to create temporary directory %v", err)
  36. }
  37. defer os.RemoveAll(tardir)
  38. // Call the suite function to setup the test package.
  39. err = suite.SetupTestPackage(tardir, systemSpecName)
  40. if err != nil {
  41. return "", fmt.Errorf("failed to setup test package %q: %v", tardir, err)
  42. }
  43. // Build the tar
  44. out, err := exec.Command("tar", "-zcvf", archiveName, "-C", tardir, ".").CombinedOutput()
  45. if err != nil {
  46. return "", fmt.Errorf("failed to build tar %v. Output:\n%s", err, out)
  47. }
  48. dir, err := os.Getwd()
  49. if err != nil {
  50. return "", fmt.Errorf("failed to get working directory %v", err)
  51. }
  52. return filepath.Join(dir, archiveName), nil
  53. }
  54. // RunRemote returns the command output, whether the exit was ok, and any errors
  55. // TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name.
  56. func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string, extraEnvs string) (string, bool, error) {
  57. // Create the temp staging directory
  58. klog.V(2).Infof("Staging test binaries on %q", host)
  59. workspace := newWorkspaceDir()
  60. // Do not sudo here, so that we can use scp to copy test archive to the directory.
  61. if output, err := SSHNoSudo(host, "mkdir", workspace); err != nil {
  62. // Exit failure with the error
  63. return "", false, fmt.Errorf("failed to create workspace directory %q on host %q: %v output: %q", workspace, host, err, output)
  64. }
  65. if cleanup {
  66. defer func() {
  67. output, err := SSH(host, "rm", "-rf", workspace)
  68. if err != nil {
  69. klog.Errorf("failed to cleanup workspace %q on host %q: %v. Output:\n%s", workspace, host, err, output)
  70. }
  71. }()
  72. }
  73. // Copy the archive to the staging directory
  74. if output, err := runSSHCommand("scp", archive, fmt.Sprintf("%s:%s/", GetHostnameOrIP(host), workspace)); err != nil {
  75. // Exit failure with the error
  76. return "", false, fmt.Errorf("failed to copy test archive: %v, output: %q", err, output)
  77. }
  78. // Extract the archive
  79. cmd := getSSHCommand(" && ",
  80. fmt.Sprintf("cd %s", workspace),
  81. fmt.Sprintf("tar -xzvf ./%s", archiveName),
  82. )
  83. klog.V(2).Infof("Extracting tar on %q", host)
  84. // Do not use sudo here, because `sudo tar -x` will recover the file ownership inside the tar ball, but
  85. // we want the extracted files to be owned by the current user.
  86. if output, err := SSHNoSudo(host, "sh", "-c", cmd); err != nil {
  87. // Exit failure with the error
  88. return "", false, fmt.Errorf("failed to extract test archive: %v, output: %q", err, output)
  89. }
  90. // Create the test result directory.
  91. resultDir := filepath.Join(workspace, "results")
  92. if output, err := SSHNoSudo(host, "mkdir", resultDir); err != nil {
  93. // Exit failure with the error
  94. return "", false, fmt.Errorf("failed to create test result directory %q on host %q: %v output: %q", resultDir, host, err, output)
  95. }
  96. klog.V(2).Infof("Running test on %q", host)
  97. output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs, *testTimeout)
  98. aggErrs := []error{}
  99. // Do not log the output here, let the caller deal with the test output.
  100. if err != nil {
  101. aggErrs = append(aggErrs, err)
  102. collectSystemLog(host)
  103. }
  104. klog.V(2).Infof("Copying test artifacts from %q", host)
  105. scpErr := getTestArtifacts(host, workspace)
  106. if scpErr != nil {
  107. aggErrs = append(aggErrs, scpErr)
  108. }
  109. return output, len(aggErrs) == 0, utilerrors.NewAggregate(aggErrs)
  110. }
  111. const (
  112. // workspaceDirPrefix is the string prefix used in the workspace directory name.
  113. workspaceDirPrefix = "node-e2e-"
  114. // timestampFormat is the timestamp format used in the node e2e directory name.
  115. timestampFormat = "20060102T150405"
  116. )
  117. func getTimestamp() string {
  118. return fmt.Sprint(time.Now().Format(timestampFormat))
  119. }
  120. func newWorkspaceDir() string {
  121. return filepath.Join("/tmp", workspaceDirPrefix+getTimestamp())
  122. }
  123. // GetTimestampFromWorkspaceDir parses the workspace directory name and gets the timestamp part of it.
  124. // This can later be used to name other artifacts (such as the
  125. // kubelet-${instance}.service systemd transient service used to launch
  126. // Kubelet) so that they can be matched to each other.
  127. func GetTimestampFromWorkspaceDir(dir string) string {
  128. dirTimestamp := strings.TrimPrefix(filepath.Base(dir), workspaceDirPrefix)
  129. re := regexp.MustCompile("^\\d{8}T\\d{6}$")
  130. if re.MatchString(dirTimestamp) {
  131. return dirTimestamp
  132. }
  133. // Fallback: if we can't find that timestamp, default to using Now()
  134. return getTimestamp()
  135. }
  136. func getTestArtifacts(host, testDir string) error {
  137. logPath := filepath.Join(*resultsDir, host)
  138. if err := os.MkdirAll(logPath, 0755); err != nil {
  139. return fmt.Errorf("failed to create log directory %q: %v", logPath, err)
  140. }
  141. // Copy logs to artifacts/hostname
  142. if _, err := runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.log", GetHostnameOrIP(host), testDir), logPath); err != nil {
  143. return err
  144. }
  145. // Copy json files (if any) to artifacts.
  146. if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
  147. if _, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIP(host), testDir), *resultsDir); err != nil {
  148. return err
  149. }
  150. }
  151. if _, err := SSH(host, "ls", fmt.Sprintf("%s/results/junit*", testDir)); err == nil {
  152. // Copy junit (if any) to the top of artifacts
  153. if _, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIP(host), testDir), *resultsDir); err != nil {
  154. return err
  155. }
  156. }
  157. return nil
  158. }
  159. // collectSystemLog is a temporary hack to collect system log when encountered on
  160. // unexpected error.
  161. func collectSystemLog(host string) {
  162. // Encountered an unexpected error. The remote test harness may not
  163. // have finished retrieved and stored all the logs in this case. Try
  164. // to get some logs for debugging purposes.
  165. // TODO: This is a best-effort, temporary hack that only works for
  166. // journald nodes. We should have a more robust way to collect logs.
  167. var (
  168. logName = "system.log"
  169. logPath = fmt.Sprintf("/tmp/%s-%s", getTimestamp(), logName)
  170. destPath = fmt.Sprintf("%s/%s-%s", *resultsDir, host, logName)
  171. )
  172. klog.V(2).Infof("Test failed unexpectedly. Attempting to retrieving system logs (only works for nodes with journald)")
  173. // Try getting the system logs from journald and store it to a file.
  174. // Don't reuse the original test directory on the remote host because
  175. // it could've be been removed if the node was rebooted.
  176. if output, err := SSH(host, "sh", "-c", fmt.Sprintf("'journalctl --system --all > %s'", logPath)); err == nil {
  177. klog.V(2).Infof("Got the system logs from journald; copying it back...")
  178. if output, err := runSSHCommand("scp", fmt.Sprintf("%s:%s", GetHostnameOrIP(host), logPath), destPath); err != nil {
  179. klog.V(2).Infof("Failed to copy the log: err: %v, output: %q", err, output)
  180. }
  181. } else {
  182. klog.V(2).Infof("Failed to run journactl (normal if it doesn't exist on the node): %v, output: %q", err, output)
  183. }
  184. }
  185. // WriteLog is a temporary function to make it possible to write log
  186. // in the runner. This is used to collect serial console log.
  187. // TODO(random-liu): Use the log-dump script in cluster e2e.
  188. func WriteLog(host, filename, content string) error {
  189. logPath := filepath.Join(*resultsDir, host)
  190. if err := os.MkdirAll(logPath, 0755); err != nil {
  191. return fmt.Errorf("failed to create log directory %q: %v", logPath, err)
  192. }
  193. f, err := os.Create(filepath.Join(logPath, filename))
  194. if err != nil {
  195. return err
  196. }
  197. defer f.Close()
  198. _, err = f.WriteString(content)
  199. return err
  200. }