123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- /*
- Copyright 2014 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- // A tiny web server for checking networking connectivity.
- //
- // Will dial out to, and expect to hear from, every pod that is a member of
- // the service passed in the flag -service.
- //
- // Will serve a webserver on given -port.
- //
- // Visit /read to see the current state, or /quit to shut down.
- //
- // Visit /status to see pass/running/fail determination. (literally, it will
- // return one of those words.)
- //
- // /write is used by other network test pods to register connectivity.
- package main
- import (
- "bytes"
- "encoding/json"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "net"
- "net/http"
- "os"
- "os/signal"
- "sync"
- "syscall"
- "time"
- v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/util/sets"
- "k8s.io/apimachinery/pkg/version"
- clientset "k8s.io/client-go/kubernetes"
- restclient "k8s.io/client-go/rest"
- )
- var (
- port = flag.Int("port", 8080, "Port number to serve at.")
- peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.")
- service = flag.String("service", "nettest", "Service to find other network test pods in.")
- namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.")
- delayShutdown = flag.Int("delay-shutdown", 0, "Number of seconds to delay shutdown when receiving SIGTERM.")
- )
- // State tracks the internal state of our little http server.
- // It's returned verbatim over the /read endpoint.
- type State struct {
- // Hostname is set once and never changed-- it's always safe to read.
- Hostname string
- // The below fields require that lock is held before reading or writing.
- Sent map[string]int
- Received map[string]int
- Errors []string
- Log []string
- StillContactingPeers bool
- lock sync.Mutex
- }
- func (s *State) doneContactingPeers() {
- s.lock.Lock()
- defer s.lock.Unlock()
- s.StillContactingPeers = false
- }
- // serveStatus returns "pass", "running", or "fail".
- func (s *State) serveStatus(w http.ResponseWriter, r *http.Request) {
- s.lock.Lock()
- defer s.lock.Unlock()
- if len(s.Sent) >= *peerCount && len(s.Received) >= *peerCount {
- fmt.Fprintf(w, "pass")
- return
- }
- if s.StillContactingPeers {
- fmt.Fprintf(w, "running")
- return
- }
- // Logf can't be called while holding the lock, so defer using a goroutine
- go s.Logf("Declaring failure for %s/%s with %d sent and %d received and %d peers", *namespace, *service, len(s.Sent), len(s.Received), *peerCount)
- fmt.Fprintf(w, "fail")
- }
- // serveRead writes our json encoded state
- func (s *State) serveRead(w http.ResponseWriter, r *http.Request) {
- s.lock.Lock()
- defer s.lock.Unlock()
- w.WriteHeader(http.StatusOK)
- b, err := json.MarshalIndent(s, "", "\t")
- s.appendErr(err)
- _, err = w.Write(b)
- s.appendErr(err)
- }
- // WritePost is the format that (json encoded) requests to the /write handler should take.
- type WritePost struct {
- Source string
- Dest string
- }
- // WriteResp is returned by /write
- type WriteResp struct {
- Hostname string
- }
- // serveWrite records the contact in our state.
- func (s *State) serveWrite(w http.ResponseWriter, r *http.Request) {
- defer r.Body.Close()
- s.lock.Lock()
- defer s.lock.Unlock()
- w.WriteHeader(http.StatusOK)
- var wp WritePost
- s.appendErr(json.NewDecoder(r.Body).Decode(&wp))
- if wp.Source == "" {
- s.appendErr(fmt.Errorf("%v: Got request with no source", s.Hostname))
- } else {
- if s.Received == nil {
- s.Received = map[string]int{}
- }
- s.Received[wp.Source]++
- }
- s.appendErr(json.NewEncoder(w).Encode(&WriteResp{Hostname: s.Hostname}))
- }
- // appendErr adds err to the list, if err is not nil. s must be locked.
- func (s *State) appendErr(err error) {
- if err != nil {
- s.Errors = append(s.Errors, err.Error())
- }
- }
- // Logf writes to the log message list. s must not be locked.
- // s's Log member will drop an old message if it would otherwise
- // become longer than 500 messages.
- func (s *State) Logf(format string, args ...interface{}) {
- s.lock.Lock()
- defer s.lock.Unlock()
- s.Log = append(s.Log, fmt.Sprintf(format, args...))
- if len(s.Log) > 500 {
- s.Log = s.Log[1:]
- }
- }
- // s must not be locked
- func (s *State) appendSuccessfulSend(toHostname string) {
- s.lock.Lock()
- defer s.lock.Unlock()
- if s.Sent == nil {
- s.Sent = map[string]int{}
- }
- s.Sent[toHostname]++
- }
- var (
- // Our one and only state object
- state State
- )
- func main() {
- flag.Parse()
- if *service == "" {
- log.Fatal("Must provide -service flag.")
- }
- hostname, err := os.Hostname()
- if err != nil {
- log.Fatalf("Error getting hostname: %v", err)
- }
- if *delayShutdown > 0 {
- termCh := make(chan os.Signal)
- signal.Notify(termCh, syscall.SIGTERM)
- go func() {
- <-termCh
- log.Printf("Sleeping %d seconds before exit ...", *delayShutdown)
- time.Sleep(time.Duration(*delayShutdown) * time.Second)
- os.Exit(0)
- }()
- }
- state := State{
- Hostname: hostname,
- StillContactingPeers: true,
- }
- go contactOthers(&state)
- http.HandleFunc("/quit", func(w http.ResponseWriter, r *http.Request) {
- os.Exit(0)
- })
- http.HandleFunc("/read", state.serveRead)
- http.HandleFunc("/write", state.serveWrite)
- http.HandleFunc("/status", state.serveStatus)
- go log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", *port), nil))
- select {}
- }
- // Find all sibling pods in the service and post to their /write handler.
- func contactOthers(state *State) {
- var (
- versionInfo *version.Info
- err error
- )
- sleepTime := 5 * time.Second
- // In large cluster getting all endpoints is pretty expensive.
- // Thus, we will limit ourselves to send on average at most 10 such
- // requests per second
- if sleepTime < time.Duration(*peerCount/10)*time.Second {
- sleepTime = time.Duration(*peerCount/10) * time.Second
- }
- timeout := 5 * time.Minute
- // Similarly we need to bump timeout so that it is reasonable in large
- // clusters.
- if timeout < time.Duration(*peerCount)*time.Second {
- timeout = time.Duration(*peerCount) * time.Second
- }
- defer state.doneContactingPeers()
- config, err := restclient.InClusterConfig()
- if err != nil {
- log.Fatalf("Unable to create config; error: %v\n", err)
- }
- config.ContentType = "application/vnd.kubernetes.protobuf"
- client, err := clientset.NewForConfig(config)
- if err != nil {
- log.Fatalf("Unable to create client; error: %v\n", err)
- }
- // Try to get the server version until <timeout>; we use a timeout because
- // the pod might not have immediate network connectivity.
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(sleepTime) {
- // Double check that worked by getting the server version.
- if versionInfo, err = client.Discovery().ServerVersion(); err != nil {
- log.Printf("Unable to get server version: %v; retrying.\n", err)
- } else {
- log.Printf("Server version: %#v\n", versionInfo)
- break
- }
- time.Sleep(1 * time.Second)
- }
- if err != nil {
- log.Fatalf("Unable to contact Kubernetes: %v\n", err)
- }
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(sleepTime) {
- eps := getWebserverEndpoints(client)
- if eps.Len() >= *peerCount {
- break
- }
- state.Logf("%v/%v has %v endpoints (%v), which is less than %v as expected. Waiting for all endpoints to come up.", *namespace, *service, len(eps), eps.List(), *peerCount)
- }
- // Do this repeatedly, in case there's some propagation delay with getting
- // newly started pods into the endpoints list.
- for i := 0; i < 15; i++ {
- eps := getWebserverEndpoints(client)
- for ep := range eps {
- state.Logf("Attempting to contact %s", ep)
- contactSingle(ep, state)
- }
- time.Sleep(sleepTime)
- }
- }
- //getWebserverEndpoints returns the webserver endpoints as a set of String, each in the format like "http://{ip}:{port}"
- func getWebserverEndpoints(client clientset.Interface) sets.String {
- endpoints, err := client.CoreV1().Endpoints(*namespace).Get(*service, v1.GetOptions{})
- eps := sets.String{}
- if err != nil {
- state.Logf("Unable to read the endpoints for %v/%v: %v.", *namespace, *service, err)
- return eps
- }
- for _, ss := range endpoints.Subsets {
- for _, a := range ss.Addresses {
- for _, p := range ss.Ports {
- ipPort := net.JoinHostPort(a.IP, fmt.Sprint(p.Port))
- eps.Insert(fmt.Sprintf("http://%s", ipPort))
- }
- }
- }
- return eps
- }
- // contactSingle dials the address 'e' and tries to POST to its /write address.
- func contactSingle(e string, state *State) {
- body, err := json.Marshal(&WritePost{
- Dest: e,
- Source: state.Hostname,
- })
- if err != nil {
- log.Fatalf("json marshal error: %v", err)
- }
- resp, err := http.Post(e+"/write", "application/json", bytes.NewReader(body))
- if err != nil {
- state.Logf("Warning: unable to contact the endpoint %q: %v", e, err)
- return
- }
- defer resp.Body.Close()
- body, err = ioutil.ReadAll(resp.Body)
- if err != nil {
- state.Logf("Warning: unable to read response from '%v': '%v'", e, err)
- return
- }
- var wr WriteResp
- err = json.Unmarshal(body, &wr)
- if err != nil {
- state.Logf("Warning: unable to unmarshal response (%v) from '%v': '%v'", string(body), e, err)
- return
- }
- state.appendSuccessfulSend(wr.Hostname)
- }
|