perfcounter_nodestats.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. // +build windows
  2. /*
  3. Copyright 2017 The Kubernetes Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package winstats
  15. import (
  16. "errors"
  17. "fmt"
  18. "os"
  19. "os/exec"
  20. "runtime"
  21. "strings"
  22. "sync"
  23. "time"
  24. "unsafe"
  25. cadvisorapi "github.com/google/cadvisor/info/v1"
  26. "golang.org/x/sys/windows"
  27. "k8s.io/apimachinery/pkg/util/wait"
  28. "k8s.io/klog"
  29. )
  30. // MemoryStatusEx is the same as Windows structure MEMORYSTATUSEX
  31. // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366770(v=vs.85).aspx
  32. type MemoryStatusEx struct {
  33. Length uint32
  34. MemoryLoad uint32
  35. TotalPhys uint64
  36. AvailPhys uint64
  37. TotalPageFile uint64
  38. AvailPageFile uint64
  39. TotalVirtual uint64
  40. AvailVirtual uint64
  41. AvailExtendedVirtual uint64
  42. }
  43. var (
  44. modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
  45. procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
  46. )
  47. // NewPerfCounterClient creates a client using perf counters
  48. func NewPerfCounterClient() (Client, error) {
  49. // Initialize the cache
  50. initCache := cpuUsageCoreNanoSecondsCache{0, 0}
  51. return newClient(&perfCounterNodeStatsClient{
  52. cpuUsageCoreNanoSecondsCache: initCache,
  53. })
  54. }
  55. // perfCounterNodeStatsClient is a client that provides Windows Stats via PerfCounters
  56. type perfCounterNodeStatsClient struct {
  57. nodeMetrics
  58. mu sync.RWMutex // mu protects nodeMetrics
  59. nodeInfo
  60. // cpuUsageCoreNanoSecondsCache caches the cpu usage for nodes.
  61. cpuUsageCoreNanoSecondsCache
  62. }
  63. func (p *perfCounterNodeStatsClient) startMonitoring() error {
  64. memory, err := getPhysicallyInstalledSystemMemoryBytes()
  65. if err != nil {
  66. return err
  67. }
  68. osInfo, err := GetOSInfo()
  69. if err != nil {
  70. return err
  71. }
  72. p.nodeInfo = nodeInfo{
  73. kernelVersion: osInfo.GetPatchVersion(),
  74. osImageVersion: osInfo.ProductName,
  75. memoryPhysicalCapacityBytes: memory,
  76. startTime: time.Now(),
  77. }
  78. cpuCounter, err := newPerfCounter(cpuQuery)
  79. if err != nil {
  80. return err
  81. }
  82. memWorkingSetCounter, err := newPerfCounter(memoryPrivWorkingSetQuery)
  83. if err != nil {
  84. return err
  85. }
  86. memCommittedBytesCounter, err := newPerfCounter(memoryCommittedBytesQuery)
  87. if err != nil {
  88. return err
  89. }
  90. networkAdapterCounter, err := newNetworkCounters()
  91. if err != nil {
  92. return err
  93. }
  94. go wait.Forever(func() {
  95. p.collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter, networkAdapterCounter)
  96. }, perfCounterUpdatePeriod)
  97. // Cache the CPU usage every defaultCachePeriod
  98. go wait.Forever(func() {
  99. newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds
  100. p.mu.Lock()
  101. defer p.mu.Unlock()
  102. p.cpuUsageCoreNanoSecondsCache = cpuUsageCoreNanoSecondsCache{
  103. previousValue: p.cpuUsageCoreNanoSecondsCache.latestValue,
  104. latestValue: newValue,
  105. }
  106. }, defaultCachePeriod)
  107. return nil
  108. }
  109. func (p *perfCounterNodeStatsClient) getMachineInfo() (*cadvisorapi.MachineInfo, error) {
  110. hostname, err := os.Hostname()
  111. if err != nil {
  112. return nil, err
  113. }
  114. systemUUID, err := getSystemUUID()
  115. if err != nil {
  116. return nil, err
  117. }
  118. return &cadvisorapi.MachineInfo{
  119. NumCores: runtime.NumCPU(),
  120. MemoryCapacity: p.nodeInfo.memoryPhysicalCapacityBytes,
  121. MachineID: hostname,
  122. SystemUUID: systemUUID,
  123. }, nil
  124. }
  125. func (p *perfCounterNodeStatsClient) getVersionInfo() (*cadvisorapi.VersionInfo, error) {
  126. return &cadvisorapi.VersionInfo{
  127. KernelVersion: p.nodeInfo.kernelVersion,
  128. ContainerOsVersion: p.nodeInfo.osImageVersion,
  129. }, nil
  130. }
  131. func (p *perfCounterNodeStatsClient) getNodeMetrics() (nodeMetrics, error) {
  132. p.mu.RLock()
  133. defer p.mu.RUnlock()
  134. return p.nodeMetrics, nil
  135. }
  136. func (p *perfCounterNodeStatsClient) getNodeInfo() nodeInfo {
  137. return p.nodeInfo
  138. }
  139. func (p *perfCounterNodeStatsClient) collectMetricsData(cpuCounter, memWorkingSetCounter, memCommittedBytesCounter *perfCounter, networkAdapterCounter *networkCounter) {
  140. cpuValue, err := cpuCounter.getData()
  141. cpuCores := runtime.NumCPU()
  142. if err != nil {
  143. klog.Errorf("Unable to get cpu perf counter data; err: %v", err)
  144. return
  145. }
  146. memWorkingSetValue, err := memWorkingSetCounter.getData()
  147. if err != nil {
  148. klog.Errorf("Unable to get memWorkingSet perf counter data; err: %v", err)
  149. return
  150. }
  151. memCommittedBytesValue, err := memCommittedBytesCounter.getData()
  152. if err != nil {
  153. klog.Errorf("Unable to get memCommittedBytes perf counter data; err: %v", err)
  154. return
  155. }
  156. networkAdapterStats, err := networkAdapterCounter.getData()
  157. if err != nil {
  158. klog.Errorf("Unable to get network adapter perf counter data; err: %v", err)
  159. return
  160. }
  161. p.mu.Lock()
  162. defer p.mu.Unlock()
  163. p.nodeMetrics = nodeMetrics{
  164. cpuUsageCoreNanoSeconds: p.convertCPUValue(cpuCores, cpuValue),
  165. cpuUsageNanoCores: p.getCPUUsageNanoCores(),
  166. memoryPrivWorkingSetBytes: memWorkingSetValue,
  167. memoryCommittedBytes: memCommittedBytesValue,
  168. interfaceStats: networkAdapterStats,
  169. timeStamp: time.Now(),
  170. }
  171. }
  172. func (p *perfCounterNodeStatsClient) convertCPUValue(cpuCores int, cpuValue uint64) uint64 {
  173. // This converts perf counter data which is cpu percentage for all cores into nanoseconds.
  174. // The formula is (cpuPercentage / 100.0) * #cores * 1e+9 (nano seconds). More info here:
  175. // https://github.com/kubernetes/heapster/issues/650
  176. newValue := p.nodeMetrics.cpuUsageCoreNanoSeconds + uint64((float64(cpuValue)/100.0)*float64(cpuCores)*1e9)
  177. return newValue
  178. }
  179. func (p *perfCounterNodeStatsClient) getCPUUsageNanoCores() uint64 {
  180. cachePeriodSeconds := uint64(defaultCachePeriod / time.Second)
  181. cpuUsageNanoCores := (p.cpuUsageCoreNanoSecondsCache.latestValue - p.cpuUsageCoreNanoSecondsCache.previousValue) / cachePeriodSeconds
  182. return cpuUsageNanoCores
  183. }
  184. func getSystemUUID() (string, error) {
  185. result, err := exec.Command("wmic", "csproduct", "get", "UUID").Output()
  186. if err != nil {
  187. return "", err
  188. }
  189. fields := strings.Fields(string(result))
  190. if len(fields) != 2 {
  191. return "", fmt.Errorf("received unexpected value retrieving vm uuid: %q", string(result))
  192. }
  193. return fields[1], nil
  194. }
  195. func getPhysicallyInstalledSystemMemoryBytes() (uint64, error) {
  196. // We use GlobalMemoryStatusEx instead of GetPhysicallyInstalledSystemMemory
  197. // on Windows node for the following reasons:
  198. // 1. GetPhysicallyInstalledSystemMemory retrieves the amount of physically
  199. // installed RAM from the computer's SMBIOS firmware tables.
  200. // https://msdn.microsoft.com/en-us/library/windows/desktop/cc300158(v=vs.85).aspx
  201. // On some VM, it is unable to read data from SMBIOS and fails with ERROR_INVALID_DATA.
  202. // 2. On Linux node, total physical memory is read from MemTotal in /proc/meminfo.
  203. // GlobalMemoryStatusEx returns the amount of physical memory that is available
  204. // for the operating system to use. The amount returned by GlobalMemoryStatusEx
  205. // is closer in parity with Linux
  206. // https://www.kernel.org/doc/Documentation/filesystems/proc.txt
  207. var statex MemoryStatusEx
  208. statex.Length = uint32(unsafe.Sizeof(statex))
  209. ret, _, _ := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(&statex)))
  210. if ret == 0 {
  211. return 0, errors.New("unable to read physical memory")
  212. }
  213. return statex.TotalPhys, nil
  214. }