k8s-node-setup.psm1 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343
  1. # Copyright 2019 The Kubernetes Authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. <#
  15. .SYNOPSIS
  16. Library for configuring Windows nodes and joining them to the cluster.
  17. .NOTES
  18. This module depends on common.psm1.
  19. Some portions copied / adapted from
  20. https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
  21. .EXAMPLE
  22. Suggested usage for dev/test:
  23. [Net.ServicePointManager]::SecurityProtocol = `
  24. [Net.SecurityProtocolType]::Tls12
  25. Invoke-WebRequest `
  26. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 `
  27. -OutFile C:\k8s-node-setup.psm1
  28. Invoke-WebRequest `
  29. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 `
  30. -OutFile C:\configure.ps1
  31. Import-Module -Force C:\k8s-node-setup.psm1 # -Force to override existing
  32. # Execute functions manually or run configure.ps1.
  33. #>
  34. # IMPORTANT PLEASE NOTE:
  35. # Any time the file structure in the `windows` directory changes, `windows/BUILD`
  36. # and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes.
  37. # We HIGHLY recommend not changing the file structure, because consumers of
  38. # Kubernetes releases depend on the release structure remaining stable.
  39. # TODO: update scripts for these style guidelines:
  40. # - Remove {} around variable references unless actually needed for clarity.
  41. # - Always use single-quoted strings unless actually interpolating variables
  42. # or using escape characters.
  43. # - Use "approved verbs":
  44. # https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands
  45. # - Document functions using proper syntax:
  46. # https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx
  47. $INFRA_CONTAINER = 'mcr.microsoft.com/k8s/core/pause:1.0.0'
  48. $GCE_METADATA_SERVER = "169.254.169.254"
  49. # The "management" interface is used by the kubelet and by Windows pods to talk
  50. # to the rest of the Kubernetes cluster *without NAT*. This interface does not
  51. # exist until an initial HNS network has been created on the Windows node - see
  52. # Add_InitialHnsNetwork().
  53. $MGMT_ADAPTER_NAME = "vEthernet (Ethernet*"
  54. Import-Module -Force C:\common.psm1
  55. # Writes a TODO with $Message to the console.
  56. function Log_Todo {
  57. param (
  58. [parameter(Mandatory=$true)] [string]$Message
  59. )
  60. Log-Output "TODO: ${Message}"
  61. }
  62. # Writes a not-implemented warning with $Message to the console and exits the
  63. # script.
  64. function Log_NotImplemented {
  65. param (
  66. [parameter(Mandatory=$true)] [string]$Message
  67. )
  68. Log-Output "Not implemented yet: ${Message}" -Fatal
  69. }
  70. # Fails and exits if the route to the GCE metadata server is not present,
  71. # otherwise does nothing and emits nothing.
  72. function Verify_GceMetadataServerRouteIsPresent {
  73. Try {
  74. Get-NetRoute `
  75. -ErrorAction "Stop" `
  76. -AddressFamily IPv4 `
  77. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  78. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  79. Log-Output -Fatal `
  80. ("GCE metadata server route is not present as expected.`n" +
  81. "$(Get-NetRoute -AddressFamily IPv4 | Out-String)")
  82. }
  83. }
  84. # Checks if the route to the GCE metadata server is present. Returns when the
  85. # route is NOT present or after a timeout has expired.
  86. function WaitFor_GceMetadataServerRouteToBeRemoved {
  87. $elapsed = 0
  88. $timeout = 60
  89. Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " +
  90. "route to be removed")
  91. while (${elapsed} -lt ${timeout}) {
  92. Try {
  93. Get-NetRoute `
  94. -ErrorAction "Stop" `
  95. -AddressFamily IPv4 `
  96. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  97. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  98. break
  99. }
  100. $sleeptime = 2
  101. Start-Sleep ${sleeptime}
  102. ${elapsed} += ${sleeptime}
  103. }
  104. }
  105. # Adds a route to the GCE metadata server to every network interface.
  106. function Add_GceMetadataServerRoute {
  107. # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and
  108. # a "Ethernet" interface, and the route to the metadata server exists on the
  109. # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)"
  110. # interface is added, and it seems to subsume the routes of the "Ethernet"
  111. # interface (trying to add routes on the Ethernet interface at this point just
  112. # results in "New-NetRoute : Element not found" errors). I don't know what's
  113. # up with that, but since it's hard to know what's the right thing to do here
  114. # we just try to add the route on all of the network adapters.
  115. Get-NetAdapter | ForEach-Object {
  116. $adapter_index = $_.InterfaceIndex
  117. New-NetRoute `
  118. -ErrorAction Ignore `
  119. -DestinationPrefix "${GCE_METADATA_SERVER}/32" `
  120. -InterfaceIndex ${adapter_index} | Out-Null
  121. }
  122. }
  123. # Writes debugging information, such as Windows version and patch info, to the
  124. # console.
  125. function Dump-DebugInfoToConsole {
  126. Try {
  127. $version = "$([System.Environment]::OSVersion.Version | Out-String)"
  128. $hotfixes = "$(Get-Hotfix | Out-String)"
  129. $image = "$(Get-InstanceMetadata 'image' | Out-String)"
  130. Log-Output "Windows version:`n$version"
  131. Log-Output "Installed hotfixes:`n$hotfixes"
  132. Log-Output "GCE Windows image:`n$image"
  133. } Catch { }
  134. }
  135. # Fetches the kube-env from the instance metadata.
  136. #
  137. # Returns: a PowerShell Hashtable object containing the key-value pairs from
  138. # kube-env.
  139. function Fetch-KubeEnv {
  140. # Testing / debugging:
  141. # First:
  142. # ${kube_env} = Get-InstanceMetadataAttribute 'kube-env'
  143. # or:
  144. # ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt")
  145. # ${kube_env_table} = ConvertFrom-Yaml ${kube_env}
  146. # ${kube_env_table}
  147. # ${kube_env_table}.GetType()
  148. # The type of kube_env is a powershell String.
  149. $kube_env = Get-InstanceMetadataAttribute 'kube-env'
  150. $kube_env_table = ConvertFrom-Yaml ${kube_env}
  151. return ${kube_env_table}
  152. }
  153. # Sets the environment variable $Key to $Value at the Machine scope (will
  154. # be present in the environment for all new shells after a reboot).
  155. function Set_MachineEnvironmentVar {
  156. param (
  157. [parameter(Mandatory=$true)] [string]$Key,
  158. [parameter(Mandatory=$true)] [string]$Value
  159. )
  160. [Environment]::SetEnvironmentVariable($Key, $Value, "Machine")
  161. }
  162. # Sets the environment variable $Key to $Value in the current shell.
  163. function Set_CurrentShellEnvironmentVar {
  164. param (
  165. [parameter(Mandatory=$true)] [string]$Key,
  166. [parameter(Mandatory=$true)] [string]$Value
  167. )
  168. $expression = '$env:' + $Key + ' = "' + $Value + '"'
  169. Invoke-Expression ${expression}
  170. }
  171. # Sets environment variables used by Kubernetes binaries and by other functions
  172. # in this module. Depends on numerous ${kube_env} keys.
  173. function Set-EnvironmentVars {
  174. # Turning the kube-env values into environment variables is not required but
  175. # it makes debugging this script easier, and it also makes the syntax a lot
  176. # easier (${env:K8S_DIR} can be expanded within a string but
  177. # ${kube_env}['K8S_DIR'] cannot be afaik).
  178. $env_vars = @{
  179. "K8S_DIR" = ${kube_env}['K8S_DIR']
  180. "NODE_DIR" = ${kube_env}['NODE_DIR']
  181. "CNI_DIR" = ${kube_env}['CNI_DIR']
  182. "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR']
  183. "PKI_DIR" = ${kube_env}['PKI_DIR']
  184. "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE']
  185. "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE']
  186. "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE']
  187. "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR']
  188. "KUBE_NETWORK" = "l2bridge".ToLower()
  189. "CA_CERT_BUNDLE_PATH" = ${kube_env}['PKI_DIR'] + '\ca-certificates.crt'
  190. "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt'
  191. "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key'
  192. # TODO(pjh): these are only in flags, can be removed from env once flags are
  193. # moved to util.sh:
  194. "LOGS_DIR" = ${kube_env}['LOGS_DIR']
  195. "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR']
  196. "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE']
  197. }
  198. # Set the environment variables in two ways: permanently on the machine (only
  199. # takes effect after a reboot), and in the current shell.
  200. $env_vars.GetEnumerator() | ForEach-Object{
  201. $message = "Setting environment variable: " + $_.key + " = " + $_.value
  202. Log-Output ${message}
  203. Set_MachineEnvironmentVar $_.key $_.value
  204. Set_CurrentShellEnvironmentVar $_.key $_.value
  205. }
  206. }
  207. # Configures various settings and prerequisites needed for the rest of the
  208. # functions in this module and the Kubernetes binaries to operate properly.
  209. function Set-PrerequisiteOptions {
  210. # Windows updates cause the node to reboot at arbitrary times.
  211. Log-Output "Disabling Windows Update service"
  212. sc.exe config wuauserv start=disabled
  213. sc.exe stop wuauserv
  214. # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com.
  215. [Net.ServicePointManager]::SecurityProtocol = `
  216. [Net.SecurityProtocolType]::Tls12
  217. # https://github.com/cloudbase/powershell-yaml
  218. Log-Output "Installing powershell-yaml module from external repo"
  219. Install-Module -Name powershell-yaml -Force
  220. }
  221. # Disables Windows Defender realtime scanning.
  222. # TODO: remove this workaround once the fix is rolled out the Windows image
  223. # https://github.com/kubernetes/kubernetes/issues/75148
  224. function Disable-WindowsDefender {
  225. # Windows Defender periodically consumes 100% of the CPU, so disable realtime
  226. # scanning. Uninstalling the Windows Feature will prevent the service from
  227. # starting after a reboot.
  228. # TODO(pjh): move this step to image preparation, since we don't want to do a
  229. # full reboot here.
  230. if ((Get-WindowsFeature -Name 'Windows-Defender').Installed) {
  231. Log-Output "Disabling Windows Defender service"
  232. Set-MpPreference -DisableRealtimeMonitoring $true
  233. Uninstall-WindowsFeature -Name 'Windows-Defender'
  234. }
  235. }
  236. # Creates directories where other functions in this module will read and write
  237. # data.
  238. # Note: C:\tmp is required for running certain kubernetes tests.
  239. # C:\var\log is used by kubelet to stored container logs and also
  240. # hard-coded in the fluentd/stackdriver config for log collection.
  241. function Create-Directories {
  242. Log-Output "Creating ${env:K8S_DIR} and its subdirectories."
  243. ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}",
  244. "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}",
  245. "${env:PKI_DIR}"), "C:\tmp", "C:\var\log") {
  246. mkdir -Force $dir
  247. }
  248. }
  249. # Downloads some external helper scripts needed by other functions in this
  250. # module.
  251. function Download-HelperScripts {
  252. if (-not (ShouldWrite-File ${env:K8S_DIR}\hns.psm1)) {
  253. return
  254. }
  255. MustDownload-File -OutFile ${env:K8S_DIR}\hns.psm1 `
  256. -URLs "https://github.com/Microsoft/SDN/raw/master/Kubernetes/windows/hns.psm1"
  257. }
  258. # Takes the Windows version string from the cluster bash scripts (e.g.
  259. # 'win1809') and returns the correct label to use for containers on this
  260. # version of Windows. Returns $null if $WinVersion is unknown.
  261. function Get_ContainerVersionLabel {
  262. param (
  263. [parameter(Mandatory=$true)] [string]$WinVersion
  264. )
  265. # -match does regular expression matching.
  266. if ($WinVersion -match '1809') {
  267. return '1809'
  268. }
  269. elseif ($WinVersion -match '2019') {
  270. return 'ltsc2019'
  271. }
  272. Throw ("Unknown Windows version $WinVersion, don't know its container " +
  273. "version label")
  274. }
  275. # Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and
  276. # puts them in a subdirectory of $env:K8S_DIR.
  277. #
  278. # Required ${kube_env} keys:
  279. # NODE_BINARY_TAR_URL
  280. function DownloadAndInstall-KubernetesBinaries {
  281. # Assume that presence of kubelet.exe indicates that the kubernetes binaries
  282. # were already previously downloaded to this node.
  283. if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) {
  284. return
  285. }
  286. $tmp_dir = 'C:\k8s_tmp'
  287. New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
  288. $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",")
  289. $filename = Split-Path -leaf $urls[0]
  290. $hash = $null
  291. if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) {
  292. $hash = ${kube_env}['NODE_BINARY_TAR_HASH']
  293. }
  294. MustDownload-File -Hash $hash -OutFile ${tmp_dir}\${filename} -URLs $urls
  295. # Change the directory to the parent directory of ${env:K8S_DIR} and untar.
  296. # This (over-)writes ${dest_dir}/kubernetes/node/bin/*.exe files.
  297. $dest_dir = (Get-Item ${env:K8S_DIR}).Parent.Fullname
  298. tar xzf ${tmp_dir}\${filename} -C ${dest_dir}
  299. # Clean up the temporary directory
  300. Remove-Item -Force -Recurse $tmp_dir
  301. }
  302. # TODO(pjh): this is copied from
  303. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  304. # See if there's a way to fetch or construct the "management subnet" so that
  305. # this is not needed.
  306. function ConvertTo_DecimalIP
  307. {
  308. param(
  309. [parameter(Mandatory = $true, Position = 0)]
  310. [Net.IPAddress] $IPAddress
  311. )
  312. $i = 3; $decimal_ip = 0;
  313. $IPAddress.GetAddressBytes() | % {
  314. $decimal_ip += $_ * [Math]::Pow(256, $i); $i--
  315. }
  316. return [UInt32]$decimal_ip
  317. }
  318. # TODO(pjh): this is copied from
  319. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  320. # See if there's a way to fetch or construct the "management subnet" so that
  321. # this is not needed.
  322. function ConvertTo_DottedDecimalIP
  323. {
  324. param(
  325. [parameter(Mandatory = $true, Position = 0)]
  326. [Uint32] $IPAddress
  327. )
  328. $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) {
  329. $remainder = $IPAddress % [Math]::Pow(256, $i)
  330. ($IPAddress - $remainder) / [Math]::Pow(256, $i)
  331. $IPAddress = $remainder
  332. })
  333. return [String]::Join(".", $dotted_ip)
  334. }
  335. # TODO(pjh): this is copied from
  336. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  337. # See if there's a way to fetch or construct the "management subnet" so that
  338. # this is not needed.
  339. function ConvertTo_MaskLength
  340. {
  341. param(
  342. [parameter(Mandatory = $True, Position = 0)]
  343. [Net.IPAddress] $SubnetMask
  344. )
  345. $bits = "$($SubnetMask.GetAddressBytes() | % {
  346. [Convert]::ToString($_, 2)
  347. } )" -replace "[\s0]"
  348. return $bits.Length
  349. }
  350. # Returns the "management" subnet on which the Windows pods+kubelet will
  351. # communicate with the rest of the Kubernetes cluster without NAT. In GCE this
  352. # is the subnet that VM internal IPs are allocated from.
  353. #
  354. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  355. function Get_MgmtSubnet {
  356. $net_adapter = Get_MgmtNetAdapter
  357. # TODO(pjh): applying the primary interface's subnet mask to its IP address
  358. # *should* give us the GCE network subnet that VM IP addresses are being
  359. # allocated from... however it might be more accurate or straightforward to
  360. # just fetch the IP address range for the VPC subnet that the kube-up script
  361. # creates (kubernetes-subnet-default).
  362. $addr = (Get-NetIPAddress `
  363. -InterfaceAlias ${net_adapter}.ifAlias `
  364. -AddressFamily IPv4).IPAddress
  365. $mask = (Get-WmiObject Win32_NetworkAdapterConfiguration |
  366. Where-Object InterfaceIndex -eq $(${net_adapter}.ifIndex)).IPSubnet[0]
  367. $mgmt_subnet = `
  368. (ConvertTo_DecimalIP ${addr}) -band (ConvertTo_DecimalIP ${mask})
  369. $mgmt_subnet = ConvertTo_DottedDecimalIP ${mgmt_subnet}
  370. return "${mgmt_subnet}/$(ConvertTo_MaskLength $mask)"
  371. }
  372. # Returns a network adapter object for the "management" interface via which the
  373. # Windows pods+kubelet will communicate with the rest of the Kubernetes cluster.
  374. #
  375. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  376. function Get_MgmtNetAdapter {
  377. $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME}
  378. if (-not ${net_adapter}) {
  379. Throw ("Failed to find a suitable network adapter, check your network " +
  380. "settings.")
  381. }
  382. return $net_adapter
  383. }
  384. # Decodes the base64 $Data string and writes it as binary to $File. Does
  385. # nothing if $File already exists and $REDO_STEPS is not set.
  386. function Write_PkiData {
  387. param (
  388. [parameter(Mandatory=$true)] [string] $Data,
  389. [parameter(Mandatory=$true)] [string] $File
  390. )
  391. if (-not (ShouldWrite-File $File)) {
  392. return
  393. }
  394. # This command writes out a PEM certificate file, analogous to "base64
  395. # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197.
  396. [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data))
  397. Log_Todo ("need to set permissions correctly on ${File}; not sure what the " +
  398. "Windows equivalent of 'umask 077' is")
  399. # Linux: owned by root, rw by user only.
  400. # -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt
  401. # -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt
  402. # -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key
  403. # Windows:
  404. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  405. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  406. }
  407. # Creates the node PKI files in $env:PKI_DIR.
  408. #
  409. # Required ${kube_env} keys:
  410. # CA_CERT
  411. # KUBELET_CERT
  412. # KUBELET_KEY
  413. function Create-NodePki {
  414. Log-Output "Creating node pki files"
  415. $CA_CERT_BUNDLE = ${kube_env}['CA_CERT']
  416. $KUBELET_CERT = ${kube_env}['KUBELET_CERT']
  417. $KUBELET_KEY = ${kube_env}['KUBELET_KEY']
  418. Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_CERT_BUNDLE_PATH}
  419. Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH}
  420. Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH}
  421. Get-ChildItem ${env:PKI_DIR}
  422. }
  423. # Creates the kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG.
  424. #
  425. # Create-NodePki() must be called first.
  426. #
  427. # Required ${kube_env} keys:
  428. # KUBERNETES_MASTER_NAME: the apiserver IP address.
  429. function Create-KubeletKubeconfig {
  430. # The API server IP address comes from KUBERNETES_MASTER_NAME in kube-env, I
  431. # think. cluster/gce/gci/configure-helper.sh?l=2801
  432. $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME']
  433. # TODO(pjh): set these using kube-env values.
  434. $createBootstrapConfig = $true
  435. $fetchBootstrapConfig = $false
  436. if (${createBootstrapConfig}) {
  437. if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) {
  438. return
  439. }
  440. New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null
  441. # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g.
  442. # "system:node:$(hostname)".
  443. Set-Content ${env:BOOTSTRAP_KUBECONFIG} `
  444. 'apiVersion: v1
  445. kind: Config
  446. users:
  447. - name: kubelet
  448. user:
  449. client-certificate: KUBELET_CERT_PATH
  450. client-key: KUBELET_KEY_PATH
  451. clusters:
  452. - name: local
  453. cluster:
  454. server: https://APISERVER_ADDRESS
  455. certificate-authority: CA_CERT_BUNDLE_PATH
  456. contexts:
  457. - context:
  458. cluster: local
  459. user: kubelet
  460. name: service-account-context
  461. current-context: service-account-context'.`
  462. replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).`
  463. replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).`
  464. replace('APISERVER_ADDRESS', ${apiserverAddress}).`
  465. replace('CA_CERT_BUNDLE_PATH', ${env:CA_CERT_BUNDLE_PATH})
  466. Log-Output ("kubelet bootstrap kubeconfig:`n" +
  467. "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
  468. }
  469. elseif (${fetchBootstrapConfig}) {
  470. Log_NotImplemented `
  471. "fetching kubelet bootstrap-kubeconfig file from metadata"
  472. # get-metadata-value "instance/attributes/bootstrap-kubeconfig" >
  473. # /var/lib/kubelet/bootstrap-kubeconfig
  474. Log-Output ("kubelet bootstrap kubeconfig:`n" +
  475. "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
  476. }
  477. else {
  478. Log_NotImplemented "fetching kubelet kubeconfig file from metadata"
  479. }
  480. }
  481. # Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG.
  482. #
  483. # Create-NodePki() must be called first.
  484. #
  485. # Required ${kube_env} keys:
  486. # CA_CERT
  487. # KUBE_PROXY_TOKEN
  488. function Create-KubeproxyKubeconfig {
  489. if (-not (ShouldWrite-File ${env:KUBEPROXY_KUBECONFIG})) {
  490. return
  491. }
  492. New-Item -Force -ItemType file ${env:KUBEPROXY_KUBECONFIG} | Out-Null
  493. # In configure-helper.sh kubelet kubeconfig uses certificate-authority while
  494. # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter?
  495. # Use just one or the other for consistency?
  496. Set-Content ${env:KUBEPROXY_KUBECONFIG} `
  497. 'apiVersion: v1
  498. kind: Config
  499. users:
  500. - name: kube-proxy
  501. user:
  502. token: KUBEPROXY_TOKEN
  503. clusters:
  504. - name: local
  505. cluster:
  506. server: https://APISERVER_ADDRESS
  507. certificate-authority-data: CA_CERT
  508. contexts:
  509. - context:
  510. cluster: local
  511. user: kube-proxy
  512. name: service-account-context
  513. current-context: service-account-context'.`
  514. replace('KUBEPROXY_TOKEN', ${kube_env}['KUBE_PROXY_TOKEN']).`
  515. replace('CA_CERT', ${kube_env}['CA_CERT']).`
  516. replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME'])
  517. Log-Output ("kubeproxy kubeconfig:`n" +
  518. "$(Get-Content -Raw ${env:KUBEPROXY_KUBECONFIG})")
  519. }
  520. # Returns the IP alias range configured for this GCE instance.
  521. function Get_IpAliasRange {
  522. $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" +
  523. "network-interfaces/0/ip-aliases/0")
  524. $client = New-Object Net.WebClient
  525. $client.Headers.Add('Metadata-Flavor', 'Google')
  526. return ($client.DownloadString($url)).Trim()
  527. }
  528. # Retrieves the pod CIDR and sets it in $env:POD_CIDR.
  529. function Set-PodCidr {
  530. while($true) {
  531. $pod_cidr = Get_IpAliasRange
  532. if (-not $?) {
  533. Log-Output ${pod_cIDR}
  534. Log-Output "Retrying Get_IpAliasRange..."
  535. Start-Sleep -sec 1
  536. continue
  537. }
  538. break
  539. }
  540. Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}"
  541. Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr}
  542. Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr}
  543. }
  544. # Adds an initial HNS network on the Windows node which forces the creation of
  545. # a virtual switch and the "management" interface that will be used to
  546. # communicate with the rest of the Kubernetes cluster without NAT.
  547. #
  548. # Note that adding the initial HNS network may cause connectivity to the GCE
  549. # metadata server to be lost due to a Windows bug.
  550. # Configure-HostNetworkingService() restores connectivity, look there for
  551. # details.
  552. #
  553. # Download-HelperScripts() must have been called first.
  554. function Add_InitialHnsNetwork {
  555. $INITIAL_HNS_NETWORK = 'External'
  556. # This comes from
  557. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74
  558. # (or
  559. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206).
  560. #
  561. # daschott noted on Slack: "L2bridge networks require an external vSwitch.
  562. # The first network ("External") with hardcoded values in the script is just
  563. # a placeholder to create an external vSwitch. This is purely for convenience
  564. # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the
  565. # nodes without a network blip. Creating a vSwitch takes time, causes network
  566. # blips, and it makes it more likely to hit the issue where flanneld is
  567. # stuck, so we want to do this as rarely as possible."
  568. $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK
  569. if ($hns_network) {
  570. if ($REDO_STEPS) {
  571. Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " +
  572. "already exists, removing it and recreating it")
  573. $hns_network | Remove-HnsNetwork
  574. $hns_network = $null
  575. }
  576. else {
  577. Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " +
  578. "already exists, not recreating it")
  579. return
  580. }
  581. }
  582. Log-Output ("Creating initial HNS network to force creation of " +
  583. "${MGMT_ADAPTER_NAME} interface")
  584. # Note: RDP connection will hiccup when running this command.
  585. New-HNSNetwork `
  586. -Type "L2Bridge" `
  587. -AddressPrefix "192.168.255.0/30" `
  588. -Gateway "192.168.255.1" `
  589. -Name $INITIAL_HNS_NETWORK `
  590. -Verbose
  591. }
  592. # Configures HNS on the Windows node to enable Kubernetes networking:
  593. # - Creates the "management" interface associated with an initial HNS network.
  594. # - Creates the HNS network $env:KUBE_NETWORK for pod networking.
  595. # - Creates an HNS endpoint for pod networking.
  596. # - Adds necessary routes on the management interface.
  597. # - Verifies that the GCE metadata server connection remains intact.
  598. #
  599. # Prerequisites:
  600. # $env:POD_CIDR is set (by Set-PodCidr).
  601. # Download-HelperScripts() has been called.
  602. function Configure-HostNetworkingService {
  603. Import-Module -Force ${env:K8S_DIR}\hns.psm1
  604. Add_InitialHnsNetwork
  605. # For Windows nodes the pod gateway IP address is the .1 address in the pod
  606. # CIDR for the host, but from inside containers it's the .2 address.
  607. $pod_gateway = `
  608. ${env:POD_CIDR}.substring(0, ${env:POD_CIDR}.lastIndexOf('.')) + '.1'
  609. $pod_endpoint_gateway = `
  610. ${env:POD_CIDR}.substring(0, ${env:POD_CIDR}.lastIndexOf('.')) + '.2'
  611. Log-Output ("Setting up Windows node HNS networking: " +
  612. "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " +
  613. "podEndpointGateway = ${pod_endpoint_gateway}")
  614. $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK}
  615. if ($hns_network) {
  616. if ($REDO_STEPS) {
  617. Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " +
  618. "removing it and recreating it")
  619. $hns_network | Remove-HnsNetwork
  620. $hns_network = $null
  621. }
  622. else {
  623. Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists"
  624. }
  625. }
  626. $created_hns_network = $false
  627. if (-not $hns_network) {
  628. # Note: RDP connection will hiccup when running this command.
  629. $hns_network = New-HNSNetwork `
  630. -Type "L2Bridge" `
  631. -AddressPrefix ${env:POD_CIDR} `
  632. -Gateway ${pod_gateway} `
  633. -Name ${env:KUBE_NETWORK} `
  634. -Verbose
  635. $created_hns_network = $true
  636. }
  637. $endpoint_name = "cbr0"
  638. $vnic_name = "vEthernet (${endpoint_name})"
  639. $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name
  640. # Note: we don't expect to ever enter this block currently - while the HNS
  641. # network does seem to persist across reboots, the HNS endpoints do not.
  642. if ($hns_endpoint) {
  643. if ($REDO_STEPS) {
  644. Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " +
  645. "removing it and recreating it")
  646. $hns_endpoint | Remove-HnsEndpoint
  647. $hns_endpoint = $null
  648. }
  649. else {
  650. Log-Output "Skip: HNS endpoint $endpoint_name already exists"
  651. }
  652. }
  653. if (-not $hns_endpoint) {
  654. $hns_endpoint = New-HnsEndpoint `
  655. -NetworkId ${hns_network}.Id `
  656. -Name ${endpoint_name} `
  657. -IPAddress ${pod_endpoint_gateway} `
  658. -Gateway "0.0.0.0" `
  659. -Verbose
  660. # TODO(pjh): find out: why is this always CompartmentId 1?
  661. Attach-HnsHostEndpoint `
  662. -EndpointID ${hns_endpoint}.Id `
  663. -CompartmentID 1 `
  664. -Verbose
  665. netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled
  666. }
  667. Get-HNSPolicyList | Remove-HnsPolicyList
  668. # Add a route from the management NIC to the pod CIDR.
  669. #
  670. # When a packet from a Kubernetes service backend arrives on the destination
  671. # Windows node, the reverse SNAT will be applied and the source address of
  672. # the packet gets replaced from the pod IP to the service VIP. The packet
  673. # will then leave the VM and return back through hairpinning.
  674. #
  675. # When IP alias is enabled, IP forwarding is disabled for anti-spoofing;
  676. # the packet with the service VIP will get blocked and be lost. With this
  677. # route, the packet will be routed to the pod subnetwork, and not leave the
  678. # VM.
  679. $mgmt_net_adapter = Get_MgmtNetAdapter
  680. New-NetRoute `
  681. -ErrorAction Ignore `
  682. -InterfaceAlias ${mgmt_net_adapter}.ifAlias `
  683. -DestinationPrefix ${env:POD_CIDR} `
  684. -NextHop "0.0.0.0" `
  685. -Verbose
  686. if ($created_hns_network) {
  687. # There is an HNS bug where the route to the GCE metadata server will be
  688. # removed when the HNS network is created:
  689. # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610.
  690. # The behavior here is very unpredictable: the route may only be removed
  691. # after some delay, or it may appear to be removed then you'll add it back
  692. # but then it will be removed once again. So, we first wait a long
  693. # unfortunate amount of time to ensure that things have quiesced, then we
  694. # wait until we're sure the route is really gone before re-adding it again.
  695. Log-Output "Waiting 45 seconds for host network state to quiesce"
  696. Start-Sleep 45
  697. WaitFor_GceMetadataServerRouteToBeRemoved
  698. Log-Output "Re-adding the GCE metadata server route"
  699. Add_GceMetadataServerRoute
  700. }
  701. Verify_GceMetadataServerRouteIsPresent
  702. Log-Output "Host network setup complete"
  703. }
  704. function Configure-GcePdTools {
  705. if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) {
  706. MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll `
  707. -URLs "https://github.com/pjh/gce-tools/raw/master/GceTools/GetGcePdName/GetGcePdName.dll"
  708. }
  709. if (-not (Test-Path $PsHome\profile.ps1)) {
  710. New-Item -path $PsHome\profile.ps1 -type file
  711. }
  712. Add-Content $PsHome\profile.ps1 `
  713. '$modulePath = "K8S_DIR\GetGcePdName.dll"
  714. Unblock-File $modulePath
  715. Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR})
  716. }
  717. # Downloads the Windows CNI binaries and writes a CNI config file under
  718. # $env:CNI_CONFIG_DIR.
  719. #
  720. # Prerequisites:
  721. # $env:POD_CIDR is set (by Set-PodCidr).
  722. # The "management" interface exists (Configure-HostNetworkingService).
  723. # The HNS network for pod networking has been configured
  724. # (Configure-HostNetworkingService).
  725. #
  726. # Required ${kube_env} keys:
  727. # DNS_SERVER_IP
  728. # DNS_DOMAIN
  729. # CLUSTER_IP_RANGE
  730. # SERVICE_CLUSTER_IP_RANGE
  731. function Configure-CniNetworking {
  732. $CNI_RELEASE_VERSION = 'v0.8.0'
  733. if ((ShouldWrite-File ${env:CNI_DIR}\win-bridge.exe) -or
  734. (ShouldWrite-File ${env:CNI_DIR}\host-local.exe)) {
  735. $tmp_dir = 'C:\cni_tmp'
  736. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  737. $release_url = ('https://github.com/containernetworking/plugins/releases/' +
  738. 'download/' + $CNI_RELEASE_VERSION + '/')
  739. $sha_url = ($release_url +
  740. "cni-plugins-windows-amd64-$CNI_RELEASE_VERSION.tgz.sha1")
  741. $tgz_url = ($release_url +
  742. "cni-plugins-windows-amd64-$CNI_RELEASE_VERSION.tgz")
  743. MustDownload-File -URLs $sha_url -OutFile $tmp_dir\cni-plugins.sha1
  744. $sha1_val = ($(Get-Content $tmp_dir\cni-plugins.sha1) -split ' ',2)[0]
  745. MustDownload-File `
  746. -URLs $tgz_url `
  747. -OutFile $tmp_dir\cni-plugins.tgz `
  748. -Hash $sha1_val
  749. Push-Location $tmp_dir
  750. # tar can only extract in the current directory.
  751. tar -xvf $tmp_dir\cni-plugins.tgz
  752. Move-Item -Force host-local.exe ${env:CNI_DIR}\
  753. Move-Item -Force win-bridge.exe ${env:CNI_DIR}\
  754. Pop-Location
  755. Remove-Item -Force -Recurse $tmp_dir
  756. }
  757. if (-not ((Test-Path ${env:CNI_DIR}\win-bridge.exe) -and `
  758. (Test-Path ${env:CNI_DIR}\host-local.exe))) {
  759. Log-Output `
  760. "win-bridge.exe and host-local.exe not found in ${env:CNI_DIR}" `
  761. -Fatal
  762. }
  763. $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  764. if (-not (ShouldWrite-File ${l2bridge_conf})) {
  765. return
  766. }
  767. $mgmt_ip = (Get_MgmtNetAdapter |
  768. Get-NetIPAddress -AddressFamily IPv4).IPAddress
  769. $mgmt_subnet = Get_MgmtSubnet
  770. Log-Output ("using mgmt IP ${mgmt_ip} and mgmt subnet ${mgmt_subnet} for " +
  771. "CNI config")
  772. # We reserve .1 and .2 for gateways. Start the CIDR range from ".3" so that
  773. # IPAM does not allocate those IPs to pods.
  774. $cidr_range_start = `
  775. ${env:POD_CIDR}.substring(0, ${env:POD_CIDR}.lastIndexOf('.')) + '.3'
  776. # Explanation of the CNI config values:
  777. # CLUSTER_CIDR: the cluster CIDR from which pod CIDRs are allocated.
  778. # POD_CIDR: the pod CIDR assigned to this node.
  779. # CIDR_RANGE_START: start of the pod CIDR range.
  780. # MGMT_SUBNET: the subnet on which the Windows pods + kubelet will
  781. # communicate with the rest of the cluster without NAT (i.e. the subnet
  782. # that VM internal IPs are allocated from).
  783. # MGMT_IP: the IP address assigned to the node's primary network interface
  784. # (i.e. the internal IP of the GCE VM).
  785. # SERVICE_CIDR: the CIDR used for kubernetes services.
  786. # DNS_SERVER_IP: the cluster's DNS server IP address.
  787. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  788. New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  789. Set-Content ${l2bridge_conf} `
  790. '{
  791. "cniVersion": "0.2.0",
  792. "name": "l2bridge",
  793. "type": "win-bridge",
  794. "capabilities": {
  795. "portMappings": true
  796. },
  797. "ipam": {
  798. "type": "host-local",
  799. "subnet": "POD_CIDR",
  800. "rangeStart": "CIDR_RANGE_START"
  801. },
  802. "dns": {
  803. "Nameservers": [
  804. "DNS_SERVER_IP"
  805. ],
  806. "Search": [
  807. "DNS_DOMAIN"
  808. ]
  809. },
  810. "Policies": [
  811. {
  812. "Name": "EndpointPolicy",
  813. "Value": {
  814. "Type": "OutBoundNAT",
  815. "ExceptionList": [
  816. "CLUSTER_CIDR",
  817. "SERVICE_CIDR",
  818. "MGMT_SUBNET"
  819. ]
  820. }
  821. },
  822. {
  823. "Name": "EndpointPolicy",
  824. "Value": {
  825. "Type": "ROUTE",
  826. "DestinationPrefix": "SERVICE_CIDR",
  827. "NeedEncap": true
  828. }
  829. },
  830. {
  831. "Name": "EndpointPolicy",
  832. "Value": {
  833. "Type": "ROUTE",
  834. "DestinationPrefix": "MGMT_IP/32",
  835. "NeedEncap": true
  836. }
  837. }
  838. ]
  839. }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  840. replace('CIDR_RANGE_START', ${cidr_range_start}).`
  841. replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  842. replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  843. replace('MGMT_IP', ${mgmt_ip}).`
  844. replace('CLUSTER_CIDR', ${kube_env}['CLUSTER_IP_RANGE']).`
  845. replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']).`
  846. replace('MGMT_SUBNET', ${mgmt_subnet})
  847. Log-Output "CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  848. }
  849. # Fetches the kubelet config from the instance metadata and puts it at
  850. # $env:KUBELET_CONFIG.
  851. function Configure-Kubelet {
  852. if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) {
  853. return
  854. }
  855. # The Kubelet config is built by build-kubelet-config() in
  856. # cluster/gce/util.sh, and stored in the metadata server under the
  857. # 'kubelet-config' key.
  858. $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config'
  859. Set-Content ${env:KUBELET_CONFIG} $kubelet_config
  860. Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})"
  861. }
  862. # Sets up the kubelet and kube-proxy arguments and starts them as native
  863. # Windows services.
  864. #
  865. # Required ${kube_env} keys:
  866. # KUBELET_ARGS
  867. # KUBEPROXY_ARGS
  868. # CLUSTER_IP_RANGE
  869. function Start-WorkerServices {
  870. # Compute kubelet args
  871. $kubelet_args_str = ${kube_env}['KUBELET_ARGS']
  872. $kubelet_args = $kubelet_args_str.Split(" ")
  873. Log-Output "kubelet_args from metadata: ${kubelet_args}"
  874. $default_kubelet_args = @(`
  875. "--pod-infra-container-image=${INFRA_CONTAINER}"
  876. )
  877. $kubelet_args = ${default_kubelet_args} + ${kubelet_args}
  878. Log-Output "Final kubelet_args: ${kubelet_args}"
  879. # Compute kube-proxy args
  880. $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS']
  881. $kubeproxy_args = $kubeproxy_args_str.Split(" ")
  882. Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}"
  883. # kubeproxy is started on Linux nodes using
  884. # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is
  885. # generated by start-kube-proxy in configure-helper.sh and contains e.g.:
  886. # kube-proxy --master=https://35.239.84.171
  887. # --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14
  888. # --resource-container="" --oom-score-adj=-998 --v=2
  889. # --feature-gates=ExperimentalCriticalPodAnnotation=true
  890. # --iptables-sync-period=1m --iptables-min-sync-period=10s
  891. # --ipvs-sync-period=1m --ipvs-min-sync-period=10s
  892. # And also with various volumeMounts and "securityContext: privileged: true".
  893. $default_kubeproxy_args = @(`
  894. "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}",
  895. "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])"
  896. )
  897. $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args}
  898. Log-Output "Final kubeproxy_args: ${kubeproxy_args}"
  899. # TODO(pjh): kubelet is emitting these messages:
  900. # I1023 23:44:11.761915 2468 kubelet.go:274] Adding pod path:
  901. # C:\etc\kubernetes
  902. # I1023 23:44:11.775601 2468 file.go:68] Watching path
  903. # "C:\\etc\\kubernetes"
  904. # ...
  905. # E1023 23:44:31.794327 2468 file.go:182] Can't process manifest file
  906. # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse
  907. # as pod(yaml: line 10: did not find expected <document start>), please check
  908. # config file.
  909. #
  910. # Figure out how to change the directory that the kubelet monitors for new
  911. # pod manifests.
  912. # We configure the service to restart on failure, after 10s wait. We reset
  913. # the restart count to 0 each time, so we re-use our restart/10000 action on
  914. # each failure. Note it currently restarts even when explicitly stopped, you
  915. # have to delete the service entry to *really* kill it (e.g. `sc.exe delete
  916. # kubelet`). See issue #72900.
  917. if (Get-Process | Where-Object Name -eq "kubelet") {
  918. Log-Output -Fatal `
  919. "A kubelet process is already running, don't know what to do"
  920. }
  921. Log-Output "Creating kubelet service"
  922. sc.exe create kubelet binPath= "${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand
  923. sc.exe failure kubelet reset= 0 actions= restart/10000
  924. Log-Output "Starting kubelet service"
  925. sc.exe start kubelet
  926. Log-Output "Waiting 10 seconds for kubelet to stabilize"
  927. Start-Sleep 10
  928. if (Get-Process | Where-Object Name -eq "kube-proxy") {
  929. Log-Output -Fatal `
  930. "A kube-proxy process is already running, don't know what to do"
  931. }
  932. Log-Output "Creating kube-proxy service"
  933. sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand
  934. sc.exe failure kube-proxy reset= 0 actions= restart/10000
  935. Log-Output "Starting kube-proxy service"
  936. sc.exe start kube-proxy
  937. # F1020 23:08:52.000083 9136 server.go:361] unable to load in-cluster
  938. # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be
  939. # defined
  940. # TODO(pjh): still getting errors like these in kube-proxy log:
  941. # E1023 04:03:58.143449 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  942. # E1023 04:03:58.150266 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  943. Log_Todo ("verify that jobs are still running; print more details about " +
  944. "the background jobs.")
  945. Log-Output "$(Get-Service kube* | Out-String)"
  946. Verify_GceMetadataServerRouteIsPresent
  947. Log-Output "Kubernetes components started successfully"
  948. }
  949. # Runs 'kubectl get nodes'.
  950. # TODO(pjh): run more verification commands.
  951. function Verify-WorkerServices {
  952. Log-Output ("kubectl get nodes:`n" +
  953. "$(& ${env:NODE_DIR}\kubectl.exe get nodes | Out-String)")
  954. Verify_GceMetadataServerRouteIsPresent
  955. Log_Todo "run more verification commands."
  956. }
  957. # Pulls the infra/pause container image onto the node so that it will be
  958. # immediately available when the kubelet tries to run pods.
  959. # TODO(pjh): downloading the container container image may take a few minutes;
  960. # figure out how to run this in the background while perform the rest of the
  961. # node startup steps!
  962. function Pull-InfraContainer {
  963. $name, $label = $INFRA_CONTAINER -split ':',2
  964. if (-not ("$(& docker image list)" -match "$name.*$label")) {
  965. & docker pull $INFRA_CONTAINER
  966. if (!$?) {
  967. throw "Error running 'docker pull $INFRA_CONTAINER'"
  968. }
  969. }
  970. $inspect = "$(& docker inspect $INFRA_CONTAINER | Out-String)"
  971. Log-Output "Infra/pause container:`n$inspect"
  972. }
  973. # Add a registry key for docker in EventLog so that log messages are mapped
  974. # correctly. This is a workaround since the key is missing in the base image.
  975. # https://github.com/MicrosoftDocs/Virtualization-Documentation/pull/503
  976. # TODO: Fix this in the base image.
  977. function Create-DockerRegistryKey {
  978. $tmp_dir = 'C:\tmp_docker_reg'
  979. New-Item -Force -ItemType 'directory' ${tmp_dir} | Out-Null
  980. $reg_file = 'docker.reg'
  981. Set-Content ${tmp_dir}\${reg_file} `
  982. 'Windows Registry Editor Version 5.00
  983. [HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\EventLog\Application\docker]
  984. "CustomSource"=dword:00000001
  985. "EventMessageFile"="C:\\Program Files\\docker\\dockerd.exe"
  986. "TypesSupported"=dword:00000007'
  987. Log-Output "Importing registry key for Docker"
  988. reg import ${tmp_dir}\${reg_file}
  989. Remove-Item -Force -Recurse ${tmp_dir}
  990. }
  991. # Configure Docker daemon and restart the service.
  992. function Configure-Dockerd {
  993. Set-Content "C:\ProgramData\docker\config\daemon.json" @'
  994. {
  995. "log-driver": "json-file",
  996. "log-opts": {
  997. "max-size": "1m",
  998. "max-file": "5"
  999. }
  1000. }
  1001. '@
  1002. Restart-Service Docker
  1003. }
  1004. # TODO(pjh): move the Stackdriver logging agent code below into a separate
  1005. # module; it was put here temporarily to avoid disrupting the file layout in
  1006. # the K8s release machinery.
  1007. $STACKDRIVER_VERSION = 'v1-9'
  1008. $STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver'
  1009. # Restart the Stackdriver logging agent
  1010. # `Restart-Service StackdriverLogging` may fail because StackdriverLogging
  1011. # sometimes is unstoppable, so we work around it by killing the processes.
  1012. function Restart-StackdriverLoggingAgent {
  1013. Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging
  1014. # TODO: check periodically to lower the wait time
  1015. Start-Sleep 10
  1016. if ((Get-service StackdriverLogging).Status -ne 'Stopped') {
  1017. # Force kill the processes.
  1018. Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  1019. Where CommandLine -Like '*Stackdriver/logging*').ProcessId
  1020. }
  1021. Start-Service StackdriverLogging
  1022. }
  1023. # Install and start the Stackdriver logging agent according to
  1024. # https://cloud.google.com/logging/docs/agent/installation.
  1025. # TODO(yujuhong): Update to a newer Stackdriver agent once it is released to
  1026. # support kubernetes metadata properly. The current version does not recognizes
  1027. # the local resource key "logging.googleapis.com/local_resource_id", and fails
  1028. # to label namespace, pod and container names on the logs.
  1029. function InstallAndStart-LoggingAgent {
  1030. # Remove the existing storage.json file if it exists. This is a workaround
  1031. # for the bug where the logging agent cannot start up if the file is
  1032. # corrupted.
  1033. Remove-Item `
  1034. -Force `
  1035. -ErrorAction Ignore `
  1036. ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" +
  1037. "storage.json")
  1038. if (Test-Path $STACKDRIVER_ROOT) {
  1039. # Note: we should reinstall the Stackdriver agent if $REDO_STEPS is true
  1040. # here, but we don't know how to run the installer without it prompting
  1041. # when Stackdriver is already installed. We dumped the strings in the
  1042. # installer binary and searched for flags to do this but found nothing. Oh
  1043. # well.
  1044. Log-Output ("Skip: $STACKDRIVER_ROOT is already present, assuming that " +
  1045. "Stackdriver logging agent is already installed")
  1046. # Restart-Service restarts a running service or starts a not-running
  1047. # service.
  1048. Restart-StackdriverLoggingAgent
  1049. return
  1050. }
  1051. $url = ("https://dl.google.com/cloudagents/windows/" +
  1052. "StackdriverLogging-${STACKDRIVER_VERSION}.exe")
  1053. $tmp_dir = 'C:\stackdriver_tmp'
  1054. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1055. $installer_file = "${tmp_dir}\StackdriverLogging-${STACKDRIVER_VERSION}.exe"
  1056. MustDownload-File -OutFile $installer_file -URLs $url
  1057. # Start the installer silently. This automatically starts the
  1058. # "StackdriverLogging" service.
  1059. Log-Output 'Invoking Stackdriver installer'
  1060. Start-Process $installer_file -ArgumentList "/S" -Wait
  1061. Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" `
  1062. -ArgumentList "install","fluent-plugin-record-reformer" `
  1063. -Wait
  1064. # Create a configuration file for kubernetes containers.
  1065. # The config.d directory should have already been created automatically, but
  1066. # try creating again just in case.
  1067. New-Item "$STACKDRIVER_ROOT\LoggingAgent\config.d" `
  1068. -ItemType 'directory' `
  1069. -Force | Out-Null
  1070. $FLUENTD_CONFIG | Out-File `
  1071. -FilePath "$STACKDRIVER_ROOT\LoggingAgent\config.d\k8s_containers.conf" `
  1072. -Encoding ASCII
  1073. # Restart the service to pick up the new configurations.
  1074. Restart-StackdriverLoggingAgent
  1075. Remove-Item -Force -Recurse $tmp_dir
  1076. }
  1077. $FLUENTD_CONFIG = @'
  1078. # This configuration file for Fluentd is used to watch changes to kubernetes
  1079. # container logs in the directory /var/lib/docker/containers/ and submit the
  1080. # log records to Google Cloud Logging using the cloud-logging plugin.
  1081. #
  1082. # Example
  1083. # =======
  1084. # A line in the Docker log file might look like this JSON:
  1085. #
  1086. # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
  1087. # "stream":"stderr",
  1088. # "time":"2014-09-25T21:15:03.499185026Z"}
  1089. #
  1090. # The original tag is derived from the log file's location.
  1091. # For example a Docker container's logs might be in the directory:
  1092. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
  1093. # and in the file:
  1094. # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1095. # where 997599971ee6... is the Docker ID of the running container.
  1096. # The Kubernetes kubelet makes a symbolic link to this file on the host
  1097. # machine in the /var/log/containers directory which includes the pod name,
  1098. # the namespace name and the Kubernetes container name:
  1099. # synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1100. # ->
  1101. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1102. # The /var/log directory on the host is mapped to the /var/log directory in the container
  1103. # running this instance of Fluentd and we end up collecting the file:
  1104. # /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1105. # This results in the tag:
  1106. # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1107. # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
  1108. # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
  1109. # the container ID.
  1110. # The record reformer is used to extract pod_name, namespace_name and
  1111. # container_name from the tag and set them in a local_resource_id in the
  1112. # format of:
  1113. # 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
  1114. # The reformer also changes the tags to 'stderr' or 'stdout' based on the
  1115. # value of 'stream'.
  1116. # local_resource_id is later used by google_cloud plugin to determine the
  1117. # monitored resource to ingest logs against.
  1118. # Json Log Example:
  1119. # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  1120. # TODO: Support CRI log format, which requires the multi_format plugin.
  1121. <source>
  1122. @type tail
  1123. path /var/log/containers/*.log
  1124. pos_file /var/log/gcp-containers.log.pos
  1125. # Tags at this point are in the format of:
  1126. # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
  1127. tag reform.*
  1128. format json
  1129. time_key time
  1130. time_format %Y-%m-%dT%H:%M:%S.%NZ
  1131. read_from_head true
  1132. </source>
  1133. # Example:
  1134. # I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  1135. <source>
  1136. @type tail
  1137. format multiline
  1138. multiline_flush_interval 5s
  1139. format_firstline /^\w\d{4}/
  1140. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1141. time_format %m%d %H:%M:%S.%N
  1142. path /etc/kubernetes/logs/kubelet.log
  1143. pos_file /etc/kubernetes/logs/gcp-kubelet.log.pos
  1144. tag kubelet
  1145. </source>
  1146. # Example:
  1147. # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  1148. <source>
  1149. @type tail
  1150. format multiline
  1151. multiline_flush_interval 5s
  1152. format_firstline /^\w\d{4}/
  1153. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1154. time_format %m%d %H:%M:%S.%N
  1155. path /etc/kubernetes/logs/kube-proxy.log
  1156. pos_file /etc/kubernetes/logs/gcp-kube-proxy.log.pos
  1157. tag kube-proxy
  1158. </source>
  1159. <match reform.**>
  1160. @type record_reformer
  1161. enable_ruby true
  1162. <record>
  1163. # Extract local_resource_id from tag for 'k8s_container' monitored
  1164. # resource. The format is:
  1165. # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
  1166. "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
  1167. # Rename the field 'log' to a more generic field 'message'. This way the
  1168. # fluent-plugin-google-cloud knows to flatten the field as textPayload
  1169. # instead of jsonPayload after extracting 'time', 'severity' and
  1170. # 'stream' from the record.
  1171. message ${record['log']}
  1172. # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
  1173. severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
  1174. </record>
  1175. tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
  1176. remove_keys stream,log
  1177. </match>
  1178. # TODO: detect exceptions and forward them as one log entry using the
  1179. # detect_exceptions plugin
  1180. # This section is exclusive for k8s_container logs. These logs come with
  1181. # 'raw.stderr' or 'raw.stdout' tags.
  1182. <match {raw.stderr,raw.stdout}>
  1183. @type google_cloud
  1184. # Try to detect JSON formatted log entries.
  1185. detect_json true
  1186. # Allow log entries from multiple containers to be sent in the same request.
  1187. split_logs_by_tag false
  1188. # Set the buffer type to file to improve the reliability and reduce the memory consumption
  1189. buffer_type file
  1190. buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
  1191. # Set queue_full action to block because we want to pause gracefully
  1192. # in case of the off-the-limits load instead of throwing an exception
  1193. buffer_queue_full_action block
  1194. # Set the chunk limit conservatively to avoid exceeding the recommended
  1195. # chunk size of 5MB per write request.
  1196. buffer_chunk_limit 512k
  1197. # Cap the combined memory usage of this buffer and the one below to
  1198. # 512KiB/chunk * (6 + 2) chunks = 4 MiB
  1199. buffer_queue_limit 6
  1200. # Never wait more than 5 seconds before flushing logs in the non-error case.
  1201. flush_interval 5s
  1202. # Never wait longer than 30 seconds between retries.
  1203. max_retry_wait 30
  1204. # Disable the limit on the number of retries (retry forever).
  1205. disable_retry_limit
  1206. # Use multiple threads for processing.
  1207. num_threads 2
  1208. use_grpc true
  1209. # Skip timestamp adjustment as this is in a controlled environment with
  1210. # known timestamp format. This helps with CPU usage.
  1211. adjust_invalid_timestamps false
  1212. </match>
  1213. # Attach local_resource_id for 'k8s_node' monitored resource.
  1214. <filter **>
  1215. @type record_transformer
  1216. enable_ruby true
  1217. <record>
  1218. "logging.googleapis.com/local_resource_id" ${"k8s_node.NODE_NAME"}
  1219. </record>
  1220. </filter>
  1221. '@.replace('NODE_NAME', (hostname))
  1222. # Export all public functions:
  1223. Export-ModuleMember -Function *-*