k8s-node-setup.psm1 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787
  1. # Copyright 2019 The Kubernetes Authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. <#
  15. .SYNOPSIS
  16. Library for configuring Windows nodes and joining them to the cluster.
  17. .NOTES
  18. This module depends on common.psm1.
  19. Some portions copied / adapted from
  20. https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
  21. .EXAMPLE
  22. Suggested usage for dev/test:
  23. [Net.ServicePointManager]::SecurityProtocol = `
  24. [Net.SecurityProtocolType]::Tls12
  25. Invoke-WebRequest `
  26. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 `
  27. -OutFile C:\k8s-node-setup.psm1
  28. Invoke-WebRequest `
  29. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 `
  30. -OutFile C:\configure.ps1
  31. Import-Module -Force C:\k8s-node-setup.psm1 # -Force to override existing
  32. # Execute functions manually or run configure.ps1.
  33. #>
  34. # IMPORTANT PLEASE NOTE:
  35. # Any time the file structure in the `windows` directory changes, `windows/BUILD`
  36. # and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes.
  37. # We HIGHLY recommend not changing the file structure, because consumers of
  38. # Kubernetes releases depend on the release structure remaining stable.
  39. # TODO: update scripts for these style guidelines:
  40. # - Remove {} around variable references unless actually needed for clarity.
  41. # - Always use single-quoted strings unless actually interpolating variables
  42. # or using escape characters.
  43. # - Use "approved verbs":
  44. # https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands
  45. # - Document functions using proper syntax:
  46. # https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx
  47. $INFRA_CONTAINER = 'gcr.io/gke-release/pause-win:1.1.0'
  48. $GCE_METADATA_SERVER = "169.254.169.254"
  49. # The "management" interface is used by the kubelet and by Windows pods to talk
  50. # to the rest of the Kubernetes cluster *without NAT*. This interface does not
  51. # exist until an initial HNS network has been created on the Windows node - see
  52. # Add_InitialHnsNetwork().
  53. $MGMT_ADAPTER_NAME = "vEthernet (Ethernet*"
  54. $CRICTL_VERSION = 'v1.17.0'
  55. $CRICTL_SHA256 = '781fd3bd15146a924c6fc2428b11d8a0f20fa04a0c8e00a9a5808f2cc37e0569'
  56. Import-Module -Force C:\common.psm1
  57. # Writes a TODO with $Message to the console.
  58. function Log_Todo {
  59. param (
  60. [parameter(Mandatory=$true)] [string]$Message
  61. )
  62. Log-Output "TODO: ${Message}"
  63. }
  64. # Writes a not-implemented warning with $Message to the console and exits the
  65. # script.
  66. function Log_NotImplemented {
  67. param (
  68. [parameter(Mandatory=$true)] [string]$Message
  69. )
  70. Log-Output "Not implemented yet: ${Message}" -Fatal
  71. }
  72. # Fails and exits if the route to the GCE metadata server is not present,
  73. # otherwise does nothing and emits nothing.
  74. function Verify_GceMetadataServerRouteIsPresent {
  75. Try {
  76. Get-NetRoute `
  77. -ErrorAction "Stop" `
  78. -AddressFamily IPv4 `
  79. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  80. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  81. Log-Output -Fatal `
  82. ("GCE metadata server route is not present as expected.`n" +
  83. "$(Get-NetRoute -AddressFamily IPv4 | Out-String)")
  84. }
  85. }
  86. # Checks if the route to the GCE metadata server is present. Returns when the
  87. # route is NOT present or after a timeout has expired.
  88. function WaitFor_GceMetadataServerRouteToBeRemoved {
  89. $elapsed = 0
  90. $timeout = 60
  91. Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " +
  92. "route to be removed")
  93. while (${elapsed} -lt ${timeout}) {
  94. Try {
  95. Get-NetRoute `
  96. -ErrorAction "Stop" `
  97. -AddressFamily IPv4 `
  98. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  99. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  100. break
  101. }
  102. $sleeptime = 2
  103. Start-Sleep ${sleeptime}
  104. ${elapsed} += ${sleeptime}
  105. }
  106. }
  107. # Adds a route to the GCE metadata server to every network interface.
  108. function Add_GceMetadataServerRoute {
  109. # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and
  110. # a "Ethernet" interface, and the route to the metadata server exists on the
  111. # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)"
  112. # interface is added, and it seems to subsume the routes of the "Ethernet"
  113. # interface (trying to add routes on the Ethernet interface at this point just
  114. # results in "New-NetRoute : Element not found" errors). I don't know what's
  115. # up with that, but since it's hard to know what's the right thing to do here
  116. # we just try to add the route on all of the network adapters.
  117. Get-NetAdapter | ForEach-Object {
  118. $adapter_index = $_.InterfaceIndex
  119. New-NetRoute `
  120. -ErrorAction Ignore `
  121. -DestinationPrefix "${GCE_METADATA_SERVER}/32" `
  122. -InterfaceIndex ${adapter_index} | Out-Null
  123. }
  124. }
  125. # Writes debugging information, such as Windows version and patch info, to the
  126. # console.
  127. function Dump-DebugInfoToConsole {
  128. Try {
  129. $version = "$([System.Environment]::OSVersion.Version | Out-String)"
  130. $hotfixes = "$(Get-Hotfix | Out-String)"
  131. $image = "$(Get-InstanceMetadata 'image' | Out-String)"
  132. Log-Output "Windows version:`n$version"
  133. Log-Output "Installed hotfixes:`n$hotfixes"
  134. Log-Output "GCE Windows image:`n$image"
  135. } Catch { }
  136. }
  137. # Converts the kube-env string in Yaml
  138. #
  139. # Returns: a PowerShell Hashtable object containing the key-value pairs from
  140. # kube-env.
  141. function ConvertFrom_Yaml_KubeEnv {
  142. param (
  143. [parameter(Mandatory=$true)] [string]$kube_env_str
  144. )
  145. $kube_env_table = @{}
  146. $currentLine = $null
  147. switch -regex (${kube_env_str} -split '\r?\n') {
  148. '^(\S.*)' {
  149. # record start pattern, line that doesn't start with a whitespace
  150. if ($null -ne $currentLine) {
  151. $key, $val = $currentLine -split ":",2
  152. $kube_env_table[$key] = $val.Trim("'", " ", "`"")
  153. }
  154. $currentLine = $matches.1
  155. continue
  156. }
  157. '^(\s+.*)' {
  158. # line that start with whitespace
  159. $currentLine += $matches.1
  160. continue
  161. }
  162. }
  163. # Handle the last line if any
  164. if ($currentLine) {
  165. $key, $val = $currentLine -split ":",2
  166. $kube_env_table[$key] = $val.Trim("'", " ", "`"")
  167. }
  168. return ${kube_env_table}
  169. }
  170. # Fetches the kube-env from the instance metadata.
  171. #
  172. # Returns: a PowerShell Hashtable object containing the key-value pairs from
  173. # kube-env.
  174. function Fetch-KubeEnv {
  175. # Testing / debugging:
  176. # First:
  177. # ${kube_env} = Get-InstanceMetadataAttribute 'kube-env'
  178. # or:
  179. # ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt")
  180. # ${kube_env_table} = ConvertFrom_Yaml_KubeEnv ${kube_env}
  181. # ${kube_env_table}
  182. # ${kube_env_table}.GetType()
  183. # The type of kube_env is a powershell String.
  184. $kube_env = Get-InstanceMetadataAttribute 'kube-env'
  185. $kube_env_table = ConvertFrom_Yaml_KubeEnv ${kube_env}
  186. return ${kube_env_table}
  187. }
  188. # Sets the environment variable $Key to $Value at the Machine scope (will
  189. # be present in the environment for all new shells after a reboot).
  190. function Set_MachineEnvironmentVar {
  191. param (
  192. [parameter(Mandatory=$true)] [string]$Key,
  193. [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
  194. )
  195. [Environment]::SetEnvironmentVariable($Key, $Value, "Machine")
  196. }
  197. # Sets the environment variable $Key to $Value in the current shell.
  198. function Set_CurrentShellEnvironmentVar {
  199. param (
  200. [parameter(Mandatory=$true)] [string]$Key,
  201. [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
  202. )
  203. $expression = '$env:' + $Key + ' = "' + $Value + '"'
  204. Invoke-Expression ${expression}
  205. }
  206. # Sets environment variables used by Kubernetes binaries and by other functions
  207. # in this module. Depends on numerous ${kube_env} keys.
  208. function Set-EnvironmentVars {
  209. # Turning the kube-env values into environment variables is not required but
  210. # it makes debugging this script easier, and it also makes the syntax a lot
  211. # easier (${env:K8S_DIR} can be expanded within a string but
  212. # ${kube_env}['K8S_DIR'] cannot be afaik).
  213. $env_vars = @{
  214. "K8S_DIR" = ${kube_env}['K8S_DIR']
  215. # Typically 'C:\etc\kubernetes\node\bin' (not just 'C:\etc\kubernetes\node')
  216. "NODE_DIR" = ${kube_env}['NODE_DIR']
  217. "CNI_DIR" = ${kube_env}['CNI_DIR']
  218. "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR']
  219. "WINDOWS_CNI_STORAGE_PATH" = ${kube_env}['WINDOWS_CNI_STORAGE_PATH']
  220. "WINDOWS_CNI_VERSION" = ${kube_env}['WINDOWS_CNI_VERSION']
  221. "PKI_DIR" = ${kube_env}['PKI_DIR']
  222. "CA_FILE_PATH" = ${kube_env}['CA_FILE_PATH']
  223. "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE']
  224. "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE']
  225. "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE']
  226. "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE']
  227. "LOGS_DIR" = ${kube_env}['LOGS_DIR']
  228. "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR']
  229. "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR']
  230. "KUBE_NETWORK" = "l2bridge".ToLower()
  231. "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt'
  232. "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key'
  233. "CONTAINER_RUNTIME" = ${kube_env}['CONTAINER_RUNTIME']
  234. "CONTAINER_RUNTIME_ENDPOINT" = ${kube_env}['CONTAINER_RUNTIME_ENDPOINT']
  235. 'LICENSE_DIR' = 'C:\Program Files\Google\Compute Engine\THIRD_PARTY_NOTICES'
  236. }
  237. # Set the environment variables in two ways: permanently on the machine (only
  238. # takes effect after a reboot), and in the current shell.
  239. $env_vars.GetEnumerator() | ForEach-Object{
  240. $message = "Setting environment variable: " + $_.key + " = " + $_.value
  241. Log-Output ${message}
  242. Set_MachineEnvironmentVar $_.key $_.value
  243. Set_CurrentShellEnvironmentVar $_.key $_.value
  244. }
  245. }
  246. # Configures various settings and prerequisites needed for the rest of the
  247. # functions in this module and the Kubernetes binaries to operate properly.
  248. function Set-PrerequisiteOptions {
  249. # Windows updates cause the node to reboot at arbitrary times.
  250. Log-Output "Disabling Windows Update service"
  251. & sc.exe config wuauserv start=disabled
  252. & sc.exe stop wuauserv
  253. # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com.
  254. [Net.ServicePointManager]::SecurityProtocol = `
  255. [Net.SecurityProtocolType]::Tls12
  256. }
  257. # Creates directories where other functions in this module will read and write
  258. # data.
  259. # Note: C:\tmp is required for running certain kubernetes tests.
  260. # C:\var\log is used by kubelet to stored container logs and also
  261. # hard-coded in the fluentd/stackdriver config for log collection.
  262. function Create-Directories {
  263. Log-Output "Creating ${env:K8S_DIR} and its subdirectories."
  264. ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}",
  265. "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}",
  266. "${env:PKI_DIR}", "${env:LICENSE_DIR}"), "C:\tmp", "C:\var\log") {
  267. mkdir -Force $dir
  268. }
  269. }
  270. # Downloads some external helper scripts needed by other functions in this
  271. # module.
  272. function Download-HelperScripts {
  273. if (ShouldWrite-File ${env:K8S_DIR}\hns.psm1) {
  274. MustDownload-File `
  275. -OutFile ${env:K8S_DIR}\hns.psm1 `
  276. -URLs 'https://storage.googleapis.com/gke-release/winnode/config/sdn/master/hns.psm1'
  277. }
  278. }
  279. # Downloads the gke-exec-auth-plugin for TPM-based authentication to the
  280. # master, if auth plugin support has been requested for this node (see
  281. # Test-NodeUsesAuthPlugin).
  282. # https://github.com/kubernetes/cloud-provider-gcp/tree/master/cmd/gke-exec-auth-plugin
  283. #
  284. # Required ${kube_env} keys:
  285. # EXEC_AUTH_PLUGIN_LICENSE_URL
  286. # EXEC_AUTH_PLUGIN_SHA1
  287. # EXEC_AUTH_PLUGIN_URL
  288. function DownloadAndInstall-AuthPlugin {
  289. if (-not (Test-NodeUsesAuthPlugin ${kube_env})) {
  290. Log-Output 'Skipping download of auth plugin'
  291. return
  292. }
  293. if (-not (ShouldWrite-File "${env:NODE_DIR}\gke-exec-auth-plugin.exe")) {
  294. return
  295. }
  296. if (-not ($kube_env.ContainsKey('EXEC_AUTH_PLUGIN_LICENSE_URL') -and
  297. $kube_env.ContainsKey('EXEC_AUTH_PLUGIN_SHA1') -and
  298. $kube_env.ContainsKey('EXEC_AUTH_PLUGIN_URL'))) {
  299. Log-Output -Fatal ("Missing one or more kube-env keys needed for " +
  300. "downloading auth plugin: $(Out-String $kube_env)")
  301. }
  302. MustDownload-File `
  303. -URLs ${kube_env}['EXEC_AUTH_PLUGIN_URL'] `
  304. -Hash ${kube_env}['EXEC_AUTH_PLUGIN_SHA1'] `
  305. -OutFile "${env:NODE_DIR}\gke-exec-auth-plugin.exe"
  306. MustDownload-File `
  307. -URLs ${kube_env}['EXEC_AUTH_PLUGIN_LICENSE_URL'] `
  308. -OutFile "${env:LICENSE_DIR}\LICENSE_gke-exec-auth-plugin.txt"
  309. }
  310. # Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and
  311. # puts them in a subdirectory of $env:K8S_DIR.
  312. #
  313. # Required ${kube_env} keys:
  314. # NODE_BINARY_TAR_URL
  315. function DownloadAndInstall-KubernetesBinaries {
  316. # Assume that presence of kubelet.exe indicates that the kubernetes binaries
  317. # were already previously downloaded to this node.
  318. if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) {
  319. return
  320. }
  321. $tmp_dir = 'C:\k8s_tmp'
  322. New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
  323. $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",")
  324. $filename = Split-Path -leaf $urls[0]
  325. $hash = $null
  326. if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) {
  327. $hash = ${kube_env}['NODE_BINARY_TAR_HASH']
  328. }
  329. MustDownload-File -Hash $hash -OutFile $tmp_dir\$filename -URLs $urls
  330. tar xzvf $tmp_dir\$filename -C $tmp_dir
  331. Move-Item -Force $tmp_dir\kubernetes\node\bin\* ${env:NODE_DIR}\
  332. Move-Item -Force `
  333. $tmp_dir\kubernetes\LICENSES ${env:LICENSE_DIR}\LICENSES_kubernetes
  334. # Clean up the temporary directory
  335. Remove-Item -Force -Recurse $tmp_dir
  336. }
  337. # TODO(pjh): this is copied from
  338. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  339. # See if there's a way to fetch or construct the "management subnet" so that
  340. # this is not needed.
  341. function ConvertTo_DecimalIP
  342. {
  343. param(
  344. [parameter(Mandatory = $true, Position = 0)]
  345. [Net.IPAddress] $IPAddress
  346. )
  347. $i = 3; $decimal_ip = 0;
  348. $IPAddress.GetAddressBytes() | % {
  349. $decimal_ip += $_ * [Math]::Pow(256, $i); $i--
  350. }
  351. return [UInt32]$decimal_ip
  352. }
  353. # TODO(pjh): this is copied from
  354. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  355. # See if there's a way to fetch or construct the "management subnet" so that
  356. # this is not needed.
  357. function ConvertTo_DottedDecimalIP
  358. {
  359. param(
  360. [parameter(Mandatory = $true, Position = 0)]
  361. [Uint32] $IPAddress
  362. )
  363. $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) {
  364. $remainder = $IPAddress % [Math]::Pow(256, $i)
  365. ($IPAddress - $remainder) / [Math]::Pow(256, $i)
  366. $IPAddress = $remainder
  367. })
  368. return [String]::Join(".", $dotted_ip)
  369. }
  370. # TODO(pjh): this is copied from
  371. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  372. # See if there's a way to fetch or construct the "management subnet" so that
  373. # this is not needed.
  374. function ConvertTo_MaskLength
  375. {
  376. param(
  377. [parameter(Mandatory = $True, Position = 0)]
  378. [Net.IPAddress] $SubnetMask
  379. )
  380. $bits = "$($SubnetMask.GetAddressBytes() | % {
  381. [Convert]::ToString($_, 2)
  382. } )" -replace "[\s0]"
  383. return $bits.Length
  384. }
  385. # Returns the "management" subnet on which the Windows pods+kubelet will
  386. # communicate with the rest of the Kubernetes cluster without NAT. In GCE this
  387. # is the subnet that VM internal IPs are allocated from.
  388. #
  389. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  390. function Get_MgmtSubnet {
  391. $net_adapter = Get_MgmtNetAdapter
  392. # TODO(pjh): applying the primary interface's subnet mask to its IP address
  393. # *should* give us the GCE network subnet that VM IP addresses are being
  394. # allocated from... however it might be more accurate or straightforward to
  395. # just fetch the IP address range for the VPC subnet that the kube-up script
  396. # creates (kubernetes-subnet-default).
  397. $addr = (Get-NetIPAddress `
  398. -InterfaceAlias ${net_adapter}.ifAlias `
  399. -AddressFamily IPv4).IPAddress
  400. # Get the adapter's mask from the registry rather than WMI or some other
  401. # approach: this is compatible with Windows' forthcoming LWVNICs (lightweight
  402. # VNICs).
  403. # https://github.com/kubernetes-sigs/sig-windows-tools/pull/16/commits/c5b5c67d5da6c23ad870cb16146eaa58131caf29
  404. $adapter_registry = Get-Item `
  405. -Path ("HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\" +
  406. "Parameters\Interfaces\$($net_adapter.InterfaceGuid)")
  407. # In this command the value name is 'DhcpSubnetMask' for current network
  408. # interfaces but could be different for "LWVNIC" interfaces.
  409. $mask = ($adapter_registry.GetValueNames() -like "*SubnetMask" |
  410. % { $adapter_registry.GetValue($_) })
  411. $mgmt_subnet = `
  412. (ConvertTo_DecimalIP ${addr}) -band (ConvertTo_DecimalIP ${mask})
  413. $mgmt_subnet = ConvertTo_DottedDecimalIP ${mgmt_subnet}
  414. return "${mgmt_subnet}/$(ConvertTo_MaskLength $mask)"
  415. }
  416. # Returns a network adapter object for the "management" interface via which the
  417. # Windows pods+kubelet will communicate with the rest of the Kubernetes cluster.
  418. #
  419. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  420. function Get_MgmtNetAdapter {
  421. $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME}
  422. if (-not ${net_adapter}) {
  423. Throw ("Failed to find a suitable network adapter, check your network " +
  424. "settings.")
  425. }
  426. return $net_adapter
  427. }
  428. # Decodes the base64 $Data string and writes it as binary to $File. Does
  429. # nothing if $File already exists and $REDO_STEPS is not set.
  430. function Write_PkiData {
  431. param (
  432. [parameter(Mandatory=$true)] [string] $Data,
  433. [parameter(Mandatory=$true)] [string] $File
  434. )
  435. if (-not (ShouldWrite-File $File)) {
  436. return
  437. }
  438. # This command writes out a PEM certificate file, analogous to "base64
  439. # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197.
  440. [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data))
  441. Log_Todo ("need to set permissions correctly on ${File}; not sure what the " +
  442. "Windows equivalent of 'umask 077' is")
  443. # Linux: owned by root, rw by user only.
  444. # -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt
  445. # -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt
  446. # -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key
  447. # Windows:
  448. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  449. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  450. }
  451. # Creates the node PKI files in $env:PKI_DIR.
  452. #
  453. # Required ${kube_env} keys:
  454. # CA_CERT
  455. # ${kube_env} keys that can be omitted for nodes that do not use an
  456. # authentication plugin:
  457. # KUBELET_CERT
  458. # KUBELET_KEY
  459. function Create-NodePki {
  460. Log-Output 'Creating node pki files'
  461. if ($kube_env.ContainsKey('CA_CERT')) {
  462. $CA_CERT_BUNDLE = ${kube_env}['CA_CERT']
  463. Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_FILE_PATH}
  464. }
  465. else {
  466. Log-Output -Fatal 'CA_CERT not present in kube-env'
  467. }
  468. # On nodes that use a plugin to support authentication, KUBELET_CERT and
  469. # KUBELET_KEY will not be present - TPM_BOOTSTRAP_CERT and TPM_BOOTSTRAP_KEY
  470. # should be set instead.
  471. if (Test-NodeUsesAuthPlugin ${kube_env}) {
  472. Log-Output ('Skipping KUBELET_CERT and KUBELET_KEY, plugin will be used ' +
  473. 'for authentication')
  474. return
  475. }
  476. if ($kube_env.ContainsKey('KUBELET_CERT')) {
  477. $KUBELET_CERT = ${kube_env}['KUBELET_CERT']
  478. Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH}
  479. }
  480. else {
  481. Log-Output -Fatal 'KUBELET_CERT not present in kube-env'
  482. }
  483. if ($kube_env.ContainsKey('KUBELET_KEY')) {
  484. $KUBELET_KEY = ${kube_env}['KUBELET_KEY']
  485. Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH}
  486. }
  487. else {
  488. Log-Output -Fatal 'KUBELET_KEY not present in kube-env'
  489. }
  490. Get-ChildItem ${env:PKI_DIR}
  491. }
  492. # Creates the bootstrap kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG.
  493. # https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
  494. #
  495. # Create-NodePki() must be called first.
  496. #
  497. # Required ${kube_env} keys:
  498. # KUBERNETES_MASTER_NAME: the apiserver IP address.
  499. function Write_BootstrapKubeconfig {
  500. if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) {
  501. return
  502. }
  503. # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g.
  504. # "system:node:$(hostname)".
  505. $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME']
  506. New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null
  507. Set-Content ${env:BOOTSTRAP_KUBECONFIG} `
  508. 'apiVersion: v1
  509. kind: Config
  510. users:
  511. - name: kubelet
  512. user:
  513. client-certificate: KUBELET_CERT_PATH
  514. client-key: KUBELET_KEY_PATH
  515. clusters:
  516. - name: local
  517. cluster:
  518. server: https://APISERVER_ADDRESS
  519. certificate-authority: CA_FILE_PATH
  520. contexts:
  521. - context:
  522. cluster: local
  523. user: kubelet
  524. name: service-account-context
  525. current-context: service-account-context'.`
  526. replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).`
  527. replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).`
  528. replace('APISERVER_ADDRESS', ${apiserverAddress}).`
  529. replace('CA_FILE_PATH', ${env:CA_FILE_PATH})
  530. Log-Output ("kubelet bootstrap kubeconfig:`n" +
  531. "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
  532. }
  533. # Fetches the kubelet kubeconfig from the metadata server and writes it to
  534. # $env:KUBECONFIG.
  535. #
  536. # Create-NodePki() must be called first.
  537. function Write_KubeconfigFromMetadata {
  538. if (-not (ShouldWrite-File ${env:KUBECONFIG})) {
  539. return
  540. }
  541. $kubeconfig = Get-InstanceMetadataAttribute 'kubeconfig'
  542. if ($kubeconfig -eq $null) {
  543. Log-Output `
  544. "kubeconfig metadata key not found, can't write ${env:KUBECONFIG}" `
  545. -Fatal
  546. }
  547. Set-Content ${env:KUBECONFIG} $kubeconfig
  548. Log-Output ("kubelet kubeconfig from metadata (non-bootstrap):`n" +
  549. "$(Get-Content -Raw ${env:KUBECONFIG})")
  550. }
  551. # Creates the kubelet kubeconfig at $env:KUBECONFIG for nodes that use an
  552. # authentication plugin, or at $env:BOOTSTRAP_KUBECONFIG for nodes that do not.
  553. #
  554. # Create-NodePki() must be called first.
  555. #
  556. # Required ${kube_env} keys:
  557. # KUBERNETES_MASTER_NAME: the apiserver IP address.
  558. function Create-KubeletKubeconfig {
  559. if (Test-NodeUsesAuthPlugin ${kube_env}) {
  560. Write_KubeconfigFromMetadata
  561. } else {
  562. Write_BootstrapKubeconfig
  563. }
  564. }
  565. # Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG.
  566. #
  567. # Create-NodePki() must be called first.
  568. #
  569. # Required ${kube_env} keys:
  570. # CA_CERT
  571. # KUBE_PROXY_TOKEN
  572. function Create-KubeproxyKubeconfig {
  573. if (-not (ShouldWrite-File ${env:KUBEPROXY_KUBECONFIG})) {
  574. return
  575. }
  576. New-Item -Force -ItemType file ${env:KUBEPROXY_KUBECONFIG} | Out-Null
  577. # In configure-helper.sh kubelet kubeconfig uses certificate-authority while
  578. # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter?
  579. # Use just one or the other for consistency?
  580. Set-Content ${env:KUBEPROXY_KUBECONFIG} `
  581. 'apiVersion: v1
  582. kind: Config
  583. users:
  584. - name: kube-proxy
  585. user:
  586. token: KUBEPROXY_TOKEN
  587. clusters:
  588. - name: local
  589. cluster:
  590. server: https://APISERVER_ADDRESS
  591. certificate-authority-data: CA_CERT
  592. contexts:
  593. - context:
  594. cluster: local
  595. user: kube-proxy
  596. name: service-account-context
  597. current-context: service-account-context'.`
  598. replace('KUBEPROXY_TOKEN', ${kube_env}['KUBE_PROXY_TOKEN']).`
  599. replace('CA_CERT', ${kube_env}['CA_CERT']).`
  600. replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME'])
  601. Log-Output ("kubeproxy kubeconfig:`n" +
  602. "$(Get-Content -Raw ${env:KUBEPROXY_KUBECONFIG})")
  603. }
  604. # Returns the IP alias range configured for this GCE instance.
  605. function Get_IpAliasRange {
  606. $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" +
  607. "network-interfaces/0/ip-aliases/0")
  608. $client = New-Object Net.WebClient
  609. $client.Headers.Add('Metadata-Flavor', 'Google')
  610. return ($client.DownloadString($url)).Trim()
  611. }
  612. # Retrieves the pod CIDR and sets it in $env:POD_CIDR.
  613. function Set-PodCidr {
  614. while($true) {
  615. $pod_cidr = Get_IpAliasRange
  616. if (-not $?) {
  617. Log-Output ${pod_cIDR}
  618. Log-Output "Retrying Get_IpAliasRange..."
  619. Start-Sleep -sec 1
  620. continue
  621. }
  622. break
  623. }
  624. Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}"
  625. Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr}
  626. Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr}
  627. }
  628. # Adds an initial HNS network on the Windows node which forces the creation of
  629. # a virtual switch and the "management" interface that will be used to
  630. # communicate with the rest of the Kubernetes cluster without NAT.
  631. #
  632. # Note that adding the initial HNS network may cause connectivity to the GCE
  633. # metadata server to be lost due to a Windows bug.
  634. # Configure-HostNetworkingService() restores connectivity, look there for
  635. # details.
  636. #
  637. # Download-HelperScripts() must have been called first.
  638. function Add_InitialHnsNetwork {
  639. $INITIAL_HNS_NETWORK = 'External'
  640. # This comes from
  641. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74
  642. # (or
  643. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206).
  644. #
  645. # daschott noted on Slack: "L2bridge networks require an external vSwitch.
  646. # The first network ("External") with hardcoded values in the script is just
  647. # a placeholder to create an external vSwitch. This is purely for convenience
  648. # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the
  649. # nodes without a network blip. Creating a vSwitch takes time, causes network
  650. # blips, and it makes it more likely to hit the issue where flanneld is
  651. # stuck, so we want to do this as rarely as possible."
  652. $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK
  653. if ($hns_network) {
  654. if ($REDO_STEPS) {
  655. Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " +
  656. "already exists, removing it and recreating it")
  657. $hns_network | Remove-HnsNetwork
  658. $hns_network = $null
  659. }
  660. else {
  661. Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " +
  662. "already exists, not recreating it")
  663. return
  664. }
  665. }
  666. Log-Output ("Creating initial HNS network to force creation of " +
  667. "${MGMT_ADAPTER_NAME} interface")
  668. # Note: RDP connection will hiccup when running this command.
  669. New-HNSNetwork `
  670. -Type "L2Bridge" `
  671. -AddressPrefix "192.168.255.0/30" `
  672. -Gateway "192.168.255.1" `
  673. -Name $INITIAL_HNS_NETWORK `
  674. -Verbose
  675. }
  676. # Get the network in uint32 for the given cidr
  677. function Get_NetworkDecimal_From_CIDR([string] $cidr) {
  678. $network, [int]$subnetlen = $cidr.Split('/')
  679. $decimal_network = ConvertTo_DecimalIP($network)
  680. return $decimal_network
  681. }
  682. # Get gateway ip string (the first address) based on pod cidr.
  683. # For Windows nodes the pod gateway IP address is the first address in the pod
  684. # CIDR for the host.
  685. function Get_Gateway_From_CIDR([string] $cidr) {
  686. $network=Get_NetworkDecimal_From_CIDR($cidr)
  687. $gateway=ConvertTo_DottedDecimalIP($network+1)
  688. return $gateway
  689. }
  690. # Get endpoint gateway ip string (the second address) based on pod cidr.
  691. # For Windows nodes the pod gateway IP address is the first address in the pod
  692. # CIDR for the host, but from inside containers it's the second address.
  693. function Get_Endpoint_Gateway_From_CIDR([string] $cidr) {
  694. $network=Get_NetworkDecimal_From_CIDR($cidr)
  695. $gateway=ConvertTo_DottedDecimalIP($network+2)
  696. return $gateway
  697. }
  698. # Get pod IP range start based (the third address) on pod cidr
  699. # We reserve the first two in the cidr range for gateways. Start the cidr
  700. # range from the third so that IPAM does not allocate those IPs to pods.
  701. function Get_PodIP_Range_Start([string] $cidr) {
  702. $network=Get_NetworkDecimal_From_CIDR($cidr)
  703. $start=ConvertTo_DottedDecimalIP($network+3)
  704. return $start
  705. }
  706. # Configures HNS on the Windows node to enable Kubernetes networking:
  707. # - Creates the "management" interface associated with an initial HNS network.
  708. # - Creates the HNS network $env:KUBE_NETWORK for pod networking.
  709. # - Creates an HNS endpoint for pod networking.
  710. # - Adds necessary routes on the management interface.
  711. # - Verifies that the GCE metadata server connection remains intact.
  712. #
  713. # Prerequisites:
  714. # $env:POD_CIDR is set (by Set-PodCidr).
  715. # Download-HelperScripts() has been called.
  716. function Configure-HostNetworkingService {
  717. Import-Module -Force ${env:K8S_DIR}\hns.psm1
  718. Add_InitialHnsNetwork
  719. $pod_gateway = Get_Gateway_From_CIDR(${env:POD_CIDR})
  720. $pod_endpoint_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  721. Log-Output ("Setting up Windows node HNS networking: " +
  722. "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " +
  723. "podEndpointGateway = ${pod_endpoint_gateway}")
  724. $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK}
  725. if ($hns_network) {
  726. if ($REDO_STEPS) {
  727. Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " +
  728. "removing it and recreating it")
  729. $hns_network | Remove-HnsNetwork
  730. $hns_network = $null
  731. }
  732. else {
  733. Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists"
  734. }
  735. }
  736. $created_hns_network = $false
  737. if (-not $hns_network) {
  738. # Note: RDP connection will hiccup when running this command.
  739. $hns_network = New-HNSNetwork `
  740. -Type "L2Bridge" `
  741. -AddressPrefix ${env:POD_CIDR} `
  742. -Gateway ${pod_gateway} `
  743. -Name ${env:KUBE_NETWORK} `
  744. -Verbose
  745. $created_hns_network = $true
  746. }
  747. $endpoint_name = "cbr0"
  748. $vnic_name = "vEthernet (${endpoint_name})"
  749. $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name
  750. # Note: we don't expect to ever enter this block currently - while the HNS
  751. # network does seem to persist across reboots, the HNS endpoints do not.
  752. if ($hns_endpoint) {
  753. if ($REDO_STEPS) {
  754. Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " +
  755. "removing it and recreating it")
  756. $hns_endpoint | Remove-HnsEndpoint
  757. $hns_endpoint = $null
  758. }
  759. else {
  760. Log-Output "Skip: HNS endpoint $endpoint_name already exists"
  761. }
  762. }
  763. if (-not $hns_endpoint) {
  764. $hns_endpoint = New-HnsEndpoint `
  765. -NetworkId ${hns_network}.Id `
  766. -Name ${endpoint_name} `
  767. -IPAddress ${pod_endpoint_gateway} `
  768. -Gateway "0.0.0.0" `
  769. -Verbose
  770. # TODO(pjh): find out: why is this always CompartmentId 1?
  771. Attach-HnsHostEndpoint `
  772. -EndpointID ${hns_endpoint}.Id `
  773. -CompartmentID 1 `
  774. -Verbose
  775. netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled
  776. }
  777. Try {
  778. Get-HNSPolicyList | Remove-HnsPolicyList
  779. } Catch { }
  780. # Add a route from the management NIC to the pod CIDR.
  781. #
  782. # When a packet from a Kubernetes service backend arrives on the destination
  783. # Windows node, the reverse SNAT will be applied and the source address of
  784. # the packet gets replaced from the pod IP to the service VIP. The packet
  785. # will then leave the VM and return back through hairpinning.
  786. #
  787. # When IP alias is enabled, IP forwarding is disabled for anti-spoofing;
  788. # the packet with the service VIP will get blocked and be lost. With this
  789. # route, the packet will be routed to the pod subnetwork, and not leave the
  790. # VM.
  791. $mgmt_net_adapter = Get_MgmtNetAdapter
  792. New-NetRoute `
  793. -ErrorAction Ignore `
  794. -InterfaceAlias ${mgmt_net_adapter}.ifAlias `
  795. -DestinationPrefix ${env:POD_CIDR} `
  796. -NextHop "0.0.0.0" `
  797. -Verbose
  798. if ($created_hns_network) {
  799. # There is an HNS bug where the route to the GCE metadata server will be
  800. # removed when the HNS network is created:
  801. # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610.
  802. # The behavior here is very unpredictable: the route may only be removed
  803. # after some delay, or it may appear to be removed then you'll add it back
  804. # but then it will be removed once again. So, we first wait a long
  805. # unfortunate amount of time to ensure that things have quiesced, then we
  806. # wait until we're sure the route is really gone before re-adding it again.
  807. Log-Output "Waiting 45 seconds for host network state to quiesce"
  808. Start-Sleep 45
  809. WaitFor_GceMetadataServerRouteToBeRemoved
  810. Log-Output "Re-adding the GCE metadata server route"
  811. Add_GceMetadataServerRoute
  812. }
  813. Verify_GceMetadataServerRouteIsPresent
  814. Log-Output "Host network setup complete"
  815. }
  816. function Configure-GcePdTools {
  817. if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) {
  818. MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll `
  819. -URLs "https://storage.googleapis.com/gke-release/winnode/config/gce-tools/master/GetGcePdName/GetGcePdName.dll"
  820. }
  821. if (-not (Test-Path $PsHome\profile.ps1)) {
  822. New-Item -path $PsHome\profile.ps1 -type file
  823. }
  824. Add-Content $PsHome\profile.ps1 `
  825. '$modulePath = "K8S_DIR\GetGcePdName.dll"
  826. Unblock-File $modulePath
  827. Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR})
  828. }
  829. # Setup cni network. This function supports both Docker and containerd.
  830. function Prepare-CniNetworking {
  831. if (${env:CONTAINER_RUNTIME} -eq "containerd") {
  832. # For containerd the CNI binaries have already been installed along with
  833. # the runtime.
  834. Configure_Containerd_CniNetworking
  835. } else {
  836. Install_Cni_Binaries
  837. Configure_Dockerd_CniNetworking
  838. }
  839. }
  840. # Downloads the Windows CNI binaries and puts them in $env:CNI_DIR.
  841. function Install_Cni_Binaries {
  842. if (-not (ShouldWrite-File ${env:CNI_DIR}\win-bridge.exe) -and
  843. -not (ShouldWrite-File ${env:CNI_DIR}\host-local.exe)) {
  844. return
  845. }
  846. $tmp_dir = 'C:\cni_tmp'
  847. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  848. $release_url = "${env:WINDOWS_CNI_STORAGE_PATH}/${env:WINDOWS_CNI_VERSION}/"
  849. $tgz_url = ($release_url +
  850. "cni-plugins-windows-amd64-${env:WINDOWS_CNI_VERSION}.tgz")
  851. $sha_url = ($tgz_url + ".sha1")
  852. MustDownload-File -URLs $sha_url -OutFile $tmp_dir\cni-plugins.sha1
  853. $sha1_val = ($(Get-Content $tmp_dir\cni-plugins.sha1) -split ' ',2)[0]
  854. MustDownload-File `
  855. -URLs $tgz_url `
  856. -OutFile $tmp_dir\cni-plugins.tgz `
  857. -Hash $sha1_val
  858. tar xzvf $tmp_dir\cni-plugins.tgz -C $tmp_dir
  859. Move-Item -Force $tmp_dir\host-local.exe ${env:CNI_DIR}\
  860. Move-Item -Force $tmp_dir\win-bridge.exe ${env:CNI_DIR}\
  861. Remove-Item -Force -Recurse $tmp_dir
  862. if (-not ((Test-Path ${env:CNI_DIR}\win-bridge.exe) -and `
  863. (Test-Path ${env:CNI_DIR}\host-local.exe))) {
  864. Log-Output `
  865. "win-bridge.exe and host-local.exe not found in ${env:CNI_DIR}" `
  866. -Fatal
  867. }
  868. }
  869. # Writes a CNI config file under $env:CNI_CONFIG_DIR.
  870. #
  871. # Prerequisites:
  872. # $env:POD_CIDR is set (by Set-PodCidr).
  873. # The "management" interface exists (Configure-HostNetworkingService).
  874. # The HNS network for pod networking has been configured
  875. # (Configure-HostNetworkingService).
  876. #
  877. # Required ${kube_env} keys:
  878. # DNS_SERVER_IP
  879. # DNS_DOMAIN
  880. # CLUSTER_IP_RANGE
  881. # SERVICE_CLUSTER_IP_RANGE
  882. function Configure_Dockerd_CniNetworking {
  883. $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  884. if (-not (ShouldWrite-File ${l2bridge_conf})) {
  885. return
  886. }
  887. $mgmt_ip = (Get_MgmtNetAdapter |
  888. Get-NetIPAddress -AddressFamily IPv4).IPAddress
  889. $mgmt_subnet = Get_MgmtSubnet
  890. Log-Output ("using mgmt IP ${mgmt_ip} and mgmt subnet ${mgmt_subnet} for " +
  891. "CNI config")
  892. $cidr_range_start = Get_PodIP_Range_Start(${env:POD_CIDR})
  893. # Explanation of the CNI config values:
  894. # CLUSTER_CIDR: the cluster CIDR from which pod CIDRs are allocated.
  895. # POD_CIDR: the pod CIDR assigned to this node.
  896. # CIDR_RANGE_START: start of the pod CIDR range.
  897. # MGMT_SUBNET: the subnet on which the Windows pods + kubelet will
  898. # communicate with the rest of the cluster without NAT (i.e. the subnet
  899. # that VM internal IPs are allocated from).
  900. # MGMT_IP: the IP address assigned to the node's primary network interface
  901. # (i.e. the internal IP of the GCE VM).
  902. # SERVICE_CIDR: the CIDR used for kubernetes services.
  903. # DNS_SERVER_IP: the cluster's DNS server IP address.
  904. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  905. New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  906. Set-Content ${l2bridge_conf} `
  907. '{
  908. "cniVersion": "0.2.0",
  909. "name": "l2bridge",
  910. "type": "win-bridge",
  911. "capabilities": {
  912. "portMappings": true,
  913. "dns": true
  914. },
  915. "ipam": {
  916. "type": "host-local",
  917. "subnet": "POD_CIDR",
  918. "rangeStart": "CIDR_RANGE_START"
  919. },
  920. "dns": {
  921. "Nameservers": [
  922. "DNS_SERVER_IP"
  923. ],
  924. "Search": [
  925. "DNS_DOMAIN"
  926. ]
  927. },
  928. "Policies": [
  929. {
  930. "Name": "EndpointPolicy",
  931. "Value": {
  932. "Type": "OutBoundNAT",
  933. "ExceptionList": [
  934. "CLUSTER_CIDR",
  935. "SERVICE_CIDR",
  936. "MGMT_SUBNET"
  937. ]
  938. }
  939. },
  940. {
  941. "Name": "EndpointPolicy",
  942. "Value": {
  943. "Type": "ROUTE",
  944. "DestinationPrefix": "SERVICE_CIDR",
  945. "NeedEncap": true
  946. }
  947. },
  948. {
  949. "Name": "EndpointPolicy",
  950. "Value": {
  951. "Type": "ROUTE",
  952. "DestinationPrefix": "MGMT_IP/32",
  953. "NeedEncap": true
  954. }
  955. }
  956. ]
  957. }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  958. replace('CIDR_RANGE_START', ${cidr_range_start}).`
  959. replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  960. replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  961. replace('MGMT_IP', ${mgmt_ip}).`
  962. replace('CLUSTER_CIDR', ${kube_env}['CLUSTER_IP_RANGE']).`
  963. replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']).`
  964. replace('MGMT_SUBNET', ${mgmt_subnet})
  965. Log-Output "CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  966. }
  967. # Obtain the host dns conf and save it to a file so that kubelet/CNI
  968. # can use it to configure dns suffix search list for pods.
  969. # The value of DNS server is ignored right now because the pod will
  970. # always only use cluster DNS service, but for consistency, we still
  971. # parsed them here in the same format as Linux resolv.conf.
  972. # This function must be called after Configure-HostNetworkingService.
  973. function Configure-HostDnsConf {
  974. $net_adapter = Get_MgmtNetAdapter
  975. $server_ips = (Get-DnsClientServerAddress `
  976. -InterfaceAlias ${net_adapter}.Name).ServerAddresses
  977. $search_list = (Get-DnsClient).ConnectionSpecificSuffixSearchList
  978. $conf = ""
  979. ForEach ($ip in $server_ips) {
  980. $conf = $conf + "nameserver $ip`r`n"
  981. }
  982. $conf = $conf + "search $search_list"
  983. # Do not put hostdns.conf into the CNI config directory so as to
  984. # avoid the container runtime treating it as CNI config.
  985. $hostdns_conf = "${env:CNI_DIR}\hostdns.conf"
  986. New-Item -Force -ItemType file ${hostdns_conf} | Out-Null
  987. Set-Content ${hostdns_conf} $conf
  988. Log-Output "HOST dns conf:`n$(Get-Content -Raw ${hostdns_conf})"
  989. }
  990. # Fetches the kubelet config from the instance metadata and puts it at
  991. # $env:KUBELET_CONFIG.
  992. function Configure-Kubelet {
  993. if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) {
  994. return
  995. }
  996. # The Kubelet config is built by build-kubelet-config() in
  997. # cluster/gce/util.sh, and stored in the metadata server under the
  998. # 'kubelet-config' key.
  999. $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config'
  1000. Set-Content ${env:KUBELET_CONFIG} $kubelet_config
  1001. Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})"
  1002. }
  1003. # Sets up the kubelet and kube-proxy arguments and starts them as native
  1004. # Windows services.
  1005. #
  1006. # Required ${kube_env} keys:
  1007. # KUBELET_ARGS
  1008. # KUBEPROXY_ARGS
  1009. # CLUSTER_IP_RANGE
  1010. function Start-WorkerServices {
  1011. # Compute kubelet args
  1012. $kubelet_args_str = ${kube_env}['KUBELET_ARGS']
  1013. $kubelet_args = $kubelet_args_str.Split(" ")
  1014. Log-Output "kubelet_args from metadata: ${kubelet_args}"
  1015. $default_kubelet_args = @(`
  1016. "--pod-infra-container-image=${INFRA_CONTAINER}"
  1017. )
  1018. $kubelet_args = ${default_kubelet_args} + ${kubelet_args}
  1019. if (-not (Test-NodeUsesAuthPlugin ${kube_env})) {
  1020. Log-Output 'Using bootstrap kubeconfig for authentication'
  1021. $kubelet_args = (${kubelet_args} +
  1022. "--bootstrap-kubeconfig=${env:BOOTSTRAP_KUBECONFIG}")
  1023. }
  1024. Log-Output "Final kubelet_args: ${kubelet_args}"
  1025. # Compute kube-proxy args
  1026. $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS']
  1027. $kubeproxy_args = $kubeproxy_args_str.Split(" ")
  1028. Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}"
  1029. # kubeproxy is started on Linux nodes using
  1030. # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is
  1031. # generated by start-kube-proxy in configure-helper.sh and contains e.g.:
  1032. # kube-proxy --master=https://35.239.84.171
  1033. # --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14
  1034. # --oom-score-adj=-998 --v=2
  1035. # --iptables-sync-period=1m --iptables-min-sync-period=10s
  1036. # --ipvs-sync-period=1m --ipvs-min-sync-period=10s
  1037. # And also with various volumeMounts and "securityContext: privileged: true".
  1038. $default_kubeproxy_args = @(`
  1039. "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}",
  1040. "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])"
  1041. )
  1042. $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args}
  1043. Log-Output "Final kubeproxy_args: ${kubeproxy_args}"
  1044. # TODO(pjh): kubelet is emitting these messages:
  1045. # I1023 23:44:11.761915 2468 kubelet.go:274] Adding pod path:
  1046. # C:\etc\kubernetes
  1047. # I1023 23:44:11.775601 2468 file.go:68] Watching path
  1048. # "C:\\etc\\kubernetes"
  1049. # ...
  1050. # E1023 23:44:31.794327 2468 file.go:182] Can't process manifest file
  1051. # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse
  1052. # as pod(yaml: line 10: did not find expected <document start>), please check
  1053. # config file.
  1054. #
  1055. # Figure out how to change the directory that the kubelet monitors for new
  1056. # pod manifests.
  1057. # We configure the service to restart on failure, after 10s wait. We reset
  1058. # the restart count to 0 each time, so we re-use our restart/10000 action on
  1059. # each failure. Note it currently restarts even when explicitly stopped, you
  1060. # have to delete the service entry to *really* kill it (e.g. `sc.exe delete
  1061. # kubelet`). See issue #72900.
  1062. if (Get-Process | Where-Object Name -eq "kubelet") {
  1063. Log-Output -Fatal `
  1064. "A kubelet process is already running, don't know what to do"
  1065. }
  1066. Log-Output "Creating kubelet service"
  1067. & sc.exe create kubelet binPath= "${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand
  1068. & sc.exe failure kubelet reset= 0 actions= restart/10000
  1069. Log-Output "Starting kubelet service"
  1070. & sc.exe start kubelet
  1071. Log-Output "Waiting 10 seconds for kubelet to stabilize"
  1072. Start-Sleep 10
  1073. if (Get-Process | Where-Object Name -eq "kube-proxy") {
  1074. Log-Output -Fatal `
  1075. "A kube-proxy process is already running, don't know what to do"
  1076. }
  1077. Log-Output "Creating kube-proxy service"
  1078. & sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand
  1079. & sc.exe failure kube-proxy reset= 0 actions= restart/10000
  1080. Log-Output "Starting kube-proxy service"
  1081. & sc.exe start kube-proxy
  1082. # F1020 23:08:52.000083 9136 server.go:361] unable to load in-cluster
  1083. # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be
  1084. # defined
  1085. # TODO(pjh): still getting errors like these in kube-proxy log:
  1086. # E1023 04:03:58.143449 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1087. # E1023 04:03:58.150266 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1088. WaitFor_KubeletAndKubeProxyReady
  1089. Verify_GceMetadataServerRouteIsPresent
  1090. Log-Output "Kubernetes components started successfully"
  1091. }
  1092. # Wait for kubelet and kube-proxy to be ready within 10s.
  1093. function WaitFor_KubeletAndKubeProxyReady {
  1094. $waited = 0
  1095. $timeout = 10
  1096. while (((Get-Service kube-proxy).Status -ne 'Running' -or (Get-Service kubelet).Status -ne 'Running') -and $waited -lt $timeout) {
  1097. Start-Sleep 1
  1098. $waited++
  1099. }
  1100. # Timeout occurred
  1101. if ($waited -ge $timeout) {
  1102. Log-Output "$(Get-Service kube* | Out-String)"
  1103. Throw ("Timeout while waiting ${timeout} seconds for kubelet and kube-proxy services to start")
  1104. }
  1105. }
  1106. # Runs 'kubectl get nodes'.
  1107. # TODO(pjh): run more verification commands.
  1108. function Verify-WorkerServices {
  1109. Log-Output ("kubectl get nodes:`n" +
  1110. $(& "${env:NODE_DIR}\kubectl.exe" get nodes | Out-String))
  1111. Verify_GceMetadataServerRouteIsPresent
  1112. Log_Todo "run more verification commands."
  1113. }
  1114. # Downloads crictl.exe and installs it in $env:NODE_DIR.
  1115. function DownloadAndInstall-Crictl {
  1116. if (-not (ShouldWrite-File ${env:NODE_DIR}\crictl.exe)) {
  1117. return
  1118. }
  1119. $url = ('https://storage.googleapis.com/kubernetes-release/crictl/' +
  1120. 'crictl-' + $CRICTL_VERSION + '-windows-amd64.exe')
  1121. MustDownload-File `
  1122. -URLs $url `
  1123. -OutFile ${env:NODE_DIR}\crictl.exe `
  1124. -Hash $CRICTL_SHA256 `
  1125. -Algorithm SHA256
  1126. }
  1127. # Sets crictl configuration values.
  1128. function Configure-Crictl {
  1129. if (${env:CONTAINER_RUNTIME_ENDPOINT}) {
  1130. & "${env:NODE_DIR}\crictl.exe" config runtime-endpoint `
  1131. ${env:CONTAINER_RUNTIME_ENDPOINT}
  1132. }
  1133. }
  1134. # Pulls the infra/pause container image onto the node so that it will be
  1135. # immediately available when the kubelet tries to run pods.
  1136. # TODO(pjh): downloading the container container image may take a few minutes;
  1137. # figure out how to run this in the background while perform the rest of the
  1138. # node startup steps!
  1139. # Pull-InfraContainer must be called AFTER Verify-WorkerServices.
  1140. function Pull-InfraContainer {
  1141. $name, $label = $INFRA_CONTAINER -split ':',2
  1142. if (-not ("$(& crictl images)" -match "$name.*$label")) {
  1143. & crictl pull $INFRA_CONTAINER
  1144. if (!$?) {
  1145. throw "Error running 'crictl pull $INFRA_CONTAINER'"
  1146. }
  1147. }
  1148. $inspect = "$(& crictl inspecti $INFRA_CONTAINER | Out-String)"
  1149. Log-Output "Infra/pause container:`n$inspect"
  1150. }
  1151. # Setup the container runtime on the node. It supports both
  1152. # Docker and containerd.
  1153. function Setup-ContainerRuntime {
  1154. if (${env:CONTAINER_RUNTIME} -eq "containerd") {
  1155. Install_Containerd
  1156. Configure_Containerd
  1157. Start_Containerd
  1158. } else {
  1159. Create_DockerRegistryKey
  1160. Configure_Dockerd
  1161. }
  1162. }
  1163. # Add a registry key for docker in EventLog so that log messages are mapped
  1164. # correctly. This is a workaround since the key is missing in the base image.
  1165. # https://github.com/MicrosoftDocs/Virtualization-Documentation/pull/503
  1166. # TODO: Fix this in the base image.
  1167. # TODO(random-liu): Figure out whether we need this for containerd.
  1168. function Create_DockerRegistryKey {
  1169. $tmp_dir = 'C:\tmp_docker_reg'
  1170. New-Item -Force -ItemType 'directory' ${tmp_dir} | Out-Null
  1171. $reg_file = 'docker.reg'
  1172. Set-Content ${tmp_dir}\${reg_file} `
  1173. 'Windows Registry Editor Version 5.00
  1174. [HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\EventLog\Application\docker]
  1175. "CustomSource"=dword:00000001
  1176. "EventMessageFile"="C:\\Program Files\\docker\\dockerd.exe"
  1177. "TypesSupported"=dword:00000007'
  1178. Log-Output "Importing registry key for Docker"
  1179. reg import ${tmp_dir}\${reg_file}
  1180. Remove-Item -Force -Recurse ${tmp_dir}
  1181. }
  1182. # Configure Docker daemon and restart the service.
  1183. function Configure_Dockerd {
  1184. Set-Content "C:\ProgramData\docker\config\daemon.json" @'
  1185. {
  1186. "log-driver": "json-file",
  1187. "log-opts": {
  1188. "max-size": "1m",
  1189. "max-file": "5"
  1190. }
  1191. }
  1192. '@
  1193. Restart-Service Docker
  1194. }
  1195. # Writes a CNI config file under $env:CNI_CONFIG_DIR for containerd.
  1196. #
  1197. # Prerequisites:
  1198. # $env:POD_CIDR is set (by Set-PodCidr).
  1199. # The "management" interface exists (Configure-HostNetworkingService).
  1200. # The HNS network for pod networking has been configured
  1201. # (Configure-HostNetworkingService).
  1202. # Containerd is installed (Install_Containerd).
  1203. #
  1204. # Required ${kube_env} keys:
  1205. # DNS_SERVER_IP
  1206. # DNS_DOMAIN
  1207. # CLUSTER_IP_RANGE
  1208. # SERVICE_CLUSTER_IP_RANGE
  1209. function Configure_Containerd_CniNetworking {
  1210. $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  1211. if (-not (ShouldWrite-File ${l2bridge_conf})) {
  1212. return
  1213. }
  1214. $mgmt_ip = (Get_MgmtNetAdapter |
  1215. Get-NetIPAddress -AddressFamily IPv4).IPAddress
  1216. $mgmt_subnet = Get_MgmtSubnet
  1217. Log-Output ("using mgmt IP ${mgmt_ip} and mgmt subnet ${mgmt_subnet} for " +
  1218. "CNI config")
  1219. $pod_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  1220. # Explanation of the CNI config values:
  1221. # CLUSTER_CIDR: the cluster CIDR from which pod CIDRs are allocated.
  1222. # POD_CIDR: the pod CIDR assigned to this node.
  1223. # POD_GATEWAY: the gateway IP.
  1224. # MGMT_SUBNET: the subnet on which the Windows pods + kubelet will
  1225. # communicate with the rest of the cluster without NAT (i.e. the subnet
  1226. # that VM internal IPs are allocated from).
  1227. # MGMT_IP: the IP address assigned to the node's primary network interface
  1228. # (i.e. the internal IP of the GCE VM).
  1229. # SERVICE_CIDR: the CIDR used for kubernetes services.
  1230. # DNS_SERVER_IP: the cluster's DNS server IP address.
  1231. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  1232. New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  1233. Set-Content ${l2bridge_conf} `
  1234. '{
  1235. "cniVersion": "0.2.0",
  1236. "name": "l2bridge",
  1237. "type": "sdnbridge",
  1238. "master": "Ethernet",
  1239. "capabilities": {
  1240. "portMappings": true,
  1241. "dns": true
  1242. },
  1243. "ipam": {
  1244. "subnet": "POD_CIDR",
  1245. "routes": [
  1246. {
  1247. "GW": "POD_GATEWAY"
  1248. }
  1249. ]
  1250. },
  1251. "dns": {
  1252. "Nameservers": [
  1253. "DNS_SERVER_IP"
  1254. ],
  1255. "Search": [
  1256. "DNS_DOMAIN"
  1257. ]
  1258. },
  1259. "AdditionalArgs": [
  1260. {
  1261. "Name": "EndpointPolicy",
  1262. "Value": {
  1263. "Type": "OutBoundNAT",
  1264. "Settings": {
  1265. "Exceptions": [
  1266. "CLUSTER_CIDR",
  1267. "SERVICE_CIDR",
  1268. "MGMT_SUBNET"
  1269. ]
  1270. }
  1271. }
  1272. },
  1273. {
  1274. "Name": "EndpointPolicy",
  1275. "Value": {
  1276. "Type": "SDNRoute",
  1277. "Settings": {
  1278. "DestinationPrefix": "SERVICE_CIDR",
  1279. "NeedEncap": true
  1280. }
  1281. }
  1282. },
  1283. {
  1284. "Name": "EndpointPolicy",
  1285. "Value": {
  1286. "Type": "SDNRoute",
  1287. "Settings": {
  1288. "DestinationPrefix": "MGMT_IP/32",
  1289. "NeedEncap": true
  1290. }
  1291. }
  1292. }
  1293. ]
  1294. }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  1295. replace('POD_GATEWAY', ${pod_gateway}).`
  1296. replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  1297. replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  1298. replace('MGMT_IP', ${mgmt_ip}).`
  1299. replace('CLUSTER_CIDR', ${kube_env}['CLUSTER_IP_RANGE']).`
  1300. replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']).`
  1301. replace('MGMT_SUBNET', ${mgmt_subnet})
  1302. Log-Output "containerd CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  1303. }
  1304. # Download and install containerd and CNI binaries into $env:NODE_DIR.
  1305. function Install_Containerd {
  1306. # Assume that presence of containerd.exe indicates that all containerd
  1307. # binaries were already previously downloaded to this node.
  1308. if (-not (ShouldWrite-File ${env:NODE_DIR}\containerd.exe)) {
  1309. return
  1310. }
  1311. # TODO(random-liu): Change this to official release path after testing.
  1312. $CONTAINERD_GCS_BUCKET = "cri-containerd-staging/windows"
  1313. $tmp_dir = 'C:\containerd_tmp'
  1314. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1315. $version_url = "https://storage.googleapis.com/$CONTAINERD_GCS_BUCKET/latest"
  1316. MustDownload-File -URLs $version_url -OutFile $tmp_dir\version
  1317. $version = $(Get-Content $tmp_dir\version)
  1318. $tar_url = ("https://storage.googleapis.com/$CONTAINERD_GCS_BUCKET/" +
  1319. "cri-containerd-cni-$version.windows-amd64.tar.gz")
  1320. $sha_url = $tar_url + ".sha256"
  1321. MustDownload-File -URLs $sha_url -OutFile $tmp_dir\sha256
  1322. $sha = $(Get-Content $tmp_dir\sha256)
  1323. MustDownload-File `
  1324. -URLs $tar_url `
  1325. -OutFile $tmp_dir\containerd.tar.gz `
  1326. -Hash $sha `
  1327. -Algorithm SHA256
  1328. tar xzvf $tmp_dir\containerd.tar.gz -C $tmp_dir
  1329. Move-Item -Force $tmp_dir\cni\*.exe ${env:CNI_DIR}\
  1330. Move-Item -Force $tmp_dir\*.exe ${env:NODE_DIR}\
  1331. Remove-Item -Force -Recurse $tmp_dir
  1332. }
  1333. # Generates the containerd config.toml file.
  1334. function Configure_Containerd {
  1335. $config_dir = 'C:\Program Files\containerd'
  1336. New-Item $config_dir -ItemType 'directory' -Force | Out-Null
  1337. Set-Content "$config_dir\config.toml" @"
  1338. [plugins.cri]
  1339. sandbox_image = 'INFRA_CONTAINER_IMAGE'
  1340. [plugins.cri.cni]
  1341. bin_dir = 'CNI_BIN_DIR'
  1342. conf_dir = 'CNI_CONF_DIR'
  1343. "@.replace('INFRA_CONTAINER_IMAGE', $INFRA_CONTAINER).`
  1344. replace('CNI_BIN_DIR', ${env:CNI_DIR}).`
  1345. replace('CNI_CONF_DIR', ${env:CNI_CONFIG_DIR})
  1346. }
  1347. # Register and start containerd service.
  1348. function Start_Containerd {
  1349. Log-Output "Creating containerd service"
  1350. & containerd.exe --register-service --log-file ${env:LOGS_DIR}/containerd.log
  1351. Log-Output "Starting containerd service"
  1352. Start-Service containerd
  1353. }
  1354. # TODO(pjh): move the Stackdriver logging agent code below into a separate
  1355. # module; it was put here temporarily to avoid disrupting the file layout in
  1356. # the K8s release machinery.
  1357. $STACKDRIVER_VERSION = 'v1-9'
  1358. $STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver'
  1359. # Restarts the Stackdriver logging agent, or starts it if it is not currently
  1360. # running. A standard `Restart-Service StackdriverLogging` may fail because
  1361. # StackdriverLogging sometimes is unstoppable, so this function works around it
  1362. # by killing the processes.
  1363. function Restart-LoggingAgent {
  1364. Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging
  1365. # Wait (if necessary) for service to stop.
  1366. $timeout = 10
  1367. $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  1368. for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  1369. Start-Sleep 1
  1370. $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  1371. }
  1372. if ((Get-service StackdriverLogging).Status -ne 'Stopped') {
  1373. # Force kill the processes.
  1374. Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  1375. Where CommandLine -Like '*Stackdriver/logging*').ProcessId
  1376. # Wait until process has stopped.
  1377. $waited = 0
  1378. $log_period = 10
  1379. $timeout = 60
  1380. while ((Get-service StackdriverLogging).Status -ne 'Stopped' -and $waited -lt $timeout) {
  1381. Start-Sleep 1
  1382. $waited++
  1383. if ($waited % $log_period -eq 0) {
  1384. Log-Output "Waiting for StackdriverLogging service to stop"
  1385. }
  1386. }
  1387. # Timeout occurred
  1388. if ($waited -ge $timeout) {
  1389. Throw ("Timeout while waiting for StackdriverLogging service to stop")
  1390. }
  1391. }
  1392. Start-Service StackdriverLogging
  1393. }
  1394. # Installs the Stackdriver logging agent according to
  1395. # https://cloud.google.com/logging/docs/agent/installation.
  1396. # TODO(yujuhong): Update to a newer Stackdriver agent once it is released to
  1397. # support kubernetes metadata properly. The current version does not recognizes
  1398. # the local resource key "logging.googleapis.com/local_resource_id", and fails
  1399. # to label namespace, pod and container names on the logs.
  1400. function Install-LoggingAgent {
  1401. # Remove the existing storage.json file if it exists. This is a workaround
  1402. # for the bug where the logging agent cannot start up if the file is
  1403. # corrupted.
  1404. Remove-Item `
  1405. -Force `
  1406. -ErrorAction Ignore `
  1407. ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" +
  1408. "storage.json")
  1409. if (Test-Path $STACKDRIVER_ROOT) {
  1410. # Note: we should reinstall the Stackdriver agent if $REDO_STEPS is true
  1411. # here, but we don't know how to run the installer without it prompting
  1412. # when Stackdriver is already installed. We dumped the strings in the
  1413. # installer binary and searched for flags to do this but found nothing. Oh
  1414. # well.
  1415. Log-Output ("Skip: $STACKDRIVER_ROOT is already present, assuming that " +
  1416. "Stackdriver logging agent is already installed")
  1417. Restart-LoggingAgent
  1418. return
  1419. }
  1420. $url = ("https://storage.googleapis.com/gke-release/winnode/stackdriver/" +
  1421. "StackdriverLogging-${STACKDRIVER_VERSION}.exe")
  1422. $tmp_dir = 'C:\stackdriver_tmp'
  1423. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1424. $installer_file = "${tmp_dir}\StackdriverLogging-${STACKDRIVER_VERSION}.exe"
  1425. MustDownload-File -OutFile $installer_file -URLs $url
  1426. # Start the installer silently. This automatically starts the
  1427. # "StackdriverLogging" service.
  1428. Log-Output 'Invoking Stackdriver installer'
  1429. Start-Process $installer_file -ArgumentList "/S" -Wait
  1430. # Install the record-reformer plugin.
  1431. Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" `
  1432. -ArgumentList "install","fluent-plugin-record-reformer" `
  1433. -Wait
  1434. # Install the multi-format-parser plugin.
  1435. Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" `
  1436. -ArgumentList "install","fluent-plugin-multi-format-parser" `
  1437. -Wait
  1438. Remove-Item -Force -Recurse $tmp_dir
  1439. }
  1440. # Writes the logging configuration file for Stackdriver. Restart-LoggingAgent
  1441. # should then be called to pick up the new configuration.
  1442. function Configure-LoggingAgent {
  1443. $fluentd_config_dir = "$STACKDRIVER_ROOT\LoggingAgent\config.d"
  1444. $fluentd_config_file = "$fluentd_config_dir\k8s_containers.conf"
  1445. # Create a configuration file for kubernetes containers.
  1446. # The config.d directory should have already been created automatically, but
  1447. # try creating again just in case.
  1448. New-Item $fluentd_config_dir -ItemType 'directory' -Force | Out-Null
  1449. $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname))
  1450. $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII
  1451. Log-Output "Wrote fluentd logging config to $fluentd_config_file"
  1452. }
  1453. # The NODE_NAME placeholder must be replaced with the node's name (hostname).
  1454. $FLUENTD_CONFIG = @'
  1455. # This configuration file for Fluentd is used to watch changes to kubernetes
  1456. # container logs in the directory /var/lib/docker/containers/ and submit the
  1457. # log records to Google Cloud Logging using the cloud-logging plugin.
  1458. #
  1459. # Example
  1460. # =======
  1461. # A line in the Docker log file might look like this JSON:
  1462. #
  1463. # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
  1464. # "stream":"stderr",
  1465. # "time":"2014-09-25T21:15:03.499185026Z"}
  1466. #
  1467. # The original tag is derived from the log file's location.
  1468. # For example a Docker container's logs might be in the directory:
  1469. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
  1470. # and in the file:
  1471. # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1472. # where 997599971ee6... is the Docker ID of the running container.
  1473. # The Kubernetes kubelet makes a symbolic link to this file on the host
  1474. # machine in the /var/log/containers directory which includes the pod name,
  1475. # the namespace name and the Kubernetes container name:
  1476. # synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1477. # ->
  1478. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1479. # The /var/log directory on the host is mapped to the /var/log directory in the container
  1480. # running this instance of Fluentd and we end up collecting the file:
  1481. # /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1482. # This results in the tag:
  1483. # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1484. # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
  1485. # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
  1486. # the container ID.
  1487. # The record reformer is used to extract pod_name, namespace_name and
  1488. # container_name from the tag and set them in a local_resource_id in the
  1489. # format of:
  1490. # 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
  1491. # The reformer also changes the tags to 'stderr' or 'stdout' based on the
  1492. # value of 'stream'.
  1493. # local_resource_id is later used by google_cloud plugin to determine the
  1494. # monitored resource to ingest logs against.
  1495. # Json Log Example:
  1496. # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  1497. # CRI Log Example:
  1498. # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
  1499. <source>
  1500. @type tail
  1501. path /var/log/containers/*.log
  1502. pos_file /var/log/gcp-containers.log.pos
  1503. # Tags at this point are in the format of:
  1504. # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
  1505. tag reform.*
  1506. read_from_head true
  1507. <parse>
  1508. @type multi_format
  1509. <pattern>
  1510. format json
  1511. time_key time
  1512. time_format %Y-%m-%dT%H:%M:%S.%NZ
  1513. </pattern>
  1514. <pattern>
  1515. format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
  1516. time_format %Y-%m-%dT%H:%M:%S.%N%:z
  1517. </pattern>
  1518. </parse>
  1519. </source>
  1520. # Example:
  1521. # I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  1522. <source>
  1523. @type tail
  1524. format multiline
  1525. multiline_flush_interval 5s
  1526. format_firstline /^\w\d{4}/
  1527. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1528. time_format %m%d %H:%M:%S.%N
  1529. path /etc/kubernetes/logs/kubelet.log
  1530. pos_file /etc/kubernetes/logs/gcp-kubelet.log.pos
  1531. tag kubelet
  1532. </source>
  1533. # Example:
  1534. # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  1535. <source>
  1536. @type tail
  1537. format multiline
  1538. multiline_flush_interval 5s
  1539. format_firstline /^\w\d{4}/
  1540. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1541. time_format %m%d %H:%M:%S.%N
  1542. path /etc/kubernetes/logs/kube-proxy.log
  1543. pos_file /etc/kubernetes/logs/gcp-kube-proxy.log.pos
  1544. tag kube-proxy
  1545. </source>
  1546. # Example:
  1547. # time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  1548. <source>
  1549. @type tail
  1550. format multiline
  1551. multiline_flush_interval 5s
  1552. format_firstline /^time=/
  1553. format1 /^time="(?<time>[^ ]*)" level=(?<severity>\w*) (?<message>.*)/
  1554. time_format %Y-%m-%dT%H:%M:%S.%N%z
  1555. path /etc/kubernetes/logs/containerd.log
  1556. pos_file /etc/kubernetes/logs/gcp-containerd.log.pos
  1557. tag container-runtime
  1558. </source>
  1559. <match reform.**>
  1560. @type record_reformer
  1561. enable_ruby true
  1562. <record>
  1563. # Extract local_resource_id from tag for 'k8s_container' monitored
  1564. # resource. The format is:
  1565. # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
  1566. "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
  1567. # Rename the field 'log' to a more generic field 'message'. This way the
  1568. # fluent-plugin-google-cloud knows to flatten the field as textPayload
  1569. # instead of jsonPayload after extracting 'time', 'severity' and
  1570. # 'stream' from the record.
  1571. message ${record['log']}
  1572. # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
  1573. severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
  1574. </record>
  1575. tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
  1576. remove_keys stream,log
  1577. </match>
  1578. # TODO: detect exceptions and forward them as one log entry using the
  1579. # detect_exceptions plugin
  1580. # This section is exclusive for k8s_container logs. These logs come with
  1581. # 'raw.stderr' or 'raw.stdout' tags.
  1582. <match {raw.stderr,raw.stdout}>
  1583. @type google_cloud
  1584. # Try to detect JSON formatted log entries.
  1585. detect_json true
  1586. # Allow log entries from multiple containers to be sent in the same request.
  1587. split_logs_by_tag false
  1588. # Set the buffer type to file to improve the reliability and reduce the memory consumption
  1589. buffer_type file
  1590. buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
  1591. # Set queue_full action to block because we want to pause gracefully
  1592. # in case of the off-the-limits load instead of throwing an exception
  1593. buffer_queue_full_action block
  1594. # Set the chunk limit conservatively to avoid exceeding the recommended
  1595. # chunk size of 5MB per write request.
  1596. buffer_chunk_limit 512k
  1597. # Cap the combined memory usage of this buffer and the one below to
  1598. # 512KiB/chunk * (6 + 2) chunks = 4 MiB
  1599. buffer_queue_limit 6
  1600. # Never wait more than 5 seconds before flushing logs in the non-error case.
  1601. flush_interval 5s
  1602. # Never wait longer than 30 seconds between retries.
  1603. max_retry_wait 30
  1604. # Disable the limit on the number of retries (retry forever).
  1605. disable_retry_limit
  1606. # Use multiple threads for processing.
  1607. num_threads 2
  1608. use_grpc true
  1609. # Skip timestamp adjustment as this is in a controlled environment with
  1610. # known timestamp format. This helps with CPU usage.
  1611. adjust_invalid_timestamps false
  1612. </match>
  1613. # Attach local_resource_id for 'k8s_node' monitored resource.
  1614. <filter **>
  1615. @type record_transformer
  1616. enable_ruby true
  1617. <record>
  1618. "logging.googleapis.com/local_resource_id" ${"k8s_node.NODE_NAME"}
  1619. </record>
  1620. </filter>
  1621. '@
  1622. # Export all public functions:
  1623. Export-ModuleMember -Function *-*