k8s-node-setup.psm1 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807
  1. # Copyright 2019 The Kubernetes Authors.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. <#
  15. .SYNOPSIS
  16. Library for configuring Windows nodes and joining them to the cluster.
  17. .NOTES
  18. This module depends on common.psm1.
  19. Some portions copied / adapted from
  20. https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1.
  21. .EXAMPLE
  22. Suggested usage for dev/test:
  23. [Net.ServicePointManager]::SecurityProtocol = `
  24. [Net.SecurityProtocolType]::Tls12
  25. Invoke-WebRequest `
  26. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/k8s-node-setup.psm1 `
  27. -OutFile C:\k8s-node-setup.psm1
  28. Invoke-WebRequest `
  29. https://github.com/kubernetes/kubernetes/raw/master/cluster/gce/windows/configure.ps1 `
  30. -OutFile C:\configure.ps1
  31. Import-Module -Force C:\k8s-node-setup.psm1 # -Force to override existing
  32. # Execute functions manually or run configure.ps1.
  33. #>
  34. # IMPORTANT PLEASE NOTE:
  35. # Any time the file structure in the `windows` directory changes, `windows/BUILD`
  36. # and `k8s.io/release/lib/releaselib.sh` must be manually updated with the changes.
  37. # We HIGHLY recommend not changing the file structure, because consumers of
  38. # Kubernetes releases depend on the release structure remaining stable.
  39. # TODO: update scripts for these style guidelines:
  40. # - Remove {} around variable references unless actually needed for clarity.
  41. # - Always use single-quoted strings unless actually interpolating variables
  42. # or using escape characters.
  43. # - Use "approved verbs":
  44. # https://docs.microsoft.com/en-us/powershell/developer/cmdlet/approved-verbs-for-windows-powershell-commands
  45. # - Document functions using proper syntax:
  46. # https://technet.microsoft.com/en-us/library/hh847834(v=wps.620).aspx
  47. $INFRA_CONTAINER = 'gcr.io/gke-release/pause-win:1.1.0'
  48. $GCE_METADATA_SERVER = "169.254.169.254"
  49. # The "management" interface is used by the kubelet and by Windows pods to talk
  50. # to the rest of the Kubernetes cluster *without NAT*. This interface does not
  51. # exist until an initial HNS network has been created on the Windows node - see
  52. # Add_InitialHnsNetwork().
  53. $MGMT_ADAPTER_NAME = "vEthernet (Ethernet*"
  54. Import-Module -Force C:\common.psm1
  55. # Writes a TODO with $Message to the console.
  56. function Log_Todo {
  57. param (
  58. [parameter(Mandatory=$true)] [string]$Message
  59. )
  60. Log-Output "TODO: ${Message}"
  61. }
  62. # Writes a not-implemented warning with $Message to the console and exits the
  63. # script.
  64. function Log_NotImplemented {
  65. param (
  66. [parameter(Mandatory=$true)] [string]$Message
  67. )
  68. Log-Output "Not implemented yet: ${Message}" -Fatal
  69. }
  70. # Fails and exits if the route to the GCE metadata server is not present,
  71. # otherwise does nothing and emits nothing.
  72. function Verify_GceMetadataServerRouteIsPresent {
  73. Try {
  74. Get-NetRoute `
  75. -ErrorAction "Stop" `
  76. -AddressFamily IPv4 `
  77. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  78. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  79. Log-Output -Fatal `
  80. ("GCE metadata server route is not present as expected.`n" +
  81. "$(Get-NetRoute -AddressFamily IPv4 | Out-String)")
  82. }
  83. }
  84. # Checks if the route to the GCE metadata server is present. Returns when the
  85. # route is NOT present or after a timeout has expired.
  86. function WaitFor_GceMetadataServerRouteToBeRemoved {
  87. $elapsed = 0
  88. $timeout = 60
  89. Log-Output ("Waiting up to ${timeout} seconds for GCE metadata server " +
  90. "route to be removed")
  91. while (${elapsed} -lt ${timeout}) {
  92. Try {
  93. Get-NetRoute `
  94. -ErrorAction "Stop" `
  95. -AddressFamily IPv4 `
  96. -DestinationPrefix ${GCE_METADATA_SERVER}/32 | Out-Null
  97. } Catch [Microsoft.PowerShell.Cmdletization.Cim.CimJobException] {
  98. break
  99. }
  100. $sleeptime = 2
  101. Start-Sleep ${sleeptime}
  102. ${elapsed} += ${sleeptime}
  103. }
  104. }
  105. # Adds a route to the GCE metadata server to every network interface.
  106. function Add_GceMetadataServerRoute {
  107. # Before setting up HNS the Windows VM has a "vEthernet (nat)" interface and
  108. # a "Ethernet" interface, and the route to the metadata server exists on the
  109. # Ethernet interface. After adding the HNS network a "vEthernet (Ethernet)"
  110. # interface is added, and it seems to subsume the routes of the "Ethernet"
  111. # interface (trying to add routes on the Ethernet interface at this point just
  112. # results in "New-NetRoute : Element not found" errors). I don't know what's
  113. # up with that, but since it's hard to know what's the right thing to do here
  114. # we just try to add the route on all of the network adapters.
  115. Get-NetAdapter | ForEach-Object {
  116. $adapter_index = $_.InterfaceIndex
  117. New-NetRoute `
  118. -ErrorAction Ignore `
  119. -DestinationPrefix "${GCE_METADATA_SERVER}/32" `
  120. -InterfaceIndex ${adapter_index} | Out-Null
  121. }
  122. }
  123. # Writes debugging information, such as Windows version and patch info, to the
  124. # console.
  125. function Dump-DebugInfoToConsole {
  126. Try {
  127. $version = "$([System.Environment]::OSVersion.Version | Out-String)"
  128. $hotfixes = "$(Get-Hotfix | Out-String)"
  129. $image = "$(Get-InstanceMetadata 'image' | Out-String)"
  130. Log-Output "Windows version:`n$version"
  131. Log-Output "Installed hotfixes:`n$hotfixes"
  132. Log-Output "GCE Windows image:`n$image"
  133. } Catch { }
  134. }
  135. # Converts the kube-env string in Yaml
  136. #
  137. # Returns: a PowerShell Hashtable object containing the key-value pairs from
  138. # kube-env.
  139. function ConvertFrom-Yaml-KubeEnv {
  140. param (
  141. [parameter(Mandatory=$true)] [string]$kube_env_str
  142. )
  143. $kube_env_table = @{}
  144. $currentLine = $null
  145. switch -regex (${kube_env_str} -split '\r?\n') {
  146. '^(\S.*)' {
  147. # record start pattern, line that doesn't start with a whitespace
  148. if ($null -ne $currentLine) {
  149. $key, $val = $currentLine -split ":",2
  150. $kube_env_table[$key] = $val.Trim("'", " ", "`"")
  151. }
  152. $currentLine = $matches.1
  153. continue
  154. }
  155. '^(\s+.*)' {
  156. # line that start with whitespace
  157. $currentLine += $matches.1
  158. continue
  159. }
  160. }
  161. # Handle the last line if any
  162. if ($currentLine) {
  163. $key, $val = $currentLine -split ":",2
  164. $kube_env_table[$key] = $val.Trim("'", " ", "`"")
  165. }
  166. return ${kube_env_table}
  167. }
  168. # Fetches the kube-env from the instance metadata.
  169. #
  170. # Returns: a PowerShell Hashtable object containing the key-value pairs from
  171. # kube-env.
  172. function Fetch-KubeEnv {
  173. # Testing / debugging:
  174. # First:
  175. # ${kube_env} = Get-InstanceMetadataAttribute 'kube-env'
  176. # or:
  177. # ${kube_env} = [IO.File]::ReadAllText(".\kubeEnv.txt")
  178. # ${kube_env_table} = ConvertFrom-Yaml-KubeEnv ${kube_env}
  179. # ${kube_env_table}
  180. # ${kube_env_table}.GetType()
  181. # The type of kube_env is a powershell String.
  182. $kube_env = Get-InstanceMetadataAttribute 'kube-env'
  183. $kube_env_table = ConvertFrom-Yaml-KubeEnv ${kube_env}
  184. return ${kube_env_table}
  185. }
  186. # Sets the environment variable $Key to $Value at the Machine scope (will
  187. # be present in the environment for all new shells after a reboot).
  188. function Set_MachineEnvironmentVar {
  189. param (
  190. [parameter(Mandatory=$true)] [string]$Key,
  191. [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
  192. )
  193. [Environment]::SetEnvironmentVariable($Key, $Value, "Machine")
  194. }
  195. # Sets the environment variable $Key to $Value in the current shell.
  196. function Set_CurrentShellEnvironmentVar {
  197. param (
  198. [parameter(Mandatory=$true)] [string]$Key,
  199. [parameter(Mandatory=$true)] [AllowEmptyString()] [string]$Value
  200. )
  201. $expression = '$env:' + $Key + ' = "' + $Value + '"'
  202. Invoke-Expression ${expression}
  203. }
  204. # Sets environment variables used by Kubernetes binaries and by other functions
  205. # in this module. Depends on numerous ${kube_env} keys.
  206. function Set-EnvironmentVars {
  207. # Turning the kube-env values into environment variables is not required but
  208. # it makes debugging this script easier, and it also makes the syntax a lot
  209. # easier (${env:K8S_DIR} can be expanded within a string but
  210. # ${kube_env}['K8S_DIR'] cannot be afaik).
  211. $env_vars = @{
  212. "K8S_DIR" = ${kube_env}['K8S_DIR']
  213. "NODE_DIR" = ${kube_env}['NODE_DIR']
  214. "CNI_DIR" = ${kube_env}['CNI_DIR']
  215. "CNI_CONFIG_DIR" = ${kube_env}['CNI_CONFIG_DIR']
  216. "WINDOWS_CNI_STORAGE_PATH" = ${kube_env}['WINDOWS_CNI_STORAGE_PATH']
  217. "WINDOWS_CNI_VERSION" = ${kube_env}['WINDOWS_CNI_VERSION']
  218. "PKI_DIR" = ${kube_env}['PKI_DIR']
  219. "CA_FILE_PATH" = ${kube_env}['CA_FILE_PATH']
  220. "KUBELET_CONFIG" = ${kube_env}['KUBELET_CONFIG_FILE']
  221. "BOOTSTRAP_KUBECONFIG" = ${kube_env}['BOOTSTRAP_KUBECONFIG_FILE']
  222. "KUBECONFIG" = ${kube_env}['KUBECONFIG_FILE']
  223. "KUBEPROXY_KUBECONFIG" = ${kube_env}['KUBEPROXY_KUBECONFIG_FILE']
  224. "LOGS_DIR" = ${kube_env}['LOGS_DIR']
  225. "MANIFESTS_DIR" = ${kube_env}['MANIFESTS_DIR']
  226. "Path" = ${env:Path} + ";" + ${kube_env}['NODE_DIR']
  227. "KUBE_NETWORK" = "l2bridge".ToLower()
  228. "KUBELET_CERT_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.crt'
  229. "KUBELET_KEY_PATH" = ${kube_env}['PKI_DIR'] + '\kubelet.key'
  230. "CONTAINER_RUNTIME" = ${kube_env}['CONTAINER_RUNTIME']
  231. "CONTAINER_RUNTIME_ENDPOINT" = ${kube_env}['CONTAINER_RUNTIME_ENDPOINT']
  232. 'LICENSE_DIR' = 'C:\Program Files\Google\Compute Engine\THIRD_PARTY_NOTICES'
  233. }
  234. # Set the environment variables in two ways: permanently on the machine (only
  235. # takes effect after a reboot), and in the current shell.
  236. $env_vars.GetEnumerator() | ForEach-Object{
  237. $message = "Setting environment variable: " + $_.key + " = " + $_.value
  238. Log-Output ${message}
  239. Set_MachineEnvironmentVar $_.key $_.value
  240. Set_CurrentShellEnvironmentVar $_.key $_.value
  241. }
  242. }
  243. # Configures various settings and prerequisites needed for the rest of the
  244. # functions in this module and the Kubernetes binaries to operate properly.
  245. function Set-PrerequisiteOptions {
  246. # Windows updates cause the node to reboot at arbitrary times.
  247. Log-Output "Disabling Windows Update service"
  248. sc.exe config wuauserv start=disabled
  249. sc.exe stop wuauserv
  250. # Use TLS 1.2: needed for Invoke-WebRequest downloads from github.com.
  251. [Net.ServicePointManager]::SecurityProtocol = `
  252. [Net.SecurityProtocolType]::Tls12
  253. }
  254. # Creates directories where other functions in this module will read and write
  255. # data.
  256. # Note: C:\tmp is required for running certain kubernetes tests.
  257. # C:\var\log is used by kubelet to stored container logs and also
  258. # hard-coded in the fluentd/stackdriver config for log collection.
  259. function Create-Directories {
  260. Log-Output "Creating ${env:K8S_DIR} and its subdirectories."
  261. ForEach ($dir in ("${env:K8S_DIR}", "${env:NODE_DIR}", "${env:LOGS_DIR}",
  262. "${env:CNI_DIR}", "${env:CNI_CONFIG_DIR}", "${env:MANIFESTS_DIR}",
  263. "${env:PKI_DIR}", "${env:LICENSE_DIR}"), "C:\tmp", "C:\var\log") {
  264. mkdir -Force $dir
  265. }
  266. }
  267. # Downloads some external helper scripts needed by other functions in this
  268. # module.
  269. function Download-HelperScripts {
  270. if (-not (ShouldWrite-File ${env:K8S_DIR}\hns.psm1)) {
  271. return
  272. }
  273. MustDownload-File -OutFile ${env:K8S_DIR}\hns.psm1 `
  274. -URLs "https://storage.googleapis.com/gke-release/winnode/config/sdn/master/hns.psm1"
  275. }
  276. # Takes the Windows version string from the cluster bash scripts (e.g.
  277. # 'win1809') and returns the correct label to use for containers on this
  278. # version of Windows. Returns $null if $WinVersion is unknown.
  279. function Get_ContainerVersionLabel {
  280. param (
  281. [parameter(Mandatory=$true)] [string]$WinVersion
  282. )
  283. # -match does regular expression matching.
  284. if ($WinVersion -match '1809') {
  285. return '1809'
  286. }
  287. elseif ($WinVersion -match '2019') {
  288. return 'ltsc2019'
  289. }
  290. Throw ("Unknown Windows version $WinVersion, don't know its container " +
  291. "version label")
  292. }
  293. # Downloads the gke-exec-auth-plugin for TPM-based authentication to the
  294. # master, if auth plugin support has been requested for this node (see
  295. # Test-NodeUsesAuthPlugin).
  296. # https://github.com/kubernetes/cloud-provider-gcp/tree/master/cmd/gke-exec-auth-plugin
  297. #
  298. # Required ${kube_env} keys:
  299. # EXEC_AUTH_PLUGIN_LICENSE_URL
  300. # EXEC_AUTH_PLUGIN_SHA1
  301. # EXEC_AUTH_PLUGIN_URL
  302. function DownloadAndInstall-AuthPlugin {
  303. if (-not (Test-NodeUsesAuthPlugin ${kube_env})) {
  304. Log-Output 'Skipping download of auth plugin'
  305. return
  306. }
  307. if (-not (ShouldWrite-File "${env:NODE_DIR}\gke-exec-auth-plugin.exe")) {
  308. return
  309. }
  310. if (-not ($kube_env.ContainsKey('EXEC_AUTH_PLUGIN_LICENSE_URL') -and
  311. $kube_env.ContainsKey('EXEC_AUTH_PLUGIN_SHA1') -and
  312. $kube_env.ContainsKey('EXEC_AUTH_PLUGIN_URL'))) {
  313. Log-Output -Fatal ("Missing one or more kube-env keys needed for " +
  314. "downloading auth plugin: $(Out-String $kube_env)")
  315. }
  316. MustDownload-File `
  317. -URLs ${kube_env}['EXEC_AUTH_PLUGIN_URL'] `
  318. -Hash ${kube_env}['EXEC_AUTH_PLUGIN_SHA1'] `
  319. -OutFile "${env:NODE_DIR}\gke-exec-auth-plugin.exe"
  320. MustDownload-File `
  321. -URLs ${kube_env}['EXEC_AUTH_PLUGIN_LICENSE_URL'] `
  322. -OutFile "${env:LICENSE_DIR}\LICENSE_gke-exec-auth-plugin.txt"
  323. }
  324. # Downloads the Kubernetes binaries from kube-env's NODE_BINARY_TAR_URL and
  325. # puts them in a subdirectory of $env:K8S_DIR.
  326. #
  327. # Required ${kube_env} keys:
  328. # NODE_BINARY_TAR_URL
  329. function DownloadAndInstall-KubernetesBinaries {
  330. # Assume that presence of kubelet.exe indicates that the kubernetes binaries
  331. # were already previously downloaded to this node.
  332. if (-not (ShouldWrite-File ${env:NODE_DIR}\kubelet.exe)) {
  333. return
  334. }
  335. $tmp_dir = 'C:\k8s_tmp'
  336. New-Item -Force -ItemType 'directory' $tmp_dir | Out-Null
  337. $urls = ${kube_env}['NODE_BINARY_TAR_URL'].Split(",")
  338. $filename = Split-Path -leaf $urls[0]
  339. $hash = $null
  340. if ($kube_env.ContainsKey('NODE_BINARY_TAR_HASH')) {
  341. $hash = ${kube_env}['NODE_BINARY_TAR_HASH']
  342. }
  343. MustDownload-File -Hash $hash -OutFile ${tmp_dir}\${filename} -URLs $urls
  344. # Change the directory to the parent directory of ${env:K8S_DIR} and untar.
  345. # This (over-)writes ${dest_dir}/kubernetes/node/bin/*.exe files.
  346. # TODO(pjh): clean this up, files not guaranteed to end up in NODE_DIR
  347. $dest_dir = (Get-Item ${env:K8S_DIR}).Parent.Fullname
  348. tar xzf ${tmp_dir}\${filename} -C ${dest_dir}
  349. # Clean up the temporary directory
  350. Remove-Item -Force -Recurse $tmp_dir
  351. }
  352. # TODO(pjh): this is copied from
  353. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  354. # See if there's a way to fetch or construct the "management subnet" so that
  355. # this is not needed.
  356. function ConvertTo_DecimalIP
  357. {
  358. param(
  359. [parameter(Mandatory = $true, Position = 0)]
  360. [Net.IPAddress] $IPAddress
  361. )
  362. $i = 3; $decimal_ip = 0;
  363. $IPAddress.GetAddressBytes() | % {
  364. $decimal_ip += $_ * [Math]::Pow(256, $i); $i--
  365. }
  366. return [UInt32]$decimal_ip
  367. }
  368. # TODO(pjh): this is copied from
  369. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  370. # See if there's a way to fetch or construct the "management subnet" so that
  371. # this is not needed.
  372. function ConvertTo_DottedDecimalIP
  373. {
  374. param(
  375. [parameter(Mandatory = $true, Position = 0)]
  376. [Uint32] $IPAddress
  377. )
  378. $dotted_ip = $(for ($i = 3; $i -gt -1; $i--) {
  379. $remainder = $IPAddress % [Math]::Pow(256, $i)
  380. ($IPAddress - $remainder) / [Math]::Pow(256, $i)
  381. $IPAddress = $remainder
  382. })
  383. return [String]::Join(".", $dotted_ip)
  384. }
  385. # TODO(pjh): this is copied from
  386. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L98.
  387. # See if there's a way to fetch or construct the "management subnet" so that
  388. # this is not needed.
  389. function ConvertTo_MaskLength
  390. {
  391. param(
  392. [parameter(Mandatory = $True, Position = 0)]
  393. [Net.IPAddress] $SubnetMask
  394. )
  395. $bits = "$($SubnetMask.GetAddressBytes() | % {
  396. [Convert]::ToString($_, 2)
  397. } )" -replace "[\s0]"
  398. return $bits.Length
  399. }
  400. # Returns the "management" subnet on which the Windows pods+kubelet will
  401. # communicate with the rest of the Kubernetes cluster without NAT. In GCE this
  402. # is the subnet that VM internal IPs are allocated from.
  403. #
  404. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  405. function Get_MgmtSubnet {
  406. $net_adapter = Get_MgmtNetAdapter
  407. # TODO(pjh): applying the primary interface's subnet mask to its IP address
  408. # *should* give us the GCE network subnet that VM IP addresses are being
  409. # allocated from... however it might be more accurate or straightforward to
  410. # just fetch the IP address range for the VPC subnet that the kube-up script
  411. # creates (kubernetes-subnet-default).
  412. $addr = (Get-NetIPAddress `
  413. -InterfaceAlias ${net_adapter}.ifAlias `
  414. -AddressFamily IPv4).IPAddress
  415. # Get the adapter's mask from the registry rather than WMI or some other
  416. # approach: this is compatible with Windows' forthcoming LWVNICs (lightweight
  417. # VNICs).
  418. # https://github.com/kubernetes-sigs/sig-windows-tools/pull/16/commits/c5b5c67d5da6c23ad870cb16146eaa58131caf29
  419. $adapter_registry = Get-Item `
  420. -Path ("HKLM:\SYSTEM\CurrentControlSet\Services\Tcpip\" +
  421. "Parameters\Interfaces\$($net_adapter.InterfaceGuid)")
  422. # In this command the value name is 'DhcpSubnetMask' for current network
  423. # interfaces but could be different for "LWVNIC" interfaces.
  424. $mask = ($adapter_registry.GetValueNames() -like "*SubnetMask" |
  425. % { $adapter_registry.GetValue($_) })
  426. $mgmt_subnet = `
  427. (ConvertTo_DecimalIP ${addr}) -band (ConvertTo_DecimalIP ${mask})
  428. $mgmt_subnet = ConvertTo_DottedDecimalIP ${mgmt_subnet}
  429. return "${mgmt_subnet}/$(ConvertTo_MaskLength $mask)"
  430. }
  431. # Returns a network adapter object for the "management" interface via which the
  432. # Windows pods+kubelet will communicate with the rest of the Kubernetes cluster.
  433. #
  434. # This function will fail if Add_InitialHnsNetwork() has not been called first.
  435. function Get_MgmtNetAdapter {
  436. $net_adapter = Get-NetAdapter | Where-Object Name -like ${MGMT_ADAPTER_NAME}
  437. if (-not ${net_adapter}) {
  438. Throw ("Failed to find a suitable network adapter, check your network " +
  439. "settings.")
  440. }
  441. return $net_adapter
  442. }
  443. # Decodes the base64 $Data string and writes it as binary to $File. Does
  444. # nothing if $File already exists and $REDO_STEPS is not set.
  445. function Write_PkiData {
  446. param (
  447. [parameter(Mandatory=$true)] [string] $Data,
  448. [parameter(Mandatory=$true)] [string] $File
  449. )
  450. if (-not (ShouldWrite-File $File)) {
  451. return
  452. }
  453. # This command writes out a PEM certificate file, analogous to "base64
  454. # --decode" on Linux. See https://stackoverflow.com/a/51914136/1230197.
  455. [IO.File]::WriteAllBytes($File, [Convert]::FromBase64String($Data))
  456. Log_Todo ("need to set permissions correctly on ${File}; not sure what the " +
  457. "Windows equivalent of 'umask 077' is")
  458. # Linux: owned by root, rw by user only.
  459. # -rw------- 1 root root 1.2K Oct 12 00:56 ca-certificates.crt
  460. # -rw------- 1 root root 1.3K Oct 12 00:56 kubelet.crt
  461. # -rw------- 1 root root 1.7K Oct 12 00:56 kubelet.key
  462. # Windows:
  463. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  464. # https://docs.microsoft.com/en-us/dotnet/api/system.io.fileattributes
  465. }
  466. # Creates the node PKI files in $env:PKI_DIR.
  467. #
  468. # Required ${kube_env} keys:
  469. # CA_CERT
  470. # ${kube_env} keys that can be omitted for nodes that do not use an
  471. # authentication plugin:
  472. # KUBELET_CERT
  473. # KUBELET_KEY
  474. function Create-NodePki {
  475. Log-Output 'Creating node pki files'
  476. if ($kube_env.ContainsKey('CA_CERT')) {
  477. $CA_CERT_BUNDLE = ${kube_env}['CA_CERT']
  478. Write_PkiData "${CA_CERT_BUNDLE}" ${env:CA_FILE_PATH}
  479. }
  480. else {
  481. Log-Output -Fatal 'CA_CERT not present in kube-env'
  482. }
  483. # On nodes that use a plugin to support authentication, KUBELET_CERT and
  484. # KUBELET_KEY will not be present - TPM_BOOTSTRAP_CERT and TPM_BOOTSTRAP_KEY
  485. # should be set instead.
  486. if (Test-NodeUsesAuthPlugin ${kube_env}) {
  487. Log-Output ('Skipping KUBELET_CERT and KUBELET_KEY, plugin will be used ' +
  488. 'for authentication')
  489. return
  490. }
  491. if ($kube_env.ContainsKey('KUBELET_CERT')) {
  492. $KUBELET_CERT = ${kube_env}['KUBELET_CERT']
  493. Write_PkiData "${KUBELET_CERT}" ${env:KUBELET_CERT_PATH}
  494. }
  495. else {
  496. Log-Output -Fatal 'KUBELET_CERT not present in kube-env'
  497. }
  498. if ($kube_env.ContainsKey('KUBELET_KEY')) {
  499. $KUBELET_KEY = ${kube_env}['KUBELET_KEY']
  500. Write_PkiData "${KUBELET_KEY}" ${env:KUBELET_KEY_PATH}
  501. }
  502. else {
  503. Log-Output -Fatal 'KUBELET_KEY not present in kube-env'
  504. }
  505. Get-ChildItem ${env:PKI_DIR}
  506. }
  507. # Creates the bootstrap kubelet kubeconfig at $env:BOOTSTRAP_KUBECONFIG.
  508. # https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/
  509. #
  510. # Create-NodePki() must be called first.
  511. #
  512. # Required ${kube_env} keys:
  513. # KUBERNETES_MASTER_NAME: the apiserver IP address.
  514. function Write_BootstrapKubeconfig {
  515. if (-not (ShouldWrite-File ${env:BOOTSTRAP_KUBECONFIG})) {
  516. return
  517. }
  518. # TODO(mtaufen): is user "kubelet" correct? Other examples use e.g.
  519. # "system:node:$(hostname)".
  520. $apiserverAddress = ${kube_env}['KUBERNETES_MASTER_NAME']
  521. New-Item -Force -ItemType file ${env:BOOTSTRAP_KUBECONFIG} | Out-Null
  522. Set-Content ${env:BOOTSTRAP_KUBECONFIG} `
  523. 'apiVersion: v1
  524. kind: Config
  525. users:
  526. - name: kubelet
  527. user:
  528. client-certificate: KUBELET_CERT_PATH
  529. client-key: KUBELET_KEY_PATH
  530. clusters:
  531. - name: local
  532. cluster:
  533. server: https://APISERVER_ADDRESS
  534. certificate-authority: CA_FILE_PATH
  535. contexts:
  536. - context:
  537. cluster: local
  538. user: kubelet
  539. name: service-account-context
  540. current-context: service-account-context'.`
  541. replace('KUBELET_CERT_PATH', ${env:KUBELET_CERT_PATH}).`
  542. replace('KUBELET_KEY_PATH', ${env:KUBELET_KEY_PATH}).`
  543. replace('APISERVER_ADDRESS', ${apiserverAddress}).`
  544. replace('CA_FILE_PATH', ${env:CA_FILE_PATH})
  545. Log-Output ("kubelet bootstrap kubeconfig:`n" +
  546. "$(Get-Content -Raw ${env:BOOTSTRAP_KUBECONFIG})")
  547. }
  548. # Fetches the kubelet kubeconfig from the metadata server and writes it to
  549. # $env:KUBECONFIG.
  550. #
  551. # Create-NodePki() must be called first.
  552. function Write_KubeconfigFromMetadata {
  553. if (-not (ShouldWrite-File ${env:KUBECONFIG})) {
  554. return
  555. }
  556. $kubeconfig = Get-InstanceMetadataAttribute 'kubeconfig'
  557. if ($kubeconfig -eq $null) {
  558. Log-Output `
  559. "kubeconfig metadata key not found, can't write ${env:KUBECONFIG}" `
  560. -Fatal
  561. }
  562. Set-Content ${env:KUBECONFIG} $kubeconfig
  563. Log-Output ("kubelet kubeconfig from metadata (non-bootstrap):`n" +
  564. "$(Get-Content -Raw ${env:KUBECONFIG})")
  565. }
  566. # Creates the kubelet kubeconfig at $env:KUBECONFIG for nodes that use an
  567. # authentication plugin, or at $env:BOOTSTRAP_KUBECONFIG for nodes that do not.
  568. #
  569. # Create-NodePki() must be called first.
  570. #
  571. # Required ${kube_env} keys:
  572. # KUBERNETES_MASTER_NAME: the apiserver IP address.
  573. function Create-KubeletKubeconfig {
  574. if (Test-NodeUsesAuthPlugin ${kube_env}) {
  575. Write_KubeconfigFromMetadata
  576. } else {
  577. Write_BootstrapKubeconfig
  578. }
  579. }
  580. # Creates the kube-proxy user kubeconfig file at $env:KUBEPROXY_KUBECONFIG.
  581. #
  582. # Create-NodePki() must be called first.
  583. #
  584. # Required ${kube_env} keys:
  585. # CA_CERT
  586. # KUBE_PROXY_TOKEN
  587. function Create-KubeproxyKubeconfig {
  588. if (-not (ShouldWrite-File ${env:KUBEPROXY_KUBECONFIG})) {
  589. return
  590. }
  591. New-Item -Force -ItemType file ${env:KUBEPROXY_KUBECONFIG} | Out-Null
  592. # In configure-helper.sh kubelet kubeconfig uses certificate-authority while
  593. # kubeproxy kubeconfig uses certificate-authority-data, ugh. Does it matter?
  594. # Use just one or the other for consistency?
  595. Set-Content ${env:KUBEPROXY_KUBECONFIG} `
  596. 'apiVersion: v1
  597. kind: Config
  598. users:
  599. - name: kube-proxy
  600. user:
  601. token: KUBEPROXY_TOKEN
  602. clusters:
  603. - name: local
  604. cluster:
  605. server: https://APISERVER_ADDRESS
  606. certificate-authority-data: CA_CERT
  607. contexts:
  608. - context:
  609. cluster: local
  610. user: kube-proxy
  611. name: service-account-context
  612. current-context: service-account-context'.`
  613. replace('KUBEPROXY_TOKEN', ${kube_env}['KUBE_PROXY_TOKEN']).`
  614. replace('CA_CERT', ${kube_env}['CA_CERT']).`
  615. replace('APISERVER_ADDRESS', ${kube_env}['KUBERNETES_MASTER_NAME'])
  616. Log-Output ("kubeproxy kubeconfig:`n" +
  617. "$(Get-Content -Raw ${env:KUBEPROXY_KUBECONFIG})")
  618. }
  619. # Returns the IP alias range configured for this GCE instance.
  620. function Get_IpAliasRange {
  621. $url = ("http://${GCE_METADATA_SERVER}/computeMetadata/v1/instance/" +
  622. "network-interfaces/0/ip-aliases/0")
  623. $client = New-Object Net.WebClient
  624. $client.Headers.Add('Metadata-Flavor', 'Google')
  625. return ($client.DownloadString($url)).Trim()
  626. }
  627. # Retrieves the pod CIDR and sets it in $env:POD_CIDR.
  628. function Set-PodCidr {
  629. while($true) {
  630. $pod_cidr = Get_IpAliasRange
  631. if (-not $?) {
  632. Log-Output ${pod_cIDR}
  633. Log-Output "Retrying Get_IpAliasRange..."
  634. Start-Sleep -sec 1
  635. continue
  636. }
  637. break
  638. }
  639. Log-Output "fetched pod CIDR (same as IP alias range): ${pod_cidr}"
  640. Set_MachineEnvironmentVar "POD_CIDR" ${pod_cidr}
  641. Set_CurrentShellEnvironmentVar "POD_CIDR" ${pod_cidr}
  642. }
  643. # Adds an initial HNS network on the Windows node which forces the creation of
  644. # a virtual switch and the "management" interface that will be used to
  645. # communicate with the rest of the Kubernetes cluster without NAT.
  646. #
  647. # Note that adding the initial HNS network may cause connectivity to the GCE
  648. # metadata server to be lost due to a Windows bug.
  649. # Configure-HostNetworkingService() restores connectivity, look there for
  650. # details.
  651. #
  652. # Download-HelperScripts() must have been called first.
  653. function Add_InitialHnsNetwork {
  654. $INITIAL_HNS_NETWORK = 'External'
  655. # This comes from
  656. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/flannel/l2bridge/start.ps1#L74
  657. # (or
  658. # https://github.com/Microsoft/SDN/blob/master/Kubernetes/windows/start-kubelet.ps1#L206).
  659. #
  660. # daschott noted on Slack: "L2bridge networks require an external vSwitch.
  661. # The first network ("External") with hardcoded values in the script is just
  662. # a placeholder to create an external vSwitch. This is purely for convenience
  663. # to be able to remove/modify the actual HNS network ("cbr0") or rejoin the
  664. # nodes without a network blip. Creating a vSwitch takes time, causes network
  665. # blips, and it makes it more likely to hit the issue where flanneld is
  666. # stuck, so we want to do this as rarely as possible."
  667. $hns_network = Get-HnsNetwork | Where-Object Name -eq $INITIAL_HNS_NETWORK
  668. if ($hns_network) {
  669. if ($REDO_STEPS) {
  670. Log-Output ("Warning: initial '$INITIAL_HNS_NETWORK' HNS network " +
  671. "already exists, removing it and recreating it")
  672. $hns_network | Remove-HnsNetwork
  673. $hns_network = $null
  674. }
  675. else {
  676. Log-Output ("Skip: initial '$INITIAL_HNS_NETWORK' HNS network " +
  677. "already exists, not recreating it")
  678. return
  679. }
  680. }
  681. Log-Output ("Creating initial HNS network to force creation of " +
  682. "${MGMT_ADAPTER_NAME} interface")
  683. # Note: RDP connection will hiccup when running this command.
  684. New-HNSNetwork `
  685. -Type "L2Bridge" `
  686. -AddressPrefix "192.168.255.0/30" `
  687. -Gateway "192.168.255.1" `
  688. -Name $INITIAL_HNS_NETWORK `
  689. -Verbose
  690. }
  691. # Get the network in uint32 for the given cidr
  692. function Get_NetworkDecimal_From_CIDR([string] $cidr) {
  693. $network, [int]$subnetlen = $cidr.Split('/')
  694. $decimal_network = ConvertTo_DecimalIP($network)
  695. return $decimal_network
  696. }
  697. # Get gateway ip string (the first address) based on pod cidr.
  698. # For Windows nodes the pod gateway IP address is the first address in the pod
  699. # CIDR for the host.
  700. function Get_Gateway_From_CIDR([string] $cidr) {
  701. $network=Get_NetworkDecimal_From_CIDR($cidr)
  702. $gateway=ConvertTo_DottedDecimalIP($network+1)
  703. return $gateway
  704. }
  705. # Get endpoint gateway ip string (the second address) based on pod cidr.
  706. # For Windows nodes the pod gateway IP address is the first address in the pod
  707. # CIDR for the host, but from inside containers it's the second address.
  708. function Get_Endpoint_Gateway_From_CIDR([string] $cidr) {
  709. $network=Get_NetworkDecimal_From_CIDR($cidr)
  710. $gateway=ConvertTo_DottedDecimalIP($network+2)
  711. return $gateway
  712. }
  713. # Get pod IP range start based (the third address) on pod cidr
  714. # We reserve the first two in the cidr range for gateways. Start the cidr
  715. # range from the third so that IPAM does not allocate those IPs to pods.
  716. function Get_PodIP_Range_Start([string] $cidr) {
  717. $network=Get_NetworkDecimal_From_CIDR($cidr)
  718. $start=ConvertTo_DottedDecimalIP($network+3)
  719. return $start
  720. }
  721. # Configures HNS on the Windows node to enable Kubernetes networking:
  722. # - Creates the "management" interface associated with an initial HNS network.
  723. # - Creates the HNS network $env:KUBE_NETWORK for pod networking.
  724. # - Creates an HNS endpoint for pod networking.
  725. # - Adds necessary routes on the management interface.
  726. # - Verifies that the GCE metadata server connection remains intact.
  727. #
  728. # Prerequisites:
  729. # $env:POD_CIDR is set (by Set-PodCidr).
  730. # Download-HelperScripts() has been called.
  731. function Configure-HostNetworkingService {
  732. Import-Module -Force ${env:K8S_DIR}\hns.psm1
  733. Add_InitialHnsNetwork
  734. $pod_gateway = Get_Gateway_From_CIDR(${env:POD_CIDR})
  735. $pod_endpoint_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  736. Log-Output ("Setting up Windows node HNS networking: " +
  737. "podCidr = ${env:POD_CIDR}, podGateway = ${pod_gateway}, " +
  738. "podEndpointGateway = ${pod_endpoint_gateway}")
  739. $hns_network = Get-HnsNetwork | Where-Object Name -eq ${env:KUBE_NETWORK}
  740. if ($hns_network) {
  741. if ($REDO_STEPS) {
  742. Log-Output ("Warning: ${env:KUBE_NETWORK} HNS network already exists, " +
  743. "removing it and recreating it")
  744. $hns_network | Remove-HnsNetwork
  745. $hns_network = $null
  746. }
  747. else {
  748. Log-Output "Skip: ${env:KUBE_NETWORK} HNS network already exists"
  749. }
  750. }
  751. $created_hns_network = $false
  752. if (-not $hns_network) {
  753. # Note: RDP connection will hiccup when running this command.
  754. $hns_network = New-HNSNetwork `
  755. -Type "L2Bridge" `
  756. -AddressPrefix ${env:POD_CIDR} `
  757. -Gateway ${pod_gateway} `
  758. -Name ${env:KUBE_NETWORK} `
  759. -Verbose
  760. $created_hns_network = $true
  761. }
  762. $endpoint_name = "cbr0"
  763. $vnic_name = "vEthernet (${endpoint_name})"
  764. $hns_endpoint = Get-HnsEndpoint | Where-Object Name -eq $endpoint_name
  765. # Note: we don't expect to ever enter this block currently - while the HNS
  766. # network does seem to persist across reboots, the HNS endpoints do not.
  767. if ($hns_endpoint) {
  768. if ($REDO_STEPS) {
  769. Log-Output ("Warning: HNS endpoint $endpoint_name already exists, " +
  770. "removing it and recreating it")
  771. $hns_endpoint | Remove-HnsEndpoint
  772. $hns_endpoint = $null
  773. }
  774. else {
  775. Log-Output "Skip: HNS endpoint $endpoint_name already exists"
  776. }
  777. }
  778. if (-not $hns_endpoint) {
  779. $hns_endpoint = New-HnsEndpoint `
  780. -NetworkId ${hns_network}.Id `
  781. -Name ${endpoint_name} `
  782. -IPAddress ${pod_endpoint_gateway} `
  783. -Gateway "0.0.0.0" `
  784. -Verbose
  785. # TODO(pjh): find out: why is this always CompartmentId 1?
  786. Attach-HnsHostEndpoint `
  787. -EndpointID ${hns_endpoint}.Id `
  788. -CompartmentID 1 `
  789. -Verbose
  790. netsh interface ipv4 set interface "${vnic_name}" forwarding=enabled
  791. }
  792. Try {
  793. Get-HNSPolicyList | Remove-HnsPolicyList
  794. } Catch { }
  795. # Add a route from the management NIC to the pod CIDR.
  796. #
  797. # When a packet from a Kubernetes service backend arrives on the destination
  798. # Windows node, the reverse SNAT will be applied and the source address of
  799. # the packet gets replaced from the pod IP to the service VIP. The packet
  800. # will then leave the VM and return back through hairpinning.
  801. #
  802. # When IP alias is enabled, IP forwarding is disabled for anti-spoofing;
  803. # the packet with the service VIP will get blocked and be lost. With this
  804. # route, the packet will be routed to the pod subnetwork, and not leave the
  805. # VM.
  806. $mgmt_net_adapter = Get_MgmtNetAdapter
  807. New-NetRoute `
  808. -ErrorAction Ignore `
  809. -InterfaceAlias ${mgmt_net_adapter}.ifAlias `
  810. -DestinationPrefix ${env:POD_CIDR} `
  811. -NextHop "0.0.0.0" `
  812. -Verbose
  813. if ($created_hns_network) {
  814. # There is an HNS bug where the route to the GCE metadata server will be
  815. # removed when the HNS network is created:
  816. # https://github.com/Microsoft/hcsshim/issues/299#issuecomment-425491610.
  817. # The behavior here is very unpredictable: the route may only be removed
  818. # after some delay, or it may appear to be removed then you'll add it back
  819. # but then it will be removed once again. So, we first wait a long
  820. # unfortunate amount of time to ensure that things have quiesced, then we
  821. # wait until we're sure the route is really gone before re-adding it again.
  822. Log-Output "Waiting 45 seconds for host network state to quiesce"
  823. Start-Sleep 45
  824. WaitFor_GceMetadataServerRouteToBeRemoved
  825. Log-Output "Re-adding the GCE metadata server route"
  826. Add_GceMetadataServerRoute
  827. }
  828. Verify_GceMetadataServerRouteIsPresent
  829. Log-Output "Host network setup complete"
  830. }
  831. function Configure-GcePdTools {
  832. if (ShouldWrite-File ${env:K8S_DIR}\GetGcePdName.dll) {
  833. MustDownload-File -OutFile ${env:K8S_DIR}\GetGcePdName.dll `
  834. -URLs "https://storage.googleapis.com/gke-release/winnode/config/gce-tools/master/GetGcePdName/GetGcePdName.dll"
  835. }
  836. if (-not (Test-Path $PsHome\profile.ps1)) {
  837. New-Item -path $PsHome\profile.ps1 -type file
  838. }
  839. Add-Content $PsHome\profile.ps1 `
  840. '$modulePath = "K8S_DIR\GetGcePdName.dll"
  841. Unblock-File $modulePath
  842. Import-Module -Name $modulePath'.replace('K8S_DIR', ${env:K8S_DIR})
  843. }
  844. # Setup cni network. This function supports both Docker
  845. # and containerd.
  846. function Configure-CniNetworking {
  847. if (${env:CONTAINER_RUNTIME} -eq "containerd") {
  848. Configure_Containerd_CniNetworking
  849. } else {
  850. Configure_Dockerd_CniNetworking
  851. }
  852. }
  853. # Downloads the Windows CNI binaries and writes a CNI config file under
  854. # $env:CNI_CONFIG_DIR.
  855. #
  856. # Prerequisites:
  857. # $env:POD_CIDR is set (by Set-PodCidr).
  858. # The "management" interface exists (Configure-HostNetworkingService).
  859. # The HNS network for pod networking has been configured
  860. # (Configure-HostNetworkingService).
  861. #
  862. # Required ${kube_env} keys:
  863. # DNS_SERVER_IP
  864. # DNS_DOMAIN
  865. # CLUSTER_IP_RANGE
  866. # SERVICE_CLUSTER_IP_RANGE
  867. function Configure_Dockerd_CniNetworking {
  868. if ((ShouldWrite-File ${env:CNI_DIR}\win-bridge.exe) -or
  869. (ShouldWrite-File ${env:CNI_DIR}\host-local.exe)) {
  870. $tmp_dir = 'C:\cni_tmp'
  871. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  872. $release_url = (${env:WINDOWS_CNI_STORAGE_PATH} + '/' + ${env:WINDOWS_CNI_VERSION} + '/')
  873. $tgz_url = ($release_url +
  874. "cni-plugins-windows-amd64-${env:WINDOWS_CNI_VERSION}.tgz")
  875. $sha_url = ($tgz_url + ".sha1")
  876. MustDownload-File -URLs $sha_url -OutFile $tmp_dir\cni-plugins.sha1
  877. $sha1_val = ($(Get-Content $tmp_dir\cni-plugins.sha1) -split ' ',2)[0]
  878. MustDownload-File `
  879. -URLs $tgz_url `
  880. -OutFile $tmp_dir\cni-plugins.tgz `
  881. -Hash $sha1_val
  882. Push-Location $tmp_dir
  883. # tar can only extract in the current directory.
  884. tar -xvf $tmp_dir\cni-plugins.tgz
  885. Move-Item -Force host-local.exe ${env:CNI_DIR}\
  886. Move-Item -Force win-bridge.exe ${env:CNI_DIR}\
  887. Pop-Location
  888. Remove-Item -Force -Recurse $tmp_dir
  889. }
  890. if (-not ((Test-Path ${env:CNI_DIR}\win-bridge.exe) -and `
  891. (Test-Path ${env:CNI_DIR}\host-local.exe))) {
  892. Log-Output `
  893. "win-bridge.exe and host-local.exe not found in ${env:CNI_DIR}" `
  894. -Fatal
  895. }
  896. $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  897. if (-not (ShouldWrite-File ${l2bridge_conf})) {
  898. return
  899. }
  900. $mgmt_ip = (Get_MgmtNetAdapter |
  901. Get-NetIPAddress -AddressFamily IPv4).IPAddress
  902. $mgmt_subnet = Get_MgmtSubnet
  903. Log-Output ("using mgmt IP ${mgmt_ip} and mgmt subnet ${mgmt_subnet} for " +
  904. "CNI config")
  905. $cidr_range_start = Get_PodIP_Range_Start(${env:POD_CIDR})
  906. # Explanation of the CNI config values:
  907. # CLUSTER_CIDR: the cluster CIDR from which pod CIDRs are allocated.
  908. # POD_CIDR: the pod CIDR assigned to this node.
  909. # CIDR_RANGE_START: start of the pod CIDR range.
  910. # MGMT_SUBNET: the subnet on which the Windows pods + kubelet will
  911. # communicate with the rest of the cluster without NAT (i.e. the subnet
  912. # that VM internal IPs are allocated from).
  913. # MGMT_IP: the IP address assigned to the node's primary network interface
  914. # (i.e. the internal IP of the GCE VM).
  915. # SERVICE_CIDR: the CIDR used for kubernetes services.
  916. # DNS_SERVER_IP: the cluster's DNS server IP address.
  917. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  918. New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  919. Set-Content ${l2bridge_conf} `
  920. '{
  921. "cniVersion": "0.2.0",
  922. "name": "l2bridge",
  923. "type": "win-bridge",
  924. "capabilities": {
  925. "portMappings": true,
  926. "dns": true
  927. },
  928. "ipam": {
  929. "type": "host-local",
  930. "subnet": "POD_CIDR",
  931. "rangeStart": "CIDR_RANGE_START"
  932. },
  933. "dns": {
  934. "Nameservers": [
  935. "DNS_SERVER_IP"
  936. ],
  937. "Search": [
  938. "DNS_DOMAIN"
  939. ]
  940. },
  941. "Policies": [
  942. {
  943. "Name": "EndpointPolicy",
  944. "Value": {
  945. "Type": "OutBoundNAT",
  946. "ExceptionList": [
  947. "CLUSTER_CIDR",
  948. "SERVICE_CIDR",
  949. "MGMT_SUBNET"
  950. ]
  951. }
  952. },
  953. {
  954. "Name": "EndpointPolicy",
  955. "Value": {
  956. "Type": "ROUTE",
  957. "DestinationPrefix": "SERVICE_CIDR",
  958. "NeedEncap": true
  959. }
  960. },
  961. {
  962. "Name": "EndpointPolicy",
  963. "Value": {
  964. "Type": "ROUTE",
  965. "DestinationPrefix": "MGMT_IP/32",
  966. "NeedEncap": true
  967. }
  968. }
  969. ]
  970. }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  971. replace('CIDR_RANGE_START', ${cidr_range_start}).`
  972. replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  973. replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  974. replace('MGMT_IP', ${mgmt_ip}).`
  975. replace('CLUSTER_CIDR', ${kube_env}['CLUSTER_IP_RANGE']).`
  976. replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']).`
  977. replace('MGMT_SUBNET', ${mgmt_subnet})
  978. Log-Output "CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  979. }
  980. # Obtain the host dns conf and save it to a file so that kubelet/CNI
  981. # can use it to configure dns suffix search list for pods.
  982. # The value of DNS server is ignored right now because the pod will
  983. # always only use cluster DNS service, but for consistency, we still
  984. # parsed them here in the same format as Linux resolv.conf.
  985. # This function must be called after Configure-HostNetworkingService.
  986. function Configure-HostDnsConf {
  987. $net_adapter = Get_MgmtNetAdapter
  988. $server_ips = (Get-DnsClientServerAddress `
  989. -InterfaceAlias ${net_adapter}.Name).ServerAddresses
  990. $search_list = (Get-DnsClient).ConnectionSpecificSuffixSearchList
  991. $conf = ""
  992. ForEach ($ip in $server_ips) {
  993. $conf = $conf + "nameserver $ip`r`n"
  994. }
  995. $conf = $conf + "search $search_list"
  996. # Do not put hostdns.conf into the CNI config directory so as to
  997. # avoid the container runtime treating it as CNI config.
  998. $hostdns_conf = "${env:CNI_DIR}\hostdns.conf"
  999. New-Item -Force -ItemType file ${hostdns_conf} | Out-Null
  1000. Set-Content ${hostdns_conf} $conf
  1001. Log-Output "HOST dns conf:`n$(Get-Content -Raw ${hostdns_conf})"
  1002. }
  1003. # Fetches the kubelet config from the instance metadata and puts it at
  1004. # $env:KUBELET_CONFIG.
  1005. function Configure-Kubelet {
  1006. if (-not (ShouldWrite-File ${env:KUBELET_CONFIG})) {
  1007. return
  1008. }
  1009. # The Kubelet config is built by build-kubelet-config() in
  1010. # cluster/gce/util.sh, and stored in the metadata server under the
  1011. # 'kubelet-config' key.
  1012. $kubelet_config = Get-InstanceMetadataAttribute 'kubelet-config'
  1013. Set-Content ${env:KUBELET_CONFIG} $kubelet_config
  1014. Log-Output "Kubelet config:`n$(Get-Content -Raw ${env:KUBELET_CONFIG})"
  1015. }
  1016. # Sets up the kubelet and kube-proxy arguments and starts them as native
  1017. # Windows services.
  1018. #
  1019. # Required ${kube_env} keys:
  1020. # KUBELET_ARGS
  1021. # KUBEPROXY_ARGS
  1022. # CLUSTER_IP_RANGE
  1023. function Start-WorkerServices {
  1024. # Compute kubelet args
  1025. $kubelet_args_str = ${kube_env}['KUBELET_ARGS']
  1026. $kubelet_args = $kubelet_args_str.Split(" ")
  1027. Log-Output "kubelet_args from metadata: ${kubelet_args}"
  1028. $default_kubelet_args = @(`
  1029. "--pod-infra-container-image=${INFRA_CONTAINER}"
  1030. )
  1031. $kubelet_args = ${default_kubelet_args} + ${kubelet_args}
  1032. if (-not (Test-NodeUsesAuthPlugin ${kube_env})) {
  1033. Log-Output 'Using bootstrap kubeconfig for authentication'
  1034. $kubelet_args = (${kubelet_args} +
  1035. "--bootstrap-kubeconfig=${env:BOOTSTRAP_KUBECONFIG}")
  1036. }
  1037. Log-Output "Final kubelet_args: ${kubelet_args}"
  1038. # Compute kube-proxy args
  1039. $kubeproxy_args_str = ${kube_env}['KUBEPROXY_ARGS']
  1040. $kubeproxy_args = $kubeproxy_args_str.Split(" ")
  1041. Log-Output "kubeproxy_args from metadata: ${kubeproxy_args}"
  1042. # kubeproxy is started on Linux nodes using
  1043. # kube-manifests/kubernetes/gci-trusty/kube-proxy.manifest, which is
  1044. # generated by start-kube-proxy in configure-helper.sh and contains e.g.:
  1045. # kube-proxy --master=https://35.239.84.171
  1046. # --kubeconfig=/var/lib/kube-proxy/kubeconfig --cluster-cidr=10.64.0.0/14
  1047. # --oom-score-adj=-998 --v=2
  1048. # --iptables-sync-period=1m --iptables-min-sync-period=10s
  1049. # --ipvs-sync-period=1m --ipvs-min-sync-period=10s
  1050. # And also with various volumeMounts and "securityContext: privileged: true".
  1051. $default_kubeproxy_args = @(`
  1052. "--kubeconfig=${env:KUBEPROXY_KUBECONFIG}",
  1053. "--cluster-cidr=$(${kube_env}['CLUSTER_IP_RANGE'])"
  1054. )
  1055. $kubeproxy_args = ${default_kubeproxy_args} + ${kubeproxy_args}
  1056. Log-Output "Final kubeproxy_args: ${kubeproxy_args}"
  1057. # TODO(pjh): kubelet is emitting these messages:
  1058. # I1023 23:44:11.761915 2468 kubelet.go:274] Adding pod path:
  1059. # C:\etc\kubernetes
  1060. # I1023 23:44:11.775601 2468 file.go:68] Watching path
  1061. # "C:\\etc\\kubernetes"
  1062. # ...
  1063. # E1023 23:44:31.794327 2468 file.go:182] Can't process manifest file
  1064. # "C:\\etc\\kubernetes\\hns.psm1": C:\etc\kubernetes\hns.psm1: couldn't parse
  1065. # as pod(yaml: line 10: did not find expected <document start>), please check
  1066. # config file.
  1067. #
  1068. # Figure out how to change the directory that the kubelet monitors for new
  1069. # pod manifests.
  1070. # We configure the service to restart on failure, after 10s wait. We reset
  1071. # the restart count to 0 each time, so we re-use our restart/10000 action on
  1072. # each failure. Note it currently restarts even when explicitly stopped, you
  1073. # have to delete the service entry to *really* kill it (e.g. `sc.exe delete
  1074. # kubelet`). See issue #72900.
  1075. if (Get-Process | Where-Object Name -eq "kubelet") {
  1076. Log-Output -Fatal `
  1077. "A kubelet process is already running, don't know what to do"
  1078. }
  1079. Log-Output "Creating kubelet service"
  1080. sc.exe create kubelet binPath= "${env:NODE_DIR}\kubelet.exe ${kubelet_args}" start= demand
  1081. sc.exe failure kubelet reset= 0 actions= restart/10000
  1082. Log-Output "Starting kubelet service"
  1083. sc.exe start kubelet
  1084. Log-Output "Waiting 10 seconds for kubelet to stabilize"
  1085. Start-Sleep 10
  1086. if (Get-Process | Where-Object Name -eq "kube-proxy") {
  1087. Log-Output -Fatal `
  1088. "A kube-proxy process is already running, don't know what to do"
  1089. }
  1090. Log-Output "Creating kube-proxy service"
  1091. sc.exe create kube-proxy binPath= "${env:NODE_DIR}\kube-proxy.exe ${kubeproxy_args}" start= demand
  1092. sc.exe failure kube-proxy reset= 0 actions= restart/10000
  1093. Log-Output "Starting kube-proxy service"
  1094. sc.exe start kube-proxy
  1095. # F1020 23:08:52.000083 9136 server.go:361] unable to load in-cluster
  1096. # configuration, KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT must be
  1097. # defined
  1098. # TODO(pjh): still getting errors like these in kube-proxy log:
  1099. # E1023 04:03:58.143449 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Endpoints: Get https://35.239.84.171/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1100. # E1023 04:03:58.150266 4840 reflector.go:205] k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion/factory.go:129: Failed to list *core.Service: Get https://35.239.84.171/api/v1/services?limit=500&resourceVersion=0: dial tcp 35.239.84.171:443: connectex: A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond.
  1101. WaitFor_KubeletAndKubeProxyReady
  1102. Verify_GceMetadataServerRouteIsPresent
  1103. Log-Output "Kubernetes components started successfully"
  1104. }
  1105. # Wait for kubelet & kube-proxy to be ready within 10s.
  1106. function WaitFor_KubeletAndKubeProxyReady {
  1107. $waited = 0
  1108. $timeout = 10
  1109. while (((Get-Service kube-proxy).Status -ne 'Running' -or (Get-Service kubelet).Status -ne 'Running') -and $waited -lt $timeout) {
  1110. Start-Sleep 1
  1111. $waited++
  1112. }
  1113. # Timeout occurred
  1114. if ($waited -ge $timeout) {
  1115. Log-Output "$(Get-Service kube* | Out-String)"
  1116. Throw ("Timeout while waiting ${timeout} seconds for kubelet & kube-proxy services to start")
  1117. }
  1118. }
  1119. # Runs 'kubectl get nodes'.
  1120. # TODO(pjh): run more verification commands.
  1121. function Verify-WorkerServices {
  1122. Log-Output ("kubectl get nodes:`n" +
  1123. "$(& ${env:NODE_DIR}\kubectl.exe get nodes | Out-String)")
  1124. Verify_GceMetadataServerRouteIsPresent
  1125. Log_Todo "run more verification commands."
  1126. }
  1127. function DownloadAndInstall-Crictl {
  1128. $CRICTL_VERSION = "v1.17.0"
  1129. $CRICTL_SHA256 = "781fd3bd15146a924c6fc2428b11d8a0f20fa04a0c8e00a9a5808f2cc37e0569"
  1130. # Assume that presence of crictl.exe indicates that the crictl binaries
  1131. # were already previously downloaded to this node.
  1132. if (-not (ShouldWrite-File ${env:NODE_DIR}\crictl.exe)) {
  1133. return
  1134. }
  1135. $tmp_dir = 'C:\crictl_tmp'
  1136. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1137. $url = ('https://storage.googleapis.com/kubernetes-release/crictl/' +
  1138. 'crictl-' + $CRICTL_VERSION + '-windows-amd64.exe')
  1139. MustDownload-File `
  1140. -URLs $url `
  1141. -OutFile $tmp_dir\crictl.exe `
  1142. -Hash $CRICTL_SHA256 `
  1143. -Algorithm SHA256
  1144. Push-Location $tmp_dir
  1145. Move-Item -Force crictl.exe ${env:NODE_DIR}\
  1146. if (${env:CONTAINER_RUNTIME_ENDPOINT}) {
  1147. crictl.exe config runtime-endpoint ${env:CONTAINER_RUNTIME_ENDPOINT}
  1148. }
  1149. Pop-Location
  1150. Remove-Item -Force -Recurse $tmp_dir
  1151. }
  1152. # Pulls the infra/pause container image onto the node so that it will be
  1153. # immediately available when the kubelet tries to run pods.
  1154. # TODO(pjh): downloading the container container image may take a few minutes;
  1155. # figure out how to run this in the background while perform the rest of the
  1156. # node startup steps!
  1157. # Pull-InfraContainer must be called AFTER Verify-WorkerServices.
  1158. function Pull-InfraContainer {
  1159. $name, $label = $INFRA_CONTAINER -split ':',2
  1160. if (-not ("$(& crictl images)" -match "$name.*$label")) {
  1161. & crictl pull $INFRA_CONTAINER
  1162. if (!$?) {
  1163. throw "Error running 'crictl pull $INFRA_CONTAINER'"
  1164. }
  1165. }
  1166. $inspect = "$(& crictl inspecti $INFRA_CONTAINER | Out-String)"
  1167. Log-Output "Infra/pause container:`n$inspect"
  1168. }
  1169. # Setup the container runtime on the node. It supports both
  1170. # Docker and containerd.
  1171. function Setup-ContainerRuntime {
  1172. if (${env:CONTAINER_RUNTIME} -eq "containerd") {
  1173. Install_Containerd
  1174. Start_Containerd
  1175. } else {
  1176. Create_DockerRegistryKey
  1177. Configure_Dockerd
  1178. }
  1179. }
  1180. # Add a registry key for docker in EventLog so that log messages are mapped
  1181. # correctly. This is a workaround since the key is missing in the base image.
  1182. # https://github.com/MicrosoftDocs/Virtualization-Documentation/pull/503
  1183. # TODO: Fix this in the base image.
  1184. # TODO(random-liu): Figure out whether we need this for containerd.
  1185. function Create_DockerRegistryKey {
  1186. $tmp_dir = 'C:\tmp_docker_reg'
  1187. New-Item -Force -ItemType 'directory' ${tmp_dir} | Out-Null
  1188. $reg_file = 'docker.reg'
  1189. Set-Content ${tmp_dir}\${reg_file} `
  1190. 'Windows Registry Editor Version 5.00
  1191. [HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\EventLog\Application\docker]
  1192. "CustomSource"=dword:00000001
  1193. "EventMessageFile"="C:\\Program Files\\docker\\dockerd.exe"
  1194. "TypesSupported"=dword:00000007'
  1195. Log-Output "Importing registry key for Docker"
  1196. reg import ${tmp_dir}\${reg_file}
  1197. Remove-Item -Force -Recurse ${tmp_dir}
  1198. }
  1199. # Configure Docker daemon and restart the service.
  1200. function Configure_Dockerd {
  1201. Set-Content "C:\ProgramData\docker\config\daemon.json" @'
  1202. {
  1203. "log-driver": "json-file",
  1204. "log-opts": {
  1205. "max-size": "1m",
  1206. "max-file": "5"
  1207. }
  1208. }
  1209. '@
  1210. Restart-Service Docker
  1211. }
  1212. # Writes a CNI config file under $env:CNI_CONFIG_DIR for containerd.
  1213. #
  1214. # Prerequisites:
  1215. # $env:POD_CIDR is set (by Set-PodCidr).
  1216. # The "management" interface exists (Configure-HostNetworkingService).
  1217. # The HNS network for pod networking has been configured
  1218. # (Configure-HostNetworkingService).
  1219. # Containerd is installed (Install_Containerd).
  1220. #
  1221. # Required ${kube_env} keys:
  1222. # DNS_SERVER_IP
  1223. # DNS_DOMAIN
  1224. # CLUSTER_IP_RANGE
  1225. # SERVICE_CLUSTER_IP_RANGE
  1226. function Configure_Containerd_CniNetworking {
  1227. $l2bridge_conf = "${env:CNI_CONFIG_DIR}\l2bridge.conf"
  1228. if (-not (ShouldWrite-File ${l2bridge_conf})) {
  1229. return
  1230. }
  1231. $mgmt_ip = (Get_MgmtNetAdapter |
  1232. Get-NetIPAddress -AddressFamily IPv4).IPAddress
  1233. $mgmt_subnet = Get_MgmtSubnet
  1234. Log-Output ("using mgmt IP ${mgmt_ip} and mgmt subnet ${mgmt_subnet} for " +
  1235. "CNI config")
  1236. $pod_gateway = Get_Endpoint_Gateway_From_CIDR(${env:POD_CIDR})
  1237. # Explanation of the CNI config values:
  1238. # CLUSTER_CIDR: the cluster CIDR from which pod CIDRs are allocated.
  1239. # POD_CIDR: the pod CIDR assigned to this node.
  1240. # POD_GATEWAY: the gateway IP.
  1241. # MGMT_SUBNET: the subnet on which the Windows pods + kubelet will
  1242. # communicate with the rest of the cluster without NAT (i.e. the subnet
  1243. # that VM internal IPs are allocated from).
  1244. # MGMT_IP: the IP address assigned to the node's primary network interface
  1245. # (i.e. the internal IP of the GCE VM).
  1246. # SERVICE_CIDR: the CIDR used for kubernetes services.
  1247. # DNS_SERVER_IP: the cluster's DNS server IP address.
  1248. # DNS_DOMAIN: the cluster's DNS domain, e.g. "cluster.local".
  1249. New-Item -Force -ItemType file ${l2bridge_conf} | Out-Null
  1250. Set-Content ${l2bridge_conf} `
  1251. '{
  1252. "cniVersion": "0.2.0",
  1253. "name": "l2bridge",
  1254. "type": "sdnbridge",
  1255. "master": "Ethernet",
  1256. "capabilities": {
  1257. "portMappings": true,
  1258. "dns": true
  1259. },
  1260. "ipam": {
  1261. "subnet": "POD_CIDR",
  1262. "routes": [
  1263. {
  1264. "GW": "POD_GATEWAY"
  1265. }
  1266. ]
  1267. },
  1268. "dns": {
  1269. "Nameservers": [
  1270. "DNS_SERVER_IP"
  1271. ],
  1272. "Search": [
  1273. "DNS_DOMAIN"
  1274. ]
  1275. },
  1276. "AdditionalArgs": [
  1277. {
  1278. "Name": "EndpointPolicy",
  1279. "Value": {
  1280. "Type": "OutBoundNAT",
  1281. "Settings": {
  1282. "Exceptions": [
  1283. "CLUSTER_CIDR",
  1284. "SERVICE_CIDR",
  1285. "MGMT_SUBNET"
  1286. ]
  1287. }
  1288. }
  1289. },
  1290. {
  1291. "Name": "EndpointPolicy",
  1292. "Value": {
  1293. "Type": "SDNRoute",
  1294. "Settings": {
  1295. "DestinationPrefix": "SERVICE_CIDR",
  1296. "NeedEncap": true
  1297. }
  1298. }
  1299. },
  1300. {
  1301. "Name": "EndpointPolicy",
  1302. "Value": {
  1303. "Type": "SDNRoute",
  1304. "Settings": {
  1305. "DestinationPrefix": "MGMT_IP/32",
  1306. "NeedEncap": true
  1307. }
  1308. }
  1309. }
  1310. ]
  1311. }'.replace('POD_CIDR', ${env:POD_CIDR}).`
  1312. replace('POD_GATEWAY', ${pod_gateway}).`
  1313. replace('DNS_SERVER_IP', ${kube_env}['DNS_SERVER_IP']).`
  1314. replace('DNS_DOMAIN', ${kube_env}['DNS_DOMAIN']).`
  1315. replace('MGMT_IP', ${mgmt_ip}).`
  1316. replace('CLUSTER_CIDR', ${kube_env}['CLUSTER_IP_RANGE']).`
  1317. replace('SERVICE_CIDR', ${kube_env}['SERVICE_CLUSTER_IP_RANGE']).`
  1318. replace('MGMT_SUBNET', ${mgmt_subnet})
  1319. Log-Output "CNI config:`n$(Get-Content -Raw ${l2bridge_conf})"
  1320. }
  1321. # Download and install containerd and CNI binaries.
  1322. function Install_Containerd {
  1323. # Assume that presence of containerd.exe indicates that all containerd binaries
  1324. # were already previously downloaded to this node.
  1325. if (-not (ShouldWrite-File ${env:NODE_DIR}\containerd.exe)) {
  1326. return
  1327. }
  1328. # https://storage.googleapis.com/cri-containerd-staging/cri-containerd-9f79be1b.windows-amd64.tar.gz
  1329. # TODO(random-liu): Change this to official release path after testing.
  1330. $CONTAINERD_GCS_BUCKET = "cri-containerd-staging/windows"
  1331. $tmp_dir = 'C:\containerd_tmp'
  1332. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1333. $version_url = "https://storage.googleapis.com/$CONTAINERD_GCS_BUCKET/latest"
  1334. MustDownload-File -URLs $version_url -OutFile $tmp_dir\version
  1335. $version = $(Get-Content $tmp_dir\version)
  1336. $tar_url = "https://storage.googleapis.com/$CONTAINERD_GCS_BUCKET/cri-containerd-cni-$version.windows-amd64.tar.gz"
  1337. $sha_url = $tar_url + ".sha256"
  1338. MustDownload-File -URLs $sha_url -OutFile $tmp_dir\sha256
  1339. $sha = $(Get-Content $tmp_dir\sha256)
  1340. MustDownload-File `
  1341. -URLs $tar_url `
  1342. -OutFile $tmp_dir\containerd.tar.gz `
  1343. -Hash $sha `
  1344. -Algorithm SHA256
  1345. Push-Location $tmp_dir
  1346. # tar can only extract in the current directory.
  1347. tar -xvf $tmp_dir\containerd.tar.gz
  1348. Move-Item -Force cni\*.exe ${env:CNI_DIR}\
  1349. Move-Item -Force *.exe ${env:NODE_DIR}\
  1350. Pop-Location
  1351. Remove-Item -Force -Recurse $tmp_dir
  1352. # Generate containerd config
  1353. $config_dir = 'C:\Program Files\containerd'
  1354. New-Item $config_dir -ItemType 'directory' -Force | Out-Null
  1355. Set-Content "$config_dir\config.toml" @"
  1356. [plugins.cri]
  1357. sandbox_image = 'INFRA_CONTAINER_IMAGE'
  1358. [plugins.cri.cni]
  1359. bin_dir = 'CNI_BIN_DIR'
  1360. conf_dir = 'CNI_CONF_DIR'
  1361. "@.replace('INFRA_CONTAINER_IMAGE', $INFRA_CONTAINER).`
  1362. replace('CNI_BIN_DIR', ${env:CNI_DIR}).`
  1363. replace('CNI_CONF_DIR', ${env:CNI_CONFIG_DIR})
  1364. }
  1365. # Register and start containerd service.
  1366. function Start_Containerd {
  1367. Log-Output "Creating containerd service"
  1368. containerd.exe --register-service --log-file ${env:LOGS_DIR}/containerd.log
  1369. Log-Output "Starting containerd service"
  1370. Start-Service containerd
  1371. }
  1372. # TODO(pjh): move the Stackdriver logging agent code below into a separate
  1373. # module; it was put here temporarily to avoid disrupting the file layout in
  1374. # the K8s release machinery.
  1375. $STACKDRIVER_VERSION = 'v1-9'
  1376. $STACKDRIVER_ROOT = 'C:\Program Files (x86)\Stackdriver'
  1377. # Restarts the Stackdriver logging agent, or starts it if it is not currently
  1378. # running. A standard `Restart-Service StackdriverLogging` may fail because
  1379. # StackdriverLogging sometimes is unstoppable, so this function works around it
  1380. # by killing the processes.
  1381. function Restart-LoggingAgent {
  1382. Stop-Service -NoWait -ErrorAction Ignore StackdriverLogging
  1383. # Wait (if necessary) for service to stop.
  1384. $timeout = 10
  1385. $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  1386. for ($i = 0; $i -lt $timeout -and !($stopped); $i++) {
  1387. Start-Sleep 1
  1388. $stopped = (Get-service StackdriverLogging).Status -eq 'Stopped'
  1389. }
  1390. if ((Get-service StackdriverLogging).Status -ne 'Stopped') {
  1391. # Force kill the processes.
  1392. Stop-Process -Force -PassThru -Id (Get-WmiObject win32_process |
  1393. Where CommandLine -Like '*Stackdriver/logging*').ProcessId
  1394. # Wait until process has stopped.
  1395. $waited = 0
  1396. $log_period = 10
  1397. $timeout = 60
  1398. while ((Get-service StackdriverLogging).Status -ne 'Stopped' -and $waited -lt $timeout) {
  1399. Start-Sleep 1
  1400. $waited++
  1401. if ($waited % $log_period -eq 0) {
  1402. Log-Output "Waiting for StackdriverLogging service to stop"
  1403. }
  1404. }
  1405. # Timeout occurred
  1406. if ($waited -ge $timeout) {
  1407. Throw ("Timeout while waiting for StackdriverLogging service to stop")
  1408. }
  1409. }
  1410. Start-Service StackdriverLogging
  1411. }
  1412. # Installs the Stackdriver logging agent according to
  1413. # https://cloud.google.com/logging/docs/agent/installation.
  1414. # TODO(yujuhong): Update to a newer Stackdriver agent once it is released to
  1415. # support kubernetes metadata properly. The current version does not recognizes
  1416. # the local resource key "logging.googleapis.com/local_resource_id", and fails
  1417. # to label namespace, pod and container names on the logs.
  1418. function Install-LoggingAgent {
  1419. # Remove the existing storage.json file if it exists. This is a workaround
  1420. # for the bug where the logging agent cannot start up if the file is
  1421. # corrupted.
  1422. Remove-Item `
  1423. -Force `
  1424. -ErrorAction Ignore `
  1425. ("$STACKDRIVER_ROOT\LoggingAgent\Main\pos\winevtlog.pos\worker0\" +
  1426. "storage.json")
  1427. if (Test-Path $STACKDRIVER_ROOT) {
  1428. # Note: we should reinstall the Stackdriver agent if $REDO_STEPS is true
  1429. # here, but we don't know how to run the installer without it prompting
  1430. # when Stackdriver is already installed. We dumped the strings in the
  1431. # installer binary and searched for flags to do this but found nothing. Oh
  1432. # well.
  1433. Log-Output ("Skip: $STACKDRIVER_ROOT is already present, assuming that " +
  1434. "Stackdriver logging agent is already installed")
  1435. Restart-LoggingAgent
  1436. return
  1437. }
  1438. $url = ("https://storage.googleapis.com/gke-release/winnode/stackdriver/" +
  1439. "StackdriverLogging-${STACKDRIVER_VERSION}.exe")
  1440. $tmp_dir = 'C:\stackdriver_tmp'
  1441. New-Item $tmp_dir -ItemType 'directory' -Force | Out-Null
  1442. $installer_file = "${tmp_dir}\StackdriverLogging-${STACKDRIVER_VERSION}.exe"
  1443. MustDownload-File -OutFile $installer_file -URLs $url
  1444. # Start the installer silently. This automatically starts the
  1445. # "StackdriverLogging" service.
  1446. Log-Output 'Invoking Stackdriver installer'
  1447. Start-Process $installer_file -ArgumentList "/S" -Wait
  1448. # Install the record-reformer plugin.
  1449. Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" `
  1450. -ArgumentList "install","fluent-plugin-record-reformer" `
  1451. -Wait
  1452. # Install the multi-format-parser plugin.
  1453. Start-Process "$STACKDRIVER_ROOT\LoggingAgent\Main\bin\fluent-gem" `
  1454. -ArgumentList "install","fluent-plugin-multi-format-parser" `
  1455. -Wait
  1456. Remove-Item -Force -Recurse $tmp_dir
  1457. }
  1458. # Writes the logging configuration file for Stackdriver. Restart-LoggingAgent
  1459. # should then be called to pick up the new configuration.
  1460. function Configure-LoggingAgent {
  1461. $fluentd_config_dir = "$STACKDRIVER_ROOT\LoggingAgent\config.d"
  1462. $fluentd_config_file = "$fluentd_config_dir\k8s_containers.conf"
  1463. # Create a configuration file for kubernetes containers.
  1464. # The config.d directory should have already been created automatically, but
  1465. # try creating again just in case.
  1466. New-Item $fluentd_config_dir -ItemType 'directory' -Force | Out-Null
  1467. $config = $FLUENTD_CONFIG.replace('NODE_NAME', (hostname))
  1468. $config | Out-File -FilePath $fluentd_config_file -Encoding ASCII
  1469. Log-Output "Wrote fluentd logging config to $fluentd_config_file"
  1470. }
  1471. # The NODE_NAME placeholder must be replaced with the node's name (hostname).
  1472. $FLUENTD_CONFIG = @'
  1473. # This configuration file for Fluentd is used to watch changes to kubernetes
  1474. # container logs in the directory /var/lib/docker/containers/ and submit the
  1475. # log records to Google Cloud Logging using the cloud-logging plugin.
  1476. #
  1477. # Example
  1478. # =======
  1479. # A line in the Docker log file might look like this JSON:
  1480. #
  1481. # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
  1482. # "stream":"stderr",
  1483. # "time":"2014-09-25T21:15:03.499185026Z"}
  1484. #
  1485. # The original tag is derived from the log file's location.
  1486. # For example a Docker container's logs might be in the directory:
  1487. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
  1488. # and in the file:
  1489. # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1490. # where 997599971ee6... is the Docker ID of the running container.
  1491. # The Kubernetes kubelet makes a symbolic link to this file on the host
  1492. # machine in the /var/log/containers directory which includes the pod name,
  1493. # the namespace name and the Kubernetes container name:
  1494. # synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1495. # ->
  1496. # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
  1497. # The /var/log directory on the host is mapped to the /var/log directory in the container
  1498. # running this instance of Fluentd and we end up collecting the file:
  1499. # /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1500. # This results in the tag:
  1501. # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
  1502. # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
  1503. # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
  1504. # the container ID.
  1505. # The record reformer is used to extract pod_name, namespace_name and
  1506. # container_name from the tag and set them in a local_resource_id in the
  1507. # format of:
  1508. # 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
  1509. # The reformer also changes the tags to 'stderr' or 'stdout' based on the
  1510. # value of 'stream'.
  1511. # local_resource_id is later used by google_cloud plugin to determine the
  1512. # monitored resource to ingest logs against.
  1513. # Json Log Example:
  1514. # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
  1515. # CRI Log Example:
  1516. # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
  1517. <source>
  1518. @type tail
  1519. path /var/log/containers/*.log
  1520. pos_file /var/log/gcp-containers.log.pos
  1521. # Tags at this point are in the format of:
  1522. # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
  1523. tag reform.*
  1524. read_from_head true
  1525. <parse>
  1526. @type multi_format
  1527. <pattern>
  1528. format json
  1529. time_key time
  1530. time_format %Y-%m-%dT%H:%M:%S.%NZ
  1531. </pattern>
  1532. <pattern>
  1533. format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
  1534. time_format %Y-%m-%dT%H:%M:%S.%N%:z
  1535. </pattern>
  1536. </parse>
  1537. </source>
  1538. # Example:
  1539. # I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
  1540. <source>
  1541. @type tail
  1542. format multiline
  1543. multiline_flush_interval 5s
  1544. format_firstline /^\w\d{4}/
  1545. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1546. time_format %m%d %H:%M:%S.%N
  1547. path /etc/kubernetes/logs/kubelet.log
  1548. pos_file /etc/kubernetes/logs/gcp-kubelet.log.pos
  1549. tag kubelet
  1550. </source>
  1551. # Example:
  1552. # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
  1553. <source>
  1554. @type tail
  1555. format multiline
  1556. multiline_flush_interval 5s
  1557. format_firstline /^\w\d{4}/
  1558. format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
  1559. time_format %m%d %H:%M:%S.%N
  1560. path /etc/kubernetes/logs/kube-proxy.log
  1561. pos_file /etc/kubernetes/logs/gcp-kube-proxy.log.pos
  1562. tag kube-proxy
  1563. </source>
  1564. # Example:
  1565. # time="2019-12-10T21:27:59.836946700Z" level=info msg="loading plugin \"io.containerd.grpc.v1.cri\"..." type=io.containerd.grpc.v1
  1566. <source>
  1567. @type tail
  1568. format multiline
  1569. multiline_flush_interval 5s
  1570. format_firstline /^time=/
  1571. format1 /^time="(?<time>[^ ]*)" level=(?<severity>\w*) (?<message>.*)/
  1572. time_format %Y-%m-%dT%H:%M:%S.%N%z
  1573. path /etc/kubernetes/logs/containerd.log
  1574. pos_file /etc/kubernetes/logs/gcp-containerd.log.pos
  1575. tag container-runtime
  1576. </source>
  1577. <match reform.**>
  1578. @type record_reformer
  1579. enable_ruby true
  1580. <record>
  1581. # Extract local_resource_id from tag for 'k8s_container' monitored
  1582. # resource. The format is:
  1583. # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
  1584. "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
  1585. # Rename the field 'log' to a more generic field 'message'. This way the
  1586. # fluent-plugin-google-cloud knows to flatten the field as textPayload
  1587. # instead of jsonPayload after extracting 'time', 'severity' and
  1588. # 'stream' from the record.
  1589. message ${record['log']}
  1590. # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
  1591. severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
  1592. </record>
  1593. tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
  1594. remove_keys stream,log
  1595. </match>
  1596. # TODO: detect exceptions and forward them as one log entry using the
  1597. # detect_exceptions plugin
  1598. # This section is exclusive for k8s_container logs. These logs come with
  1599. # 'raw.stderr' or 'raw.stdout' tags.
  1600. <match {raw.stderr,raw.stdout}>
  1601. @type google_cloud
  1602. # Try to detect JSON formatted log entries.
  1603. detect_json true
  1604. # Allow log entries from multiple containers to be sent in the same request.
  1605. split_logs_by_tag false
  1606. # Set the buffer type to file to improve the reliability and reduce the memory consumption
  1607. buffer_type file
  1608. buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
  1609. # Set queue_full action to block because we want to pause gracefully
  1610. # in case of the off-the-limits load instead of throwing an exception
  1611. buffer_queue_full_action block
  1612. # Set the chunk limit conservatively to avoid exceeding the recommended
  1613. # chunk size of 5MB per write request.
  1614. buffer_chunk_limit 512k
  1615. # Cap the combined memory usage of this buffer and the one below to
  1616. # 512KiB/chunk * (6 + 2) chunks = 4 MiB
  1617. buffer_queue_limit 6
  1618. # Never wait more than 5 seconds before flushing logs in the non-error case.
  1619. flush_interval 5s
  1620. # Never wait longer than 30 seconds between retries.
  1621. max_retry_wait 30
  1622. # Disable the limit on the number of retries (retry forever).
  1623. disable_retry_limit
  1624. # Use multiple threads for processing.
  1625. num_threads 2
  1626. use_grpc true
  1627. # Skip timestamp adjustment as this is in a controlled environment with
  1628. # known timestamp format. This helps with CPU usage.
  1629. adjust_invalid_timestamps false
  1630. </match>
  1631. # Attach local_resource_id for 'k8s_node' monitored resource.
  1632. <filter **>
  1633. @type record_transformer
  1634. enable_ruby true
  1635. <record>
  1636. "logging.googleapis.com/local_resource_id" ${"k8s_node.NODE_NAME"}
  1637. </record>
  1638. </filter>
  1639. '@
  1640. # Export all public functions:
  1641. Export-ModuleMember -Function *-*