update_owners.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. #!/usr/bin/env python
  2. # Copyright 2016 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import argparse
  16. import collections
  17. import csv
  18. import json
  19. import os
  20. import random
  21. import re
  22. import subprocess
  23. import sys
  24. import time
  25. import urllib2
  26. import zlib
  27. BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  28. OWNERS_PATH = os.path.abspath(
  29. os.path.join(BASE_DIR, '..', 'test', 'test_owners.csv'))
  30. OWNERS_JSON_PATH = OWNERS_PATH.replace('.csv', '.json')
  31. GCS_URL_BASE = 'https://storage.googleapis.com/kubernetes-test-history/'
  32. SKIP_MAINTAINERS = {
  33. 'a-robinson', 'aronchick', 'bgrant0607-nocc', 'david-mcmahon',
  34. 'goltermann', 'sarahnovotny'}
  35. def normalize(name):
  36. name = re.sub(r'\[.*?\]|\{.*?\}', '', name)
  37. name = re.sub(r'\s+', ' ', name)
  38. return name.strip()
  39. def get_test_history(days_ago):
  40. url = time.strftime(GCS_URL_BASE + 'logs/%Y-%m-%d.json',
  41. time.gmtime(time.time() - days_ago * 24 * 60 * 60))
  42. resp = urllib2.urlopen(url)
  43. content = resp.read()
  44. if resp.headers.get('content-encoding') == 'gzip':
  45. content = zlib.decompress(content, 15 | 16)
  46. return json.loads(content)
  47. def get_test_names_from_test_history():
  48. test_names = set()
  49. for days_ago in range(4):
  50. test_history = get_test_history(days_ago)
  51. test_names.update(normalize(name) for name in test_history['test_names'])
  52. return test_names
  53. def get_test_names_from_local_files():
  54. tests_json = subprocess.check_output(['go', 'run', 'test/list/main.go', '-json'])
  55. tests = json.loads(tests_json)
  56. return {normalize(t['Name'] + (' ' + t['TestName'] if 'k8s.io/' not in t['Name'] else ''))
  57. for t in tests}
  58. def load_owners(fname):
  59. owners = {}
  60. with open(fname) as f:
  61. for n, cols in enumerate(csv.reader(f)):
  62. if n == 0:
  63. continue # header
  64. if len(cols) == 3:
  65. # migrate from previous version without sig
  66. (name, owner, random_assignment), sig = cols, ""
  67. else:
  68. (name, owner, random_assignment, sig) = cols
  69. owners[normalize(name)] = (owner, int(random_assignment), sig)
  70. return owners
  71. def write_owners(fname, owners):
  72. with open(fname, 'w') as f:
  73. out = csv.writer(f, lineterminator='\n')
  74. out.writerow(['name', 'owner', 'auto-assigned', 'sig'])
  75. items = sorted(owners.items())
  76. for name, (owner, random_assignment, sig) in items:
  77. out.writerow([name, owner, int(random_assignment), sig])
  78. def get_maintainers():
  79. # Github doesn't seem to support team membership listing without a key with
  80. # org admin privileges. Instead, we do it manually:
  81. # Open https://github.com/orgs/kubernetes/teams/kubernetes-maintainers
  82. # Run this in the js console:
  83. # [].slice.call(document.querySelectorAll('.team-member-username a')).map(
  84. # e => e.textContent.trim())
  85. ret = {"alex-mohr", "apelisse", "aronchick", "bgrant0607", "bgrant0607-nocc",
  86. "bprashanth", "brendandburns", "caesarxuchao", "childsb", "cjcullen",
  87. "david-mcmahon", "davidopp", "dchen1107", "deads2k", "derekwaynecarr",
  88. "eparis", "erictune", "fabioy", "fejta", "fgrzadkowski", "freehan",
  89. "gmarek", "grodrigues3", "ingvagabund", "ixdy", "janetkuo", "jbeda",
  90. "jessfraz", "jingxu97", "jlowdermilk", "jsafrane", "jszczepkowski",
  91. "justinsb", "Kashomon", "kevin-wangzefeng", "krousey",
  92. "lavalamp", "liggitt", "luxas", "madhusudancs", "maisem", "matchstick",
  93. "mbohlool", "mikedanese", "mml", "mtaufen", "mwielgus", "ncdc",
  94. "nikhiljindal", "piosz", "pmorie", "pwittrock", "Q-Lee", "quinton-hoole",
  95. "Random-Liu", "rmmh", "roberthbailey", "saad-ali", "smarterclayton",
  96. "soltysh", "spxtr", "sttts", "thelinuxfoundation", "thockin",
  97. "timothysc", "tallclair", "vishh", "wojtek-t", "xiang90", "yifan-gu",
  98. "yujuhong", "zmerlynn"}
  99. return sorted(ret - SKIP_MAINTAINERS)
  100. def detect_github_username():
  101. origin_url = subprocess.check_output(['git', 'config', 'remote.origin.url'])
  102. m = re.search(r'github.com[:/](.*)/', origin_url)
  103. if m and m.group(1) != 'kubernetes':
  104. return m.group(1)
  105. raise ValueError('unable to determine GitHub user from '
  106. '`git config remote.origin.url` output, run with --user instead')
  107. def sig_prefixes(owners):
  108. # TODO(rmmh): make sig prefixes the only thing in test_owners!
  109. # Precise test names aren't very interesting.
  110. owns = []
  111. for test, (owner, random_assignment, sig) in owners.iteritems():
  112. if 'k8s.io/' in test or not sig:
  113. continue
  114. owns.append([test, sig])
  115. while True:
  116. owns.sort()
  117. for name, sig in owns:
  118. # try removing the last word in the name, use it if all tests beginning
  119. # with this shorter name share the same sig.
  120. maybe_prefix = ' '.join(name.split()[:-1])
  121. matches = [other_sig == sig for other_name, other_sig in owns if other_name.startswith(maybe_prefix)]
  122. if matches and all(matches):
  123. owns = [[n, s] for n, s in owns if not n.startswith(maybe_prefix)]
  124. owns.append([maybe_prefix, sig])
  125. break
  126. else: # iterated completely through owns without any changes
  127. break
  128. sigs = {}
  129. for name, sig in owns:
  130. sigs.setdefault(sig, []).append(name)
  131. return json.dumps(sigs, sort_keys=True, indent=True)
  132. def main():
  133. parser = argparse.ArgumentParser()
  134. parser.add_argument('--history', action='store_true', help='Generate test list from result history.')
  135. parser.add_argument('--user', help='User to assign new tests to (or RANDOM, default: current GitHub user).')
  136. parser.add_argument('--addonly', action='store_true', help='Only add missing tests, do not change existing.')
  137. parser.add_argument('--check', action='store_true', help='Exit with a nonzero status if the test list has changed.')
  138. parser.add_argument('--print_sig_prefixes', action='store_true', help='Emit SIG prefixes for matching.')
  139. options = parser.parse_args()
  140. if options.history:
  141. test_names = get_test_names_from_test_history()
  142. else:
  143. test_names = get_test_names_from_local_files()
  144. test_names = sorted(test_names)
  145. owners = load_owners(OWNERS_PATH)
  146. prefixes = sig_prefixes(owners)
  147. with open(OWNERS_JSON_PATH, 'w') as f:
  148. f.write(prefixes + '\n')
  149. if options.print_sig_prefixes:
  150. print prefixes
  151. return
  152. outdated_tests = sorted(set(owners) - set(test_names))
  153. new_tests = sorted(set(test_names) - set(owners))
  154. maintainers = get_maintainers()
  155. print '# OUTDATED TESTS (%d):' % len(outdated_tests)
  156. print '\n'.join('%s -- %s%s' %
  157. (t, owners[t][0], ['', ' (random)'][owners[t][1]])
  158. for t in outdated_tests)
  159. print '# NEW TESTS (%d):' % len(new_tests)
  160. print '\n'.join(new_tests)
  161. if options.check:
  162. if new_tests or outdated_tests:
  163. print
  164. print 'ERROR: the test list has changed'
  165. sys.exit(1)
  166. sys.exit(0)
  167. if not options.user:
  168. options.user = detect_github_username()
  169. for name in outdated_tests:
  170. owners.pop(name)
  171. if not options.addonly:
  172. print '# UNEXPECTED MAINTAINERS ',
  173. print '(randomly assigned, but not in kubernetes-maintainers)'
  174. for name, (owner, random_assignment, _) in sorted(owners.iteritems()):
  175. if random_assignment and owner not in maintainers:
  176. print '%-16s %s' % (owner, name)
  177. owners.pop(name)
  178. print
  179. owner_counts = collections.Counter(
  180. owner for name, (owner, random, sig) in owners.iteritems()
  181. if owner in maintainers)
  182. for test_name in set(test_names) - set(owners):
  183. random_assignment = True
  184. if options.user.lower() == 'random':
  185. new_owner, _count = random.choice(owner_counts.most_common()[-4:])
  186. else:
  187. new_owner = options.user
  188. random_assignment = False
  189. owner_counts[new_owner] += 1
  190. owners[test_name] = (new_owner, random_assignment, "")
  191. if options.user.lower() == 'random':
  192. print '# Tests per maintainer:'
  193. for owner, count in owner_counts.most_common():
  194. print '%-20s %3d' % (owner, count)
  195. write_owners(OWNERS_PATH, owners)
  196. if __name__ == '__main__':
  197. main()