From 4cb523cdc9b75df38c8f5743ac76027a7f82c8b1 Mon Sep 17 00:00:00 2001 From: Jeremy Stanley Date: Tue, 9 Jan 2018 21:27:06 +0000 Subject: [PATCH] Drop tools/owners.py Now that the tools/owners.py script is a module in the openstack_election package within the openstack/election repository, we can stop providing a copy here. Change-Id: I39efbad539790687646c1d76159894e9e997ff72 Depends-On: I180ef0e5ec880b46f0427c1c952b640a780b5732 --- tools/owners.py | 719 ------------------------------------------------ 1 file changed, 719 deletions(-) delete mode 100644 tools/owners.py diff --git a/tools/owners.py b/tools/owners.py deleted file mode 100644 index f9f1a7f127..0000000000 --- a/tools/owners.py +++ /dev/null @@ -1,719 +0,0 @@ -#!/usr/bin/env python - -# Copyright (c) 2016 OpenStack Foundation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS -# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language -# governing permissions and limitations under the License. - -# Description: When run using OpenStack's Gerrit server, this builds -# YAML representations of aggregate change owner details and change -# counts for each governance project-team, as well as a combined set -# for all teams. - -# Rationale: The OpenStack Technical Committee and Project Team Lead -# elections need electorate rolls taken from "Active Technical -# Contributors" to any repos under official project-teams over a -# particular timeframe. Similarly, the OpenStack Foundation gives -# summit registration discount codes to contributors meeting similar -# criteria. The Gerrit REST API provides access to all the data -# necessary to identify these individuals. - -# Use: The results end up in files named for each -# official governance project-team (or "all") ending with a .yaml -# extension. At the time of writing, it takes approximately 30 -# minutes to run on a well-connected machine with 70-80ms round-trip -# latency to review.opendev.org. - -# An example for generating the March 2016 technical election rolls: -# -# $ virtualenv venv -# [...] -# $ ./venv/bin/pip install pyyaml requests -# [...] -# $ ./venv/bin/python tools/owners.py -a 2015-03-04 \ -# -b 2016-03-04 -i 11131 -i 22816 -o owners \ -# -r march-2016-elections -# MISSING: ansible-build-image -# MERGING DUPLICATE ACCOUNT: 8074 into 2467 -# [...blah, blah, blah...wait for completion...] -# -# TODO(fungi): Add a pass which will correctly generate the -# stable_branch_maintenance.* files. In the meantime, to properly -# generate the SBM PTL electorate, run a second time with a -# different -o of sbm, adding the -n and -s options, and then copy -# the full electorate over like: -# -# $ ./venv/bin/python tools/owners.py -a 2015-03-04 \ -# -b 2016-03-04 -i 11131 -i 22816 -o sbm \ -# -r march-2016-elections -n -s 'branch:^stable/.*' -# [...wait for completion again...] -# $ cp sbm/_electorate.txt owners/stable_branch_maintenance.txt -# $ cp sbm/_all_owners.yaml owners/stable_branch_maintenance.yaml -# -# Once complete, make a compressed tarball of the owners directory -# and send it attached to a PGP/MIME signed message to the appointed -# election officials. The various *.txt files are lists of the -# preferred addresses of all valid voters for the various PTL -# elections (whose team names correspond to the file names), -# suitable for passing directly to CIVS. The similarly named *.yaml -# files are detailed structured data about the same sets of voters, -# for use in validating the address lists. The _electorate.txt file -# is the equivalent address list for the TC election voters, and its -# corresponding structured data is in _all_owners.yaml. - -# You can also do interesting analysis on _all_owners.yaml, for -# example: -# -# $ ./venv/bin/python -# >>> import yaml -# >>> -# >>> o = yaml.load(open('owners/_all_owners.yaml')) -# >>> for c in range(5): -# ... print('Owners of at least %s changes: %s' % ( -# ... c+1, -# ... len({k: v for k, v in o.iteritems() if v['count'] > c}))) -# ... -# Owners of at least 1 changes: 3239 -# Owners of at least 2 changes: 2352 -# Owners of at least 3 changes: 1924 -# Owners of at least 4 changes: 1682 -# Owners of at least 5 changes: 1504 - - -from __future__ import print_function -import argparse -import csv -import datetime -import json -import os -import sys - -import requests -import yaml - -try: - from string import maketrans -except ImportError: # Python3 - maketrans = bytes.maketrans - - -def dumper(data, stream): - """Convenience wrapper to consistently set YAML formatting""" - return yaml.safe_dump(data, allow_unicode=True, default_flow_style=False, - encoding='utf-8', stream=stream) - - -def normalize_email(email): - """Lower-case the domain part of E-mail addresses to better spot - duplicate entries, since the domain part is case-insensitive - courtesy of DNS while the local part is not necessarily""" - local, domain = email.split('@') - domain = domain.lower() - return '%s@%s' % (local, domain) - - -def normalize_project(project): - """Replace spaces and hyphens with underscores in project teams - and then lower-case them, for more convenient filenames""" - return project.translate(maketrans(' -', '__')).lower() - - -def date_merged(change, after=None, before=None): - """Determine the date and time a specific change merged""" - - date = change.get('submitted', None) - - if not date: - # Something's terribly wrong with any changes matching this now - print( - 'SKIPPING DATELESS MERGE: change %s for account %s' - % (change['_number'], change['owner']['_account_id']), - file=sys.stderr) - return None - - # Strip superfluous subsecond values as Gerrit always just - # reports .000000000 for them anyway - date = date.split('.')[0] - - # Pass back an invalid result if it falls after the requested - # cutoff - if before and date >= before: - return None - - # Sanity check for completeness, but since "after" is also used - # in the Gerrit query this shouldn't ever actually be reached - if after and date < after: - return None - - return date - - -def requester(url, params={}, headers={}): - """A requests wrapper to consistently retry HTTPS queries""" - - # Try up to 3 times - retry = requests.Session() - retry.mount("https://", requests.adapters.HTTPAdapter(max_retries=3)) - return retry.get(url=url, params=params, headers=headers) - - -def decode_json(raw): - """Trap JSON decoding failures and provide more detailed errors""" - - # Gerrit's REST API prepends a JSON-breaker to avoid XSS vulnerabilities - if raw.text.startswith(")]}'"): - trimmed = raw.text[4:] - else: - trimmed = raw.text - - # Try to decode and bail with much detail if it fails - try: - decoded = json.loads(trimmed) - except: - print('\nrequest returned %s error to query:\n\n %s\n' - '\nwith detail:\n\n %s\n' % (raw, raw.url, trimmed), - file=sys.stderr) - raise - return decoded - - -def query_gerrit(method, params={}): - """Query the Gerrit REST API""" - - # The base URL to Gerrit REST API - GERRIT_API_URL = 'https://review.opendev.org/' - - raw = requester(GERRIT_API_URL + method, params=params, - headers={'Accept': 'application/json'}) - return decode_json(raw) - - -def get_from_cgit(project, obj, params={}): - """Retrieve a file from the cgit interface""" - - url = 'http://git.openstack.org/cgit/' + project + '/plain/' + obj - raw = requester(url, params=params, - headers={'Accept': 'application/json'}) - return yaml.safe_load(raw.text) - - -def lookup_member(email): - """A requests wrapper to querying the OSF member directory API""" - - # The OpenStack foundation member directory lookup API endpoint - MEMBER_LOOKUP_URL = 'https://openstackid-resources.openstack.org/' - - # URL pattern for querying foundation members by E-mail address - raw = requester( - MEMBER_LOOKUP_URL + '/api/public/v1/members', - params={'filter[]': [ - 'group_slug==foundation-members', - 'email==' + email, - ]}, - headers={'Accept': 'application/json'}, - ) - - return decode_json(raw) - - -def usage(argv): - """Parse command line argument""" - parser = argparse.ArgumentParser( - description="When run using OpenDev's Gerrit server, this builds " - "YAML representations of aggregate change owner details and change " - "counts for each governance project-team, as well as a combined set " - "for all teams. Before and after dates/times should be supplied in " - "formats Gerrit accepts: https://review.opendev.org/Documentation/" - "user-search.html#search-operators") - parser.add_argument("-a", "--after", help="Start date for matching merges") - parser.add_argument("-b", "--before", help="End date for matching merges") - parser.add_argument("-c", "--config", help="Path to script configuration") - parser.add_argument("-i", "--ignore", help="Account Id numbers to skip", - action='append') - parser.add_argument("-n", "--no-extra-atcs", help='Omit "extra ATCs"', - dest='no_extra_atcs', action='store_true') - parser.add_argument("-o", "--outdir", help="Create an output directory") - parser.add_argument("-r", "--ref", help="Specify a Governance refname") - parser.add_argument("-s", "--sieve", help="Add Gerrit query parameters") - return parser.parse_args(argv[1:]) - - -def main(argv=sys.argv): - """The giant pile of spaghetti which does everything else""" - - # Record the start time for use later - start = datetime.datetime.utcnow() - - options = usage(argv) - - # If we're supplied a configuration file, use it - if options.config: - config = yaml.safe_load(open(options.config)) - # Otherwise, use nothing - else: - config = {} - - # Start of the match timeframe for change merges - if options.after: - after = options.after - elif 'after' in config: - after = config['after'] - else: - after = None - - # End of the match timeframe for change merges - if options.before: - before = options.before - elif 'before' in config: - before = config['before'] - else: - before = None - - # Owner Ids for whom to ignore changes - if options.ignore: - ignore = [int(i) for i in options.ignore] - elif 'ignore' in config: - ignore = config['ignore'] - else: - ignore = [] - - # Whether to omit "extra ATCs" - if options.no_extra_atcs: - no_extra_atcs = options.no_extra_atcs - elif 'no-extra-atcs' in config: - no_extra_atcs = config['no-extra-atcs'] - else: - no_extra_atcs = False - - # Output file directory - if options.outdir: - outdir = options.outdir - elif 'outdir' in config: - outdir = config['outdir'] - else: - outdir = '.' - if not os.path.isdir(outdir): - os.makedirs(outdir) - - # Governance Git repository ref object for reference lists - if options.ref: - ref = options.ref - elif 'ref' in config: - ref = config['ref'] - else: - ref = 'refs/heads/master' - - # Gerrit change query additions - if options.sieve: - sieve = options.sieve - elif 'sieve' in config: - sieve = config['sieve'] - else: - sieve = None - - # The query identifying relevant changes - match = 'status:merged' - if after: - match = '%s after:"%s"' % (match, after) - if sieve: - match = '%s %s' % (match, sieve) - - # Retrieve the governance projects list, needs a Git refname as a - # parameter - # TODO(fungi): make this a configurable option so that you can - # for example supply a custom project list for running elections - # in unofficial teams - gov_projects = get_from_cgit('openstack/governance', - 'reference/projects.yaml', - {'h': ref}) - - # The set of retired or removed "legacy" projects from governance - # are merged into the main dict if their retired-on date falls - # later than the after parameter for the qualifying time period - # TODO(fungi): make this a configurable option - old_projects = get_from_cgit('openstack/governance', - 'reference/legacy.yaml', - {'h': ref}) - for project in old_projects: - for deliverable in old_projects[project]['deliverables']: - if 'retired-on' in old_projects[project]['deliverables'][deliverable]: - retired = old_projects[project]['deliverables'][deliverable]['retired-on'] - elif 'retired-on' in old_projects[project]: - retired = old_projects[project]['retired-on'] - else: - retired = None - if retired: - retired = retired.isoformat() - if after and after > retired: - continue - if project not in gov_projects: - gov_projects[project] = {'deliverables': {}} - if deliverable in gov_projects[project]['deliverables']: - print('Skipping duplicate/partially retired deliverable: %s' % deliverable, file=sys.stderr) - continue - gov_projects[project]['deliverables'][deliverable] = old_projects[project]['deliverables'][deliverable] - - # A mapping of short (no prefix) to full repo names existing in - # Gerrit, used to handle repos which have a different namespace - # in governance during transitions and also to filter out repos - # listed in governance which don't actually exist - ger_repos = dict( - [(x.split('/')[-1], x) for x in query_gerrit('projects/')]) - - # This will be populated with change owners mapped to the - # project-teams maintaining their respective Git repositories - projects = {} - - # This will be populated with all change owners and their - # account details - owners = {} - - # This will be populated with discovered duplicate owners - duplicates = {} - - # This will be populated with all individual E-mail addresses of - # change owners, to facilitate finding and merging duplicate - # accounts - all_emails = {} - - # Iterate over all governance project-teams only at filename - # generation time - for project in gov_projects: - # This will be populated with change owner Ids and counts - projects[project] = {} - - # Governance project-teams have one or more deliverables - for deliverable in gov_projects[project]['deliverables']: - # Each deliverable can have multiple repos - repos = gov_projects[project]['deliverables'][deliverable]['repos'] - - # Operate on repo short-names (no namespace) to avoid - # potential namespace mismatches between governance - # and Gerrit - for repo in [r.split('/')[-1] for r in repos]: - # Only process repos which actually exist in Gerrit, - # otherwise spew a warning if skipping - if repo not in ger_repos: - print('MISSING: %s' % repo, file=sys.stderr) - else: - # Query for an arbitrary change set and get - # detailed account information about the most - # recent patchset, paginating at 100 changes - offset = 0 - changes = [] - while offset >= 0: - changes += query_gerrit('changes/', params={ - 'q': 'project:%s %s' % (ger_repos[repo], match), - 'n': '100', - 'start': offset, - 'o': [ - 'CURRENT_COMMIT', - 'CURRENT_REVISION', - 'DETAILED_ACCOUNTS', - ], - }) - if changes and changes[-1].get('_more_changes', False): - offset += 100 - else: - offset = -1 - - # Iterate over each matched change in the repo - for change in changes: - # Get the merge date and skip if it's - # outside any requested date range - merged = date_merged(change, after, before) - if not merged: - continue - - # We index owners by their unique Gerrit - # account Id numbers - owner = change['owner']['_account_id'] - - # If this owner is in the blacklist of Ids - # to skip, then move on to the next change - if owner in ignore: - continue - - # Seen this owner already? - new_owner = owner - new = False - if owner in duplicates: - owner = duplicates[owner] - elif owner not in owners: - new = True - - # For new additions, initialize this as - # their first and record specific account - # details - if new: - # Get the set of all E-mail addresses - # Gerrit knows for this owner's account - emails = query_gerrit( - 'accounts/%s/emails' - % change['owner']['_account_id']) - - # Find duplicate addresses and merge - # accounts when that happens - for email in emails: - address = normalize_email(email['email']) - if address in all_emails: - owner = all_emails[address] - duplicates[new_owner] = owner - print( - 'MERGING DUPLICATE ACCOUNT: %s into %s' - % (new_owner, owner), file=sys.stderr) - break - - # For newly found non-duplicate owners, - # initialize the global change count, - # newest/oldest merged dates, and an empty - # list where extra E-mail addresses can be - # added; also track their full name and - # Gerrit username - if new and owner == new_owner: - # TODO(fungi): this is a prime candidate - # to become a struct, or maybe a class - owners[owner] = { - 'count': 1, - 'extra': [], - 'name': change['owner'].get('name'), - 'newest': merged, - 'oldest': merged, - 'username': change['owner'].get('username'), - } - - # If we've seen this owner on another change - # in any repo then just iterate their global - # change counter and update newest/oldest - # dates - else: - owners[owner]['count'] += 1 - if merged > owners[owner]['newest']: - owners[owner]['newest'] = merged - elif merged < owners[owner]['oldest']: - owners[owner]['oldest'] = merged - - # We only want to add addresses if this is a - # new owner or a new duplicate - if new: - # Iterate over each E-mail address - for email in emails: - # Normalize the address before - # performing any matching since - # Gerrit doesn't do a great job of - # this on its own - address = normalize_email(email['email']) - - # Track this in the full list of all - # known E-mail addresses - all_emails[address] = owner - - # Whether Gerrit considers this the - # preferred E-mail address - preferred = email.get('preferred', False) - - # Store the preferred E-mail address - # under its own key since it has a - # special status, but only if this - # is not a duplicate account - if preferred and owner == new_owner: - owners[owner]['preferred'] = address - - # If this was already added to - # the extras list due to an - # additional pre-normalized - # copy, remove it there - if address in owners[owner]['extra']: - owners[owner]['extra'].remove(address) - - # Store a list of non-preferred - # addresses, deduplicating them in - # case they match post-normalization - # and treating duplicate preferred - # addresses as # non-preferred - else: - if ((address not in owners[owner]['extra']) - and (address != owners[owner].get( - 'preferred', ''))): - owners[owner]['extra'].append(address) - - # If we've seen this owner on another change - # in a repo under this project-team then - # just iterate their team change counter and - # update newest/oldest dates - if owner in projects[project]: - projects[project][owner]['count'] += 1 - if merged > projects[project][owner]['newest']: - projects[project][owner]['newest'] = merged - elif merged < projects[project][owner]['oldest']: - projects[project][owner]['oldest'] = merged - - # ...otherwise initialize this as their - # first - else: - # TODO(fungi): another potential struct - projects[project][owner] = { - 'count': 1, - 'newest': merged, - 'oldest': merged, - } - - # The negative counter will be used as a makeshift account Id - # for non-code contributors; those with owned changes use their - # Gerrit account Id instead - counter = 1 - - # Use the before time as the only contribution time for non-code - # contributors, falling back on the script start time if before - # was not specified - if before: - if len(before) == 10: - stamp = before + ' 00:00:00' - else: - stamp = before - else: - stamp = start.isoformat(sep=' ').split('.')[0] - - # Iterate over all extra-atcs entries - if not no_extra_atcs: - for project in gov_projects: - for extra_atc in gov_projects[project].get('extra-atcs', []): - name = extra_atc['name'] - email = extra_atc['email'] - address = normalize_email(email) - if address in all_emails: - owner = all_emails[address] - else: - owner = -counter - all_emails[address] = owner - owners[owner] = { - 'count': -1, - 'extra': [], - 'name': name, - 'newest': stamp, - 'oldest': stamp, - 'preferred': address, - 'username': '_non_code_contributor', - } - if owner not in projects[project]: - projects[project][owner] = { - 'count': -1, - 'newest': stamp, - 'oldest': stamp, - } - counter += 1 - - # This will hold an address list for TC electorate rolls - electorate = [] - - # A table of owners for summit invites - invites = [] - - # A fresh pass through the owners to build some other datasets - for owner in owners: - # Sort extra E-mail address lists for ease of comparison - owners[owner]['extra'].sort() - - # Build the data used for an invite - if 'name' not in owners[owner] or not owners[owner]['name']: - print( - 'SKIPPING MALFORMED OWNER: no fullname found for account %s' % - owner, file=sys.stderr) - continue - if 'preferred' not in owners[owner]: - if 'extra' in owners[owner] and owners[owner]['extra']: - owners[owner]['preferred'] = owners[owner]['extra'][0] - owners[owner]['extra'] = owners[owner]['extra'][1:] - print( - 'MISSING PREFERRED EMAIL: used first extra address as ' - 'account %s preferred' % owner, file=sys.stderr) - else: - print( - 'SKIPPING MALFORMED OWNER: no preferred or extra ' - 'addresses found for account %s' % owner, file=sys.stderr) - continue - for email in [owners[owner]['preferred']] + owners[owner]['extra']: - member = lookup_member(email) - if member['data']: - owners[owner]['member'] = member['data'][0]['id'] - continue - invite = [owners[owner].get('member','0')] - invite.append(owners[owner]['name'].encode('utf-8')) - invite.append(owners[owner]['preferred']) - invite += owners[owner]['extra'] - invites.append(invite) - - # Append preferred addresses to the TC electorate for members only - if 'member' in owners[owner]: - electorate.append(owners[owner]['preferred'] + '\n') - - # Write out a YAML file covering all change owners - fd = open(os.path.join(outdir, '_all_owners.yaml'), 'w') - dumper(owners, stream=fd) - fd.close() - - # Write out a YAML file covering tracked duplicate accounts - fd = open(os.path.join(outdir, '_duplicate_owners.yaml'), 'w') - dumper(duplicates, stream=fd) - fd.close() - - # Write out a team-specific electoral roll for CIVS - fd = open(os.path.join(outdir, '_electorate.txt'), 'w') - fd.writelines(electorate) - fd.close() - - # Write out a CSV file appropriate for the invite2summit tool - fd = open(os.path.join(outdir, '_invites.csv'), 'w') - csv.writer(fd).writerows(invites) - fd.close() - - # Make another pass through the projects so they can be dumped - # to our output files - for project in projects: - - # This will hold team-specific info for writing - output = {} - - # This will hold an address list for PTL electoral rolls - electorate = [] - - # Use a normalized project name for output file names - normalized_project = normalize_project(project) - - # Iterate over each change owner for the current team - for owner in projects[project]: - # Copy the global owner details into our output since - # we're going to modify some - output[owner] = dict(owners[owner]) - - # Replace the owner change count and newest/oldest - # merged dates with the team-specific value rather than - # using the count from the global set - for field in ('count', 'newest', 'oldest'): - output[owner][field] = projects[project][owner][field] - - # Append preferred member addresses to the PTL electoral rolls - if 'member' in owners[owner]: - electorate.append(owners[owner]['preferred'] + '\n') - - # Write out a team-specific YAML file - fd = open(os.path.join(outdir, '%s.yaml' % normalized_project), 'w') - dumper(output, stream=fd) - fd.close() - - # Write out a team-specific electoral roll for CIVS - fd = open(os.path.join(outdir, '%s.txt' % normalized_project), 'w') - fd.writelines(electorate) - fd.close() - -if __name__ == "__main__": - main()