Use a thread pool to update gitea repos faster

This keeps repo creation serialized (because of a bug in gitea),
but it parallelizes updating the settings.  This should reduce
our time by about half.

It also uses a requests session, though I'm not sure if that
really gets us anything.

It eliminates a couple of extraneous GET calls following 302
redirect responses from the POSTs on setting updates.

This will automatically paralellize to nproc * 5 threads.

Change-Id: I5549562d667c0939d0af1151d44b9190774196f9
This commit is contained in:
James E. Blair 2019-07-15 15:20:52 -07:00
parent 892596373f
commit 47bd535d60

View File

@ -14,6 +14,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import concurrent.futures
import datetime
import time
import requests
@ -37,6 +38,7 @@ class Gitea(object):
self.projects = projects
self.orgs = { f['project'].split('/')[0] for f in self.projects }
self._log = []
self.session = requests.Session()
def log(self, *args):
now = datetime.datetime.utcnow().isoformat()
@ -46,7 +48,7 @@ class Gitea(object):
return "\n".join(self._log)
def request(self, method, endpoint, *args, **kwargs):
resp = requests.request(
resp = self.session.request(
method,
urllib.parse.urljoin(self.url, endpoint),
auth=('root', self.password),
@ -91,20 +93,22 @@ class Gitea(object):
def get_csrf_token(self):
resp = self.get('/')
return urllib.parse.unquote(resp.cookies.get('_csrf'))
return urllib.parse.unquote(self.session.cookies.get('_csrf'))
def make_gitea_project(self, project, csrf_token, create_project=True):
def make_gitea_project(self, project, csrf_token):
org, repo = project['project'].split('/', 1)
if create_project:
resp = self.post(
'/api/v1/org/{org}/repos'.format(org=org),
json=dict(
auto_init=True,
description=project.get('description', '')[:255],
name=repo,
private=False,
readme='Default'))
resp = self.post(
'/api/v1/org/{org}/repos'.format(org=org),
json=dict(
auto_init=True,
description=project.get('description', '')[:255],
name=repo,
private=False,
readme='Default'))
self.log("Created repo:", project['project'])
def update_gitea_project_settings(self, project, csrf_token):
org, repo = project['project'].split('/', 1)
if project.get('use-storyboard'):
external_tracker_url = SB_REPO.format(org=org, repo=repo)
tracker_url_format = SB_FORMAT
@ -114,7 +118,6 @@ class Gitea(object):
else:
external_tracker_url = LP_REPO.format(repo=repo)
tracker_url_format = LP_FORMAT.format(repo=repo)
self.post(
'/{org}/{repo}/settings'.format(org=org, repo=repo),
data=dict(
@ -130,9 +133,15 @@ class Gitea(object):
external_tracker_url=external_tracker_url,
tracker_url_format=tracker_url_format,
tracker_issue_style='numeric',
))
),
allow_redirects=False)
# Set allow_redirects to false because gitea returns
# with a 302 on success, and we don't need to follow
# that.
self.log("Updated tracker url:", external_tracker_url)
def update_gitea_project_branches(self, project, csrf_token):
org, repo = project['project'].split('/', 1)
for count in range(0, 5):
try:
self.post(
@ -142,7 +151,11 @@ class Gitea(object):
_csrf=csrf_token,
action='default_branch',
branch='master',
))
),
allow_redirects=False)
# Set allow_redirects to false because gitea returns
# with a 302 on success, and we don't need to follow
# that.
self.log("Set master branch:", project['project'])
return
except requests.exceptions.HTTPError as e:
@ -150,6 +163,8 @@ class Gitea(object):
raise Exception("Could not update branch settings")
def run(self):
thread_pool = concurrent.futures.ThreadPoolExecutor()
futures = []
gitea_orgs = self.get_gitea_orgs()
gitea_repos = []
for org in self.orgs:
@ -164,8 +179,23 @@ class Gitea(object):
create = False
else:
create = True
if create:
# TODO: use threadpool when we're running with
# https://github.com/go-gitea/gitea/pull/7493
self.make_gitea_project(project, csrf_token)
if create or self.always_update:
self.make_gitea_project(project, csrf_token, create)
futures.append(thread_pool.submit(
self.update_gitea_project_settings,
project, csrf_token))
futures.append(thread_pool.submit(
self.update_gitea_project_branches,
project, csrf_token))
for f in futures:
try:
r = f.result()
except Exception as e:
self.log(str(e))
thread_pool.shutdown()
def ansible_main():