# Check which of the wheels in our AFS directory exist upstream
#
# This outputs two files
#
#  to-delete.txt : a list of files and directories that can be removed
#                  from the mirror as all contents are cached in pypi
#
#  log.txt       : the leading number is the number of files left
#                  in the given directory after checking upstream
#                  package contents.  i.e. this is unique content in
#                  our  mirror volume.
#
# Needs pypi-simple

import sys
import os
import json

from pypi_simple import PyPISimple, NoSuchProjectError

BASE = '/afs/openstack.org/mirror/wheel'

FILE_DEL = open('to-delete.txt', 'w')
FILE_LOG = open('log.txt', 'w')

PLATFORMS = ('centos-8-x86_64',
             'centos-9-x86_64',
             'debian-10-x86_64'
             'debian-11-x86_64',
             'ubuntu-18.04-aarch64',
             'ubuntu-20.04-aarch64',
             'ubuntu-22.04-aarch64',
             'centos-8-aarch64',
             'centos-9-aarch64',
             'debian-10-aarch64',
             'debian-11-aarch64',
             'ubuntu-16.04-x86_64',
             'ubuntu-18.04-x86_64',
             'ubuntu-20.04-x86_64',
             'ubuntu-22.04-x86_64')

def iterate_wheels(path, d):
    name = os.path.basename(path)

    if os.path.isdir(path):
        if name not in d['dirs']:
            d['dirs'][name] = {'dirs':{},'files':[]}
        for x in os.listdir(path):
            iterate_wheels(os.path.join(path,x), d['dirs'][name])

        # top level has index.html; skip
        # otherwise the directory name is the pypi project name
        if name and (d['dirs'][name]['files']):
            with PyPISimple() as client:
                try:
                    page = client.get_project_page(name)
                except NoSuchProjectError:
                    print("Removing disappeared project : %s" % name, file=sys.stderr)
                    for w in d['dirs'][name]['files']:
                        print("%s/%s" % (path, w), file=FILE_DEL)
                    return

                upstream = set([package.filename for package in page.packages])
                local = set(d['dirs'][name]['files'])

                not_upstream = local.difference(upstream)
                dups = local.intersection(upstream)

                # Print files to delete, and if the directory is empty
                # put that in the list to delete too.
                for d in dups:
                    print("%s/%s" % (path, d), file=FILE_DEL)
                if len(not_upstream) == 0:
                    print("%s" % path, file=FILE_DEL)

                # Output the file left in the directory after pruning
                print("%4d %s" % (len(not_upstream), path), file=FILE_LOG)
    else:
        d['files'].append(name)
    return d

for p in PLATFORMS:
    print("Processing %s" % p, file=sys.stderr)
    iterate_wheels('%s/%s/' % (BASE, p),
                   d = {'dirs':{},'files':[]})