commit b0080eb9b53c778c81f33cb5f6a978945ee939d1
parent cea8d90386986eecc2ae3cecbd2f87a8470dfa88
Author: parazyd <parazyd@dyne.org>
Date:   Mon, 29 May 2017 16:54:24 +0200
implement suite structure to orchestrate merging
Diffstat:
3 files changed, 88 insertions(+), 40 deletions(-)
diff --git a/amprolla-merge b/amprolla-merge
@@ -3,40 +3,46 @@
 Amprolla main module
 """
 
-import sys
 from os.path import join
 from time import time
 
 from lib.package import (write_packages, load_packages_file,
                          merge_packages_many)
-from lib.config import banpkgs
+from lib.config import (aliases, banpkgs, repo_order, repos,
+                        spooldir, suites)
 
-roots = {
-    'devuan': 'spool/devuan/dists/jessie',
-    'debian': 'spool/debian/dists/jessie',
-    'debian-sec': 'spool/dists/jessie/updates',
-}
 
-#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
-#debian_release_contents = open(join(roots['debian'], 'Release')).read()
-#devuan_release = parse_release(devuan_release_contents)
-#debian_release = parse_release(debian_release_contents)
-#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
-#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
+def prepare_merge_dict():
+    """
+    This function will prepare a dict of lists that contain the repos
+    that need to be merged in an ordered fashion. Orders them using the
+    repo_order list found in lib.config
+    Example output:
+        { ascii: ['ascii', None, 'stretch'] },
+    """
+    merge_dict = {}
 
-#packages_file = 'main/binary-armhf/Packages.gz'
-packages_file = sys.argv[1]
+    for suite in suites:
+        for i in suites[suite]:
+            merge_dict[i] = []
 
-t1 = time()
-print('Loading packages: %s' % packages_file)
+    for suite in merge_dict:
+        for repo in repo_order:
+            tmpsuite = suite
+            if repos[repo]['aliases'] is True:
+                if tmpsuite in aliases[repos[repo]['name']]:
+                    tmpsuite = aliases[repos[repo]['name']][suite]
+                elif repos[repo]['skipmissing'] is True:
+                    tmpsuite = None
+                skips = ['jessie-security', 'ascii-security']
+                if repo == 'debian' and suite in skips:
+                    tmpsuite = None
+            if tmpsuite:  # make it a proper path
+                tmpsuite = join(spooldir, repos[repo]['dists'], tmpsuite)
+            merge_dict[suite].append(tmpsuite)
 
-devuan = load_packages_file(join(roots['devuan'], packages_file))
-debian = load_packages_file(join(roots['debian'], packages_file))
-debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
+    return merge_dict
 
-all_repos = [{'name': 'devuan', 'packages': devuan},
-             {'name': 'debian-sec', 'packages': debian_sec},
-             {'name': 'debian', 'packages': debian}]
 
 def devuan_rewrite(pkg, repo_name):
     """
@@ -51,11 +57,47 @@ def devuan_rewrite(pkg, repo_name):
     return pkg
 
 
-print('Merging packages')
-new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs, rewriter=devuan_rewrite)
+def merge(packages_list):
+    t1 = time()
+
+    all_repos = []
+    print('Loading packages: %s' % packages_list)
+
+    devuan = load_packages_file(packages_list[0])
+    if devuan:
+        all_repos.append({'name': 'devuan', 'packages': devuan})
+
+    debian_sec = load_packages_file(packages_list[1])
+    if debian_sec:
+        all_repos.append({'name': 'debian-sec', 'packages': debian_sec})
+
+    debian = load_packages_file(packages_list[2])
+    if debian:
+        all_repos.append({'name': 'debian', 'packages': debian})
+
+    print('Merging packages')
+    new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs, rewriter=devuan_rewrite)
+
+    print('Writing packages')
+    write_packages(new_pkgs, 'Packages.merged')
+
+    t2 = time()
+    print('time:', t2-t1)
+
+
+packages_file = 'main/binary-armhf/Packages.gz'
+to_merge = prepare_merge_dict()
+
+tt1 = time()
+for suite in to_merge:
+    pkg_list = []
+    for rep in to_merge[suite]:
+        if rep:
+            pkg_list.append(join(rep, packages_file))
+        else:
+            pkg_list.append(None)
 
-print('Writing packages')
-write_packages(new_pkgs, 'Packages.merged')
+    merge(pkg_list)
 
-t2 = time()
-print('time:', t2-t1)
+tt2 = time()
+print('total time:', tt2-tt1)
diff --git a/lib/config.py b/lib/config.py
@@ -7,7 +7,9 @@ sign_key = 'fa1b0274'
 mergedir = './merged'
 mergedsubdirs = ['dists', 'pool']
 banpkgs = {'systemd', 'systemd-sysv'}
-#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
+# checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
+
+repo_order = ['devuan', 'debian-sec', 'debian']
 
 repos = {
     'devuan': {
@@ -17,7 +19,6 @@ repos = {
         'pool': 'devuan/pool',
         'aliases': False,
         'skipmissing': False,
-        'priority': 0,
     },
     'debian-sec': {
         'name': 'DEBIAN-SECURITY',
@@ -26,17 +27,14 @@ repos = {
         'pool': 'pool',
         'aliases': True,
         'skipmissing': True,
-        'priority': 1,
     },
     'debian': {
         'name': 'DEBIAN',
-        #'host': 'httpredir.debian.org',
         'host': 'http://ftp.debian.org',
         'dists': 'debian/dists',
         'pool': 'debian/pool',
         'aliases': True,
         'skipmissing': False,
-        'priority': 2,
     }
 }
 
diff --git a/lib/package.py b/lib/package.py
@@ -3,6 +3,7 @@ from gzip import open as gzip_open
 from lib.parse import (parse_packages, parse_dependencies)
 from lib.config import packages_keys
 
+
 def write_packages(packages, filename, sort=True):
     """
     Writes `packages` to a file (per debian Packages format)
@@ -22,20 +23,24 @@ def write_packages(packages, filename, sort=True):
 
     f.close()
 
+
 def load_packages_file(filename):
     """ Load a gzip'd packages file.
     Returns a dictionary of package name and package key-values.
     """
-    packages_contents = gzip_open(filename).read()
-    packages_contents = packages_contents.decode('utf-8')
-    return parse_packages(packages_contents)
+    if filename is not None:
+        packages_contents = gzip_open(filename).read()
+        packages_contents = packages_contents.decode('utf-8')
+        return parse_packages(packages_contents)
+
+    return None
 
 
 def package_banned(pkg, banned_pkgs):
     """
     Returns True is the package contains a banned dependency.
-    Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
-    of the package.
+    Currently checks and parses both the 'Depends:' and the 'Pre-Depends'
+    fields of the package.
     """
     if pkg.get('Package') in banned_pkgs:
         return True
@@ -85,13 +90,16 @@ def merge_packages(pkg1, pkg2, name1, name2, banned_packages=set(),
 
     return new_pkgs
 
+
 def merge_packages_many(packages, banned_packages=set(), rewriter=None):
     """
     Merges two (or more) previously loaded/parsed (using load_packages_file)
     packages dictionaries, priority is defined by the order of the `packages`
     list, optionally discarding any banned packages.
     """
-    assert len(packages) > 1
+    assert len(packages) > 1  # TODO: what to do when there is only one?
+    # a situation arises when the file exists, but it just has the gzip
+    # header, rather than any content
 
     new_pkgs = {}