commit 742e8d0e7f1aede6dac09ed92cba828378dfab24
parent 1226778224578a31af771a5879d90135aa2fb1ac
Author: Merlijn Wajer <merlijn@wizzup.org>
Date:   Fri, 26 May 2017 00:03:25 +0200
Implement reading, merging and writing of Packages files
Diffstat:
| A | amprolla |  |  | 42 | ++++++++++++++++++++++++++++++++++++++++++ | 
| M | lib/config.py |  |  | 130 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- | 
| D | lib/delta.py |  |  | 108 | ------------------------------------------------------------------------------- | 
| A | lib/package.py |  |  | 98 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| A | lib/parse.py |  |  | 137 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
5 files changed, 341 insertions(+), 174 deletions(-)
diff --git a/amprolla b/amprolla
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+from os.path import join
+from time import time
+
+from lib.package import (write_packages, load_packages_file,
+        merge_packages, merge_packages_many)
+from lib.parse import parse_release
+from lib.config import banpkgs
+
+roots = {
+    'devuan': 'spool/devuan/dists/jessie',
+    'debian': 'spool/debian/dists/jessie',
+    'debian-sec': 'spool/dists/jessie/updates/',
+}
+
+#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
+#debian_release_contents = open(join(roots['debian'], 'Release')).read()
+#devuan_release = parse_release(devuan_release_contents)
+#debian_release = parse_release(debian_release_contents)
+#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
+#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
+
+packages_file = 'main/binary-armhf/Packages.gz'
+
+t1 = time()
+print('Loading packages')
+
+devuan = load_packages_file(join(roots['devuan'], packages_file))
+debian = load_packages_file(join(roots['debian'], packages_file))
+debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
+
+all_repos = [devuan, debian_sec, debian]
+
+print('Merging packages')
+new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs)
+
+print('Writing packages')
+write_packages(new_pkgs, 'Packages.merged')
+
+t2 = time()
+print('time:', t2-t1)
diff --git a/lib/config.py b/lib/config.py
@@ -2,14 +2,12 @@
 # copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
 # see LICENSE file for copyright and license details
 
-amprolla = {
-    "spooldir": "./spool",
-    "sign_key": "fa1b0274",
-    "mergedir": "./merged",
-    "mergedsubdirs": ["dists", "pool"],
-    "banpkgs": ['systemd', 'systemd-sysv']
-    #"checksums": [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
-}
+spooldir = "./spool"
+sign_key = "fa1b0274"
+mergedir = "./merged"
+mergedsubdirs = ["dists", "pool"]
+banpkgs = {'systemd', 'systemd-sysv'}
+#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
 
 repos = {
     # key name is priority, first is 0
@@ -136,63 +134,63 @@ mainrepofiles = [
     "Release.gpg"
 ]
 
-pkgfmt = [
-    'Package:',
-    'Version:',
-    'Essential:',
-    'Installed-Size:',
-    'Maintainer:',
-    'Architecture:',
-    'Replaces:',
-    'Provides:',
-    'Depends:',
-    'Conflicts:',
-    'Pre-Depends:',
-    'Breaks:',
-    'Homepage:',
-    'Apport:',
-    'Auto-Built-Package:',
+packages_keys = [
+    'Package',
+    'Version',
+    'Essential',
+    'Installed-Size',
+    'Maintainer',
+    'Architecture',
+    'Replaces',
+    'Provides',
+    'Depends',
+    'Conflicts',
+    'Pre-Depends',
+    'Breaks',
+    'Homepage',
+    'Apport',
+    'Auto-Built-Package',
     'Build-Ids',
-    'Origin:',
-    'Bugs:',
-    'Built-Using:',
-    'Enhances:',
-    'Recommends:',
-    'Description:',
-    'Description-md5:',
-    'Ghc-Package:',
-    'Gstreamer-Decoders:',
-    'Gstreamer-Elements:',
-    'Gstreamer-Encoders:',
-    'Gstreamer-Uri-Sinks:',
-    'Gstreamer-Uri-Sources:',
-    'Gstreamer-Version:',
-    'Lua-Versions:',
-    'Modaliases:',
-    'Npp-Applications:',
-    'Npp-Description:',
-    'Npp-File:',
-    'Npp-Mimetype:',
-    'Npp-Name:',
-    'Origin:',
-    'Original-Maintainer:',
-    'Original-Source-Maintainer:',
-    'Package-Type:',
-    'Postgresql-Version:',
-    'Python-Version:',
-    'Python-Versions:',
-    'Ruby-Versions:',
-    'Source:',
-    'Suggests:',
-    'Xul-Appid:',
-    'Multi-Arch:',
-    'Build-Essential:',
-    'Tag:',
-    'Section:',
-    'Priority:',
-    'Filename:',
-    'Size:',
-    'MD5sum:',
-    'SHA1:',
-    'SHA256:'
+    'Origin',
+    'Bugs',
+    'Built-Using',
+    'Enhances',
+    'Recommends',
+    'Description',
+    'Description-md5',
+    'Ghc-Package',
+    'Gstreamer-Decoders',
+    'Gstreamer-Elements',
+    'Gstreamer-Encoders',
+    'Gstreamer-Uri-Sinks',
+    'Gstreamer-Uri-Sources',
+    'Gstreamer-Version',
+    'Lua-Versions',
+    'Modaliases',
+    'Npp-Applications',
+    'Npp-Description',
+    'Npp-File',
+    'Npp-Mimetype',
+    'Npp-Name',
+    'Origin',
+    'Original-Maintainer',
+    'Original-Source-Maintainer',
+    'Package-Type',
+    'Postgresql-Version',
+    'Python-Version',
+    'Python-Versions',
+    'Ruby-Versions',
+    'Source',
+    'Suggests',
+    'Xul-Appid',
+    'Multi-Arch',
+    'Build-Essential',
+    'Tag',
+    'Section',
+    'Priority',
+    'Filename',
+    'Size',
+    'MD5sum',
+    'SHA1',
+    'SHA256'
 ]
diff --git a/lib/delta.py b/lib/delta.py
@@ -1,108 +0,0 @@
-#!/usr/bin/env python
-# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
-# see LICENSE file for copyright and license details
-
-import ast
-import gzip
-import re
-import requests
-import time
-
-import config
-from log import notice
-
-
-def get_time(date):
-    return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
-
-
-def get_date(relfile):
-    match = re.search('Date: .+', relfile)
-    if match:
-        line = relfile[match.start():match.end()]
-        relfile = line.split(': ')[1]
-    return relfile
-
-
-def parse_release(reltext):
-    hash = {}
-    match = re.search('SHA256:+', reltext)
-    if match:
-        line = reltext[match.start():-1]
-        for i in line.split('\n'):
-            if i == 'SHA256:' or i == '\n':  # XXX: hack
-                continue
-            hash[(i.split()[2])] = i.split()[0]
-        return hash
-
-
-def parse_package(entry):
-    # for parsing a single package
-    values = re.split('\\n[A-Z].+?:', entry)[0:]
-    values[0] = values[0].split(':')[1]
-    keys = re.findall('\\n[A-Z].+?:', '\n' + entry)
-    both = zip(keys, values)
-    return {key.lstrip(): value for key, value in both}
-
-
-def parse_packages(pkgtext):
-    # this parses our package file into a hashmap
-    # key: package name, value: entire package paragraph as a hashmap
-    map = {}
-
-    # TODO: consider also this approach
-    # def parse_packages(pkgfilepath):
-    # with gzip.open(pkgfilepath, "rb") as f:
-    #    pkgs = f.read().split("\n\n")
-
-    pkgs = pkgtext.split("\n\n")
-    for pkg in pkgs:
-        m = re.match('Package: .+', pkg)
-        if m:
-            line = pkg[m.start():m.end()]
-            key = line.split(': ')[1]
-            map[key] = parse_package(pkg)
-    return map
-
-
-def print_package(map, pkgname):
-    try:
-        pkg = ast.literal_eval(map[pkgname])
-        sin = []
-        for i in config.pkgfmt:
-            if config.pkgfmt[i] in pkg.keys():
-                sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
-        return sin
-    except:
-        log.die("nonexistent package")
-
-
-def compare_dict(d1, d2):
-    d1_keys = set(d1.keys())
-    d2_keys = set(d2.keys())
-    intersect_keys = d1_keys.intersection(d2_keys)
-    modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
-    return modified
-
-
-def compare_release(oldrel, newrel):
-    r = requests.get(newrel)
-    new = r.text
-    with open(oldrel, "rb") as f:
-        old = f.read()
-
-    oldtime = get_time(get_date(old))
-    newtime = get_time(get_date(new))
-    if newtime > oldtime:
-        notice("Update available")
-        newhashes = parse_release(new)
-        oldhashes = parse_release(old)
-        changes = compare_dict(newhashes, oldhashes)
-        # k = pkg name, v = sha256
-        return changes
-
-
-# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
-# print relmap
-# for k,v in relmap.iteritems():
-#    print(k)
diff --git a/lib/package.py b/lib/package.py
@@ -0,0 +1,98 @@
+from gzip import open as gzip_open
+
+from lib.parse import (parse_packages, parse_dependencies)
+from lib.config import packages_keys
+
+def write_packages(packages, filename, sort=False):
+    """
+    Writes `packages` to a file (per debian Packages format)
+    If sort=True, the packages are sorted by name.
+    """
+    f = open(filename, 'w+')
+
+    pkg_items = packages.items()
+    if sort:
+        pkg_items = sorted(pkg_items, key=lambda x: x[0])
+
+    for pkg_name, pkg_contents in pkg_items:
+        for key in packages_keys:
+            if key in pkg_contents:
+                f.write('%s: %s\n' % (key, pkg_contents[key]))
+        f.write('\n')
+
+    f.close()
+
+def load_packages_file(filename):
+    """ Load a gzip'd packages file.
+    Returns a dictionary of package name and package key-values.
+    """
+    packages_contents = gzip_open(filename).read()
+    packages_contents = packages_contents.decode('utf-8')
+    return parse_packages(packages_contents)
+
+
+def package_banned(pkg, banned_pkgs):
+    """
+    Returns True is the package contains a banned dependency.
+    Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
+    of the package.
+    """
+    if pkg.get('Package') in banned_pkgs:
+        return True
+
+    depends = parse_dependencies(pkg.get('Depends', ''))
+    pre_depends = parse_dependencies(pkg.get('Pre-Depends', ''))
+
+    depends = [v[0] for v in depends]
+    pre_depends = [v[0] for v in pre_depends]
+
+    deps = set(depends).union(set(pre_depends))
+
+    return bool(deps.intersection(banned_pkgs))
+
+
+def merge_packages(pkg1, pkg2, banned_packages=set()):
+    """
+    Merges two previously loaded/parsed (using load_packages_file) packages
+    dictionaries, preferring `pkg1` over `pkg2`, and optionally discarding any
+    banned packages.
+    """
+    new_pkgs = {}
+    package_names = set(pkg1.keys()).union(set(pkg2.keys()))
+
+    for pkg in package_names:
+        pkg1_pkg = pkg1.get(pkg)
+        pkg2_pkg = pkg2.get(pkg)
+
+        if pkg1_pkg and pkg2_pkg:
+            new_pkgs[pkg] = pkg1_pkg
+        elif pkg1_pkg:
+            if not package_banned(pkg1_pkg, banned_packages):
+                new_pkgs[pkg] = pkg1_pkg
+        elif pkg2_pkg:
+            if not package_banned(pkg2_pkg, banned_packages):
+                new_pkgs[pkg] = pkg2_pkg
+        else:
+            assert False, 'Impossibru'
+
+    return new_pkgs
+
+def merge_packages_many(packages, banned_packages=set()): # TODO: Make generic
+    """
+    Merges two (or more) previously loaded/parsed (using load_packages_file)
+    packages dictionaries, priority is defined by the order of the `packages`
+    list, optionally discarding any banned packages.
+    """
+    assert len(packages) > 1
+
+    new_pkgs = {}
+
+    pkg1 = packages[0]
+    pkg2 = packages[1]
+
+    new_pkgs = merge_packages(pkg1, pkg2, banned_packages=banned_packages)
+
+    for pkg in packages[2:]:
+        new_pkgs = merge_packages(new_pkgs, pkg, banned_packages=banned_packages)
+
+    return new_pkgs
diff --git a/lib/parse.py b/lib/parse.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
+# see LICENSE file for copyright and license details
+
+import ast
+import gzip
+import re
+#import requests
+import time
+
+from . import config
+from .log import notice
+
+
+def get_time(date):
+    return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
+
+
+def get_date(relfile):
+    match = re.search('Date: .+', relfile)
+    if match:
+        line = relfile[match.start():match.end()]
+        relfile = line.split(': ')[1]
+    return relfile
+
+
+def parse_release(reltext):
+    _hash = {}
+    match = re.search('SHA256:+', reltext)
+    if match:
+        line = reltext[match.start():-1]
+        for i in line.split('\n'):
+            if i == 'SHA256:' or i == '\n':  # XXX: hack
+                continue
+            _hash[(i.split()[2])] = i.split()[0]
+        return _hash
+
+PACKAGES_REGEX = re.compile('([A-Za-z0-9\-]+): ')
+
+def parse_package(entry):
+    """ Parses a single Packages entry """
+    contents = PACKAGES_REGEX.split(entry)[1:]  # Throw away the first ''
+
+    keys = contents[::2]
+    vals = map(lambda x: x.strip(), contents[1::2])
+
+    return dict(zip(keys, vals))
+
+
+def parse_packages(pkgtext):
+    # this parses our package file into a hashmap
+    # key: package name, value: entire package paragraph as a hashmap
+    map = {}
+
+    pkgs = pkgtext.split("\n\n")
+    for pkg in pkgs:
+        m = re.match('Package: .+', pkg)
+        if m:
+            line = pkg[m.start():m.end()]
+            key = line.split(': ')[1]
+            map[key] = parse_package(pkg)
+
+    return map
+
+def parse_dependencies(dependencies):
+    """
+    Parses a dependency line from a debian Packages file.
+
+    Example line::
+
+        'lib6 (>= 2.4), libdbus-1-3 (>= 1.0.2), foo'
+
+    Output::
+
+        {'lib6': '(>= 2.4)', 'libdbus-1-3': '(>= 1.0.2)', 'foo': None}
+    """
+    r = {}
+
+    for pkg_plus_version in dependencies.split(', '):
+        v = pkg_plus_version.split(' ', 1)
+        name = v[0]
+
+        # If we get passed an empty string, the name is '', and we just outright
+        # stop
+        if not name:
+            return {}
+
+        if len(v) == 2:
+            version = v[1]
+            r[name] = version
+        else:
+            r[name] = None
+
+    return r
+
+
+def print_package(map, pkgname):
+    try:
+        pkg = ast.literal_eval(map[pkgname])
+        sin = []
+        for i in config.pkgfmt:
+            if config.pkgfmt[i] in pkg.keys():
+                sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
+        return sin
+    except:
+        log.die("nonexistent package")
+
+
+def compare_dict(d1, d2):
+    d1_keys = set(d1.keys())
+    d2_keys = set(d2.keys())
+    intersect_keys = d1_keys.intersection(d2_keys)
+    modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
+    return modified
+
+
+def compare_release(oldrel, newrel):
+    r = requests.get(newrel)
+    new = r.text
+    with open(oldrel, "rb") as f:
+        old = f.read()
+
+    oldtime = get_time(get_date(old))
+    newtime = get_time(get_date(new))
+    if newtime > oldtime:
+        notice("Update available")
+        newhashes = parse_release(new)
+        oldhashes = parse_release(old)
+        changes = compare_dict(newhashes, oldhashes)
+        # k = pkg name, v = sha256
+        return changes
+
+
+# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
+# print relmap
+# for k,v in relmap.iteritems():
+#    print(k)