amprolla

devuan's apt repo merger
git clone git://parazyd.org/amprolla.git
Log | Files | Refs | README | LICENSE

commit 742e8d0e7f1aede6dac09ed92cba828378dfab24
parent 1226778224578a31af771a5879d90135aa2fb1ac
Author: Merlijn Wajer <merlijn@wizzup.org>
Date:   Fri, 26 May 2017 00:03:25 +0200

Implement reading, merging and writing of Packages files

Diffstat:
Aamprolla | 42++++++++++++++++++++++++++++++++++++++++++
Mlib/config.py | 130+++++++++++++++++++++++++++++++++++++++----------------------------------------
Dlib/delta.py | 108-------------------------------------------------------------------------------
Alib/package.py | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/parse.py | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 341 insertions(+), 174 deletions(-)

diff --git a/amprolla b/amprolla @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +from os.path import join +from time import time + +from lib.package import (write_packages, load_packages_file, + merge_packages, merge_packages_many) +from lib.parse import parse_release +from lib.config import banpkgs + +roots = { + 'devuan': 'spool/devuan/dists/jessie', + 'debian': 'spool/debian/dists/jessie', + 'debian-sec': 'spool/dists/jessie/updates/', +} + +#devuan_release_contents = open(join(roots['devuan'], 'Release')).read() +#debian_release_contents = open(join(roots['debian'], 'Release')).read() +#devuan_release = parse_release(devuan_release_contents) +#debian_release = parse_release(debian_release_contents) +#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys())) +#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys())) + +packages_file = 'main/binary-armhf/Packages.gz' + +t1 = time() +print('Loading packages') + +devuan = load_packages_file(join(roots['devuan'], packages_file)) +debian = load_packages_file(join(roots['debian'], packages_file)) +debian_sec = load_packages_file(join(roots['debian-sec'], packages_file)) + +all_repos = [devuan, debian_sec, debian] + +print('Merging packages') +new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs) + +print('Writing packages') +write_packages(new_pkgs, 'Packages.merged') + +t2 = time() +print('time:', t2-t1) diff --git a/lib/config.py b/lib/config.py @@ -2,14 +2,12 @@ # copyright (c) 2017 - Ivan J. <parazyd@dyne.org> # see LICENSE file for copyright and license details -amprolla = { - "spooldir": "./spool", - "sign_key": "fa1b0274", - "mergedir": "./merged", - "mergedsubdirs": ["dists", "pool"], - "banpkgs": ['systemd', 'systemd-sysv'] - #"checksums": [ 'md5sum', 'sha1', 'sha256', 'sha512' ] -} +spooldir = "./spool" +sign_key = "fa1b0274" +mergedir = "./merged" +mergedsubdirs = ["dists", "pool"] +banpkgs = {'systemd', 'systemd-sysv'} +#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ] repos = { # key name is priority, first is 0 @@ -136,63 +134,63 @@ mainrepofiles = [ "Release.gpg" ] -pkgfmt = [ - 'Package:', - 'Version:', - 'Essential:', - 'Installed-Size:', - 'Maintainer:', - 'Architecture:', - 'Replaces:', - 'Provides:', - 'Depends:', - 'Conflicts:', - 'Pre-Depends:', - 'Breaks:', - 'Homepage:', - 'Apport:', - 'Auto-Built-Package:', +packages_keys = [ + 'Package', + 'Version', + 'Essential', + 'Installed-Size', + 'Maintainer', + 'Architecture', + 'Replaces', + 'Provides', + 'Depends', + 'Conflicts', + 'Pre-Depends', + 'Breaks', + 'Homepage', + 'Apport', + 'Auto-Built-Package', 'Build-Ids', - 'Origin:', - 'Bugs:', - 'Built-Using:', - 'Enhances:', - 'Recommends:', - 'Description:', - 'Description-md5:', - 'Ghc-Package:', - 'Gstreamer-Decoders:', - 'Gstreamer-Elements:', - 'Gstreamer-Encoders:', - 'Gstreamer-Uri-Sinks:', - 'Gstreamer-Uri-Sources:', - 'Gstreamer-Version:', - 'Lua-Versions:', - 'Modaliases:', - 'Npp-Applications:', - 'Npp-Description:', - 'Npp-File:', - 'Npp-Mimetype:', - 'Npp-Name:', - 'Origin:', - 'Original-Maintainer:', - 'Original-Source-Maintainer:', - 'Package-Type:', - 'Postgresql-Version:', - 'Python-Version:', - 'Python-Versions:', - 'Ruby-Versions:', - 'Source:', - 'Suggests:', - 'Xul-Appid:', - 'Multi-Arch:', - 'Build-Essential:', - 'Tag:', - 'Section:', - 'Priority:', - 'Filename:', - 'Size:', - 'MD5sum:', - 'SHA1:', - 'SHA256:' + 'Origin', + 'Bugs', + 'Built-Using', + 'Enhances', + 'Recommends', + 'Description', + 'Description-md5', + 'Ghc-Package', + 'Gstreamer-Decoders', + 'Gstreamer-Elements', + 'Gstreamer-Encoders', + 'Gstreamer-Uri-Sinks', + 'Gstreamer-Uri-Sources', + 'Gstreamer-Version', + 'Lua-Versions', + 'Modaliases', + 'Npp-Applications', + 'Npp-Description', + 'Npp-File', + 'Npp-Mimetype', + 'Npp-Name', + 'Origin', + 'Original-Maintainer', + 'Original-Source-Maintainer', + 'Package-Type', + 'Postgresql-Version', + 'Python-Version', + 'Python-Versions', + 'Ruby-Versions', + 'Source', + 'Suggests', + 'Xul-Appid', + 'Multi-Arch', + 'Build-Essential', + 'Tag', + 'Section', + 'Priority', + 'Filename', + 'Size', + 'MD5sum', + 'SHA1', + 'SHA256' ] diff --git a/lib/delta.py b/lib/delta.py @@ -1,108 +0,0 @@ -#!/usr/bin/env python -# copyright (c) 2017 - Ivan J. <parazyd@dyne.org> -# see LICENSE file for copyright and license details - -import ast -import gzip -import re -import requests -import time - -import config -from log import notice - - -def get_time(date): - return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z")) - - -def get_date(relfile): - match = re.search('Date: .+', relfile) - if match: - line = relfile[match.start():match.end()] - relfile = line.split(': ')[1] - return relfile - - -def parse_release(reltext): - hash = {} - match = re.search('SHA256:+', reltext) - if match: - line = reltext[match.start():-1] - for i in line.split('\n'): - if i == 'SHA256:' or i == '\n': # XXX: hack - continue - hash[(i.split()[2])] = i.split()[0] - return hash - - -def parse_package(entry): - # for parsing a single package - values = re.split('\\n[A-Z].+?:', entry)[0:] - values[0] = values[0].split(':')[1] - keys = re.findall('\\n[A-Z].+?:', '\n' + entry) - both = zip(keys, values) - return {key.lstrip(): value for key, value in both} - - -def parse_packages(pkgtext): - # this parses our package file into a hashmap - # key: package name, value: entire package paragraph as a hashmap - map = {} - - # TODO: consider also this approach - # def parse_packages(pkgfilepath): - # with gzip.open(pkgfilepath, "rb") as f: - # pkgs = f.read().split("\n\n") - - pkgs = pkgtext.split("\n\n") - for pkg in pkgs: - m = re.match('Package: .+', pkg) - if m: - line = pkg[m.start():m.end()] - key = line.split(': ')[1] - map[key] = parse_package(pkg) - return map - - -def print_package(map, pkgname): - try: - pkg = ast.literal_eval(map[pkgname]) - sin = [] - for i in config.pkgfmt: - if config.pkgfmt[i] in pkg.keys(): - sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]]) - return sin - except: - log.die("nonexistent package") - - -def compare_dict(d1, d2): - d1_keys = set(d1.keys()) - d2_keys = set(d2.keys()) - intersect_keys = d1_keys.intersection(d2_keys) - modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]} - return modified - - -def compare_release(oldrel, newrel): - r = requests.get(newrel) - new = r.text - with open(oldrel, "rb") as f: - old = f.read() - - oldtime = get_time(get_date(old)) - newtime = get_time(get_date(new)) - if newtime > oldtime: - notice("Update available") - newhashes = parse_release(new) - oldhashes = parse_release(old) - changes = compare_dict(newhashes, oldhashes) - # k = pkg name, v = sha256 - return changes - - -# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release") -# print relmap -# for k,v in relmap.iteritems(): -# print(k) diff --git a/lib/package.py b/lib/package.py @@ -0,0 +1,98 @@ +from gzip import open as gzip_open + +from lib.parse import (parse_packages, parse_dependencies) +from lib.config import packages_keys + +def write_packages(packages, filename, sort=False): + """ + Writes `packages` to a file (per debian Packages format) + If sort=True, the packages are sorted by name. + """ + f = open(filename, 'w+') + + pkg_items = packages.items() + if sort: + pkg_items = sorted(pkg_items, key=lambda x: x[0]) + + for pkg_name, pkg_contents in pkg_items: + for key in packages_keys: + if key in pkg_contents: + f.write('%s: %s\n' % (key, pkg_contents[key])) + f.write('\n') + + f.close() + +def load_packages_file(filename): + """ Load a gzip'd packages file. + Returns a dictionary of package name and package key-values. + """ + packages_contents = gzip_open(filename).read() + packages_contents = packages_contents.decode('utf-8') + return parse_packages(packages_contents) + + +def package_banned(pkg, banned_pkgs): + """ + Returns True is the package contains a banned dependency. + Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields + of the package. + """ + if pkg.get('Package') in banned_pkgs: + return True + + depends = parse_dependencies(pkg.get('Depends', '')) + pre_depends = parse_dependencies(pkg.get('Pre-Depends', '')) + + depends = [v[0] for v in depends] + pre_depends = [v[0] for v in pre_depends] + + deps = set(depends).union(set(pre_depends)) + + return bool(deps.intersection(banned_pkgs)) + + +def merge_packages(pkg1, pkg2, banned_packages=set()): + """ + Merges two previously loaded/parsed (using load_packages_file) packages + dictionaries, preferring `pkg1` over `pkg2`, and optionally discarding any + banned packages. + """ + new_pkgs = {} + package_names = set(pkg1.keys()).union(set(pkg2.keys())) + + for pkg in package_names: + pkg1_pkg = pkg1.get(pkg) + pkg2_pkg = pkg2.get(pkg) + + if pkg1_pkg and pkg2_pkg: + new_pkgs[pkg] = pkg1_pkg + elif pkg1_pkg: + if not package_banned(pkg1_pkg, banned_packages): + new_pkgs[pkg] = pkg1_pkg + elif pkg2_pkg: + if not package_banned(pkg2_pkg, banned_packages): + new_pkgs[pkg] = pkg2_pkg + else: + assert False, 'Impossibru' + + return new_pkgs + +def merge_packages_many(packages, banned_packages=set()): # TODO: Make generic + """ + Merges two (or more) previously loaded/parsed (using load_packages_file) + packages dictionaries, priority is defined by the order of the `packages` + list, optionally discarding any banned packages. + """ + assert len(packages) > 1 + + new_pkgs = {} + + pkg1 = packages[0] + pkg2 = packages[1] + + new_pkgs = merge_packages(pkg1, pkg2, banned_packages=banned_packages) + + for pkg in packages[2:]: + new_pkgs = merge_packages(new_pkgs, pkg, banned_packages=banned_packages) + + return new_pkgs diff --git a/lib/parse.py b/lib/parse.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# copyright (c) 2017 - Ivan J. <parazyd@dyne.org> +# see LICENSE file for copyright and license details + +import ast +import gzip +import re +#import requests +import time + +from . import config +from .log import notice + + +def get_time(date): + return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z")) + + +def get_date(relfile): + match = re.search('Date: .+', relfile) + if match: + line = relfile[match.start():match.end()] + relfile = line.split(': ')[1] + return relfile + + +def parse_release(reltext): + _hash = {} + match = re.search('SHA256:+', reltext) + if match: + line = reltext[match.start():-1] + for i in line.split('\n'): + if i == 'SHA256:' or i == '\n': # XXX: hack + continue + _hash[(i.split()[2])] = i.split()[0] + return _hash + +PACKAGES_REGEX = re.compile('([A-Za-z0-9\-]+): ') + +def parse_package(entry): + """ Parses a single Packages entry """ + contents = PACKAGES_REGEX.split(entry)[1:] # Throw away the first '' + + keys = contents[::2] + vals = map(lambda x: x.strip(), contents[1::2]) + + return dict(zip(keys, vals)) + + +def parse_packages(pkgtext): + # this parses our package file into a hashmap + # key: package name, value: entire package paragraph as a hashmap + map = {} + + pkgs = pkgtext.split("\n\n") + for pkg in pkgs: + m = re.match('Package: .+', pkg) + if m: + line = pkg[m.start():m.end()] + key = line.split(': ')[1] + map[key] = parse_package(pkg) + + return map + +def parse_dependencies(dependencies): + """ + Parses a dependency line from a debian Packages file. + + Example line:: + + 'lib6 (>= 2.4), libdbus-1-3 (>= 1.0.2), foo' + + Output:: + + {'lib6': '(>= 2.4)', 'libdbus-1-3': '(>= 1.0.2)', 'foo': None} + """ + r = {} + + for pkg_plus_version in dependencies.split(', '): + v = pkg_plus_version.split(' ', 1) + name = v[0] + + # If we get passed an empty string, the name is '', and we just outright + # stop + if not name: + return {} + + if len(v) == 2: + version = v[1] + r[name] = version + else: + r[name] = None + + return r + + +def print_package(map, pkgname): + try: + pkg = ast.literal_eval(map[pkgname]) + sin = [] + for i in config.pkgfmt: + if config.pkgfmt[i] in pkg.keys(): + sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]]) + return sin + except: + log.die("nonexistent package") + + +def compare_dict(d1, d2): + d1_keys = set(d1.keys()) + d2_keys = set(d2.keys()) + intersect_keys = d1_keys.intersection(d2_keys) + modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]} + return modified + + +def compare_release(oldrel, newrel): + r = requests.get(newrel) + new = r.text + with open(oldrel, "rb") as f: + old = f.read() + + oldtime = get_time(get_date(old)) + newtime = get_time(get_date(new)) + if newtime > oldtime: + notice("Update available") + newhashes = parse_release(new) + oldhashes = parse_release(old) + changes = compare_dict(newhashes, oldhashes) + # k = pkg name, v = sha256 + return changes + + +# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release") +# print relmap +# for k,v in relmap.iteritems(): +# print(k)