commit 742e8d0e7f1aede6dac09ed92cba828378dfab24
parent 1226778224578a31af771a5879d90135aa2fb1ac
Author: Merlijn Wajer <merlijn@wizzup.org>
Date: Fri, 26 May 2017 00:03:25 +0200
Implement reading, merging and writing of Packages files
Diffstat:
A | amprolla | | | 42 | ++++++++++++++++++++++++++++++++++++++++++ |
M | lib/config.py | | | 130 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
D | lib/delta.py | | | 108 | ------------------------------------------------------------------------------- |
A | lib/package.py | | | 98 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | lib/parse.py | | | 137 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 341 insertions(+), 174 deletions(-)
diff --git a/amprolla b/amprolla
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+from os.path import join
+from time import time
+
+from lib.package import (write_packages, load_packages_file,
+ merge_packages, merge_packages_many)
+from lib.parse import parse_release
+from lib.config import banpkgs
+
+roots = {
+ 'devuan': 'spool/devuan/dists/jessie',
+ 'debian': 'spool/debian/dists/jessie',
+ 'debian-sec': 'spool/dists/jessie/updates/',
+}
+
+#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
+#debian_release_contents = open(join(roots['debian'], 'Release')).read()
+#devuan_release = parse_release(devuan_release_contents)
+#debian_release = parse_release(debian_release_contents)
+#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
+#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
+
+packages_file = 'main/binary-armhf/Packages.gz'
+
+t1 = time()
+print('Loading packages')
+
+devuan = load_packages_file(join(roots['devuan'], packages_file))
+debian = load_packages_file(join(roots['debian'], packages_file))
+debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
+
+all_repos = [devuan, debian_sec, debian]
+
+print('Merging packages')
+new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs)
+
+print('Writing packages')
+write_packages(new_pkgs, 'Packages.merged')
+
+t2 = time()
+print('time:', t2-t1)
diff --git a/lib/config.py b/lib/config.py
@@ -2,14 +2,12 @@
# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
# see LICENSE file for copyright and license details
-amprolla = {
- "spooldir": "./spool",
- "sign_key": "fa1b0274",
- "mergedir": "./merged",
- "mergedsubdirs": ["dists", "pool"],
- "banpkgs": ['systemd', 'systemd-sysv']
- #"checksums": [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
-}
+spooldir = "./spool"
+sign_key = "fa1b0274"
+mergedir = "./merged"
+mergedsubdirs = ["dists", "pool"]
+banpkgs = {'systemd', 'systemd-sysv'}
+#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
repos = {
# key name is priority, first is 0
@@ -136,63 +134,63 @@ mainrepofiles = [
"Release.gpg"
]
-pkgfmt = [
- 'Package:',
- 'Version:',
- 'Essential:',
- 'Installed-Size:',
- 'Maintainer:',
- 'Architecture:',
- 'Replaces:',
- 'Provides:',
- 'Depends:',
- 'Conflicts:',
- 'Pre-Depends:',
- 'Breaks:',
- 'Homepage:',
- 'Apport:',
- 'Auto-Built-Package:',
+packages_keys = [
+ 'Package',
+ 'Version',
+ 'Essential',
+ 'Installed-Size',
+ 'Maintainer',
+ 'Architecture',
+ 'Replaces',
+ 'Provides',
+ 'Depends',
+ 'Conflicts',
+ 'Pre-Depends',
+ 'Breaks',
+ 'Homepage',
+ 'Apport',
+ 'Auto-Built-Package',
'Build-Ids',
- 'Origin:',
- 'Bugs:',
- 'Built-Using:',
- 'Enhances:',
- 'Recommends:',
- 'Description:',
- 'Description-md5:',
- 'Ghc-Package:',
- 'Gstreamer-Decoders:',
- 'Gstreamer-Elements:',
- 'Gstreamer-Encoders:',
- 'Gstreamer-Uri-Sinks:',
- 'Gstreamer-Uri-Sources:',
- 'Gstreamer-Version:',
- 'Lua-Versions:',
- 'Modaliases:',
- 'Npp-Applications:',
- 'Npp-Description:',
- 'Npp-File:',
- 'Npp-Mimetype:',
- 'Npp-Name:',
- 'Origin:',
- 'Original-Maintainer:',
- 'Original-Source-Maintainer:',
- 'Package-Type:',
- 'Postgresql-Version:',
- 'Python-Version:',
- 'Python-Versions:',
- 'Ruby-Versions:',
- 'Source:',
- 'Suggests:',
- 'Xul-Appid:',
- 'Multi-Arch:',
- 'Build-Essential:',
- 'Tag:',
- 'Section:',
- 'Priority:',
- 'Filename:',
- 'Size:',
- 'MD5sum:',
- 'SHA1:',
- 'SHA256:'
+ 'Origin',
+ 'Bugs',
+ 'Built-Using',
+ 'Enhances',
+ 'Recommends',
+ 'Description',
+ 'Description-md5',
+ 'Ghc-Package',
+ 'Gstreamer-Decoders',
+ 'Gstreamer-Elements',
+ 'Gstreamer-Encoders',
+ 'Gstreamer-Uri-Sinks',
+ 'Gstreamer-Uri-Sources',
+ 'Gstreamer-Version',
+ 'Lua-Versions',
+ 'Modaliases',
+ 'Npp-Applications',
+ 'Npp-Description',
+ 'Npp-File',
+ 'Npp-Mimetype',
+ 'Npp-Name',
+ 'Origin',
+ 'Original-Maintainer',
+ 'Original-Source-Maintainer',
+ 'Package-Type',
+ 'Postgresql-Version',
+ 'Python-Version',
+ 'Python-Versions',
+ 'Ruby-Versions',
+ 'Source',
+ 'Suggests',
+ 'Xul-Appid',
+ 'Multi-Arch',
+ 'Build-Essential',
+ 'Tag',
+ 'Section',
+ 'Priority',
+ 'Filename',
+ 'Size',
+ 'MD5sum',
+ 'SHA1',
+ 'SHA256'
]
diff --git a/lib/delta.py b/lib/delta.py
@@ -1,108 +0,0 @@
-#!/usr/bin/env python
-# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
-# see LICENSE file for copyright and license details
-
-import ast
-import gzip
-import re
-import requests
-import time
-
-import config
-from log import notice
-
-
-def get_time(date):
- return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
-
-
-def get_date(relfile):
- match = re.search('Date: .+', relfile)
- if match:
- line = relfile[match.start():match.end()]
- relfile = line.split(': ')[1]
- return relfile
-
-
-def parse_release(reltext):
- hash = {}
- match = re.search('SHA256:+', reltext)
- if match:
- line = reltext[match.start():-1]
- for i in line.split('\n'):
- if i == 'SHA256:' or i == '\n': # XXX: hack
- continue
- hash[(i.split()[2])] = i.split()[0]
- return hash
-
-
-def parse_package(entry):
- # for parsing a single package
- values = re.split('\\n[A-Z].+?:', entry)[0:]
- values[0] = values[0].split(':')[1]
- keys = re.findall('\\n[A-Z].+?:', '\n' + entry)
- both = zip(keys, values)
- return {key.lstrip(): value for key, value in both}
-
-
-def parse_packages(pkgtext):
- # this parses our package file into a hashmap
- # key: package name, value: entire package paragraph as a hashmap
- map = {}
-
- # TODO: consider also this approach
- # def parse_packages(pkgfilepath):
- # with gzip.open(pkgfilepath, "rb") as f:
- # pkgs = f.read().split("\n\n")
-
- pkgs = pkgtext.split("\n\n")
- for pkg in pkgs:
- m = re.match('Package: .+', pkg)
- if m:
- line = pkg[m.start():m.end()]
- key = line.split(': ')[1]
- map[key] = parse_package(pkg)
- return map
-
-
-def print_package(map, pkgname):
- try:
- pkg = ast.literal_eval(map[pkgname])
- sin = []
- for i in config.pkgfmt:
- if config.pkgfmt[i] in pkg.keys():
- sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
- return sin
- except:
- log.die("nonexistent package")
-
-
-def compare_dict(d1, d2):
- d1_keys = set(d1.keys())
- d2_keys = set(d2.keys())
- intersect_keys = d1_keys.intersection(d2_keys)
- modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
- return modified
-
-
-def compare_release(oldrel, newrel):
- r = requests.get(newrel)
- new = r.text
- with open(oldrel, "rb") as f:
- old = f.read()
-
- oldtime = get_time(get_date(old))
- newtime = get_time(get_date(new))
- if newtime > oldtime:
- notice("Update available")
- newhashes = parse_release(new)
- oldhashes = parse_release(old)
- changes = compare_dict(newhashes, oldhashes)
- # k = pkg name, v = sha256
- return changes
-
-
-# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
-# print relmap
-# for k,v in relmap.iteritems():
-# print(k)
diff --git a/lib/package.py b/lib/package.py
@@ -0,0 +1,98 @@
+from gzip import open as gzip_open
+
+from lib.parse import (parse_packages, parse_dependencies)
+from lib.config import packages_keys
+
+def write_packages(packages, filename, sort=False):
+ """
+ Writes `packages` to a file (per debian Packages format)
+ If sort=True, the packages are sorted by name.
+ """
+ f = open(filename, 'w+')
+
+ pkg_items = packages.items()
+ if sort:
+ pkg_items = sorted(pkg_items, key=lambda x: x[0])
+
+ for pkg_name, pkg_contents in pkg_items:
+ for key in packages_keys:
+ if key in pkg_contents:
+ f.write('%s: %s\n' % (key, pkg_contents[key]))
+ f.write('\n')
+
+ f.close()
+
+def load_packages_file(filename):
+ """ Load a gzip'd packages file.
+ Returns a dictionary of package name and package key-values.
+ """
+ packages_contents = gzip_open(filename).read()
+ packages_contents = packages_contents.decode('utf-8')
+ return parse_packages(packages_contents)
+
+
+def package_banned(pkg, banned_pkgs):
+ """
+ Returns True is the package contains a banned dependency.
+ Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
+ of the package.
+ """
+ if pkg.get('Package') in banned_pkgs:
+ return True
+
+ depends = parse_dependencies(pkg.get('Depends', ''))
+ pre_depends = parse_dependencies(pkg.get('Pre-Depends', ''))
+
+ depends = [v[0] for v in depends]
+ pre_depends = [v[0] for v in pre_depends]
+
+ deps = set(depends).union(set(pre_depends))
+
+ return bool(deps.intersection(banned_pkgs))
+
+
+def merge_packages(pkg1, pkg2, banned_packages=set()):
+ """
+ Merges two previously loaded/parsed (using load_packages_file) packages
+ dictionaries, preferring `pkg1` over `pkg2`, and optionally discarding any
+ banned packages.
+ """
+ new_pkgs = {}
+ package_names = set(pkg1.keys()).union(set(pkg2.keys()))
+
+ for pkg in package_names:
+ pkg1_pkg = pkg1.get(pkg)
+ pkg2_pkg = pkg2.get(pkg)
+
+ if pkg1_pkg and pkg2_pkg:
+ new_pkgs[pkg] = pkg1_pkg
+ elif pkg1_pkg:
+ if not package_banned(pkg1_pkg, banned_packages):
+ new_pkgs[pkg] = pkg1_pkg
+ elif pkg2_pkg:
+ if not package_banned(pkg2_pkg, banned_packages):
+ new_pkgs[pkg] = pkg2_pkg
+ else:
+ assert False, 'Impossibru'
+
+ return new_pkgs
+
+def merge_packages_many(packages, banned_packages=set()): # TODO: Make generic
+ """
+ Merges two (or more) previously loaded/parsed (using load_packages_file)
+ packages dictionaries, priority is defined by the order of the `packages`
+ list, optionally discarding any banned packages.
+ """
+ assert len(packages) > 1
+
+ new_pkgs = {}
+
+ pkg1 = packages[0]
+ pkg2 = packages[1]
+
+ new_pkgs = merge_packages(pkg1, pkg2, banned_packages=banned_packages)
+
+ for pkg in packages[2:]:
+ new_pkgs = merge_packages(new_pkgs, pkg, banned_packages=banned_packages)
+
+ return new_pkgs
diff --git a/lib/parse.py b/lib/parse.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
+# see LICENSE file for copyright and license details
+
+import ast
+import gzip
+import re
+#import requests
+import time
+
+from . import config
+from .log import notice
+
+
+def get_time(date):
+ return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
+
+
+def get_date(relfile):
+ match = re.search('Date: .+', relfile)
+ if match:
+ line = relfile[match.start():match.end()]
+ relfile = line.split(': ')[1]
+ return relfile
+
+
+def parse_release(reltext):
+ _hash = {}
+ match = re.search('SHA256:+', reltext)
+ if match:
+ line = reltext[match.start():-1]
+ for i in line.split('\n'):
+ if i == 'SHA256:' or i == '\n': # XXX: hack
+ continue
+ _hash[(i.split()[2])] = i.split()[0]
+ return _hash
+
+PACKAGES_REGEX = re.compile('([A-Za-z0-9\-]+): ')
+
+def parse_package(entry):
+ """ Parses a single Packages entry """
+ contents = PACKAGES_REGEX.split(entry)[1:] # Throw away the first ''
+
+ keys = contents[::2]
+ vals = map(lambda x: x.strip(), contents[1::2])
+
+ return dict(zip(keys, vals))
+
+
+def parse_packages(pkgtext):
+ # this parses our package file into a hashmap
+ # key: package name, value: entire package paragraph as a hashmap
+ map = {}
+
+ pkgs = pkgtext.split("\n\n")
+ for pkg in pkgs:
+ m = re.match('Package: .+', pkg)
+ if m:
+ line = pkg[m.start():m.end()]
+ key = line.split(': ')[1]
+ map[key] = parse_package(pkg)
+
+ return map
+
+def parse_dependencies(dependencies):
+ """
+ Parses a dependency line from a debian Packages file.
+
+ Example line::
+
+ 'lib6 (>= 2.4), libdbus-1-3 (>= 1.0.2), foo'
+
+ Output::
+
+ {'lib6': '(>= 2.4)', 'libdbus-1-3': '(>= 1.0.2)', 'foo': None}
+ """
+ r = {}
+
+ for pkg_plus_version in dependencies.split(', '):
+ v = pkg_plus_version.split(' ', 1)
+ name = v[0]
+
+ # If we get passed an empty string, the name is '', and we just outright
+ # stop
+ if not name:
+ return {}
+
+ if len(v) == 2:
+ version = v[1]
+ r[name] = version
+ else:
+ r[name] = None
+
+ return r
+
+
+def print_package(map, pkgname):
+ try:
+ pkg = ast.literal_eval(map[pkgname])
+ sin = []
+ for i in config.pkgfmt:
+ if config.pkgfmt[i] in pkg.keys():
+ sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
+ return sin
+ except:
+ log.die("nonexistent package")
+
+
+def compare_dict(d1, d2):
+ d1_keys = set(d1.keys())
+ d2_keys = set(d2.keys())
+ intersect_keys = d1_keys.intersection(d2_keys)
+ modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
+ return modified
+
+
+def compare_release(oldrel, newrel):
+ r = requests.get(newrel)
+ new = r.text
+ with open(oldrel, "rb") as f:
+ old = f.read()
+
+ oldtime = get_time(get_date(old))
+ newtime = get_time(get_date(new))
+ if newtime > oldtime:
+ notice("Update available")
+ newhashes = parse_release(new)
+ oldhashes = parse_release(old)
+ changes = compare_dict(newhashes, oldhashes)
+ # k = pkg name, v = sha256
+ return changes
+
+
+# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
+# print relmap
+# for k,v in relmap.iteritems():
+# print(k)