amprolla

devuan's apt repo merger
git clone https://git.parazyd.org/amprolla
Log | Files | Refs | README | LICENSE

amprolla_merge_contents.py (3287B)


      1 #!/usr/bin/env python3
      2 # See LICENSE file for copyright and license details.
      3 
      4 """
      5 Amprolla module for merging Contents files
      6 """
      7 
      8 from gzip import open as gzip_open
      9 from multiprocessing import Pool
     10 from os import makedirs
     11 from os.path import dirname, join, isfile
     12 from time import time
     13 
     14 import lib.globalvars as globalvars
     15 from lib.config import (arches, categories, cpunm, mergedir, mergesubdir,
     16                         repos, spooldir)
     17 from lib.lock import check_lock, free_lock
     18 from lib.log import die, info
     19 from amprolla_merge import prepare_merge_dict
     20 
     21 
     22 def merge_contents(filelist):
     23     """
     24     Merges a list of Contents files and returns a dict of the merged files
     25     """
     26     pkgs = {}
     27     for i in filelist:
     28         if i and isfile(i):
     29             cfile = gzip_open(i).read()
     30             cfile = cfile.decode('utf-8')
     31             contents = cfile.split('\n')
     32 
     33             header = False
     34             for line in contents:
     35                 if line.startswith('This file maps each file'):
     36                     header = True
     37                 if line.startswith('FILE'):
     38                     header = False
     39                     continue
     40 
     41                 if line != '' and not header:
     42                     sin = line.split()
     43                     if sin[-1] not in pkgs.keys():
     44                         pkgs[sin[-1]] = []
     45                     pkgs[sin[-1]].append(' '.join(sin[:-1]))
     46     return pkgs
     47 
     48 
     49 def write_contents(pkgs, filename):
     50     """
     51     Writes a merged Contents dict to the given filename in gzip format
     52     """
     53     makedirs(dirname(filename), exist_ok=True)
     54     gzf = gzip_open(filename, 'w')
     55 
     56     for pkg, files in sorted(pkgs.items()):
     57         for file in files:
     58             line = "%s %s\n" % (file, pkg)
     59             gzf.write(line.encode('utf-8'))
     60 
     61     gzf.write(b'\n')
     62     gzf.close()
     63 
     64 
     65 def main_merge(contents_file):
     66     """
     67     Main merge logic. First parses the files into dictionaries, and
     68     writes them to the mergedir afterwards
     69     """
     70     to_merge = prepare_merge_dict()
     71 
     72     for suite in to_merge:
     73         globalvars.suite = suite
     74         cont_list = []
     75         for rep in to_merge[suite]:
     76             if rep:
     77                 cont_list.append(join(rep, contents_file))
     78             else:
     79                 cont_list.append(None)
     80 
     81         print("Merging contents: %s" % cont_list)
     82         contents_dict = merge_contents(cont_list)
     83 
     84         outfile = cont_list[0].replace(join(spooldir,
     85                                             repos['devuan']['dists']),
     86                                        join(mergedir, mergesubdir))
     87         print("Writing contents: %s" % outfile)
     88         write_contents(contents_dict, outfile)
     89 
     90 
     91 def main():
     92     """
     93     Main function to allow multiprocessing.
     94     """
     95     cont = []
     96     for i in arches:
     97         for j in categories:
     98             if i != 'source':
     99                 cont.append(join(j, i.replace('binary', 'Contents')+'.gz'))
    100             else:
    101                 cont.append(join(j, 'Contents-'+i+'.gz'))
    102 
    103     mrgpool = Pool(cpunm)
    104     mrgpool.map(main_merge, cont)
    105     mrgpool.close()
    106 
    107 
    108 if __name__ == '__main__':
    109     try:
    110         t1 = time()
    111         check_lock()
    112         main()
    113         free_lock()
    114         t2 = time()
    115         info('Total contents merge time: %s' % (t2 - t1), tofile=True)
    116     except Exception as e:
    117         die(e)