amprolla_merge_contents.py (3287B)
1 #!/usr/bin/env python3 2 # See LICENSE file for copyright and license details. 3 4 """ 5 Amprolla module for merging Contents files 6 """ 7 8 from gzip import open as gzip_open 9 from multiprocessing import Pool 10 from os import makedirs 11 from os.path import dirname, join, isfile 12 from time import time 13 14 import lib.globalvars as globalvars 15 from lib.config import (arches, categories, cpunm, mergedir, mergesubdir, 16 repos, spooldir) 17 from lib.lock import check_lock, free_lock 18 from lib.log import die, info 19 from amprolla_merge import prepare_merge_dict 20 21 22 def merge_contents(filelist): 23 """ 24 Merges a list of Contents files and returns a dict of the merged files 25 """ 26 pkgs = {} 27 for i in filelist: 28 if i and isfile(i): 29 cfile = gzip_open(i).read() 30 cfile = cfile.decode('utf-8') 31 contents = cfile.split('\n') 32 33 header = False 34 for line in contents: 35 if line.startswith('This file maps each file'): 36 header = True 37 if line.startswith('FILE'): 38 header = False 39 continue 40 41 if line != '' and not header: 42 sin = line.split() 43 if sin[-1] not in pkgs.keys(): 44 pkgs[sin[-1]] = [] 45 pkgs[sin[-1]].append(' '.join(sin[:-1])) 46 return pkgs 47 48 49 def write_contents(pkgs, filename): 50 """ 51 Writes a merged Contents dict to the given filename in gzip format 52 """ 53 makedirs(dirname(filename), exist_ok=True) 54 gzf = gzip_open(filename, 'w') 55 56 for pkg, files in sorted(pkgs.items()): 57 for file in files: 58 line = "%s %s\n" % (file, pkg) 59 gzf.write(line.encode('utf-8')) 60 61 gzf.write(b'\n') 62 gzf.close() 63 64 65 def main_merge(contents_file): 66 """ 67 Main merge logic. First parses the files into dictionaries, and 68 writes them to the mergedir afterwards 69 """ 70 to_merge = prepare_merge_dict() 71 72 for suite in to_merge: 73 globalvars.suite = suite 74 cont_list = [] 75 for rep in to_merge[suite]: 76 if rep: 77 cont_list.append(join(rep, contents_file)) 78 else: 79 cont_list.append(None) 80 81 print("Merging contents: %s" % cont_list) 82 contents_dict = merge_contents(cont_list) 83 84 outfile = cont_list[0].replace(join(spooldir, 85 repos['devuan']['dists']), 86 join(mergedir, mergesubdir)) 87 print("Writing contents: %s" % outfile) 88 write_contents(contents_dict, outfile) 89 90 91 def main(): 92 """ 93 Main function to allow multiprocessing. 94 """ 95 cont = [] 96 for i in arches: 97 for j in categories: 98 if i != 'source': 99 cont.append(join(j, i.replace('binary', 'Contents')+'.gz')) 100 else: 101 cont.append(join(j, 'Contents-'+i+'.gz')) 102 103 mrgpool = Pool(cpunm) 104 mrgpool.map(main_merge, cont) 105 mrgpool.close() 106 107 108 if __name__ == '__main__': 109 try: 110 t1 = time() 111 check_lock() 112 main() 113 free_lock() 114 t2 = time() 115 info('Total contents merge time: %s' % (t2 - t1), tofile=True) 116 except Exception as e: 117 die(e)