[SCM] hydrogen-drumkits/master: First stab at a python-script to download drumkits and keep track of licenses

Mon Sep 21 22:34:09 UTC 2015

The following commit has been merged in the master branch:
commit 93b282be2a88ffb9d6a61a38cd45131e057c953a
Author: IOhannes m zmölnig <zmoelnig at umlautQ.umlaeute.mur.at>
Date:   Tue Sep 22 00:33:46 2015 +0200

    First stab at a python-script to download drumkits and keep track of licenses

diff --git a/debian/get-orig-source.py b/debian/get-orig-source.py
new file mode 100755
index 0000000..076f2a2
--- /dev/null
+++ b/debian/get-orig-source.py
@@ -0,0 +1,434 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# get-orig-source.py
+#
+# Copyright (C) 2011 Alessio Treglia <alessio at debian.org>
+# Copyright (C) 2015 IOhannes m zmölnig <umlaeute at debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+## purpose of this script
+## check for available drumkits in upstream's drumkit feed
+## for each available drumkit do:
+## - check if drumkit is already part of the package (probably comparing hashes of the downloaded file). if so, skip it
+## - check if the drumkit has a license attached. if not, skip it
+## - present the drumkit information to the maintainer (in a readable form)
+## - ask the maintainer to manually accept the drumkit (after they have examined the license)
+## - skip or add the new drumkit, based on maintainer's decision.
+
+## implementation details
+#
+## tarball layout
+# - drumkits/
+# - drumkits/MyKit
+#
+## persistent data
+# maybe we should store some data for each drumkit (even those not included in debian),
+# in order to:
+# - compare hashes (to avoid adding duplicate drumkits of different name)
+# - make inclusion/exclusion choices persistent
+# e.g. a JSON file accompanying each KIT, containing all the info from the XML + sha256 + distributable-flag
+# - drumkits.json
+#  [
+#    {
+#      "name": "MyKit",
+#      "author": "Foo Bar",
+#      "url": "http://example.com/drumkits/MyKit.h2drumkit",
+#      "file": "MyKit.h2drumkdit",
+#      "license": "Public Domain",
+#      "sha256": "1b351166cfab4be4c4da6dba81d7a26d020f8ece44503ec76fe4d1975cbe4736",
+#      "distribute": true
+#    },
+#  ]
+
+## TODO
+
+## LATER: allow to specify multiple input paths (e.g. to maintain non-free packages as well)
+
+## LATER: remember the decision (if the package gets accepted) for a given license literal, and automatically apply that
+
+## LATER: (additionally) store a normalized <license> in the JSON
+##        e.g. for packages that lack a <license> but provide a grant in the <info>
+#         ..JSON:   '"licensed": "CC-SA"'
+
+
+import os
+import sys
+import json
+import shutil
+import urllib
+import tarfile
+import argparse
+import tempfile
+import xml.etree.ElementTree
+
+URL = 'http://www.hydrogen-music.org/feeds/drumkit_list.php'
+WORKDIR='tmp/'
+PKGDIR=''
+
+DRUMKITDIR=os.path.join(PKGDIR, 'drumkits')
+
+
+### come compat foo
+
+# use html2text if available
+try:
+    from html2text import html2text as h2t
+except ImportError:
+    def h2t(html, baseurl='', bodywidth=78):
+        return html
+
+# define basestring for Python3
+try:
+    basestring
+except NameError:
+    str=basestring
+
+### helper functions
+def toBool(s):
+    try:
+        return bool(int(s))
+    except ValueError:
+        pass
+    try:
+        s=s.lower()
+    except AttributeError:
+        pass
+    return s in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']
+
+def html2text(html, baseurl='', bodywidth=78):
+    if html:
+        if isinstance(html, str):
+            s=h2t(html, baseurl, bodywidth).strip()
+        else:
+            return html
+        if s:
+            return s
+    return html
+
+def hashfile(afile, hasher=None, blocksize=65536):
+    if not hasher:
+        import hashlib
+        hasher=hashlib.sha256()
+    if isinstance(afile, basestring):
+        afile=open(afile, 'rb')
+    buf = afile.read(blocksize)
+    while len(buf) > 0:
+        hasher.update(buf)
+        buf = afile.read(blocksize)
+    return hasher.hexdigest()
+
+def stripSuffix(s, suffix, stripfix=None):
+    if stripfix is None:
+        stripfix=suffix
+    if s.endswith(suffix):
+        return s[:-len(stripfix)]
+    return s
+def stripPrefix(s, prefix, stripfix=None):
+    if stripfix is None:
+        stripfix=prefix
+    if s.startswith(prefix):
+        return s[len(stripfix):]
+    return s
+
+
+def print_dict(obj):
+    for k in obj:
+        s=html2text(obj[k])
+        print("%s\t: %s" % (k, s))
+    #print("")
+
+def compareDict(d1, d2, keys=None):
+    """
+    compare two dictionaries.
+    if keys is None (default), compare all keys;
+    if all keys match (either they are equal or non-existing in both dictionaries) return True
+    if some keys differ, return False
+    if keys are missing in one of the dicts, return None
+    """
+    result=True
+    if keys is None:
+        keys=set(d1.keys() + d2.keys())
+    else:
+        keys=set(keys)
+    for k in keys:
+        if k in d1 or k in d2:
+            try:
+                if d1[k] != d2[k]:
+                    return False
+            except KeyError:
+                ## key is missing in on of the two dictionaries
+                result = None
+    return result
+
+def copyUnsetNonempty(src, dst):
+    for key in src:
+        if key in dst and dst[key] != '':
+            continue
+        if src[key] != '':
+            dst[key]=src[key]
+
+def download(url, outputfile=None):
+    """
+    download url into outputfile;
+    if <outputfile> is None, the filename will be derived from the url;
+    if <outputfile> is a directory, it is the output directory
+    """
+    outdir=None
+    outfile=None
+    if outputfile:
+        outdir=os.path.dirname(outputfile)
+        outfile=os.path.basename(outputfile)
+
+    if not outfile:
+        outfile=url.split('/')[-1]
+    if not outdir:
+        outdir=''
+    else:
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+
+    outputfile=os.path.join(outdir, outfile)
+    urllib.urlretrieve(url, outputfile)
+    return outputfile
+
+def tarup(tarname, path, strippath):
+    _path=os.path.join(strippath, '').lstrip('/')
+    def filter(x):
+        x.name=stripPrefix(x.name, _path)
+        return x
+    tarfilename=tarname
+    with tarfile.open(tarfilename, "w:gz") as tar:
+        tar.add(path, filter = filter)
+    return tarfilename
+
+## core functions
+
+
+def write_DrumkitInfo(path, dki, name=None):
+    if not name:
+        name=dki['name']
+    with open(os.path.join(path, name+".json"), "w") as f:
+        json.dump(dki, f, indent=2, separators=(',', ': '))
+
+
+def get_availableDrumkitsInfo(url):
+    """returns a list of dictionaries, each describing a drumkit (as obtained from 'url')"""
+    def xmlobjs_to_dictlist(objs):
+        objs_list = []
+        for obj in objs:
+            elem = dict()
+            for o in obj:
+                s=o.text
+                if s:
+                    s=s.strip()
+                if s:
+                    elem[o.tag]=o.text
+            objs_list.append(elem)
+        return objs_list
+
+    filename = urllib.urlretrieve(url)[0]
+    objs = xml.etree.ElementTree.parse(filename).findall('drumkit')
+    return xmlobjs_to_dictlist(objs)
+
+def get_packagedDrumkitsInfo(paths, jname='drumkits.json'):
+    """returns a list of dictionaries, each describing a drumkit (as info found at 'path')"""
+    objs_list = []
+    try:
+        jfiles=[f for f in [os.path.join(p, jname) for p in paths]
+                if os.path.isfile(f) and f.endswith('.json')]
+        #print("paths: %s" % (paths))
+        #print("jfiles: %s" % (jfiles))
+    except OSError:
+        return objs_list
+    for jfile in jfiles:
+        j=None
+        try:
+            with open(jfile) as jfd:
+                j=json.load(jfd)
+        except (IOError, ValueError):
+            continue
+        if type(j) == dict:
+            objs_list.append(j)
+        if type(j) == list:
+            objs_list+=j
+    return objs_list
+
+def compareDrumkitInfo(dk1, dk2):
+    """compares two dictionaries describing a drumkit"""
+    return compareDict(dk1, dk2, ('name', 'author', 'url', 'license'), )
+
+def findDrumkitInfo(dk, dklist):
+    """
+    compares a <dk> drumkit info dict, with the infos in <dklist>,
+    and returns the first matching one (or None)
+    """
+    for dk_ in dklist:
+        ## we only really compare the URL
+        ## and we only accept drumkits with a license (so we compare that as well)
+        if compareDict(dk, dk_, ('url', 'license')):
+            return dk_
+    return None
+
+def guessFilenameFromURL(url):
+    url=stripSuffix(url, '.h2drumkit/download', '/download')
+    return os.path.basename(url)
+
+def downloadPkgs(pkgs, path):
+    if not os.path.isdir(path):
+        os.makedirs(path)
+    count=0
+    for pkg in pkgs:
+        count=count+1
+        url=pkg['url']
+        fname=pkg['filename']
+        print("downloading %s into %s [%d/%d]" % (url, fname, count, len(pkgs)))
+        download(url, os.path.join(path, fname))
+
+## testing functions
+def avail2pkg(URL, path):
+    objs=get_availableDrumkitsInfo(URL)
+    for obj in objs:
+        write_DrumkitInfo(path, obj)
+def print_avail(URL):
+    objs=get_availableDrumkitsInfo(URL)
+    for obj in objs:
+        write_DrumkitInfo(path, obj)
+        print_dict(obj)
+def print_pkg(path):
+    objs=get_packagedDrumkitsInfo(path)
+    for obj in objs:
+        print_dict(obj)
+
+def print_foo(objs):
+    for o in objs:
+        if o.get('author') == "Artemiy Pavlov":
+            print("%s (%s)\n" % (o['name'], o['url']))
+
+def pkg_drumkits(conf):
+    # list of already packaged drumkits
+    packd=get_packagedDrumkitsInfo(conf.pkgdir)
+    # list of available (online) drumkits
+    avail=[x for x in get_availableDrumkitsInfo(conf.uri)]
+
+    for pkg in avail:
+        # check if this has already been packaged
+        a=findDrumkitInfo(pkg, packd)
+        if a:
+            distribute=a.get('distribute')
+            if (distribute is not None) and (distribute != ''):
+                distribute=toBool(distribute)
+
+            copyUnsetNonempty(a, pkg)
+        else:
+            distribute=None
+        print("=================")
+        print_dict(pkg)
+        if distribute is None:
+            while distribute is None:
+                try:
+                    dis=raw_input("Add drumkit '%s' to package? [y/n/^D to skip] " % (pkg.get('name')))
+                    if dis:
+                        distribute=toBool(dis)
+                except EOFError:
+                    distribute=None
+                    break
+            #print("Adding '%s' to distribution: %s" % (pkg.get('name'), distribute))
+        else:
+            print("Using prior decision about distribution: %s" % (distribute))
+        pkg['distribute']=distribute
+        if distribute:
+            print("")
+            lic=pkg.get('license')
+            ## get a machine-readable license from the user
+            license=pkg.get('licensed')
+            while not license:
+                try:
+                    license=raw_input("What's the machine-readable license ('%s')? " % (pkg.get('license')))
+                except EOFError:
+                    if distribute:
+                        pass
+                    else:
+                        break
+                if not license:
+                    license=lic
+            pkg['licensed']=license
+
+            ## make sure we have a sane output filename
+            filename=pkg.get('filename')
+            if not filename:
+                url=pkg.get('url')
+                filename=guessFilenameFromURL(url)
+                #print("The drumkit will be downloaded from %s" % (url,))
+                fname=raw_input("What should be the output filename ('%s')? " % (filename))
+                if fname:
+                    filename=fname
+            pkg['filename']=filename
+
+        print("")
+
+    pkgs = [x for x in avail if x.get('distribute')]
+    downloadPkgs(pkgs, os.path.join(conf.workdir, 'drumkits'))
+    write_DrumkitInfo(conf.workdir, avail, 'drumkits')
+
+def main(conf):
+    outdir=conf.workdir
+    conf.workdir=os.path.join(conf.workdir, 'hydrogen-drumkits')
+    pkg_drumkits(conf)
+    tarfile=tarup(conf.outfile, conf.workdir, outdir)
+    print("assembled package in %s" % tarfile)
+
+
+
+def parseCmdlineArgs():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-u', '--uri', type=str,
+                        default=URL,
+                        help="URI to read available packages from (DEFAULT: %(default)s)")
+    parser.add_argument('-w', '--workdir', type=str,
+                        help="temporary directory to assemble tarball (DEFAULT: $TMPDIR)")
+    parser.add_argument('-o', '--outfile', type=str,
+                        default="hydrogen-drumkits.tar.gz",
+                        help="output tarfile (DEFAULT: %(default)s)")
+    parser.add_argument('-p', '--pkgdir', type=str,
+                        default=['.'],
+                        action='append',
+                        help="directory to read available packages from (DEFAULT: %(default)s)")
+
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == '__main__':
+    args=parseCmdlineArgs()
+    if not args.uri:
+        args.uri = URL
+    if not args.pkgdir:
+        args.pkgdir=['.']
+    if args.workdir:
+        if not os.path.exists(args.workdir):
+            os.makedirs(args.workdir)
+    wd=tempfile.mkdtemp(dir=args.workdir)
+    args.workdir=wd
+    print("args: %s" % (args,))
+
+    try:
+        main(args)
+    except BaseException:
+        shutil.rmtree(wd)
+        raise
+    shutil.rmtree(wd)

-- 
hydrogen-drumkits packaging