[SCM] hydrogen-drumkits/master: First stab at a python-script to download drumkits and keep track of licenses
umlaeute at users.alioth.debian.org
umlaeute at users.alioth.debian.org
Mon Sep 21 22:34:09 UTC 2015
The following commit has been merged in the master branch:
commit 93b282be2a88ffb9d6a61a38cd45131e057c953a
Author: IOhannes m zmölnig <zmoelnig at umlautQ.umlaeute.mur.at>
Date: Tue Sep 22 00:33:46 2015 +0200
First stab at a python-script to download drumkits and keep track of licenses
diff --git a/debian/get-orig-source.py b/debian/get-orig-source.py
new file mode 100755
index 0000000..076f2a2
--- /dev/null
+++ b/debian/get-orig-source.py
@@ -0,0 +1,434 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# get-orig-source.py
+#
+# Copyright (C) 2011 Alessio Treglia <alessio at debian.org>
+# Copyright (C) 2015 IOhannes m zmölnig <umlaeute at debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+## purpose of this script
+## check for available drumkits in upstream's drumkit feed
+## for each available drumkit do:
+## - check if drumkit is already part of the package (probably comparing hashes of the downloaded file). if so, skip it
+## - check if the drumkit has a license attached. if not, skip it
+## - present the drumkit information to the maintainer (in a readable form)
+## - ask the maintainer to manually accept the drumkit (after they have examined the license)
+## - skip or add the new drumkit, based on maintainer's decision.
+
+## implementation details
+#
+## tarball layout
+# - drumkits/
+# - drumkits/MyKit
+#
+## persistent data
+# maybe we should store some data for each drumkit (even those not included in debian),
+# in order to:
+# - compare hashes (to avoid adding duplicate drumkits of different name)
+# - make inclusion/exclusion choices persistent
+# e.g. a JSON file accompanying each KIT, containing all the info from the XML + sha256 + distributable-flag
+# - drumkits.json
+# [
+# {
+# "name": "MyKit",
+# "author": "Foo Bar",
+# "url": "http://example.com/drumkits/MyKit.h2drumkit",
+# "file": "MyKit.h2drumkdit",
+# "license": "Public Domain",
+# "sha256": "1b351166cfab4be4c4da6dba81d7a26d020f8ece44503ec76fe4d1975cbe4736",
+# "distribute": true
+# },
+# ]
+
+## TODO
+
+## LATER: allow to specify multiple input paths (e.g. to maintain non-free packages as well)
+
+## LATER: remember the decision (if the package gets accepted) for a given license literal, and automatically apply that
+
+## LATER: (additionally) store a normalized <license> in the JSON
+## e.g. for packages that lack a <license> but provide a grant in the <info>
+# ..JSON: '"licensed": "CC-SA"'
+
+
+import os
+import sys
+import json
+import shutil
+import urllib
+import tarfile
+import argparse
+import tempfile
+import xml.etree.ElementTree
+
+URL = 'http://www.hydrogen-music.org/feeds/drumkit_list.php'
+WORKDIR='tmp/'
+PKGDIR=''
+
+DRUMKITDIR=os.path.join(PKGDIR, 'drumkits')
+
+
+### come compat foo
+
+# use html2text if available
+try:
+ from html2text import html2text as h2t
+except ImportError:
+ def h2t(html, baseurl='', bodywidth=78):
+ return html
+
+# define basestring for Python3
+try:
+ basestring
+except NameError:
+ str=basestring
+
+### helper functions
+def toBool(s):
+ try:
+ return bool(int(s))
+ except ValueError:
+ pass
+ try:
+ s=s.lower()
+ except AttributeError:
+ pass
+ return s in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']
+
+def html2text(html, baseurl='', bodywidth=78):
+ if html:
+ if isinstance(html, str):
+ s=h2t(html, baseurl, bodywidth).strip()
+ else:
+ return html
+ if s:
+ return s
+ return html
+
+def hashfile(afile, hasher=None, blocksize=65536):
+ if not hasher:
+ import hashlib
+ hasher=hashlib.sha256()
+ if isinstance(afile, basestring):
+ afile=open(afile, 'rb')
+ buf = afile.read(blocksize)
+ while len(buf) > 0:
+ hasher.update(buf)
+ buf = afile.read(blocksize)
+ return hasher.hexdigest()
+
+def stripSuffix(s, suffix, stripfix=None):
+ if stripfix is None:
+ stripfix=suffix
+ if s.endswith(suffix):
+ return s[:-len(stripfix)]
+ return s
+def stripPrefix(s, prefix, stripfix=None):
+ if stripfix is None:
+ stripfix=prefix
+ if s.startswith(prefix):
+ return s[len(stripfix):]
+ return s
+
+
+def print_dict(obj):
+ for k in obj:
+ s=html2text(obj[k])
+ print("%s\t: %s" % (k, s))
+ #print("")
+
+def compareDict(d1, d2, keys=None):
+ """
+ compare two dictionaries.
+ if keys is None (default), compare all keys;
+ if all keys match (either they are equal or non-existing in both dictionaries) return True
+ if some keys differ, return False
+ if keys are missing in one of the dicts, return None
+ """
+ result=True
+ if keys is None:
+ keys=set(d1.keys() + d2.keys())
+ else:
+ keys=set(keys)
+ for k in keys:
+ if k in d1 or k in d2:
+ try:
+ if d1[k] != d2[k]:
+ return False
+ except KeyError:
+ ## key is missing in on of the two dictionaries
+ result = None
+ return result
+
+def copyUnsetNonempty(src, dst):
+ for key in src:
+ if key in dst and dst[key] != '':
+ continue
+ if src[key] != '':
+ dst[key]=src[key]
+
+def download(url, outputfile=None):
+ """
+ download url into outputfile;
+ if <outputfile> is None, the filename will be derived from the url;
+ if <outputfile> is a directory, it is the output directory
+ """
+ outdir=None
+ outfile=None
+ if outputfile:
+ outdir=os.path.dirname(outputfile)
+ outfile=os.path.basename(outputfile)
+
+ if not outfile:
+ outfile=url.split('/')[-1]
+ if not outdir:
+ outdir=''
+ else:
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+
+ outputfile=os.path.join(outdir, outfile)
+ urllib.urlretrieve(url, outputfile)
+ return outputfile
+
+def tarup(tarname, path, strippath):
+ _path=os.path.join(strippath, '').lstrip('/')
+ def filter(x):
+ x.name=stripPrefix(x.name, _path)
+ return x
+ tarfilename=tarname
+ with tarfile.open(tarfilename, "w:gz") as tar:
+ tar.add(path, filter = filter)
+ return tarfilename
+
+## core functions
+
+
+def write_DrumkitInfo(path, dki, name=None):
+ if not name:
+ name=dki['name']
+ with open(os.path.join(path, name+".json"), "w") as f:
+ json.dump(dki, f, indent=2, separators=(',', ': '))
+
+
+def get_availableDrumkitsInfo(url):
+ """returns a list of dictionaries, each describing a drumkit (as obtained from 'url')"""
+ def xmlobjs_to_dictlist(objs):
+ objs_list = []
+ for obj in objs:
+ elem = dict()
+ for o in obj:
+ s=o.text
+ if s:
+ s=s.strip()
+ if s:
+ elem[o.tag]=o.text
+ objs_list.append(elem)
+ return objs_list
+
+ filename = urllib.urlretrieve(url)[0]
+ objs = xml.etree.ElementTree.parse(filename).findall('drumkit')
+ return xmlobjs_to_dictlist(objs)
+
+def get_packagedDrumkitsInfo(paths, jname='drumkits.json'):
+ """returns a list of dictionaries, each describing a drumkit (as info found at 'path')"""
+ objs_list = []
+ try:
+ jfiles=[f for f in [os.path.join(p, jname) for p in paths]
+ if os.path.isfile(f) and f.endswith('.json')]
+ #print("paths: %s" % (paths))
+ #print("jfiles: %s" % (jfiles))
+ except OSError:
+ return objs_list
+ for jfile in jfiles:
+ j=None
+ try:
+ with open(jfile) as jfd:
+ j=json.load(jfd)
+ except (IOError, ValueError):
+ continue
+ if type(j) == dict:
+ objs_list.append(j)
+ if type(j) == list:
+ objs_list+=j
+ return objs_list
+
+def compareDrumkitInfo(dk1, dk2):
+ """compares two dictionaries describing a drumkit"""
+ return compareDict(dk1, dk2, ('name', 'author', 'url', 'license'), )
+
+def findDrumkitInfo(dk, dklist):
+ """
+ compares a <dk> drumkit info dict, with the infos in <dklist>,
+ and returns the first matching one (or None)
+ """
+ for dk_ in dklist:
+ ## we only really compare the URL
+ ## and we only accept drumkits with a license (so we compare that as well)
+ if compareDict(dk, dk_, ('url', 'license')):
+ return dk_
+ return None
+
+def guessFilenameFromURL(url):
+ url=stripSuffix(url, '.h2drumkit/download', '/download')
+ return os.path.basename(url)
+
+def downloadPkgs(pkgs, path):
+ if not os.path.isdir(path):
+ os.makedirs(path)
+ count=0
+ for pkg in pkgs:
+ count=count+1
+ url=pkg['url']
+ fname=pkg['filename']
+ print("downloading %s into %s [%d/%d]" % (url, fname, count, len(pkgs)))
+ download(url, os.path.join(path, fname))
+
+## testing functions
+def avail2pkg(URL, path):
+ objs=get_availableDrumkitsInfo(URL)
+ for obj in objs:
+ write_DrumkitInfo(path, obj)
+def print_avail(URL):
+ objs=get_availableDrumkitsInfo(URL)
+ for obj in objs:
+ write_DrumkitInfo(path, obj)
+ print_dict(obj)
+def print_pkg(path):
+ objs=get_packagedDrumkitsInfo(path)
+ for obj in objs:
+ print_dict(obj)
+
+def print_foo(objs):
+ for o in objs:
+ if o.get('author') == "Artemiy Pavlov":
+ print("%s (%s)\n" % (o['name'], o['url']))
+
+def pkg_drumkits(conf):
+ # list of already packaged drumkits
+ packd=get_packagedDrumkitsInfo(conf.pkgdir)
+ # list of available (online) drumkits
+ avail=[x for x in get_availableDrumkitsInfo(conf.uri)]
+
+ for pkg in avail:
+ # check if this has already been packaged
+ a=findDrumkitInfo(pkg, packd)
+ if a:
+ distribute=a.get('distribute')
+ if (distribute is not None) and (distribute != ''):
+ distribute=toBool(distribute)
+
+ copyUnsetNonempty(a, pkg)
+ else:
+ distribute=None
+ print("=================")
+ print_dict(pkg)
+ if distribute is None:
+ while distribute is None:
+ try:
+ dis=raw_input("Add drumkit '%s' to package? [y/n/^D to skip] " % (pkg.get('name')))
+ if dis:
+ distribute=toBool(dis)
+ except EOFError:
+ distribute=None
+ break
+ #print("Adding '%s' to distribution: %s" % (pkg.get('name'), distribute))
+ else:
+ print("Using prior decision about distribution: %s" % (distribute))
+ pkg['distribute']=distribute
+ if distribute:
+ print("")
+ lic=pkg.get('license')
+ ## get a machine-readable license from the user
+ license=pkg.get('licensed')
+ while not license:
+ try:
+ license=raw_input("What's the machine-readable license ('%s')? " % (pkg.get('license')))
+ except EOFError:
+ if distribute:
+ pass
+ else:
+ break
+ if not license:
+ license=lic
+ pkg['licensed']=license
+
+ ## make sure we have a sane output filename
+ filename=pkg.get('filename')
+ if not filename:
+ url=pkg.get('url')
+ filename=guessFilenameFromURL(url)
+ #print("The drumkit will be downloaded from %s" % (url,))
+ fname=raw_input("What should be the output filename ('%s')? " % (filename))
+ if fname:
+ filename=fname
+ pkg['filename']=filename
+
+ print("")
+
+ pkgs = [x for x in avail if x.get('distribute')]
+ downloadPkgs(pkgs, os.path.join(conf.workdir, 'drumkits'))
+ write_DrumkitInfo(conf.workdir, avail, 'drumkits')
+
+def main(conf):
+ outdir=conf.workdir
+ conf.workdir=os.path.join(conf.workdir, 'hydrogen-drumkits')
+ pkg_drumkits(conf)
+ tarfile=tarup(conf.outfile, conf.workdir, outdir)
+ print("assembled package in %s" % tarfile)
+
+
+
+def parseCmdlineArgs():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-u', '--uri', type=str,
+ default=URL,
+ help="URI to read available packages from (DEFAULT: %(default)s)")
+ parser.add_argument('-w', '--workdir', type=str,
+ help="temporary directory to assemble tarball (DEFAULT: $TMPDIR)")
+ parser.add_argument('-o', '--outfile', type=str,
+ default="hydrogen-drumkits.tar.gz",
+ help="output tarfile (DEFAULT: %(default)s)")
+ parser.add_argument('-p', '--pkgdir', type=str,
+ default=['.'],
+ action='append',
+ help="directory to read available packages from (DEFAULT: %(default)s)")
+
+ args = parser.parse_args()
+ return args
+
+
+if __name__ == '__main__':
+ args=parseCmdlineArgs()
+ if not args.uri:
+ args.uri = URL
+ if not args.pkgdir:
+ args.pkgdir=['.']
+ if args.workdir:
+ if not os.path.exists(args.workdir):
+ os.makedirs(args.workdir)
+ wd=tempfile.mkdtemp(dir=args.workdir)
+ args.workdir=wd
+ print("args: %s" % (args,))
+
+ try:
+ main(args)
+ except BaseException:
+ shutil.rmtree(wd)
+ raise
+ shutil.rmtree(wd)
--
hydrogen-drumkits packaging
More information about the pkg-multimedia-commits
mailing list