[Pkg-mozext-commits] [mozilla-devscripts] 01/02: amo-changelog: parse into human-readable form and save to debian/upstream/changelog{, .html}

Ximin Luo infinity0 at pwned.gg
Wed Dec 17 09:30:14 UTC 2014


This is an automated email from the git hooks/post-receive script.

infinity0-guest pushed a commit to branch master
in repository mozilla-devscripts.

commit 003eca75448abf77413b93b3c8c3cf56f48cee34
Author: Ximin Luo <infinity0 at pwned.gg>
Date:   Tue Sep 30 01:59:25 2014 +0100

    amo-changelog: parse into human-readable form and save to debian/upstream/changelog{,.html}
---
 amo-changelog       | 142 ++++++++++++++++++++++++++++++++++++++++++++--------
 man/amo-changelog.1 |  16 ++++--
 2 files changed, 132 insertions(+), 26 deletions(-)

diff --git a/amo-changelog b/amo-changelog
index 4ce9e73..273d6ad 100755
--- a/amo-changelog
+++ b/amo-changelog
@@ -19,43 +19,141 @@ from __future__ import print_function
 
 import argparse
 import os
+import re
+import subprocess
 import sys
 import urllib2
 import xml.etree.cElementTree as etree
 
 URL_TEMPLATE = "https://addons.mozilla.org/en-US/addon/{ext}/versions/format:rss"
+OUTGOING_HREF = re.compile(r'href="https?://outgoing\.mozilla\.org/v\d+/\w+/(.+?)"')
+HTML_HEAD = "<html>\n<head><title>{title}</title></head>\n<body>\n"
+HTML_FOOT = "</body>\n</html>"
 
+def fix_outgoing_href(match):
+    return 'href="%s"' % urllib2.unquote(match.group(1))
+
+def convert_rss_to_html(source, target):
+    elements = etree.iterparse(source)
+    # title
+    element = next(elements)[1]
+    while element.tag != "title":
+        element = next(elements)[1]
+    print(HTML_HEAD.format(title=element.text), file=target)
+    # items
+    for _, element in elements:
+        if element.tag != "item":
+            continue
+        title = element.find("title").text.encode("utf-8")
+        print("<h2>%s</h2>" % title, file=target)
+        descel = element.find("description")
+        if descel is not None and descel.text:
+            desc = descel.text.rstrip("\n").encode("utf-8")
+            # process manual line breaks, e.g. adblock-plus
+            desc = desc.replace("\n", "\n<br/>").replace("<br/>\n", "<br/> \n")
+            # strip outgoing redirect
+            desc = OUTGOING_HREF.sub(fix_outgoing_href, desc)
+            print(desc, file=target)
+        else:
+            print("[no description]", file=target)
+        print("", file=target)
+    print(HTML_FOOT, file=target)
+
+def which(cmd):
+    path = os.environ.get("PATH", os.defpath).split(os.pathsep)
+    for dir in path:
+        name = os.path.join(dir, cmd)
+        if (os.path.exists(name) and os.access(name, os.F_OK | os.X_OK)
+            and not os.path.isdir(name)):
+            return name
+    return None
+
+def try_external_write(out, args, **kwargs):
+    prog = args[0]
+    if not which(prog):
+        print("failed to write %s: program not found: %s" % (out, prog), file=sys.stderr)
+        return False
+    try:
+        subprocess.check_call(args, **kwargs)
+        print("wrote %s" % out, file=sys.stderr)
+        return True
+    except Exception as e:
+        print("failed to write %s: %s" % (out, e), file=sys.stderr)
+        return False
 
 def main():
     parser = argparse.ArgumentParser(
-        description="fetch Version History of an addon from the Mozilla Extensions website.")
+        description="Fetch Version History of an addon from the Mozilla "
+        "Extensions website and convert it into a human-readable format.")
     parser.add_argument("extension",
-                        help="Extension short-name, as used on addons.mozilla.org.")
+        help="Extension short-name, as used on addons.mozilla.org.")
+    parser.add_argument("-f", "--html-file",
+        metavar="FILE", default="debian/upstream/changelog.html",
+        help="File to write to. Default: %(default)s.")
+    parser.add_argument("-p", "--plain-format", metavar="FORMAT",
+        choices=["text", "markdown", "rst"], default="none",
+        help="Generate a human-readable form of the changelog in the file "
+        "without the .html extension, using an external program. Possible "
+        "options are text (uses lynx(1)), markdown (pandoc(1)), or rst "
+        "(pandoc(1)). Default: %(default)s.")
     options = parser.parse_args()
 
-    url = URL_TEMPLATE.format(ext=options.extension)
-    try:
-        fp = urllib2.urlopen(url)
-    except urllib2.HTTPError as error:
-        print("%s: For extension '%s', error fetching '%s': %s" %
-              (os.path.basename(sys.argv[0]), options.extension, url, error),
-              file=sys.stderr)
+    progname = os.path.basename(sys.argv[0])
+
+    html_file = options.html_file
+    if not html_file.endswith(".html"):
+        print("%s: Output filename must end with .html: %s" %
+            (progname, html_file), file=sys.stderr)
         return 1
+    plain_file = html_file[:-5]
+
     try:
-        for _, element in etree.iterparse(fp):
-            if element.tag != "item":
-                continue
-            title = element.find("title").text.encode("utf-8")
-            print(title)
-            print("=" * len(title))
-            descel = element.find("description")
-            if descel is not None and descel.text:
-                print(descel.text.rstrip("\n").encode("utf-8"))
+        with open(html_file, "w") as target:
+            url = URL_TEMPLATE.format(ext=options.extension)
+            try:
+                source = urllib2.urlopen(url)
+            except urllib2.HTTPError as error:
+                print("%s: For extension '%s', error fetching '%s': %s" %
+                      (progname, options.extension, url, error), file=sys.stderr)
+                raise
+            try:
+                convert_rss_to_html(source, target)
+            finally:
+                source.close()
+        print("wrote %s" % html_file, file=sys.stderr)
+    except Exception as e:
+        print("failed to write %s: %s" % (html_file, e), file=sys.stderr)
+        #os.remove(html_file)
+        return 1
+
+    if options.plain_format == "text":
+        with open(plain_file, "w") as target:
+            if not try_external_write(plain_file,
+              ["lynx", "-dump", "-list_inline", "-width=84", html_file], stdout=target):
+                #os.remove(plain_file)
+                return 1
             else:
-                print("[no description]")
-            print("")
-    finally:
-        fp.close()
+                # 2 space indent is a bit more reasonable than lynx's 3 default
+                # width=84 above (3*2-2) effectively cancels the right margin
+                subprocess.call(["sed", "-i", "-e", "s/^   /  /g", plain_file])
+
+    elif options.plain_format == "markdown":
+        if not try_external_write(plain_file,
+          ["pandoc", "-i", html_file, "--columns=79", "-wmarkdown", "-o", plain_file]):
+            return 1
+
+    elif options.plain_format == "rst":
+        if not try_external_write(plain_file,
+          ["pandoc", "-i", html_file, "--columns=79", "-wrst", "-o", plain_file]):
+            return 1
+        else:
+            # work around https://github.com/jgm/pandoc/issues/1656
+            # by adding two spaces to all line-block continuation lines
+            subprocess.call(["sed", "-i", "-r",
+                "-e", r"/^\|/,/^ |^$/{s/^([^ |])/  \1/g}", plain_file])
+
+    return 0
+
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/man/amo-changelog.1 b/man/amo-changelog.1
index 197ff17..09ab68e 100644
--- a/man/amo-changelog.1
+++ b/man/amo-changelog.1
@@ -12,7 +12,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.TH XPI-REPACK "1" "April 2014" "amo-changelog" "mozilla-devscripts suite"
+.TH AMO-CHANGELOG "1" "December 2014" "amo-changelog" "mozilla-devscripts suite"
 .SH NAME
 amo-changelog \- fetch Version History of an addon
 .SH SYNOPSIS
@@ -37,20 +37,28 @@ Here is an example for debian/rules:
 .br
 override_dh_installchangelogs:
 .br
-	dh_installchangelogs debian/changelog.upstream
+	dh_installchangelogs debian/upstream/changelog.html debian/upstream/changelog
 
 \[char46]PHONY: get-orig-changelog
 .br
 get-orig-changelog:
 .br
-	amo-changelog adblock-plus > debian/changelog.upstream
+	amo-changelog -p rst adblock-plus
 
-Using this approach, one would save the output file (debian/changelog.upstream)
+Using this approach, one would save the output files debian/upstream/{changelog.html,changelog}
 as part of the Debian packaging. When updating the package with a new upstream
 release, one would run `debian/rules get-orig-changelog`
 .SH OPTIONS
 .TP
 \fB\-h\fR, \fB\-\-help\fR
 Display a brief help message.
+.TP
+\fB\-f\fR, \fB\-\-html\-file\fR
+File to write to. Default: debian/upstream/changelog.html
+.TP
+\fB\-p\fR, \fB\-\-plain\-format\fR
+Generate a human-readable form of the changelog in the file without the .html
+extension, using an external program. Possible options are text (uses lynx(1)),
+markdown (pandoc(1)), or rst (pandoc(1)). Default: none.
 .SH AUTHOR
 Jakub Wilk <jwilk at debian.org> and Ximin Luo <infinity0 at pwned.gg>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-mozext/mozilla-devscripts.git



More information about the Pkg-mozext-commits mailing list