[DRE-commits] r2132 - in packages: . libhtml-parser-ruby libhtml-parser-ruby/branches libhtml-parser-ruby/branches/upstream libhtml-parser-ruby/branches/upstream/current

lucas at alioth.debian.org lucas at alioth.debian.org
Sat Dec 8 11:58:49 UTC 2007


Author: lucas
Date: 2007-12-08 11:58:48 +0000 (Sat, 08 Dec 2007)
New Revision: 2132

Added:
   packages/libhtml-parser-ruby/
   packages/libhtml-parser-ruby/branches/
   packages/libhtml-parser-ruby/branches/upstream/
   packages/libhtml-parser-ruby/branches/upstream/current/
   packages/libhtml-parser-ruby/branches/upstream/current/htmltest.rb
   packages/libhtml-parser-ruby/branches/upstream/current/install.rb
   packages/libhtml-parser-ruby/branches/upstream/current/sgml-parser.rb
Log:
[svn-inject] Installing original source of libhtml-parser-ruby

Added: packages/libhtml-parser-ruby/branches/upstream/current/htmltest.rb
===================================================================
--- packages/libhtml-parser-ruby/branches/upstream/current/htmltest.rb	                        (rev 0)
+++ packages/libhtml-parser-ruby/branches/upstream/current/htmltest.rb	2007-12-08 11:58:48 UTC (rev 2132)
@@ -0,0 +1,23 @@
+#! /usr/local/bin/ruby
+
+require "html-parser"
+require "formatter"
+
+def htmltest(data)
+  w = DumbWriter.new
+  f = AbstractFormatter.new(w)
+  p = HTMLParser.new(f)
+  p.feed(data)
+  p.close
+end
+
+file = 'test.html'
+if ARGV[0]
+  file = ARGV[0]
+end
+
+fp = open(file, 'r')
+data = fp.read()
+fp.close
+
+htmltest(data)


Property changes on: packages/libhtml-parser-ruby/branches/upstream/current/htmltest.rb
___________________________________________________________________
Name: svn:executable
   + 

Added: packages/libhtml-parser-ruby/branches/upstream/current/install.rb
===================================================================
--- packages/libhtml-parser-ruby/branches/upstream/current/install.rb	                        (rev 0)
+++ packages/libhtml-parser-ruby/branches/upstream/current/install.rb	2007-12-08 11:58:48 UTC (rev 2132)
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+
+require 'rbconfig'
+require 'find'
+require 'ftools'
+require 'getoptlong'
+
+include Config
+
+$srcdir = CONFIG["srcdir"]
+$version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
+$libdir = File.join(CONFIG["libdir"], "ruby", $version)
+$archdir = File.join($libdir, CONFIG["arch"])
+$site_libdir = CONFIG["sitedir"]
+if !$site_libdir
+  $site_libdir = $:.find {|x| x =~ /site_ruby$/}
+end
+if !$site_libdir
+  $site_libdir = File.join($libdir, "site_ruby")
+end
+
+def install_rb(libdir = "lib", files = nil)
+  path = []
+  dir = []
+  if files
+    path = files
+    dir |= [libdir]
+  else
+    Find.find(libdir) do |f|
+      next if (f = f[libdir.length+1..-1]) == nil
+      path.push f if File.ftype(File.join(libdir, f)) == 'file'
+      dir |= [File.dirname(f)]
+    end
+  end
+  for f in dir
+    if f == "."
+      File::makedirs($site_libdir)
+    else
+      File::makedirs(File.join($site_libdir, f))
+    end
+  end
+  for f in path
+    File::install(File.join(libdir, f), File.join($site_libdir, f), nil, true)
+  end
+end
+
+install_rb(".", ["formatter.rb", "html-parser.rb", "sgml-parser.rb"])


Property changes on: packages/libhtml-parser-ruby/branches/upstream/current/install.rb
___________________________________________________________________
Name: svn:executable
   + 

Added: packages/libhtml-parser-ruby/branches/upstream/current/sgml-parser.rb
===================================================================
--- packages/libhtml-parser-ruby/branches/upstream/current/sgml-parser.rb	                        (rev 0)
+++ packages/libhtml-parser-ruby/branches/upstream/current/sgml-parser.rb	2007-12-08 11:58:48 UTC (rev 2132)
@@ -0,0 +1,332 @@
+# A parser for SGML, using the derived class as static DTD.
+
+class SGMLParser
+
+  # Regular expressions used for parsing:
+  Interesting = /[&<]/
+  Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
+                              '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
+                              '![^<>]*)?')
+
+  Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
+  Charref = /&#([0-9]+)[^0-9]/
+
+  Starttagopen = /<[>a-zA-Z]/
+  Endtagopen = /<\/[<>a-zA-Z]/
+  Endbracket = /[<>]/
+  Special = /<![^<>]*>/
+  Commentopen = /<!--/
+  Commentclose = /--[ \t\n]*>/
+  Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
+  Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
+                            '(\s*=\s*' +
+                            "('[^']*'" +
+                            '|"[^"]*"' +
+                            '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
+
+  Entitydefs =
+    {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
+
+  def initialize(verbose=false)
+    @verbose = verbose
+    reset
+  end
+
+  def reset
+    @rawdata = ''
+    @stack = []
+    @lasttag = '???'
+    @nomoretags = false
+    @literal = false
+  end
+
+  def has_context(gi)
+    @stack.include? gi
+  end
+
+  def setnomoretags
+    @nomoretags = true
+    @literal = true
+  end
+
+  def setliteral(*args)
+    @literal = true
+  end
+
+  def feed(data)
+    @rawdata << data
+    goahead(false)
+  end
+
+  def close
+    goahead(true)
+  end
+
+  def goahead(_end)
+    rawdata = @rawdata
+    i = 0
+    n = rawdata.length
+    while i < n
+      if @nomoretags
+        handle_data(rawdata[i..(n-1)])
+        i = n
+        break
+      end
+      j = rawdata.index(Interesting, i)
+      j = n unless j
+      if i < j
+        handle_data(rawdata[i..(j-1)])
+      end
+      i = j
+      break if (i == n)
+      if rawdata[i] == ?< #
+        if rawdata.index(Starttagopen, i) == i
+          if @literal
+            handle_data(rawdata[i, 1])
+            i += 1
+            next
+          end
+          k = parse_starttag(i)
+          break unless k
+          i = k
+          next
+        end
+        if rawdata.index(Endtagopen, i) == i
+          k = parse_endtag(i)
+          break unless k
+          i = k
+          @literal = false
+          next
+        end
+        if rawdata.index(Commentopen, i) == i
+          if @literal
+            handle_data(rawdata[i,1])
+            i += 1
+            next
+          end
+          k = parse_comment(i)
+          break unless k
+          i += k
+          next
+        end
+        if rawdata.index(Special, i) == i
+          if @literal
+            handle_data(rawdata[i, 1])
+            i += 1
+            next
+          end
+          k = parse_special(i)
+          break unless k
+          i += k
+          next
+        end
+      elsif rawdata[i] == ?& #
+        if rawdata.index(Charref, i) == i
+          i += $&.length
+          handle_charref($1)
+          i -= 1 unless rawdata[i-1] == ?;
+          next
+        end
+        if rawdata.index(Entityref, i) == i
+          i += $&.length
+          handle_entityref($1)
+          i -= 1 unless rawdata[i-1] == ?;
+          next
+        end
+      else
+        raise RuntimeError, 'neither < nor & ??'
+      end
+      # We get here only if incomplete matches but
+      # nothing else
+      match = rawdata.index(Incomplete, i)
+      unless match == i
+        handle_data(rawdata[i, 1])
+        i += 1
+        next
+      end
+      j = match + $&.length
+      break if j == n # Really incomplete
+      handle_data(rawdata[i..(j-1)])
+      i = j
+    end
+    # end while
+    if _end and i < n
+      handle_data(@rawdata[i..(n-1)])
+      i = n
+    end
+    @rawdata = rawdata[i..-1]
+  end
+
+  def parse_comment(i)
+    rawdata = @rawdata
+    if rawdata[i, 4] != '<!--'
+      raise RuntimeError, 'unexpected call to handle_comment'
+    end
+    match = rawdata.index(Commentclose, i)
+    return nil unless match
+    matched_length = $&.length
+    j = match
+    handle_comment(rawdata[i+4..(j-1)])
+    j = match + matched_length
+    return j-i
+  end
+
+  def parse_starttag(i)
+    rawdata = @rawdata
+    j = rawdata.index(Endbracket, i + 1)
+    return nil unless j
+    attrs = []
+    if rawdata[i+1] == ?> #
+      # SGML shorthand: <> == <last open tag seen>
+      k = j
+      tag = @lasttag
+    else
+      match = rawdata.index(Tagfind, i + 1)
+      unless match
+        raise RuntimeError, 'unexpected call to parse_starttag'
+      end
+      k = i + 1 + ($&.length)
+      tag = $&.downcase
+      @lasttag = tag
+    end
+    while k < j
+      break unless rawdata.index(Attrfind, k)
+      matched_length = $&.length
+      attrname, rest, attrvalue = $1, $2, $3
+      if not rest
+        attrvalue = '' # was: = attrname
+      elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
+          (attrvalue[0] == ?" && attrvalue[-1,1] == ?")
+        attrvalue = attrvalue[1..-2]
+      end
+      attrs << [attrname.downcase, attrvalue]
+      k += matched_length
+    end
+    if rawdata[j] == ?> #
+      j += 1
+    end
+    finish_starttag(tag, attrs)
+    return j
+  end
+
+  def parse_endtag(i)
+    rawdata = @rawdata
+    j = rawdata.index(Endbracket, i + 1)
+    return nil unless j
+    tag = (rawdata[i+2..j-1].strip).downcase
+    if rawdata[j] == ?> #
+      j += 1
+    end
+    finish_endtag(tag)
+    return j
+  end
+
+  def finish_starttag(tag, attrs)
+    method = 'start_' + tag
+    if self.respond_to?(method)
+      @stack << tag
+      handle_starttag(tag, method, attrs)
+      return 1
+    else
+      method = 'do_' + tag
+      if self.respond_to?(method)
+        handle_starttag(tag, method, attrs)
+        return 0
+      else
+        unknown_starttag(tag, attrs)
+        return -1
+      end
+    end
+  end
+
+  def finish_endtag(tag)
+    if tag == ''
+      found = @stack.length - 1
+      if found < 0
+        unknown_endtag(tag)
+        return
+      end
+    else
+      unless @stack.include? tag
+        method = 'end_' + tag
+        unless self.respond_to?(method)
+          unknown_endtag(tag)
+        end
+        return
+      end
+      found = @stack.index(tag) #or @stack.length
+    end
+    while @stack.length > found
+      tag = @stack[-1]
+      method = 'end_' + tag
+      if respond_to?(method)
+        handle_endtag(tag, method)
+      else
+        unknown_endtag(tag)
+      end
+      @stack.pop
+    end
+  end
+
+  def parse_special(i)
+    rawdata = @rawdata
+    match = rawdata.index(Endbracket, i+1)
+    return nil unless match
+    matched_length = $&.length
+    handle_special(rawdata[i+1..(match-1)])
+    return match - i + matched_length
+  end
+
+  def handle_starttag(tag, method, attrs)
+    self.send(method, attrs)
+  end
+
+  def handle_endtag(tag, method)
+    self.send(method)
+  end
+
+  def report_unbalanced(tag)
+    if @verbose
+      print '*** Unbalanced </' + tag + '>', "\n"
+      print '*** Stack:', self.stack, "\n"
+    end
+  end
+
+  def handle_charref(name)
+    n = Integer(name)
+    if !(0 <= n && n <= 255)
+      unknown_charref(name)
+      return
+    end
+    handle_data(n.chr)
+  end
+
+  def handle_entityref(name)
+    table = Entitydefs
+    if table.include?(name)
+      handle_data(table[name])
+    else
+      unknown_entityref(name)
+      return
+    end
+  end
+
+  def handle_data(data)
+  end
+
+  def handle_comment(data)
+  end
+
+  def handle_special(data)
+  end
+
+  def unknown_starttag(tag, attrs)
+  end
+  def unknown_endtag(tag)
+  end
+  def unknown_charref(ref)
+  end
+  def unknown_entityref(ref)
+  end
+
+end




More information about the Pkg-ruby-extras-commits mailing list