[Debian-islamic-commits] [SCM] Packaging for Thawab branch, master, updated. upstream/3.0.10-58-g9dca20d

أحمد المحمو =?UTF-8?Q?=D8=AF=D9=8A=20?=(Ahmed El-Mahmoudy) aelmahmoudy at sabily.org
Thu Jul 28 12:56:46 UTC 2011


The following commit has been merged in the master branch:
commit 2ead257346de512bb7a5a8355ae379c1268c6094
Author: أحمد المحمودي (Ahmed El-Mahmoudy) <aelmahmoudy at sabily.org>
Date:   Mon Feb 21 23:06:51 2011 +0200

    Imported Upstream version 3.0.10+1git9e448ba

diff --git a/Thawab/shamelaUtils.py b/Thawab/shamelaUtils.py
index b721828..0a49327 100644
--- a/Thawab/shamelaUtils.py
+++ b/Thawab/shamelaUtils.py
@@ -66,7 +66,7 @@ schema_fix_text=re.compile('Memo/Hyperlink',re.I)
 schema_fix_int=re.compile('(Boolean|Byte|Byte|Numeric|Replication ID|(\w+ )?Integer)',re.I)
 sqlite_cols_re=re.compile("\((.*)\)",re.M | re.S)
 no_sql_comments=re.compile('^--.*$',re.M)
-shamela_footers_re=re.compile('^(_{4,})$',re.M)
+shamela_footers_re=re.compile(u'^(¬?_{4,})$',re.M)
 digits_re=re.compile(r'\d+')
 no_w_re=re.compile(ur'[^A-Za-zابتثجحخدذرزسشصضطظعغفقكلمنهوي\s]')
 # one to one transformations that does not change chars order
@@ -165,7 +165,6 @@ class ShamelaSqlite(object):
     """Internal function used by importTable"""
     if prefix and sql_cmd[0].startswith('INSERT INTO '): sql_cmd[0]='INSERT INTO '+prefix+sql_cmd[0][12:]
     sql=''.join(sql_cmd)
-    open("C:\\th.log","at+").write(sql+"\n")
     self.c.execute(sql)
 
   def __schemaGetCols(self, r):
@@ -217,7 +216,7 @@ class ShamelaSqlite(object):
     elif is_replace: prefix="OR REPLACE INTO "
     prefix+=tb_prefix
     for l in pipe.stdout:
-      l=l.replace('\r','')
+      l=l.replace('\r','\n')
       # output encoding in mdbtools in windows is cp1256, this is a bug in it
       if self.encoding_fix_needed==None:
         try: l.decode('UTF-8')
@@ -398,6 +397,17 @@ def set_get_xref(xref, h_tags, sh, bkid, pg_id, matn, matnid):
     xref=sh.get_xref(matn, matnid)
     if xref: h_tags['embed.original.section']=xref
 
+ss_re=re.compile(" +")
+re_ss_re=re.compile("( \*){2,}")
+
+def ss(txt):
+  """squeeze spaces"""
+  return ss_re.sub(" ", txt)
+
+def re_ss(txt):
+  """squeeze spaces in re"""
+  return re_ss_re.sub(" *", ss(txt))
+
 
 def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=ur'\((\d+)\)', ft_prefix_len=1, ft_suffix_len=1):
   """
@@ -428,6 +438,7 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
   # NOTE: we only need page_id,title and depth, sub is only used to sort them
   toc_ls=filter(lambda i: i[2] and i[1], [list(i) for i in r])
   if not toc_ls: raise TypeError # no text in the book
+  if toc_ls[0][0]!=1: toc_ls.insert(0, [1, sh.getBookMeta(bkid)['kitab'].replace('_',' '),toc_ls[0][2]])
   toc_hash=map(lambda i: (i[1][0],i[0]),enumerate(toc_ls))
   # toc_hash.sort(lambda a,b: cmp(a[0],b[0])) # FIXME: this is not needed!
   toc_hash=dict(map(lambda j: (j[0],map(lambda k:k[1], j[1])), groupby(toc_hash,lambda i: i[0])))
@@ -493,30 +504,30 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
       h_p=no_w_re.sub(' ', h.translate(sh_normalize_tb)).strip()
       if h_p: # if normalized h_p is not empty
         # NOTE: no need for map h_p on re.escape() because it does not contain special chars
-        h_re_entire_line=re.compile(ur"^\s*%s\s*$" % ur" *".join(list(h_p)), re.M)
+        h_re_entire_line=re.compile(re_ss(ur"^\s*%s\s*$" % ur" *".join(list(h_p))), re.M)
         if _shamelaFindHeadings(txt, page_id, d, h, h_re_entire_line, ix, j, 2): continue
 
       if not txt_no_d: txt_no_d=txt.translate(sh_digits_to_spaces_tb)
       h_p_no_d=h_p.translate(sh_digits_to_spaces_tb).strip()
       if h_p_no_d:
-        h_re_entire_line_no_d=re.compile(ur"^\s*%s\s*$" % ur" *".join(list(h_p_no_d)), re.M)
+        h_re_entire_line_no_d=re.compile(re_ss(ur"^\s*%s\s*$" % ur" *".join(list(h_p_no_d))), re.M)
         if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_entire_line_no_d, ix, j, 3): continue
 
       # at the beginning of the line
       if _shamelaFindExactHeadings(page_txt, page_id, "\n%s", d, h, ix,j, 4): continue
       if h_p:
-        h_re_line_start=re.compile(ur"^\s*%s\s*" % ur" *".join(list(h_p)), re.M)
+        h_re_line_start=re.compile(re_ss(ur"^\s*%s\s*" % ur" *".join(list(h_p))), re.M)
         if _shamelaFindHeadings(txt, page_id, d, h, h_re_line_start, ix, j, 5): continue
       if h_p_no_d:
-        h_re_line_start_no_d=re.compile(ur"^\s*%s\s*" % ur" *".join(list(h_p_no_d)), re.M)
+        h_re_line_start_no_d=re.compile(re_ss(ur"^\s*%s\s*" % ur" *".join(list(h_p_no_d))), re.M)
         if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_line_start_no_d, ix, j, 6): continue
       # any where in the line
       if _shamelaFindExactHeadings(page_txt, page_id, "%s", d, h, ix,j, 7): continue
       if h_p:
-        h_re_any_ware=re.compile(ur"\s*%s\s*" % ur" *".join(list(h_p)), re.M)
+        h_re_any_ware=re.compile(re_ss(ur"\s*%s\s*" % ur" *".join(list(h_p))), re.M)
         if _shamelaFindHeadings(txt, page_id, d, h, h_re_any_ware, ix, j, 8): continue
       if h_p_no_d:
-        h_re_any_ware_no_d=re.compile(ur"\s*%s\s*" % ur" *".join(list(h_p_no_d)), re.M)
+        h_re_any_ware_no_d=re.compile(re_ss(ur"\s*%s\s*" % ur" *".join(list(h_p_no_d))), re.M)
         if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_any_ware, ix, j, 9): continue
       # if we reached here then head is not found
       # place it just after last one
@@ -615,7 +626,6 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
     #  for j,k in footnotes_cnd:
     #    print "j=[%s] k=[%s]" % (j,k)
     #  # raise KeyError
-
     if toc_hash.has_key(pg_id):
       hno_pop_needed=False
     elif hno!=None and hno!=last_hno:
@@ -702,7 +712,6 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
     depths.append(f.depth)
     #last=pg_body[f.end:]+'\n'
     last=shamela_shift_footers_re.sub(footer_shift_cb, pg_body[f.end:]+'\n')
-    if footnotes_cnd and pg_id!=154: print " *** stall footnotes at pg_id=",pg_id; # raise
     if footnotes_cnd:
       last+="\n==========[\n"+pop_footers(footnotes_cnd)+"\n]==========\n"
     
diff --git a/Thawab/webApp.py b/Thawab/webApp.py
index 6cb9c48..00ff0d4 100644
--- a/Thawab/webApp.py
+++ b/Thawab/webApp.py
@@ -275,7 +275,7 @@ Allow: /
       # print R
       if not R: return {'t':0,'c':0,'h':''}
       self.searchCache.append(h,R)
-      r={'t':R.runtime,'c':R.scored,'h':h}
+      r={'t':R.runtime,'c':len(R),'h':h}
     elif args[0]=='searchResults':
       h=rq.q.getfirst('h','')
       try: i=int(rq.q.getfirst('i','0'))
@@ -284,11 +284,11 @@ Allow: /
       except TypeError: c=0
       R=self.searchCache.get(h)
       if R==None: return {'c':0}
-      C=R.scored
+      C=len(R)
       if i>=C: return {'c':0}
       c=min(c,C-i)
       r={'c':c,'a':[]}
-      n=100.0/R.scores[0]
+      n=100.0/R[0].score
       j=0
       for j in range(i,i+c):
         name=R[j]['kitab']
@@ -298,7 +298,7 @@ Allow: /
         r['a'].append({
         'i':j,'n':'_i'+R[j]['nodeIdNum'],
         'k':m['kitab'], 'a':prettyId(m['author']), 'y':tryInt(m['year']),
-        't':R[j]['title'], 'r':'%4.1f' % (n*R.scores[j])})
+        't':R[j]['title'], 'r':'%4.1f' % (n*R[j].score)})
         j+=1
       r[c]=j;
     else: r={}
diff --git a/Thawab/whooshSearchEngine.py b/Thawab/whooshSearchEngine.py
index 1c37d9e..5cf4741 100644
--- a/Thawab/whooshSearchEngine.py
+++ b/Thawab/whooshSearchEngine.py
@@ -17,12 +17,14 @@ Copyright © 2008, Muayyad Alsadi <alsadi at ojuba.org>
 
 """
 import sys, os, os.path, re
+import shutil
 from tags import *
 from meta import prettyId,makeId
 
 from whoosh import query
-from whoosh.index import EmptyIndexError, create_in, open_dir
+from whoosh.index import EmptyIndexError, create_in, open_dir, IndexVersionError
 from whoosh.highlight import highlight, SentenceFragmenter, BasicFragmentScorer, FIRST, HtmlFormatter
+from whoosh.filedb.fileindex import _INDEX_VERSION as whoosh_ix_ver
 from whoosh.filedb.filestore import FileStorage
 from whoosh.fields import Schema, ID, IDLIST, TEXT
 from whoosh.formats import Frequency
@@ -36,227 +38,8 @@ def stemfn(word): return stemArabic(stem(word))
 # word_re=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]"
 analyzer=StandardAnalyzer(expression=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+(?:\.?[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+)*") | StemFilter(stemfn)
 
-#from whoosh.fields import FieldType, KeywordAnalyzer
-#try: from whoosh.fields import Existence
-#except ImportError: from whoosh.fields import Existance as Existence
-
-#class TAGSLIST(FieldType):
-#    """
-#    Configured field type for fields containing space-separated or comma-separated
-#    keyword-like data (such as tags). The default is to not store positional information
-#    (so phrase searching is not allowed in this field) and to not make the field scorable.
-#    
-#    unlike KEYWORD field type, TAGS list does not count frequency just existence.
-#    """
-#    
-#    def __init__(self, stored = False, lowercase = False, commas = False,
-#                 scorable = False, unique = False, field_boost = 1.0):
-#        """
-#        :stored: Whether to store the value of the field with the document.
-#        :comma: Whether this is a comma-separated field. If this is False
-#            (the default), it is treated as a space-separated field.
-#        :scorable: Whether this field is scorable.
-#        """
-#        
-#        ana = KeywordAnalyzer(lowercase = lowercase, commas = commas)
-#        self.format = Existence(analyzer = ana, field_boost = field_boost)
-#        self.scorable = scorable
-#        self.stored = stored
-#        self.unique = unique
-
-from whoosh.qparser import MultifieldParser, FieldAliasPlugin, QueryParserError, BoostPlugin, GroupPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin, Group, AndGroup, OrGroup, AndNotGroup, AndMaybeGroup, Singleton, BasicSyntax, Plugin, White, Token
-
-from whoosh.qparser import CompoundsPlugin, NotPlugin, WildcardPlugin
-
-class ThCompoundsPlugin(Plugin):
-    """Adds the ability to use &, |, &~, and &! to specify
-    query constraints.
-    
-    This plugin is included in the default parser configuration.
-    """
-    
-    def tokens(self):
-        return ((ThCompoundsPlugin.AndNot, -10), (ThCompoundsPlugin.AndMaybe, -5), (ThCompoundsPlugin.And, 0),
-                (ThCompoundsPlugin.Or, 0))
-    
-    def filters(self):
-        return ((ThCompoundsPlugin.do_compounds, 600), )
-
-    @staticmethod
-    def do_compounds(parser, stream):
-        newstream = stream.empty()
-        i = 0
-        while i < len(stream):
-            t = stream[i]
-            ismiddle = newstream and i < len(stream) - 1
-            if isinstance(t, Group):
-                newstream.append(ThCompoundsPlugin.do_compounds(parser, t))
-            elif isinstance(t, (ThCompoundsPlugin.And, ThCompoundsPlugin.Or)):
-                if isinstance(t, ThCompoundsPlugin.And):
-                    cls = AndGroup
-                else:
-                    cls = OrGroup
-                
-                if cls != type(newstream) and ismiddle:
-                    last = newstream.pop()
-                    rest = ThCompoundsPlugin.do_compounds(parser, cls(stream[i+1:]))
-                    newstream.append(cls([last, rest]))
-                    break
-            
-            elif isinstance(t, ThCompoundsPlugin.AndNot):
-                if ismiddle:
-                    last = newstream.pop()
-                    i += 1
-                    next = stream[i]
-                    if isinstance(next, Group):
-                        next = ThCompoundsPlugin.do_compounds(parser, next)
-                    newstream.append(AndNotGroup([last, next]))
-            
-            elif isinstance(t, ThCompoundsPlugin.AndMaybe):
-                if ismiddle:
-                    last = newstream.pop()
-                    i += 1
-                    next = stream[i]
-                    if isinstance(next, Group):
-                        next = ThCompoundsPlugin.do_compounds(parser, next)
-                    newstream.append(AndMaybeGroup([last, next]))
-            else:
-                newstream.append(t)
-            i += 1
-        
-        return newstream
-    
-    class And(Singleton):
-        expr = re.compile(u"&")
-        
-    class Or(Singleton):
-        expr = re.compile(u"\|")
-        
-    class AndNot(Singleton):
-        expr = re.compile(u"&!")
-        
-    class AndMaybe(Singleton):
-        expr = re.compile(u"&~") # when using Arabic keyboard ~ is shift+Z
-
-class ThFieldsPlugin(Plugin):
-    """Adds the ability to specify the field of a clause using a colon.
-    
-    This plugin is included in the default parser configuration.
-    """
-    
-    def tokens(self):
-        return ((ThFieldsPlugin.Field, 0), )
-    
-    def filters(self):
-        return ((ThFieldsPlugin.do_fieldnames, 100), )
-
-    @staticmethod
-    def do_fieldnames(parser, stream):
-        newstream = stream.empty()
-        newname = None
-        for i, t in enumerate(stream):
-            if isinstance(t, ThFieldsPlugin.Field):
-                valid = False
-                if i < len(stream) - 1:
-                    next = stream[i+1]
-                    if not isinstance(next, (White, ThFieldsPlugin.Field)):
-                        newname = t.fieldname
-                        valid = True
-                if not valid:
-                    newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
-                continue
-            
-            if isinstance(t, Group):
-                t = ThFieldsPlugin.do_fieldnames(parser, t)
-            newstream.append(t.set_fieldname(newname))
-            newname = None
-        
-        return newstream
-    
-    class Field(Token):
-        expr = re.compile(u"(\w[\w\d]*):", re.U)
-        
-        def __init__(self, fieldname):
-            self.fieldname = fieldname
-        
-        def __repr__(self):
-            return "<%s:>" % self.fieldname
-        
-        def set_fieldname(self, fieldname):
-            return self.__class__(fieldname)
-        
-        @classmethod
-        def create(cls, parser, match):
-            return cls(match.group(1))
-
-class ThNotPlugin(Plugin):
-    """Adds the ability to negate a clause by preceding it with !.
-    
-    This plugin is included in the default parser configuration.
-    """
-    
-    def tokens(self):
-        return ((ThNotPlugin.Not, 0), )
-    
-    def filters(self):
-        return ((ThNotPlugin.do_not, 800), )
-    
-    @staticmethod
-    def do_not(parser, stream):
-        newstream = stream.empty()
-        notnext = False
-        for t in stream:
-            if isinstance(t, ThNotPlugin.Not):
-                notnext = True
-                continue
-            
-            if notnext:
-                t = NotGroup([t])
-            newstream.append(t)
-            notnext = False
-            
-        return newstream
-    
-    class Not(Singleton):
-        expr = re.compile(u"!")
-
-class ThWildcardPlugin(Plugin):
-    """Adds the ability to specify wildcard queries by using asterisk and
-    question mark characters in terms. Note that these types can be very
-    performance and memory intensive. You may consider not including this
-    type of query.
-    
-    This plugin is included in the default parser configuration.
-    """
-    
-    def tokens(self):
-        return ((ThWildcardPlugin.Wild, 0), )
-    
-    class Wild(BasicSyntax):
-        expr = re.compile(u"[^ \t\r\n*?]*(\\*|\\?|؟)\\S*")
-        qclass = query.Wildcard
-        
-        def __repr__(self):
-            r = "%s:wild(%r)" % (self.fieldname, self.text)
-            if self.boost != 1.0:
-                r += "^%s" % self.boost
-            return r
-        
-        @classmethod
-        def create(cls, parser, match):
-            return cls(match.group(0).replace(u'؟',u'?'))
-
-def ThMultifieldParser(schema=None):
-  plugins = (BoostPlugin, ThCompoundsPlugin, ThFieldsPlugin, GroupPlugin,
-      ThNotPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin,
-      ThWildcardPlugin, FieldAliasPlugin({
-        u"kitab":(u"كتاب",),
-        u"title":(u"عنوان",),
-        u"tags":(u"وسوم",)})
-      )
-  p = MultifieldParser(("title","content",), schema=schema, plugins=plugins)
-  # to add a plugin use: p.add_plugin(XYZ)
-  return p
+from whoosh.qparser import FieldAliasPlugin
+from whooshSymbolicQParser import MultifieldSQParser
 
 class ExcerptFormatter(object):
     def __init__(self, between = "..."):
@@ -288,11 +71,14 @@ class SearchEngine(BaseSearchEngine):
   def __init__(self, th):
     BaseSearchEngine.__init__(self, th, False)
     self.__ix_writer = None
-    ix_dir=os.path.join(th.prefixes[0],'index')
+    ix_dir=os.path.join(th.prefixes[0],'index', "ix_"+str(whoosh_ix_ver))
+    if not os.path.isdir(ix_dir): os.makedirs(ix_dir)
     # try to load a pre-existing index
     try: self.indexer=open_dir(ix_dir)
-    except EmptyIndexError:
+    except (EmptyIndexError, IndexVersionError):
       # create a new one
+      try: shutil.rmtree(ix_dir, True); os.makedirs(ix_dir)
+      except OSError: pass
       schema = Schema(
         kitab=ID(stored=True),
         vrr=ID(stored=True,unique=False), # version release
@@ -304,7 +90,12 @@ class SearchEngine(BaseSearchEngine):
       )
       self.indexer=create_in(ix_dir,schema)
     #self.__ix_qparser = ThMultifieldParser(self.th, ("title","content",), schema=self.indexer.schema)
-    self.__ix_qparser = ThMultifieldParser(self.indexer.schema)
+    self.__ix_qparser = MultifieldSQParser(("title","content",), self.indexer.schema)
+    self.__ix_qparser.add_plugin(FieldAliasPlugin({
+        u"kitab":(u"كتاب",),
+        u"title":(u"عنوان",),
+        u"tags":(u"وسوم",)})
+    )
     #self.__ix_pre=whoosh.query.Prefix
     self.__ix_searcher= self.indexer.searcher()
 
@@ -315,7 +106,9 @@ class SearchEngine(BaseSearchEngine):
     """
     return a Version-Release string if in index, otherwise return None
     """
-    d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
+    try: d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
+    except TypeError: return None
+    except KeyError: return None
     if d: return d['vrr']
     return None
 
@@ -342,7 +135,7 @@ class SearchEngine(BaseSearchEngine):
     txt=node.toText(ub)
     s=set()
     #results.query.all_terms(s) # return (field,term) pairs 
-    results.query.existing_terms(self.indexer.reader(), s, phrases=True) # return (field,term) pairs  # self.self.__ix_searcher.reader()
+    results.q.existing_terms(self.indexer.reader(), s, phrases=True) # return (field,term) pairs  # self.self.__ix_searcher.reader()
     terms=dict(
       map(lambda i: (i[1],i[0]),
       filter(lambda j: j[0]=='content' or j[0]=='title', s))).keys()
diff --git a/Thawab/whooshSymbolicQParser.py b/Thawab/whooshSymbolicQParser.py
new file mode 100644
index 0000000..2adfba9
--- /dev/null
+++ b/Thawab/whooshSymbolicQParser.py
@@ -0,0 +1,69 @@
+# -*- coding: UTF-8 -*-
+"""
+
+Copyright © 2010, Muayyad Alsadi <alsadi at ojuba.org>
+
+"""
+
+import sys, os, os.path, re
+
+from whoosh import query
+from whoosh.qparser import *
+
+class SFieldsPlugin(Plugin):
+    """This plugin does not require an English field name, so that my field aliases work"""
+    
+    def tokens(self, parser):
+        return ((SFieldsPlugin.Field, 0), )
+    
+    def filters(self, parser):
+        return ((SFieldsPlugin.do_fieldnames, 100), )
+
+    @staticmethod
+    def do_fieldnames(parser, stream):
+        newstream = stream.empty()
+        newname = None
+        for i, t in enumerate(stream):
+            if isinstance(t, SFieldsPlugin.Field):
+                valid = False
+                if i < len(stream) - 1:
+                    next = stream[i+1]
+                    if not isinstance(next, (White, SFieldsPlugin.Field)):
+                        newname = t.fieldname
+                        valid = True
+                if not valid:
+                    newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
+                continue
+            
+            if isinstance(t, Group):
+                t = SFieldsPlugin.do_fieldnames(parser, t)
+            newstream.append(t.set_fieldname(newname))
+            newname = None
+        
+        return newstream
+    
+    class Field(Token):
+        expr = re.compile(u"(\w[\w\d]*):", re.U)
+        
+        def __init__(self, fieldname):
+            self.fieldname = fieldname
+        
+        def __repr__(self):
+            return "<%s:>" % self.fieldname
+        
+        def set_fieldname(self, fieldname):
+            return self.__class__(fieldname)
+        
+        @classmethod
+        def create(cls, parser, match):
+            return cls(match.group(1))
+
+def MultifieldSQParser(fieldnames, schema=None, fieldboosts=None, **kwargs):
+  p = MultifieldParser(fieldnames, schema, fieldboosts, **kwargs)
+  cp = OperatorsPlugin(And=r"&", Or=r"\|", AndNot=r"&!", AndMaybe=r"&~", Not=r'!')
+  p.replace_plugin(cp)
+  # FIXME: try to upsteam SFieldsPlugin
+  p.remove_plugin_class(FieldsPlugin)
+  p.add_plugin(SFieldsPlugin)
+  return p
+
diff --git a/bok2ki.py b/bok2ki.py
new file mode 100755
index 0000000..875b35e
--- /dev/null
+++ b/bok2ki.py
@@ -0,0 +1,105 @@
+#! /usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+Script to import .bok files
+Copyright © 2008-2010, Muayyad Alsadi <alsadi at ojuba.org>
+
+    Released under terms of Waqf Public License.
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the latest version Waqf Public License as
+    published by Ojuba.org.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+    The Latest version of the license can be found on
+    "http://waqf.ojuba.org/license"
+
+"""
+
+import sys, os, os.path, glob, shutil, re
+import sqlite3
+from getopt import getopt, GetoptError
+
+def usage():
+    print '''\
+Usage: %s [-i] [-m DIR] FILES ...
+Where:
+\t-i\t\t- in-memory
+\t-m DIR\t\t- move successfully imported BOK files into DIR
+\t--ft-prefix=FOOTER_PREFIX	default is "(¬"
+\t--ft-suffix=FOOTER_SUFFIX	default is ")"
+\t--ft-leading=[0|1]	should footnote be match at line start only, default is 0
+\t--ft-sp=[0|1|2]	no, single or many whitespaces, default is 0 
+\t--bft-prefix=FOOTER_PREFIX	footnote anchor in body prefix, default is "(¬"
+\t--bft-suffix=FOOTER_SUFFIX	footnote anchor in body suffix, default is ")"
+\t--bft-sp=[0|1|2]	no, single or many whitespaces, default is 0 
+
+the generated files will be moved into db in thawab prefix (usually ~/.thawab/db/)
+''' % os.path.basename(sys.argv[0])
+
+try:
+  opts, args = getopt(sys.argv[1:], "im:", ["help", 'ft-prefix=', 'ft-suffix=', 'bft-prefix=', 'bft-suffix=', 'ft-leading=', 'ft-sp=', 'bft-sp='])
+except GetoptError, err:
+  print str(err) # will print something like "option -a not recognized"
+  usage()
+  sys.exit(1)
+
+if not args:
+  print "please provide at least one .bok files"
+  usage()
+  sys.exit(1)
+
+opts=dict(opts)
+
+def progress(msg, p, *a, **kw): print " ** [%g%% completed] %s" % (p,msg)
+
+from Thawab.core import ThawabMan
+from Thawab.shamelaUtils import ShamelaSqlite,shamelaImport
+th=ThawabMan()
+thprefix=th.prefixes[0]
+
+if not opts.has_key('-i'): db_fn=os.path.expanduser('~/bok2sql.db')
+else: db_fn=None
+
+#    ¬ U+00AC NOT SIGN
+ft_prefix=opts.get('--ft-prefix','(¬').decode('utf-8'); ft_prefix_len=len(ft_prefix)
+ft_suffix=opts.get('--ft-suffix',')').decode('utf-8'); ft_suffix_len=len(ft_suffix)
+ft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--ft-sp','0'))]
+ft_at_line_start=int(opts.get('--ft-leading','0'))
+footnote_re=(ft_at_line_start and u'^\s*' or u'') + re.escape(ft_prefix)+ft_sp+ur'(\d+)'+ft_sp+re.escape(ft_suffix)
+
+bft_prefix=opts.get('--bft-prefix','(¬').decode('utf-8');
+bft_suffix=opts.get('--bft-suffix',')').decode('utf-8');
+bft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--bft-sp','0'))]
+body_footnote_re=re.escape(bft_prefix)+bft_sp+ur'(\d+)'+bft_sp+re.escape(bft_suffix)
+
+
+
+for fn in args:
+  if db_fn:
+    if os.path.exists(db_fn): os.unlink(db_fn)
+    cn=sqlite3.connect(db_fn, isolation_level=None)
+  else: cn=None
+  sh=ShamelaSqlite(fn, cn, 0 , 0, progress)
+  sh.toSqlite()
+  for bkid in sh.getBookIds():
+    ki=th.mktemp()
+    c=ki.seek(-1,-1)
+    
+    m=shamelaImport(c, sh, bkid, footnote_re, body_footnote_re, ft_prefix_len, ft_suffix_len)
+    c.flush()
+    print "moving %s to %s" % (ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
+    shutil.move(ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
+  if opts.has_key('-m'):
+    dd=opts['-m']
+    if not os.path.isdir(dd):
+      try: os.makedirs(dd)
+      except OSError: pass
+    if os.path.isdir(dd):
+      dst=os.path.join(dd,os.path.basename(fn))
+      print "moving %s to %s" % (fn,dst)
+      shutil.move(fn, dst)
+    else: print "could not move .bok files, target directory does not exists"
+
diff --git a/po/ar.po b/po/ar.po
index a61f329..079d92a 100644
--- a/po/ar.po
+++ b/po/ar.po
@@ -1,7 +1,7 @@
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
-# This file is distributed under the same license as the PACKAGE package.
-# FIRST AUTHOR <EMAIL at ADDRESS>, YEAR.
+# Translation of thawab templates to Arabic
+# Copyright (C) 2008-2010, ojuba.org <core at ojuba.org>
+# This file is distributed under the same license as the thawab package.
+# Muayyad Saleh Alsadi <alsadi at ojuba.org>, 2010
 #
 #, fuzzy
 msgid ""
diff --git a/po/de.po b/po/de.po
index 570f621..38fae0e 100644
--- a/po/de.po
+++ b/po/de.po
@@ -1,7 +1,7 @@
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
-# This file is distributed under the same license as the PACKAGE package.
-# FIRST AUTHOR <EMAIL at ADDRESS>, YEAR.
+# Translation of thawab templates to Arabic
+# Copyright (C) 2008-2010, ojuba.org <core at ojuba.org>
+# This file is distributed under the same license as the thawab package.
+# cegerxwin <cegerxwin at web.de>, 2010
 #
 msgid ""
 msgstr ""

-- 
Packaging for Thawab



More information about the Debian-islamic-commits mailing list