[Debian-islamic-commits] [SCM] Packaging for Thawab branch, master, updated. upstream/3.0.10-58-g9dca20d
أحمد المحمو =?UTF-8?Q?=D8=AF=D9=8A=20?=(Ahmed El-Mahmoudy)
aelmahmoudy at sabily.org
Thu Jul 28 12:56:46 UTC 2011
The following commit has been merged in the master branch:
commit 2ead257346de512bb7a5a8355ae379c1268c6094
Author: أحمد المحمودي (Ahmed El-Mahmoudy) <aelmahmoudy at sabily.org>
Date: Mon Feb 21 23:06:51 2011 +0200
Imported Upstream version 3.0.10+1git9e448ba
diff --git a/Thawab/shamelaUtils.py b/Thawab/shamelaUtils.py
index b721828..0a49327 100644
--- a/Thawab/shamelaUtils.py
+++ b/Thawab/shamelaUtils.py
@@ -66,7 +66,7 @@ schema_fix_text=re.compile('Memo/Hyperlink',re.I)
schema_fix_int=re.compile('(Boolean|Byte|Byte|Numeric|Replication ID|(\w+ )?Integer)',re.I)
sqlite_cols_re=re.compile("\((.*)\)",re.M | re.S)
no_sql_comments=re.compile('^--.*$',re.M)
-shamela_footers_re=re.compile('^(_{4,})$',re.M)
+shamela_footers_re=re.compile(u'^(¬?_{4,})$',re.M)
digits_re=re.compile(r'\d+')
no_w_re=re.compile(ur'[^A-Za-zابتثجحخدذرزسشصضطظعغفقكلمنهوي\s]')
# one to one transformations that does not change chars order
@@ -165,7 +165,6 @@ class ShamelaSqlite(object):
"""Internal function used by importTable"""
if prefix and sql_cmd[0].startswith('INSERT INTO '): sql_cmd[0]='INSERT INTO '+prefix+sql_cmd[0][12:]
sql=''.join(sql_cmd)
- open("C:\\th.log","at+").write(sql+"\n")
self.c.execute(sql)
def __schemaGetCols(self, r):
@@ -217,7 +216,7 @@ class ShamelaSqlite(object):
elif is_replace: prefix="OR REPLACE INTO "
prefix+=tb_prefix
for l in pipe.stdout:
- l=l.replace('\r','')
+ l=l.replace('\r','\n')
# output encoding in mdbtools in windows is cp1256, this is a bug in it
if self.encoding_fix_needed==None:
try: l.decode('UTF-8')
@@ -398,6 +397,17 @@ def set_get_xref(xref, h_tags, sh, bkid, pg_id, matn, matnid):
xref=sh.get_xref(matn, matnid)
if xref: h_tags['embed.original.section']=xref
+ss_re=re.compile(" +")
+re_ss_re=re.compile("( \*){2,}")
+
+def ss(txt):
+ """squeeze spaces"""
+ return ss_re.sub(" ", txt)
+
+def re_ss(txt):
+ """squeeze spaces in re"""
+ return re_ss_re.sub(" *", ss(txt))
+
def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=ur'\((\d+)\)', ft_prefix_len=1, ft_suffix_len=1):
"""
@@ -428,6 +438,7 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
# NOTE: we only need page_id,title and depth, sub is only used to sort them
toc_ls=filter(lambda i: i[2] and i[1], [list(i) for i in r])
if not toc_ls: raise TypeError # no text in the book
+ if toc_ls[0][0]!=1: toc_ls.insert(0, [1, sh.getBookMeta(bkid)['kitab'].replace('_',' '),toc_ls[0][2]])
toc_hash=map(lambda i: (i[1][0],i[0]),enumerate(toc_ls))
# toc_hash.sort(lambda a,b: cmp(a[0],b[0])) # FIXME: this is not needed!
toc_hash=dict(map(lambda j: (j[0],map(lambda k:k[1], j[1])), groupby(toc_hash,lambda i: i[0])))
@@ -493,30 +504,30 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
h_p=no_w_re.sub(' ', h.translate(sh_normalize_tb)).strip()
if h_p: # if normalized h_p is not empty
# NOTE: no need for map h_p on re.escape() because it does not contain special chars
- h_re_entire_line=re.compile(ur"^\s*%s\s*$" % ur" *".join(list(h_p)), re.M)
+ h_re_entire_line=re.compile(re_ss(ur"^\s*%s\s*$" % ur" *".join(list(h_p))), re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_entire_line, ix, j, 2): continue
if not txt_no_d: txt_no_d=txt.translate(sh_digits_to_spaces_tb)
h_p_no_d=h_p.translate(sh_digits_to_spaces_tb).strip()
if h_p_no_d:
- h_re_entire_line_no_d=re.compile(ur"^\s*%s\s*$" % ur" *".join(list(h_p_no_d)), re.M)
+ h_re_entire_line_no_d=re.compile(re_ss(ur"^\s*%s\s*$" % ur" *".join(list(h_p_no_d))), re.M)
if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_entire_line_no_d, ix, j, 3): continue
# at the beginning of the line
if _shamelaFindExactHeadings(page_txt, page_id, "\n%s", d, h, ix,j, 4): continue
if h_p:
- h_re_line_start=re.compile(ur"^\s*%s\s*" % ur" *".join(list(h_p)), re.M)
+ h_re_line_start=re.compile(re_ss(ur"^\s*%s\s*" % ur" *".join(list(h_p))), re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_line_start, ix, j, 5): continue
if h_p_no_d:
- h_re_line_start_no_d=re.compile(ur"^\s*%s\s*" % ur" *".join(list(h_p_no_d)), re.M)
+ h_re_line_start_no_d=re.compile(re_ss(ur"^\s*%s\s*" % ur" *".join(list(h_p_no_d))), re.M)
if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_line_start_no_d, ix, j, 6): continue
# any where in the line
if _shamelaFindExactHeadings(page_txt, page_id, "%s", d, h, ix,j, 7): continue
if h_p:
- h_re_any_ware=re.compile(ur"\s*%s\s*" % ur" *".join(list(h_p)), re.M)
+ h_re_any_ware=re.compile(re_ss(ur"\s*%s\s*" % ur" *".join(list(h_p))), re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_any_ware, ix, j, 8): continue
if h_p_no_d:
- h_re_any_ware_no_d=re.compile(ur"\s*%s\s*" % ur" *".join(list(h_p_no_d)), re.M)
+ h_re_any_ware_no_d=re.compile(re_ss(ur"\s*%s\s*" % ur" *".join(list(h_p_no_d))), re.M)
if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_any_ware, ix, j, 9): continue
# if we reached here then head is not found
# place it just after last one
@@ -615,7 +626,6 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
# for j,k in footnotes_cnd:
# print "j=[%s] k=[%s]" % (j,k)
# # raise KeyError
-
if toc_hash.has_key(pg_id):
hno_pop_needed=False
elif hno!=None and hno!=last_hno:
@@ -702,7 +712,6 @@ def shamelaImport(cursor, sh, bkid, footnote_re=ur'\((\d+)\)', body_footnote_re=
depths.append(f.depth)
#last=pg_body[f.end:]+'\n'
last=shamela_shift_footers_re.sub(footer_shift_cb, pg_body[f.end:]+'\n')
- if footnotes_cnd and pg_id!=154: print " *** stall footnotes at pg_id=",pg_id; # raise
if footnotes_cnd:
last+="\n==========[\n"+pop_footers(footnotes_cnd)+"\n]==========\n"
diff --git a/Thawab/webApp.py b/Thawab/webApp.py
index 6cb9c48..00ff0d4 100644
--- a/Thawab/webApp.py
+++ b/Thawab/webApp.py
@@ -275,7 +275,7 @@ Allow: /
# print R
if not R: return {'t':0,'c':0,'h':''}
self.searchCache.append(h,R)
- r={'t':R.runtime,'c':R.scored,'h':h}
+ r={'t':R.runtime,'c':len(R),'h':h}
elif args[0]=='searchResults':
h=rq.q.getfirst('h','')
try: i=int(rq.q.getfirst('i','0'))
@@ -284,11 +284,11 @@ Allow: /
except TypeError: c=0
R=self.searchCache.get(h)
if R==None: return {'c':0}
- C=R.scored
+ C=len(R)
if i>=C: return {'c':0}
c=min(c,C-i)
r={'c':c,'a':[]}
- n=100.0/R.scores[0]
+ n=100.0/R[0].score
j=0
for j in range(i,i+c):
name=R[j]['kitab']
@@ -298,7 +298,7 @@ Allow: /
r['a'].append({
'i':j,'n':'_i'+R[j]['nodeIdNum'],
'k':m['kitab'], 'a':prettyId(m['author']), 'y':tryInt(m['year']),
- 't':R[j]['title'], 'r':'%4.1f' % (n*R.scores[j])})
+ 't':R[j]['title'], 'r':'%4.1f' % (n*R[j].score)})
j+=1
r[c]=j;
else: r={}
diff --git a/Thawab/whooshSearchEngine.py b/Thawab/whooshSearchEngine.py
index 1c37d9e..5cf4741 100644
--- a/Thawab/whooshSearchEngine.py
+++ b/Thawab/whooshSearchEngine.py
@@ -17,12 +17,14 @@ Copyright © 2008, Muayyad Alsadi <alsadi at ojuba.org>
"""
import sys, os, os.path, re
+import shutil
from tags import *
from meta import prettyId,makeId
from whoosh import query
-from whoosh.index import EmptyIndexError, create_in, open_dir
+from whoosh.index import EmptyIndexError, create_in, open_dir, IndexVersionError
from whoosh.highlight import highlight, SentenceFragmenter, BasicFragmentScorer, FIRST, HtmlFormatter
+from whoosh.filedb.fileindex import _INDEX_VERSION as whoosh_ix_ver
from whoosh.filedb.filestore import FileStorage
from whoosh.fields import Schema, ID, IDLIST, TEXT
from whoosh.formats import Frequency
@@ -36,227 +38,8 @@ def stemfn(word): return stemArabic(stem(word))
# word_re=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]"
analyzer=StandardAnalyzer(expression=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+(?:\.?[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+)*") | StemFilter(stemfn)
-#from whoosh.fields import FieldType, KeywordAnalyzer
-#try: from whoosh.fields import Existence
-#except ImportError: from whoosh.fields import Existance as Existence
-
-#class TAGSLIST(FieldType):
-# """
-# Configured field type for fields containing space-separated or comma-separated
-# keyword-like data (such as tags). The default is to not store positional information
-# (so phrase searching is not allowed in this field) and to not make the field scorable.
-#
-# unlike KEYWORD field type, TAGS list does not count frequency just existence.
-# """
-#
-# def __init__(self, stored = False, lowercase = False, commas = False,
-# scorable = False, unique = False, field_boost = 1.0):
-# """
-# :stored: Whether to store the value of the field with the document.
-# :comma: Whether this is a comma-separated field. If this is False
-# (the default), it is treated as a space-separated field.
-# :scorable: Whether this field is scorable.
-# """
-#
-# ana = KeywordAnalyzer(lowercase = lowercase, commas = commas)
-# self.format = Existence(analyzer = ana, field_boost = field_boost)
-# self.scorable = scorable
-# self.stored = stored
-# self.unique = unique
-
-from whoosh.qparser import MultifieldParser, FieldAliasPlugin, QueryParserError, BoostPlugin, GroupPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin, Group, AndGroup, OrGroup, AndNotGroup, AndMaybeGroup, Singleton, BasicSyntax, Plugin, White, Token
-
-from whoosh.qparser import CompoundsPlugin, NotPlugin, WildcardPlugin
-
-class ThCompoundsPlugin(Plugin):
- """Adds the ability to use &, |, &~, and &! to specify
- query constraints.
-
- This plugin is included in the default parser configuration.
- """
-
- def tokens(self):
- return ((ThCompoundsPlugin.AndNot, -10), (ThCompoundsPlugin.AndMaybe, -5), (ThCompoundsPlugin.And, 0),
- (ThCompoundsPlugin.Or, 0))
-
- def filters(self):
- return ((ThCompoundsPlugin.do_compounds, 600), )
-
- @staticmethod
- def do_compounds(parser, stream):
- newstream = stream.empty()
- i = 0
- while i < len(stream):
- t = stream[i]
- ismiddle = newstream and i < len(stream) - 1
- if isinstance(t, Group):
- newstream.append(ThCompoundsPlugin.do_compounds(parser, t))
- elif isinstance(t, (ThCompoundsPlugin.And, ThCompoundsPlugin.Or)):
- if isinstance(t, ThCompoundsPlugin.And):
- cls = AndGroup
- else:
- cls = OrGroup
-
- if cls != type(newstream) and ismiddle:
- last = newstream.pop()
- rest = ThCompoundsPlugin.do_compounds(parser, cls(stream[i+1:]))
- newstream.append(cls([last, rest]))
- break
-
- elif isinstance(t, ThCompoundsPlugin.AndNot):
- if ismiddle:
- last = newstream.pop()
- i += 1
- next = stream[i]
- if isinstance(next, Group):
- next = ThCompoundsPlugin.do_compounds(parser, next)
- newstream.append(AndNotGroup([last, next]))
-
- elif isinstance(t, ThCompoundsPlugin.AndMaybe):
- if ismiddle:
- last = newstream.pop()
- i += 1
- next = stream[i]
- if isinstance(next, Group):
- next = ThCompoundsPlugin.do_compounds(parser, next)
- newstream.append(AndMaybeGroup([last, next]))
- else:
- newstream.append(t)
- i += 1
-
- return newstream
-
- class And(Singleton):
- expr = re.compile(u"&")
-
- class Or(Singleton):
- expr = re.compile(u"\|")
-
- class AndNot(Singleton):
- expr = re.compile(u"&!")
-
- class AndMaybe(Singleton):
- expr = re.compile(u"&~") # when using Arabic keyboard ~ is shift+Z
-
-class ThFieldsPlugin(Plugin):
- """Adds the ability to specify the field of a clause using a colon.
-
- This plugin is included in the default parser configuration.
- """
-
- def tokens(self):
- return ((ThFieldsPlugin.Field, 0), )
-
- def filters(self):
- return ((ThFieldsPlugin.do_fieldnames, 100), )
-
- @staticmethod
- def do_fieldnames(parser, stream):
- newstream = stream.empty()
- newname = None
- for i, t in enumerate(stream):
- if isinstance(t, ThFieldsPlugin.Field):
- valid = False
- if i < len(stream) - 1:
- next = stream[i+1]
- if not isinstance(next, (White, ThFieldsPlugin.Field)):
- newname = t.fieldname
- valid = True
- if not valid:
- newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
- continue
-
- if isinstance(t, Group):
- t = ThFieldsPlugin.do_fieldnames(parser, t)
- newstream.append(t.set_fieldname(newname))
- newname = None
-
- return newstream
-
- class Field(Token):
- expr = re.compile(u"(\w[\w\d]*):", re.U)
-
- def __init__(self, fieldname):
- self.fieldname = fieldname
-
- def __repr__(self):
- return "<%s:>" % self.fieldname
-
- def set_fieldname(self, fieldname):
- return self.__class__(fieldname)
-
- @classmethod
- def create(cls, parser, match):
- return cls(match.group(1))
-
-class ThNotPlugin(Plugin):
- """Adds the ability to negate a clause by preceding it with !.
-
- This plugin is included in the default parser configuration.
- """
-
- def tokens(self):
- return ((ThNotPlugin.Not, 0), )
-
- def filters(self):
- return ((ThNotPlugin.do_not, 800), )
-
- @staticmethod
- def do_not(parser, stream):
- newstream = stream.empty()
- notnext = False
- for t in stream:
- if isinstance(t, ThNotPlugin.Not):
- notnext = True
- continue
-
- if notnext:
- t = NotGroup([t])
- newstream.append(t)
- notnext = False
-
- return newstream
-
- class Not(Singleton):
- expr = re.compile(u"!")
-
-class ThWildcardPlugin(Plugin):
- """Adds the ability to specify wildcard queries by using asterisk and
- question mark characters in terms. Note that these types can be very
- performance and memory intensive. You may consider not including this
- type of query.
-
- This plugin is included in the default parser configuration.
- """
-
- def tokens(self):
- return ((ThWildcardPlugin.Wild, 0), )
-
- class Wild(BasicSyntax):
- expr = re.compile(u"[^ \t\r\n*?]*(\\*|\\?|؟)\\S*")
- qclass = query.Wildcard
-
- def __repr__(self):
- r = "%s:wild(%r)" % (self.fieldname, self.text)
- if self.boost != 1.0:
- r += "^%s" % self.boost
- return r
-
- @classmethod
- def create(cls, parser, match):
- return cls(match.group(0).replace(u'؟',u'?'))
-
-def ThMultifieldParser(schema=None):
- plugins = (BoostPlugin, ThCompoundsPlugin, ThFieldsPlugin, GroupPlugin,
- ThNotPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin,
- ThWildcardPlugin, FieldAliasPlugin({
- u"kitab":(u"كتاب",),
- u"title":(u"عنوان",),
- u"tags":(u"وسوم",)})
- )
- p = MultifieldParser(("title","content",), schema=schema, plugins=plugins)
- # to add a plugin use: p.add_plugin(XYZ)
- return p
+from whoosh.qparser import FieldAliasPlugin
+from whooshSymbolicQParser import MultifieldSQParser
class ExcerptFormatter(object):
def __init__(self, between = "..."):
@@ -288,11 +71,14 @@ class SearchEngine(BaseSearchEngine):
def __init__(self, th):
BaseSearchEngine.__init__(self, th, False)
self.__ix_writer = None
- ix_dir=os.path.join(th.prefixes[0],'index')
+ ix_dir=os.path.join(th.prefixes[0],'index', "ix_"+str(whoosh_ix_ver))
+ if not os.path.isdir(ix_dir): os.makedirs(ix_dir)
# try to load a pre-existing index
try: self.indexer=open_dir(ix_dir)
- except EmptyIndexError:
+ except (EmptyIndexError, IndexVersionError):
# create a new one
+ try: shutil.rmtree(ix_dir, True); os.makedirs(ix_dir)
+ except OSError: pass
schema = Schema(
kitab=ID(stored=True),
vrr=ID(stored=True,unique=False), # version release
@@ -304,7 +90,12 @@ class SearchEngine(BaseSearchEngine):
)
self.indexer=create_in(ix_dir,schema)
#self.__ix_qparser = ThMultifieldParser(self.th, ("title","content",), schema=self.indexer.schema)
- self.__ix_qparser = ThMultifieldParser(self.indexer.schema)
+ self.__ix_qparser = MultifieldSQParser(("title","content",), self.indexer.schema)
+ self.__ix_qparser.add_plugin(FieldAliasPlugin({
+ u"kitab":(u"كتاب",),
+ u"title":(u"عنوان",),
+ u"tags":(u"وسوم",)})
+ )
#self.__ix_pre=whoosh.query.Prefix
self.__ix_searcher= self.indexer.searcher()
@@ -315,7 +106,9 @@ class SearchEngine(BaseSearchEngine):
"""
return a Version-Release string if in index, otherwise return None
"""
- d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
+ try: d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
+ except TypeError: return None
+ except KeyError: return None
if d: return d['vrr']
return None
@@ -342,7 +135,7 @@ class SearchEngine(BaseSearchEngine):
txt=node.toText(ub)
s=set()
#results.query.all_terms(s) # return (field,term) pairs
- results.query.existing_terms(self.indexer.reader(), s, phrases=True) # return (field,term) pairs # self.self.__ix_searcher.reader()
+ results.q.existing_terms(self.indexer.reader(), s, phrases=True) # return (field,term) pairs # self.self.__ix_searcher.reader()
terms=dict(
map(lambda i: (i[1],i[0]),
filter(lambda j: j[0]=='content' or j[0]=='title', s))).keys()
diff --git a/Thawab/whooshSymbolicQParser.py b/Thawab/whooshSymbolicQParser.py
new file mode 100644
index 0000000..2adfba9
--- /dev/null
+++ b/Thawab/whooshSymbolicQParser.py
@@ -0,0 +1,69 @@
+# -*- coding: UTF-8 -*-
+"""
+
+Copyright © 2010, Muayyad Alsadi <alsadi at ojuba.org>
+
+"""
+
+import sys, os, os.path, re
+
+from whoosh import query
+from whoosh.qparser import *
+
+class SFieldsPlugin(Plugin):
+ """This plugin does not require an English field name, so that my field aliases work"""
+
+ def tokens(self, parser):
+ return ((SFieldsPlugin.Field, 0), )
+
+ def filters(self, parser):
+ return ((SFieldsPlugin.do_fieldnames, 100), )
+
+ @staticmethod
+ def do_fieldnames(parser, stream):
+ newstream = stream.empty()
+ newname = None
+ for i, t in enumerate(stream):
+ if isinstance(t, SFieldsPlugin.Field):
+ valid = False
+ if i < len(stream) - 1:
+ next = stream[i+1]
+ if not isinstance(next, (White, SFieldsPlugin.Field)):
+ newname = t.fieldname
+ valid = True
+ if not valid:
+ newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
+ continue
+
+ if isinstance(t, Group):
+ t = SFieldsPlugin.do_fieldnames(parser, t)
+ newstream.append(t.set_fieldname(newname))
+ newname = None
+
+ return newstream
+
+ class Field(Token):
+ expr = re.compile(u"(\w[\w\d]*):", re.U)
+
+ def __init__(self, fieldname):
+ self.fieldname = fieldname
+
+ def __repr__(self):
+ return "<%s:>" % self.fieldname
+
+ def set_fieldname(self, fieldname):
+ return self.__class__(fieldname)
+
+ @classmethod
+ def create(cls, parser, match):
+ return cls(match.group(1))
+
+def MultifieldSQParser(fieldnames, schema=None, fieldboosts=None, **kwargs):
+ p = MultifieldParser(fieldnames, schema, fieldboosts, **kwargs)
+ cp = OperatorsPlugin(And=r"&", Or=r"\|", AndNot=r"&!", AndMaybe=r"&~", Not=r'!')
+ p.replace_plugin(cp)
+ # FIXME: try to upsteam SFieldsPlugin
+ p.remove_plugin_class(FieldsPlugin)
+ p.add_plugin(SFieldsPlugin)
+ return p
+
diff --git a/bok2ki.py b/bok2ki.py
new file mode 100755
index 0000000..875b35e
--- /dev/null
+++ b/bok2ki.py
@@ -0,0 +1,105 @@
+#! /usr/bin/python
+# -*- coding: UTF-8 -*-
+"""
+Script to import .bok files
+Copyright © 2008-2010, Muayyad Alsadi <alsadi at ojuba.org>
+
+ Released under terms of Waqf Public License.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the latest version Waqf Public License as
+ published by Ojuba.org.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ The Latest version of the license can be found on
+ "http://waqf.ojuba.org/license"
+
+"""
+
+import sys, os, os.path, glob, shutil, re
+import sqlite3
+from getopt import getopt, GetoptError
+
+def usage():
+ print '''\
+Usage: %s [-i] [-m DIR] FILES ...
+Where:
+\t-i\t\t- in-memory
+\t-m DIR\t\t- move successfully imported BOK files into DIR
+\t--ft-prefix=FOOTER_PREFIX default is "(¬"
+\t--ft-suffix=FOOTER_SUFFIX default is ")"
+\t--ft-leading=[0|1] should footnote be match at line start only, default is 0
+\t--ft-sp=[0|1|2] no, single or many whitespaces, default is 0
+\t--bft-prefix=FOOTER_PREFIX footnote anchor in body prefix, default is "(¬"
+\t--bft-suffix=FOOTER_SUFFIX footnote anchor in body suffix, default is ")"
+\t--bft-sp=[0|1|2] no, single or many whitespaces, default is 0
+
+the generated files will be moved into db in thawab prefix (usually ~/.thawab/db/)
+''' % os.path.basename(sys.argv[0])
+
+try:
+ opts, args = getopt(sys.argv[1:], "im:", ["help", 'ft-prefix=', 'ft-suffix=', 'bft-prefix=', 'bft-suffix=', 'ft-leading=', 'ft-sp=', 'bft-sp='])
+except GetoptError, err:
+ print str(err) # will print something like "option -a not recognized"
+ usage()
+ sys.exit(1)
+
+if not args:
+ print "please provide at least one .bok files"
+ usage()
+ sys.exit(1)
+
+opts=dict(opts)
+
+def progress(msg, p, *a, **kw): print " ** [%g%% completed] %s" % (p,msg)
+
+from Thawab.core import ThawabMan
+from Thawab.shamelaUtils import ShamelaSqlite,shamelaImport
+th=ThawabMan()
+thprefix=th.prefixes[0]
+
+if not opts.has_key('-i'): db_fn=os.path.expanduser('~/bok2sql.db')
+else: db_fn=None
+
+# ¬ U+00AC NOT SIGN
+ft_prefix=opts.get('--ft-prefix','(¬').decode('utf-8'); ft_prefix_len=len(ft_prefix)
+ft_suffix=opts.get('--ft-suffix',')').decode('utf-8'); ft_suffix_len=len(ft_suffix)
+ft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--ft-sp','0'))]
+ft_at_line_start=int(opts.get('--ft-leading','0'))
+footnote_re=(ft_at_line_start and u'^\s*' or u'') + re.escape(ft_prefix)+ft_sp+ur'(\d+)'+ft_sp+re.escape(ft_suffix)
+
+bft_prefix=opts.get('--bft-prefix','(¬').decode('utf-8');
+bft_suffix=opts.get('--bft-suffix',')').decode('utf-8');
+bft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--bft-sp','0'))]
+body_footnote_re=re.escape(bft_prefix)+bft_sp+ur'(\d+)'+bft_sp+re.escape(bft_suffix)
+
+
+
+for fn in args:
+ if db_fn:
+ if os.path.exists(db_fn): os.unlink(db_fn)
+ cn=sqlite3.connect(db_fn, isolation_level=None)
+ else: cn=None
+ sh=ShamelaSqlite(fn, cn, 0 , 0, progress)
+ sh.toSqlite()
+ for bkid in sh.getBookIds():
+ ki=th.mktemp()
+ c=ki.seek(-1,-1)
+
+ m=shamelaImport(c, sh, bkid, footnote_re, body_footnote_re, ft_prefix_len, ft_suffix_len)
+ c.flush()
+ print "moving %s to %s" % (ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
+ shutil.move(ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
+ if opts.has_key('-m'):
+ dd=opts['-m']
+ if not os.path.isdir(dd):
+ try: os.makedirs(dd)
+ except OSError: pass
+ if os.path.isdir(dd):
+ dst=os.path.join(dd,os.path.basename(fn))
+ print "moving %s to %s" % (fn,dst)
+ shutil.move(fn, dst)
+ else: print "could not move .bok files, target directory does not exists"
+
diff --git a/po/ar.po b/po/ar.po
index a61f329..079d92a 100644
--- a/po/ar.po
+++ b/po/ar.po
@@ -1,7 +1,7 @@
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
-# This file is distributed under the same license as the PACKAGE package.
-# FIRST AUTHOR <EMAIL at ADDRESS>, YEAR.
+# Translation of thawab templates to Arabic
+# Copyright (C) 2008-2010, ojuba.org <core at ojuba.org>
+# This file is distributed under the same license as the thawab package.
+# Muayyad Saleh Alsadi <alsadi at ojuba.org>, 2010
#
#, fuzzy
msgid ""
diff --git a/po/de.po b/po/de.po
index 570f621..38fae0e 100644
--- a/po/de.po
+++ b/po/de.po
@@ -1,7 +1,7 @@
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
-# This file is distributed under the same license as the PACKAGE package.
-# FIRST AUTHOR <EMAIL at ADDRESS>, YEAR.
+# Translation of thawab templates to Arabic
+# Copyright (C) 2008-2010, ojuba.org <core at ojuba.org>
+# This file is distributed under the same license as the thawab package.
+# cegerxwin <cegerxwin at web.de>, 2010
#
msgid ""
msgstr ""
--
Packaging for Thawab
More information about the Debian-islamic-commits
mailing list