[Debian-islamic-commits] [SCM] Packaging for Thawab branch, master, updated. upstream/3.0.10-43-gf30d0f7
أحمد المحمودي (Ahmed El-Mahmoudy)
aelmahmoudy at sabily.org
Sat Dec 11 10:41:17 UTC 2010
The following commit has been merged in the master branch:
commit 69e4859a19d74f85547ef204607d7a31bf60aa17
Author: أحمد المحمودي (Ahmed El-Mahmoudy) <aelmahmoudy at sabily.org>
Date: Sat Dec 11 10:08:20 2010 +0200
* Added whoosh1.3.3.diff upstream patch to support whoosh >= 1.3.3
* Bumped dependency on python-whoosh to 1.3.3
diff --git a/debian/control b/debian/control
index cc8dafe..63de998 100644
--- a/debian/control
+++ b/debian/control
@@ -20,7 +20,7 @@ Package: thawab
Architecture: all
Depends: ${misc:Depends},
${python:Depends},
- python-whoosh (>= 1.0.0),
+ python-whoosh (>= 1.3.3),
python-okasha,
python-othman,
python-gtk2,
diff --git a/debian/patches/series b/debian/patches/series
index ad8e6a0..1ccd71d 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,3 +1,4 @@
private-pkg.diff
separate_setup.py.diff
desktop-categories.diff
+whoosh1.3.3.diff
diff --git a/debian/patches/whoosh1.3.3.diff b/debian/patches/whoosh1.3.3.diff
new file mode 100644
index 0000000..df30fe0
--- /dev/null
+++ b/debian/patches/whoosh1.3.3.diff
@@ -0,0 +1,458 @@
+Description: separate our custom parser and update for whoosh 1.3.3
+Origin: http://git.ojuba.org/cgit/thawab/commit/?id=cc5f60064e626c649a882de8363e4b08a53957ac
+Author: Muayyad Alsadi مؤيد السعدي <alsadi at ojuba.org>
+
+
+diff --git a/Thawab/whooshSearchEngine.py b/Thawab/whooshSearchEngine.py
+index 1c37d9e..76bbd10 100644
+--- a/Thawab/whooshSearchEngine.py
++++ b/Thawab/whooshSearchEngine.py
+@@ -36,227 +36,8 @@ def stemfn(word): return stemArabic(stem(word))
+ # word_re=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]"
+ analyzer=StandardAnalyzer(expression=ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+(?:\.?[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+)*") | StemFilter(stemfn)
+
+-#from whoosh.fields import FieldType, KeywordAnalyzer
+-#try: from whoosh.fields import Existence
+-#except ImportError: from whoosh.fields import Existance as Existence
+-
+-#class TAGSLIST(FieldType):
+-# """
+-# Configured field type for fields containing space-separated or comma-separated
+-# keyword-like data (such as tags). The default is to not store positional information
+-# (so phrase searching is not allowed in this field) and to not make the field scorable.
+-#
+-# unlike KEYWORD field type, TAGS list does not count frequency just existence.
+-# """
+-#
+-# def __init__(self, stored = False, lowercase = False, commas = False,
+-# scorable = False, unique = False, field_boost = 1.0):
+-# """
+-# :stored: Whether to store the value of the field with the document.
+-# :comma: Whether this is a comma-separated field. If this is False
+-# (the default), it is treated as a space-separated field.
+-# :scorable: Whether this field is scorable.
+-# """
+-#
+-# ana = KeywordAnalyzer(lowercase = lowercase, commas = commas)
+-# self.format = Existence(analyzer = ana, field_boost = field_boost)
+-# self.scorable = scorable
+-# self.stored = stored
+-# self.unique = unique
+-
+-from whoosh.qparser import MultifieldParser, FieldAliasPlugin, QueryParserError, BoostPlugin, GroupPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin, Group, AndGroup, OrGroup, AndNotGroup, AndMaybeGroup, Singleton, BasicSyntax, Plugin, White, Token
+-
+-from whoosh.qparser import CompoundsPlugin, NotPlugin, WildcardPlugin
+-
+-class ThCompoundsPlugin(Plugin):
+- """Adds the ability to use &, |, &~, and &! to specify
+- query constraints.
+-
+- This plugin is included in the default parser configuration.
+- """
+-
+- def tokens(self):
+- return ((ThCompoundsPlugin.AndNot, -10), (ThCompoundsPlugin.AndMaybe, -5), (ThCompoundsPlugin.And, 0),
+- (ThCompoundsPlugin.Or, 0))
+-
+- def filters(self):
+- return ((ThCompoundsPlugin.do_compounds, 600), )
+-
+- @staticmethod
+- def do_compounds(parser, stream):
+- newstream = stream.empty()
+- i = 0
+- while i < len(stream):
+- t = stream[i]
+- ismiddle = newstream and i < len(stream) - 1
+- if isinstance(t, Group):
+- newstream.append(ThCompoundsPlugin.do_compounds(parser, t))
+- elif isinstance(t, (ThCompoundsPlugin.And, ThCompoundsPlugin.Or)):
+- if isinstance(t, ThCompoundsPlugin.And):
+- cls = AndGroup
+- else:
+- cls = OrGroup
+-
+- if cls != type(newstream) and ismiddle:
+- last = newstream.pop()
+- rest = ThCompoundsPlugin.do_compounds(parser, cls(stream[i+1:]))
+- newstream.append(cls([last, rest]))
+- break
+-
+- elif isinstance(t, ThCompoundsPlugin.AndNot):
+- if ismiddle:
+- last = newstream.pop()
+- i += 1
+- next = stream[i]
+- if isinstance(next, Group):
+- next = ThCompoundsPlugin.do_compounds(parser, next)
+- newstream.append(AndNotGroup([last, next]))
+-
+- elif isinstance(t, ThCompoundsPlugin.AndMaybe):
+- if ismiddle:
+- last = newstream.pop()
+- i += 1
+- next = stream[i]
+- if isinstance(next, Group):
+- next = ThCompoundsPlugin.do_compounds(parser, next)
+- newstream.append(AndMaybeGroup([last, next]))
+- else:
+- newstream.append(t)
+- i += 1
+-
+- return newstream
+-
+- class And(Singleton):
+- expr = re.compile(u"&")
+-
+- class Or(Singleton):
+- expr = re.compile(u"\|")
+-
+- class AndNot(Singleton):
+- expr = re.compile(u"&!")
+-
+- class AndMaybe(Singleton):
+- expr = re.compile(u"&~") # when using Arabic keyboard ~ is shift+Z
+-
+-class ThFieldsPlugin(Plugin):
+- """Adds the ability to specify the field of a clause using a colon.
+-
+- This plugin is included in the default parser configuration.
+- """
+-
+- def tokens(self):
+- return ((ThFieldsPlugin.Field, 0), )
+-
+- def filters(self):
+- return ((ThFieldsPlugin.do_fieldnames, 100), )
+-
+- @staticmethod
+- def do_fieldnames(parser, stream):
+- newstream = stream.empty()
+- newname = None
+- for i, t in enumerate(stream):
+- if isinstance(t, ThFieldsPlugin.Field):
+- valid = False
+- if i < len(stream) - 1:
+- next = stream[i+1]
+- if not isinstance(next, (White, ThFieldsPlugin.Field)):
+- newname = t.fieldname
+- valid = True
+- if not valid:
+- newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
+- continue
+-
+- if isinstance(t, Group):
+- t = ThFieldsPlugin.do_fieldnames(parser, t)
+- newstream.append(t.set_fieldname(newname))
+- newname = None
+-
+- return newstream
+-
+- class Field(Token):
+- expr = re.compile(u"(\w[\w\d]*):", re.U)
+-
+- def __init__(self, fieldname):
+- self.fieldname = fieldname
+-
+- def __repr__(self):
+- return "<%s:>" % self.fieldname
+-
+- def set_fieldname(self, fieldname):
+- return self.__class__(fieldname)
+-
+- @classmethod
+- def create(cls, parser, match):
+- return cls(match.group(1))
+-
+-class ThNotPlugin(Plugin):
+- """Adds the ability to negate a clause by preceding it with !.
+-
+- This plugin is included in the default parser configuration.
+- """
+-
+- def tokens(self):
+- return ((ThNotPlugin.Not, 0), )
+-
+- def filters(self):
+- return ((ThNotPlugin.do_not, 800), )
+-
+- @staticmethod
+- def do_not(parser, stream):
+- newstream = stream.empty()
+- notnext = False
+- for t in stream:
+- if isinstance(t, ThNotPlugin.Not):
+- notnext = True
+- continue
+-
+- if notnext:
+- t = NotGroup([t])
+- newstream.append(t)
+- notnext = False
+-
+- return newstream
+-
+- class Not(Singleton):
+- expr = re.compile(u"!")
+-
+-class ThWildcardPlugin(Plugin):
+- """Adds the ability to specify wildcard queries by using asterisk and
+- question mark characters in terms. Note that these types can be very
+- performance and memory intensive. You may consider not including this
+- type of query.
+-
+- This plugin is included in the default parser configuration.
+- """
+-
+- def tokens(self):
+- return ((ThWildcardPlugin.Wild, 0), )
+-
+- class Wild(BasicSyntax):
+- expr = re.compile(u"[^ \t\r\n*?]*(\\*|\\?|؟)\\S*")
+- qclass = query.Wildcard
+-
+- def __repr__(self):
+- r = "%s:wild(%r)" % (self.fieldname, self.text)
+- if self.boost != 1.0:
+- r += "^%s" % self.boost
+- return r
+-
+- @classmethod
+- def create(cls, parser, match):
+- return cls(match.group(0).replace(u'؟',u'?'))
+-
+-def ThMultifieldParser(schema=None):
+- plugins = (BoostPlugin, ThCompoundsPlugin, ThFieldsPlugin, GroupPlugin,
+- ThNotPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin,
+- ThWildcardPlugin, FieldAliasPlugin({
+- u"kitab":(u"كتاب",),
+- u"title":(u"عنوان",),
+- u"tags":(u"وسوم",)})
+- )
+- p = MultifieldParser(("title","content",), schema=schema, plugins=plugins)
+- # to add a plugin use: p.add_plugin(XYZ)
+- return p
++from whoosh.qparser import FieldAliasPlugin
++from whooshSymbolicQParser import MultifieldSQParser
+
+ class ExcerptFormatter(object):
+ def __init__(self, between = "..."):
+@@ -304,7 +85,12 @@ class SearchEngine(BaseSearchEngine):
+ )
+ self.indexer=create_in(ix_dir,schema)
+ #self.__ix_qparser = ThMultifieldParser(self.th, ("title","content",), schema=self.indexer.schema)
+- self.__ix_qparser = ThMultifieldParser(self.indexer.schema)
++ self.__ix_qparser = MultifieldSQParser(("title","content",), self.indexer.schema)
++ self.__ix_qparser.add_plugin(FieldAliasPlugin({
++ u"kitab":(u"كتاب",),
++ u"title":(u"عنوان",),
++ u"tags":(u"وسوم",)})
++ )
+ #self.__ix_pre=whoosh.query.Prefix
+ self.__ix_searcher= self.indexer.searcher()
+
+@@ -315,7 +101,9 @@ class SearchEngine(BaseSearchEngine):
+ """
+ return a Version-Release string if in index, otherwise return None
+ """
+- d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
++ try: d=self.__ix_searcher.document(kitab=unicode(makeId(name)))
++ except TypeError: return None
++ except KeyError: return None
+ if d: return d['vrr']
+ return None
+
+diff --git a/Thawab/whooshSymbolicQParser.py b/Thawab/whooshSymbolicQParser.py
+new file mode 100644
+index 0000000..14ed6f9
+--- /dev/null
++++ b/Thawab/whooshSymbolicQParser.py
+@@ -0,0 +1,188 @@
++# -*- coding: UTF-8 -*-
++"""
++
++Copyright © 2010, Muayyad Alsadi <alsadi at ojuba.org>
++
++"""
++
++import sys, os, os.path, re
++
++from whoosh import query
++from whoosh.qparser import *
++
++class SCompoundsPlugin(Plugin):
++ """Adds the ability to use &, |, &~, and &! to specify
++ query constraints.
++ """
++
++ def tokens(self, parser):
++ return ((SCompoundsPlugin.AndNot, -10), (SCompoundsPlugin.AndMaybe, -5), (SCompoundsPlugin.And, 0),
++ (SCompoundsPlugin.Or, 0))
++
++ def filters(self, parser):
++ return ((SCompoundsPlugin.do_compounds, 600), )
++
++ @staticmethod
++ def do_compounds(parser, stream):
++ newstream = stream.empty()
++ i = 0
++ while i < len(stream):
++ t = stream[i]
++ ismiddle = newstream and i < len(stream) - 1
++ if isinstance(t, Group):
++ newstream.append(SCompoundsPlugin.do_compounds(parser, t))
++ elif isinstance(t, (SCompoundsPlugin.And, SCompoundsPlugin.Or)):
++ if isinstance(t, SCompoundsPlugin.And):
++ cls = AndGroup
++ else:
++ cls = OrGroup
++
++ if cls != type(newstream) and ismiddle:
++ last = newstream.pop()
++ rest = SCompoundsPlugin.do_compounds(parser, cls(stream[i+1:]))
++ newstream.append(cls([last, rest]))
++ break
++
++ elif isinstance(t, SCompoundsPlugin.AndNot):
++ if ismiddle:
++ last = newstream.pop()
++ i += 1
++ next = stream[i]
++ if isinstance(next, Group):
++ next = SCompoundsPlugin.do_compounds(parser, next)
++ newstream.append(AndNotGroup([last, next]))
++
++ elif isinstance(t, SCompoundsPlugin.AndMaybe):
++ if ismiddle:
++ last = newstream.pop()
++ i += 1
++ next = stream[i]
++ if isinstance(next, Group):
++ next = SCompoundsPlugin.do_compounds(parser, next)
++ newstream.append(AndMaybeGroup([last, next]))
++ else:
++ newstream.append(t)
++ i += 1
++
++ return newstream
++
++ class And(Singleton):
++ expr = re.compile(u"&")
++
++ class Or(Singleton):
++ expr = re.compile(u"\|")
++
++ class AndNot(Singleton):
++ expr = re.compile(u"&!")
++
++ class AndMaybe(Singleton):
++ expr = re.compile(u"&~") # when using Arabic keyboard ~ is shift+Z
++
++class SFieldsPlugin(Plugin):
++ """This plugin does not require an English field name, so that my field aliases work"""
++
++ def tokens(self, parser):
++ return ((SFieldsPlugin.Field, 0), )
++
++ def filters(self, parser):
++ return ((SFieldsPlugin.do_fieldnames, 100), )
++
++ @staticmethod
++ def do_fieldnames(parser, stream):
++ newstream = stream.empty()
++ newname = None
++ for i, t in enumerate(stream):
++ if isinstance(t, SFieldsPlugin.Field):
++ valid = False
++ if i < len(stream) - 1:
++ next = stream[i+1]
++ if not isinstance(next, (White, SFieldsPlugin.Field)):
++ newname = t.fieldname
++ valid = True
++ if not valid:
++ newstream.append(Word(t.fieldname, fieldname=parser.fieldname))
++ continue
++
++ if isinstance(t, Group):
++ t = SFieldsPlugin.do_fieldnames(parser, t)
++ newstream.append(t.set_fieldname(newname))
++ newname = None
++
++ return newstream
++
++ class Field(Token):
++ expr = re.compile(u"(\w[\w\d]*):", re.U)
++
++ def __init__(self, fieldname):
++ self.fieldname = fieldname
++
++ def __repr__(self):
++ return "<%s:>" % self.fieldname
++
++ def set_fieldname(self, fieldname):
++ return self.__class__(fieldname)
++
++ @classmethod
++ def create(cls, parser, match):
++ return cls(match.group(1))
++
++class SNotPlugin(Plugin):
++ """Adds the ability to negate a clause by preceding it with !.
++ """
++
++ def tokens(self, parser):
++ return ((SNotPlugin.Not, 0), )
++
++ def filters(self, parser):
++ return ((SNotPlugin.do_not, 800), )
++
++ @staticmethod
++ def do_not(parser, stream):
++ newstream = stream.empty()
++ notnext = False
++ for t in stream:
++ if isinstance(t, SNotPlugin.Not):
++ notnext = True
++ continue
++
++ if notnext:
++ t = NotGroup([t])
++ newstream.append(t)
++ notnext = False
++
++ return newstream
++
++ class Not(Singleton):
++ expr = re.compile(u"!")
++
++class SWildcardPlugin(Plugin):
++ """Adds the ability to specify wildcard queries by using asterisk and
++ question mark characters in terms. Note that these types can be very
++ performance and memory intensive. You may consider not including this
++ type of query.
++ """
++
++ def tokens(self, parser):
++ return ((SWildcardPlugin.Wild, 0), )
++
++ class Wild(BasicSyntax):
++ expr = re.compile(u"[^ \t\r\n*?]*(\\*|\\?|؟)\\S*")
++ qclass = query.Wildcard
++
++ def __repr__(self):
++ r = "%s:wild(%r)" % (self.fieldname, self.text)
++ if self.boost != 1.0:
++ r += "^%s" % self.boost
++ return r
++
++ @classmethod
++ def create(cls, parser, match):
++ return cls(match.group(0).replace(u'؟',u'?'))
++
++def MultifieldSQParser(fieldnames, schema=None, fieldboosts=None, **kwargs):
++ plugins = (BoostPlugin, SCompoundsPlugin, SFieldsPlugin, GroupPlugin,
++ SNotPlugin, PhrasePlugin, RangePlugin, SingleQuotesPlugin,
++ SWildcardPlugin)
++ p = MultifieldParser(fieldnames, schema, fieldboosts, plugins=plugins, **kwargs)
++ return p
++
--
Packaging for Thawab
More information about the Debian-islamic-commits
mailing list