[liblucene-queryparser-perl] 01/04: [svn-inject] Installing original source of liblucene-queryparser-perl

dom at earth.li dom at earth.li
Sat Apr 23 23:13:08 UTC 2016


This is an automated email from the git hooks/post-receive script.

dom pushed a commit to branch master
in repository liblucene-queryparser-perl.

commit 3639fd9f8bb8e397f3ed6c4d3ff1b9beee2dcc08
Author: Dominic Hargreaves <dom at earth.li>
Date:   Sat Apr 19 15:47:30 2008 +0000

    [svn-inject] Installing original source of liblucene-queryparser-perl
---
 Changes        |  15 ++++
 MANIFEST       |   8 ++
 META.yml       |  10 +++
 Makefile.PL    |  14 ++++
 QueryParser.pm | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README         |  38 +++++++++
 t/1.t          |  84 +++++++++++++++++++
 t/2.t          |  46 +++++++++++
 8 files changed, 463 insertions(+)

diff --git a/Changes b/Changes
new file mode 100644
index 0000000..812bfa2
--- /dev/null
+++ b/Changes
@@ -0,0 +1,15 @@
+Revision history for Perl extension Lucene::QueryParser.
+
+1.3   Tue Dec  9 11:26:08 GMT 2003
+    - Add to_plucene
+
+1.2   Mon Jun  2 15:27:09 BST 2003
+    - Better handling of AND/OR conjunctions.
+
+1.1   Tue Jan 14 13:57:15 GMT 2003
+    - Add deparse() method
+
+0.01  Tue Jan  7 10:13:44 2003
+	- original version; created by h2xs 1.22 with options
+		-b 5.5.3 -AX -n Lucene::QueryParser
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..7c3fa58
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,8 @@
+Changes
+Makefile.PL
+MANIFEST
+QueryParser.pm
+README
+t/1.t
+t/2.t
+META.yml                                Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..643905c
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,10 @@
+#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
+name:         Lucene-QueryParser
+version:      1.04
+version_from: QueryParser.pm
+installdirs:  site
+requires:
+    Text::Balanced:                0
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.12
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..53f5ae8
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,14 @@
+use 5.00503;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+    'NAME'		=> 'Lucene::QueryParser',
+    'VERSION_FROM'	=> 'QueryParser.pm', # finds $VERSION
+    'PREREQ_PM'		=> {
+                Text::Balanced => 0
+            }, # e.g., Module::Name => 1.1
+    ($] >= 5.005 ?    ## Add these new keywords supported since 5.005
+      (
+       AUTHOR     => 'Simon Cozens <simon at kasei.com>') : ()),
+);
diff --git a/QueryParser.pm b/QueryParser.pm
new file mode 100644
index 0000000..c1e95f8
--- /dev/null
+++ b/QueryParser.pm
@@ -0,0 +1,248 @@
+package Lucene::QueryParser;
+
+use 5.00503;
+use strict;
+use Carp;
+
+require Exporter;
+use Text::Balanced qw(extract_bracketed extract_delimited);
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
+ at ISA = qw(Exporter);
+ at EXPORT_OK = qw( parse_query deparse_query );
+ at EXPORT = qw( parse_query deparse_query );
+$VERSION = '1.04';
+
+sub parse_query {
+    local $_ = shift;
+    my @rv;
+    while ($_) {
+        s/^\s+// and next;
+        my $item;
+        s/^(AND|OR|\|\|)\s+//;
+        if ($1)                   { $item->{conj} = $1; }
+        if (s/^\+//)              { $item->{type} = "REQUIRED";   }
+        elsif (s/^(-|!|NOT)\s*//i){ $item->{type} = "PROHIBITED"; }
+        else                      { $item->{type} = "NORMAL";     }
+
+        if (s/^([^\s(":]+)://)      { $item->{field} = $1 }
+
+        # Subquery
+        if (/^\(/) {
+            my ($extracted, $remainer) = extract_bracketed($_,"(");
+            if (!$extracted) { croak "Unbalanced subquery" }
+            $_ = $remainer;
+            $extracted =~ s/^\(//;
+            $extracted =~ s/\)$//;
+            $item->{query} = "SUBQUERY";
+            $item->{subquery} = parse_query($extracted);
+        } elsif (/^"/) {
+            my ($extracted, $remainer) = extract_delimited($_, '"');
+            if (!$extracted) { croak "Unbalanced phrase" }
+            $_ = $remainer;
+            $extracted =~ s/^"//;
+            $extracted =~ s/"$//;
+            $item->{query} = "PHRASE";
+            $item->{term} = $extracted;
+        } elsif (s/^(\S+)\*//) {
+            $item->{query} = "PREFIX";
+            $item->{term} = $1;
+        } else {
+            s/([^\s\^]+)// or croak "Malformed query";
+            $item->{query} = "TERM";
+            $item->{term} = $1;
+        }
+
+        if (s/^\^(\d+(?:.\d+)?)//)  { $item->{boost} = $1 }
+
+        push @rv, bless $item, "Lucene::QueryParser::".ucfirst lc $item->{query};
+    }
+    return bless \@rv, "Lucene::QueryParser::TopLevel";
+}
+
+sub deparse_query {
+    my $ds = shift;
+    my @out; 
+    for my $elem (@$ds) {
+        my $thing = "";
+        if ($elem->{conj}) { $thing .= "$elem->{conj} "; }
+        if ($elem->{type} eq "REQUIRED") {
+            $thing .= "+";
+        } elsif ($elem->{type} eq "PROHIBITED") {
+            $thing .= "-";
+        }
+        if (exists $elem->{field}) { 
+            $thing .= $elem->{field}.":"
+        }
+        if ($elem->{query} eq "TERM") {
+            $thing .= $elem->{term};
+        } elsif ($elem->{query} eq "SUBQUERY") {
+            $thing .= "(".deparse_query($elem->{subquery}).")";
+        } elsif ($elem->{query} eq "PHRASE") {
+            $thing .= '"'.$elem->{term}.'"';
+        }
+        if (exists $elem->{boost}) { $thing .= "^".$elem->{boost} }
+        push @out, $thing;
+    }
+    return join " ", @out;
+}
+
+package Lucene::QueryParser::TopLevel;
+
+sub to_plucene {
+    my ($self, $field) = @_;
+    Carp::croak("You need to specify a default field for your query")
+        unless $field;
+    return $self->[0]->to_plucene($field) 
+        if @$self ==1 and $self->[0]->{type} eq "NORMAL";
+
+    my @clauses;
+    $self->add_clause(\@clauses, $_, $field) for @$self;
+    require Plucene::Search::BooleanQuery;
+    my $query = new Plucene::Search::BooleanQuery;
+    $query->add_clause($_) for @clauses;
+    
+    $query;
+}
+
+sub add_clause {
+    my ($self, $clauses, $term, $field) = @_;
+    my $q = $term->to_plucene($field);
+    if (exists $term->{conj} and $term->{conj} eq "AND" and @$clauses) { 
+        # The previous term needs to become required
+        $clauses->[-1]->required(1) unless $clauses->[-1]->prohibited;
+    }
+
+    return unless $q; # Shouldn't happen yet
+    my $prohibited = $term->{type} eq "PROHIBITED";
+    my $required   = $term->{type} eq "REQUIRED";
+    $required = 1 if exists $term->{conj} and $term->{conj} eq "AND" 
+                     and !$prohibited;
+    require Plucene::Search::BooleanClause;
+    push @$clauses, Plucene::Search::BooleanClause->new({
+        prohibited => $prohibited,
+        required   => $required,
+        query      => $q
+    });
+}
+
+# Oh, I really like abstraction
+
+package Lucene::QueryParser::Term;
+
+sub to_plucene {
+    require Plucene::Search::TermQuery;
+    require Plucene::Index::Term;
+    my ($self, $field) = @_;
+    $self->{pl_term} = Plucene::Index::Term->new({
+        field => (exists $self->{field} ? $self->{field} : $field),
+        text => $self->{term}
+    });
+    my $q = Plucene::Search::TermQuery->new({ term => $self->{pl_term} });
+    $self->set_boost($q);
+    return $q;
+}
+
+sub set_boost {
+    my ($self, $q) = @_;
+    $q->boost($self->{boost}) if exists $self->{boost};
+}
+
+package Lucene::QueryParser::Phrase;
+our @ISA = qw(Lucene::QueryParser::Term);
+# This corresponds to the rules for "PHRASE" in the Plucene grammar
+
+sub to_plucene {
+    require Plucene::Search::PhraseQuery;
+    require Plucene::Index::Term;
+    my ($self, $field) = @_;
+    my @words = split /\s+/, $self->{term};
+    return $self->SUPER::to_plucene($field) if @words == 1;
+
+    my $phrase = Plucene::Search::PhraseQuery->new;
+    for my $word (@words) {
+        my $term = Plucene::Index::Term->new({
+            field => (exists $self->{field} ? $self->{field} : $field),
+            text => $word
+        });
+        $phrase->add($term);
+    }
+    if (exists $self->{slop}) { # Future extension
+        $phrase->slop($self->{slop});
+    }
+    $self->set_boost($phrase);
+    return $phrase;
+}
+
+package Lucene::QueryParser::Subquery;
+
+sub to_plucene {
+    my ($self, $field) = @_;
+    $self->{subquery}->to_plucene(
+        exists $self->{field} ? $self->{field} : $field
+    )
+}
+
+1;
+__END__
+# Below is stub documentation for your module. You'd better edit it!
+
+=head1 NAME
+
+Lucene::QueryParser - Turn a Lucene query into a Perl data structure
+
+=head1 SYNOPSIS
+
+  use Lucene::QueryParser;
+  my $structure = parse_query("red and yellow and -(coat:pink and green)");
+
+C<$structure> will be:
+
+ [ { query => 'TERM', type => 'NORMAL', term => 'red' },
+   { query => 'TERM', type => 'NORMAL', term => 'yellow' },
+   { subquery => [
+        { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' },
+        { query => 'TERM', type => 'NORMAL', term => 'green' }
+     ], query => 'SUBQUERY', type => 'PROHIBITED' 
+   }
+ ]
+
+=head1 DESCRIPTION
+
+This module parses a Lucene query, as defined by 
+http://lucene.sourceforge.net/cgi-bin/faq/faqmanager.cgi?file=chapter.search&toc=faq#q5
+
+It deals with fields, types, phrases, subqueries, and so on; everything
+handled by the C<SimpleQuery> class in Lucene. The data structure is similar
+to the one given above, and is pretty self-explanatory.
+
+The other function, C<deparse_query> turns such a data structure back into
+a Lucene query string. This is useful if you've just been mucking about
+with the data.
+
+=head2 PLUCENE
+
+Note for people using Plucene: the big arrayref and the hashes in the
+output of C<parse_query> are actually objects. They're not
+C<Plucene::Query> objects, because then everyone who wanted to do search
+queries would have to pull in Plucene, which is a bit unfair. However,
+they can be turned into C<Plucene::Query>s by calling C<to_plucene> on
+them. The argument to C<to_plucene> should be the default field to
+search if none is supplied.
+
+=head2 EXPORT
+
+Exports the C<parse_query> and C<deparse_query> functions.
+
+=head1 AUTHOR
+
+Simon Cozens, E<lt>simon at kasei.comE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2003 by Kasei
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. 
+
+=cut
diff --git a/README b/README
new file mode 100644
index 0000000..89041a1
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+Lucene/QueryParser version 0.01
+===============================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+  blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2003 Simon Cozens
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself. 
+
diff --git a/t/1.t b/t/1.t
new file mode 100644
index 0000000..4f8c4c5
--- /dev/null
+++ b/t/1.t
@@ -0,0 +1,84 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 20;
+
+use_ok("Lucene::QueryParser");
+Lucene::QueryParser->import; # Devel::Cover seems to screw this over.
+
+use Data::Dumper;
+
+sub test_query {
+    my ($query, $expected, $message) = @_;
+    my $parsed = parse_query($query);
+    is_deeply($parsed, $expected, $message);
+    $query =~ s/ (and|or)//g;
+    $query =~ s/not /-/g;
+    is(deparse_query($parsed), $query, " ... and back again");
+}
+
+test_query("foo",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' } ],
+    "Simple one-word query parses fine");
+
+test_query("foo bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar' },
+ ],
+    "Simple two-word query parses fine");
+test_query("foo +bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar', type => "REQUIRED" },
+ ],
+    "+ operator works");
+test_query("foo -bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" },
+ ],
+    "- operator works");
+
+test_query("foo not bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" },
+ ],
+    "not operator works");
+
+is_deeply(parse_query('"foo bar" baz'),
+[ { query => 'PHRASE', type => 'NORMAL', term => 'foo bar' },
+  { query => 'TERM', type => 'NORMAL', term => 'baz',},
+ ],
+    "Quoted phrase matches work");
+
+test_query("foo AND bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar' , conj => "AND" },
+ ],
+    "conjunctions work");
+
+test_query("foo AND baz:bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+  { query => 'TERM', type => 'NORMAL', term => 'bar', field => 'baz' , conj => "AND" },
+ ],
+    "fields work");
+
+test_query("foo AND baz^2.0",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo'},
+  { query => 'TERM', type => 'NORMAL', term => 'baz', boost => "2.0", conj => "AND" },
+ ],
+    "boosting works");
+
+# Grand finale!
+
+test_query("red AND yellow AND -(coat:pink AND green)",
+[ { query => 'TERM', type => 'NORMAL', term => 'red' },
+  { query => 'TERM', type => 'NORMAL', term => 'yellow', conj => "AND" },
+  { subquery => [
+        { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' },
+        { query => 'TERM', type => 'NORMAL', term => 'green', conj => "AND" } 
+    ], query => 'SUBQUERY', type => 'PROHIBITED', conj => "AND" }
+], "A very complex query (with subquery)");
+
diff --git a/t/2.t b/t/2.t
new file mode 100644
index 0000000..a376269
--- /dev/null
+++ b/t/2.t
@@ -0,0 +1,46 @@
+use Test::More;
+if (!eval { require Plucene::Search::Query }) { 
+    plan skip_all => "Plucene not installed";
+} else {
+    plan tests => 22;
+}
+
+use_ok("Lucene::QueryParser");
+Lucene::QueryParser->import; # Devel::Cover seems to screw this over.
+sub pq { parse_query(shift)->to_plucene("text") };
+
+my $query = pq( "hello" );
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "text", "Field is correct");
+is($query->term->text,  "hello", "Text is correct");
+
+$query = pq( "foo:hello^3" );
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "foo", "Field is correct");
+is($query->term->text,  "hello", "Text is correct");
+is($query->boost, 3, "Boost is set");
+
+$query = pq( "-foo:hello" );
+isa_ok($query, "Plucene::Search::BooleanQuery");
+my @clauses = $query->clauses;
+is(@clauses, 1, "Boolean query with one clause");
+ok($clauses[0]->prohibited, "Clause is prohibited");
+ok(!$clauses[0]->required, "Clause is not required");
+
+$query = pq('"hello"');
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "text", "Field is correct");
+is($query->term->text,  "hello", "Text is correct");
+
+$query = pq('nonsense:"hello world"');
+isa_ok($query, "Plucene::Search::PhraseQuery");
+my @terms = @{$query->terms};
+is(@terms, 2, "With two terms");
+is($terms[1]->field, "nonsense", "Correctly distributed");
+is($terms[1]->text, "world", "Correctly partitioned");
+
+$query = pq("foo AND bar");
+isa_ok($query, "Plucene::Search::BooleanQuery");
+ at clauses = $query->clauses;
+is(@clauses, 2, "Boolean query with two clause");
+ok($clauses[0]->required, "Forced to be required");

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/liblucene-queryparser-perl.git



More information about the Pkg-perl-cvs-commits mailing list