[liblucene-queryparser-perl] 01/04: [svn-inject] Installing original source of liblucene-queryparser-perl
dom at earth.li
dom at earth.li
Sat Apr 23 23:13:08 UTC 2016
This is an automated email from the git hooks/post-receive script.
dom pushed a commit to branch master
in repository liblucene-queryparser-perl.
commit 3639fd9f8bb8e397f3ed6c4d3ff1b9beee2dcc08
Author: Dominic Hargreaves <dom at earth.li>
Date: Sat Apr 19 15:47:30 2008 +0000
[svn-inject] Installing original source of liblucene-queryparser-perl
---
Changes | 15 ++++
MANIFEST | 8 ++
META.yml | 10 +++
Makefile.PL | 14 ++++
QueryParser.pm | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
README | 38 +++++++++
t/1.t | 84 +++++++++++++++++++
t/2.t | 46 +++++++++++
8 files changed, 463 insertions(+)
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..812bfa2
--- /dev/null
+++ b/Changes
@@ -0,0 +1,15 @@
+Revision history for Perl extension Lucene::QueryParser.
+
+1.3 Tue Dec 9 11:26:08 GMT 2003
+ - Add to_plucene
+
+1.2 Mon Jun 2 15:27:09 BST 2003
+ - Better handling of AND/OR conjunctions.
+
+1.1 Tue Jan 14 13:57:15 GMT 2003
+ - Add deparse() method
+
+0.01 Tue Jan 7 10:13:44 2003
+ - original version; created by h2xs 1.22 with options
+ -b 5.5.3 -AX -n Lucene::QueryParser
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..7c3fa58
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,8 @@
+Changes
+Makefile.PL
+MANIFEST
+QueryParser.pm
+README
+t/1.t
+t/2.t
+META.yml Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..643905c
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,10 @@
+#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
+name: Lucene-QueryParser
+version: 1.04
+version_from: QueryParser.pm
+installdirs: site
+requires:
+ Text::Balanced: 0
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.12
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..53f5ae8
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,14 @@
+use 5.00503;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'Lucene::QueryParser',
+ 'VERSION_FROM' => 'QueryParser.pm', # finds $VERSION
+ 'PREREQ_PM' => {
+ Text::Balanced => 0
+ }, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (
+ AUTHOR => 'Simon Cozens <simon at kasei.com>') : ()),
+);
diff --git a/QueryParser.pm b/QueryParser.pm
new file mode 100644
index 0000000..c1e95f8
--- /dev/null
+++ b/QueryParser.pm
@@ -0,0 +1,248 @@
+package Lucene::QueryParser;
+
+use 5.00503;
+use strict;
+use Carp;
+
+require Exporter;
+use Text::Balanced qw(extract_bracketed extract_delimited);
+
+use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
+ at ISA = qw(Exporter);
+ at EXPORT_OK = qw( parse_query deparse_query );
+ at EXPORT = qw( parse_query deparse_query );
+$VERSION = '1.04';
+
+sub parse_query {
+ local $_ = shift;
+ my @rv;
+ while ($_) {
+ s/^\s+// and next;
+ my $item;
+ s/^(AND|OR|\|\|)\s+//;
+ if ($1) { $item->{conj} = $1; }
+ if (s/^\+//) { $item->{type} = "REQUIRED"; }
+ elsif (s/^(-|!|NOT)\s*//i){ $item->{type} = "PROHIBITED"; }
+ else { $item->{type} = "NORMAL"; }
+
+ if (s/^([^\s(":]+)://) { $item->{field} = $1 }
+
+ # Subquery
+ if (/^\(/) {
+ my ($extracted, $remainer) = extract_bracketed($_,"(");
+ if (!$extracted) { croak "Unbalanced subquery" }
+ $_ = $remainer;
+ $extracted =~ s/^\(//;
+ $extracted =~ s/\)$//;
+ $item->{query} = "SUBQUERY";
+ $item->{subquery} = parse_query($extracted);
+ } elsif (/^"/) {
+ my ($extracted, $remainer) = extract_delimited($_, '"');
+ if (!$extracted) { croak "Unbalanced phrase" }
+ $_ = $remainer;
+ $extracted =~ s/^"//;
+ $extracted =~ s/"$//;
+ $item->{query} = "PHRASE";
+ $item->{term} = $extracted;
+ } elsif (s/^(\S+)\*//) {
+ $item->{query} = "PREFIX";
+ $item->{term} = $1;
+ } else {
+ s/([^\s\^]+)// or croak "Malformed query";
+ $item->{query} = "TERM";
+ $item->{term} = $1;
+ }
+
+ if (s/^\^(\d+(?:.\d+)?)//) { $item->{boost} = $1 }
+
+ push @rv, bless $item, "Lucene::QueryParser::".ucfirst lc $item->{query};
+ }
+ return bless \@rv, "Lucene::QueryParser::TopLevel";
+}
+
+sub deparse_query {
+ my $ds = shift;
+ my @out;
+ for my $elem (@$ds) {
+ my $thing = "";
+ if ($elem->{conj}) { $thing .= "$elem->{conj} "; }
+ if ($elem->{type} eq "REQUIRED") {
+ $thing .= "+";
+ } elsif ($elem->{type} eq "PROHIBITED") {
+ $thing .= "-";
+ }
+ if (exists $elem->{field}) {
+ $thing .= $elem->{field}.":"
+ }
+ if ($elem->{query} eq "TERM") {
+ $thing .= $elem->{term};
+ } elsif ($elem->{query} eq "SUBQUERY") {
+ $thing .= "(".deparse_query($elem->{subquery}).")";
+ } elsif ($elem->{query} eq "PHRASE") {
+ $thing .= '"'.$elem->{term}.'"';
+ }
+ if (exists $elem->{boost}) { $thing .= "^".$elem->{boost} }
+ push @out, $thing;
+ }
+ return join " ", @out;
+}
+
+package Lucene::QueryParser::TopLevel;
+
+sub to_plucene {
+ my ($self, $field) = @_;
+ Carp::croak("You need to specify a default field for your query")
+ unless $field;
+ return $self->[0]->to_plucene($field)
+ if @$self ==1 and $self->[0]->{type} eq "NORMAL";
+
+ my @clauses;
+ $self->add_clause(\@clauses, $_, $field) for @$self;
+ require Plucene::Search::BooleanQuery;
+ my $query = new Plucene::Search::BooleanQuery;
+ $query->add_clause($_) for @clauses;
+
+ $query;
+}
+
+sub add_clause {
+ my ($self, $clauses, $term, $field) = @_;
+ my $q = $term->to_plucene($field);
+ if (exists $term->{conj} and $term->{conj} eq "AND" and @$clauses) {
+ # The previous term needs to become required
+ $clauses->[-1]->required(1) unless $clauses->[-1]->prohibited;
+ }
+
+ return unless $q; # Shouldn't happen yet
+ my $prohibited = $term->{type} eq "PROHIBITED";
+ my $required = $term->{type} eq "REQUIRED";
+ $required = 1 if exists $term->{conj} and $term->{conj} eq "AND"
+ and !$prohibited;
+ require Plucene::Search::BooleanClause;
+ push @$clauses, Plucene::Search::BooleanClause->new({
+ prohibited => $prohibited,
+ required => $required,
+ query => $q
+ });
+}
+
+# Oh, I really like abstraction
+
+package Lucene::QueryParser::Term;
+
+sub to_plucene {
+ require Plucene::Search::TermQuery;
+ require Plucene::Index::Term;
+ my ($self, $field) = @_;
+ $self->{pl_term} = Plucene::Index::Term->new({
+ field => (exists $self->{field} ? $self->{field} : $field),
+ text => $self->{term}
+ });
+ my $q = Plucene::Search::TermQuery->new({ term => $self->{pl_term} });
+ $self->set_boost($q);
+ return $q;
+}
+
+sub set_boost {
+ my ($self, $q) = @_;
+ $q->boost($self->{boost}) if exists $self->{boost};
+}
+
+package Lucene::QueryParser::Phrase;
+our @ISA = qw(Lucene::QueryParser::Term);
+# This corresponds to the rules for "PHRASE" in the Plucene grammar
+
+sub to_plucene {
+ require Plucene::Search::PhraseQuery;
+ require Plucene::Index::Term;
+ my ($self, $field) = @_;
+ my @words = split /\s+/, $self->{term};
+ return $self->SUPER::to_plucene($field) if @words == 1;
+
+ my $phrase = Plucene::Search::PhraseQuery->new;
+ for my $word (@words) {
+ my $term = Plucene::Index::Term->new({
+ field => (exists $self->{field} ? $self->{field} : $field),
+ text => $word
+ });
+ $phrase->add($term);
+ }
+ if (exists $self->{slop}) { # Future extension
+ $phrase->slop($self->{slop});
+ }
+ $self->set_boost($phrase);
+ return $phrase;
+}
+
+package Lucene::QueryParser::Subquery;
+
+sub to_plucene {
+ my ($self, $field) = @_;
+ $self->{subquery}->to_plucene(
+ exists $self->{field} ? $self->{field} : $field
+ )
+}
+
+1;
+__END__
+# Below is stub documentation for your module. You'd better edit it!
+
+=head1 NAME
+
+Lucene::QueryParser - Turn a Lucene query into a Perl data structure
+
+=head1 SYNOPSIS
+
+ use Lucene::QueryParser;
+ my $structure = parse_query("red and yellow and -(coat:pink and green)");
+
+C<$structure> will be:
+
+ [ { query => 'TERM', type => 'NORMAL', term => 'red' },
+ { query => 'TERM', type => 'NORMAL', term => 'yellow' },
+ { subquery => [
+ { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' },
+ { query => 'TERM', type => 'NORMAL', term => 'green' }
+ ], query => 'SUBQUERY', type => 'PROHIBITED'
+ }
+ ]
+
+=head1 DESCRIPTION
+
+This module parses a Lucene query, as defined by
+http://lucene.sourceforge.net/cgi-bin/faq/faqmanager.cgi?file=chapter.search&toc=faq#q5
+
+It deals with fields, types, phrases, subqueries, and so on; everything
+handled by the C<SimpleQuery> class in Lucene. The data structure is similar
+to the one given above, and is pretty self-explanatory.
+
+The other function, C<deparse_query> turns such a data structure back into
+a Lucene query string. This is useful if you've just been mucking about
+with the data.
+
+=head2 PLUCENE
+
+Note for people using Plucene: the big arrayref and the hashes in the
+output of C<parse_query> are actually objects. They're not
+C<Plucene::Query> objects, because then everyone who wanted to do search
+queries would have to pull in Plucene, which is a bit unfair. However,
+they can be turned into C<Plucene::Query>s by calling C<to_plucene> on
+them. The argument to C<to_plucene> should be the default field to
+search if none is supplied.
+
+=head2 EXPORT
+
+Exports the C<parse_query> and C<deparse_query> functions.
+
+=head1 AUTHOR
+
+Simon Cozens, E<lt>simon at kasei.comE<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2003 by Kasei
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
diff --git a/README b/README
new file mode 100644
index 0000000..89041a1
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+Lucene/QueryParser version 0.01
+===============================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+ blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2003 Simon Cozens
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
diff --git a/t/1.t b/t/1.t
new file mode 100644
index 0000000..4f8c4c5
--- /dev/null
+++ b/t/1.t
@@ -0,0 +1,84 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl 1.t'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test::More tests => 20;
+
+use_ok("Lucene::QueryParser");
+Lucene::QueryParser->import; # Devel::Cover seems to screw this over.
+
+use Data::Dumper;
+
+sub test_query {
+ my ($query, $expected, $message) = @_;
+ my $parsed = parse_query($query);
+ is_deeply($parsed, $expected, $message);
+ $query =~ s/ (and|or)//g;
+ $query =~ s/not /-/g;
+ is(deparse_query($parsed), $query, " ... and back again");
+}
+
+test_query("foo",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' } ],
+ "Simple one-word query parses fine");
+
+test_query("foo bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar' },
+ ],
+ "Simple two-word query parses fine");
+test_query("foo +bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar', type => "REQUIRED" },
+ ],
+ "+ operator works");
+test_query("foo -bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" },
+ ],
+ "- operator works");
+
+test_query("foo not bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" },
+ ],
+ "not operator works");
+
+is_deeply(parse_query('"foo bar" baz'),
+[ { query => 'PHRASE', type => 'NORMAL', term => 'foo bar' },
+ { query => 'TERM', type => 'NORMAL', term => 'baz',},
+ ],
+ "Quoted phrase matches work");
+
+test_query("foo AND bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar' , conj => "AND" },
+ ],
+ "conjunctions work");
+
+test_query("foo AND baz:bar",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo' },
+ { query => 'TERM', type => 'NORMAL', term => 'bar', field => 'baz' , conj => "AND" },
+ ],
+ "fields work");
+
+test_query("foo AND baz^2.0",
+[ { query => 'TERM', type => 'NORMAL', term => 'foo'},
+ { query => 'TERM', type => 'NORMAL', term => 'baz', boost => "2.0", conj => "AND" },
+ ],
+ "boosting works");
+
+# Grand finale!
+
+test_query("red AND yellow AND -(coat:pink AND green)",
+[ { query => 'TERM', type => 'NORMAL', term => 'red' },
+ { query => 'TERM', type => 'NORMAL', term => 'yellow', conj => "AND" },
+ { subquery => [
+ { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' },
+ { query => 'TERM', type => 'NORMAL', term => 'green', conj => "AND" }
+ ], query => 'SUBQUERY', type => 'PROHIBITED', conj => "AND" }
+], "A very complex query (with subquery)");
+
diff --git a/t/2.t b/t/2.t
new file mode 100644
index 0000000..a376269
--- /dev/null
+++ b/t/2.t
@@ -0,0 +1,46 @@
+use Test::More;
+if (!eval { require Plucene::Search::Query }) {
+ plan skip_all => "Plucene not installed";
+} else {
+ plan tests => 22;
+}
+
+use_ok("Lucene::QueryParser");
+Lucene::QueryParser->import; # Devel::Cover seems to screw this over.
+sub pq { parse_query(shift)->to_plucene("text") };
+
+my $query = pq( "hello" );
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "text", "Field is correct");
+is($query->term->text, "hello", "Text is correct");
+
+$query = pq( "foo:hello^3" );
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "foo", "Field is correct");
+is($query->term->text, "hello", "Text is correct");
+is($query->boost, 3, "Boost is set");
+
+$query = pq( "-foo:hello" );
+isa_ok($query, "Plucene::Search::BooleanQuery");
+my @clauses = $query->clauses;
+is(@clauses, 1, "Boolean query with one clause");
+ok($clauses[0]->prohibited, "Clause is prohibited");
+ok(!$clauses[0]->required, "Clause is not required");
+
+$query = pq('"hello"');
+isa_ok($query, "Plucene::Search::TermQuery");
+is($query->term->field, "text", "Field is correct");
+is($query->term->text, "hello", "Text is correct");
+
+$query = pq('nonsense:"hello world"');
+isa_ok($query, "Plucene::Search::PhraseQuery");
+my @terms = @{$query->terms};
+is(@terms, 2, "With two terms");
+is($terms[1]->field, "nonsense", "Correctly distributed");
+is($terms[1]->text, "world", "Correctly partitioned");
+
+$query = pq("foo AND bar");
+isa_ok($query, "Plucene::Search::BooleanQuery");
+ at clauses = $query->clauses;
+is(@clauses, 2, "Boolean query with two clause");
+ok($clauses[0]->required, "Forced to be required");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/liblucene-queryparser-perl.git
More information about the Pkg-perl-cvs-commits
mailing list