[libcatmandu-perl] 61/101: Adding more POD
Jonas Smedegaard
dr at jones.dk
Tue Feb 23 13:43:56 UTC 2016
This is an automated email from the git hooks/post-receive script.
js pushed a commit to branch master
in repository libcatmandu-perl.
commit 8fc50027635a35bb6f9229906a5831297024640a
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date: Wed Feb 3 16:50:00 2016 +0100
Adding more POD
---
Build.PL | 4 +-
README.md | 10 +--
lib/Catmandu.pm | 136 +++++++++++++++++++++++++---------
lib/Catmandu/Exporter.pm | 79 ++++++++++++++------
lib/Catmandu/Fix.pm | 71 +++++++++++++-----
lib/Catmandu/Importer.pm | 166 +++++++++++++++++++++++++++++-------------
lib/Catmandu/Importer/Text.pm | 2 +-
lib/Catmandu/Introduction.pod | 10 +--
t/Catmandu-Importer.t | 18 ++---
9 files changed, 348 insertions(+), 148 deletions(-)
diff --git a/Build.PL b/Build.PL
index aae0a0a..946832e 100644
--- a/Build.PL
+++ b/Build.PL
@@ -1,5 +1,5 @@
-# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.041.
+# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.039.
use strict;
use warnings;
@@ -15,7 +15,7 @@ my %module_build_args = (
},
"dist_abstract" => "a data toolkit",
"dist_author" => [
- "Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>"
+ "Nicolas Steenlant <nicolas.steenlant\@ugent.be>"
],
"dist_name" => "Catmandu",
"dist_version" => "0.9505",
diff --git a/README.md b/README.md
index c318963..1d1edba 100644
--- a/README.md
+++ b/README.md
@@ -17,15 +17,15 @@ command:
$ catmandu convert JSON to CSV < data.json
-Or, to store a YAML file into an ElasticSearch database type:
+Or, to store a YAML file into an ElasticSearch database type (requires Catmandu::ElasticSearch):
$ catmandu import YAML to ElasticSearch --index_name demo < test.yml
-To export all the data from an Solr search engine into JSON type:
+To export all the data from an Solr search engine into JSON type (requires Catmandu::Solr):
$ catmandu export Solr --url http://localhost:8983/solr to JSON
-With Catmandu one can import OAI-PMH records in your application:
+With Catmandu one can import OAI-PMH records in your application (requires Catmandu::OAI):
$ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt
@@ -48,7 +48,7 @@ which contains a (sub)field 'nested'.
Catmandu was created by librarians for librarians. We process a lot of metadata especially
library metadata in formats such as MARC, MAB2 and MODS. With the following command we can extract
-data from a marc record and to store it into the title field:
+data from a marc record and to store it into the title field (requires Catmandu::MARC):
$ catmandu convert MARC --fix 'marc_map(245,title)' < data.mrc
@@ -119,7 +119,7 @@ For a quick and demo installation visit our [blog](https://librecatproject.wordp
where a VirtualBox image is available containing all the Catmandu modules, including
ElasticSearch and MongoDB.
-On our [website](http://librecat.org/Catmandu/) we provide installation instructions for:
+On our [website](http://librecat.org/Catmandu/#installation) we provide installation instructions for:
* Debian
* Ubuntu Server
diff --git a/lib/Catmandu.pm b/lib/Catmandu.pm
index d5adc09..f2c1bd5 100644
--- a/lib/Catmandu.pm
+++ b/lib/Catmandu.pm
@@ -148,15 +148,34 @@ __END__
Catmandu - a data toolkit
=head1 SYNOPSIS
+
+ # From the command line
- use Catmandu -all;
- use Catmandu qw(config store);
- use Catmandu -load; # loads default configuration file
- use Catmandu -all -load => [qw(/config/path' '/another/config/path)];
+ # Convert data from one format to another
+ $ catmandu convert JSON to CSV < data.json
+ $ catmandu convert CSV to YAML < data.csv
+ $ catmandu convert MARC to YAML < data.mrc
+
+ # Fix data, add, delete, change fields
+ $ catmandu convert JSON --fix 'move_field(title,my_title)' < data.json
+ $ catmandu convert JSON --fix al_my_fixes.txt < data.json
+
+ # Import data into a database
+ # Requires: Catmandu::MongoDB and Catmandu::ElasticSearch
+ $ catmandu import YAML to MongoDB --database_name bibliography < data.yml
+ $ catmandu import CSV to ElasticSearch --index_name mystuff < data.csv
+
+ # Export data from a database
+ # Requires: Catmandu::MongoDB and Catmandu::ElasticSearch
+ $ catmandu export MongoDB --database_name bibliography to YAML > data.yml
+ $ catmandu export ElasticSearch --index_name mystuff to CSV > data.csv
+
+ # From Perl
+ use Catmandu;
# If you have Catmandu::OAI and Catmandu::MongoDB installed
my $importer = Catmandu->importer('OAI',url => 'https://biblio.ugent.be/oai')
- my $store = Catmandu->exporter('MongoDB',database_name => 'test');
+ my $store = Catmandu->store('MongoDB',database_name => 'test');
# Import all the OAI records into MongoDB
$store->add_many($importer);
@@ -170,51 +189,100 @@ Catmandu - a data toolkit
my $fixer = Catmandu->fixer('myfixes.txt');
my $exporter = Catmandu->exporter('YAML');
- $exporter->add_many(
- $fixer->fix($store)
- );
+ $exporter->add_many( $fixer->fix($store) );
$exporter->commit;
- # Or be very lazy and do this via the command line
- $ catmandu import OAI --url https://biblio.ugent.be/oai to MongoDB --database_name test
- $ catmandu export MongoDB --database_name test --fix myfixes.txt to YAML
-
=head1 DESCRIPTION
-Importing, transforming, storing and indexing data should be easy.
+Catmandu provides a command line client and a Perl API to ease the export (E)
+transformation (T) and loading (L) of data into databases or data file, ETL in short.
+
+Most of the daily work processing structured data can be done on the command line
+executing the C<catmandu> command. With our catmandu command ETL processing is available
+in a Perl context. Catmandu is different from other
+ETL tools by its focus on command line processing with much support for dataformats
+available in (academic) libraries: MARC, MODS, OAI and SRU. But, also generic formats such
+as JSON, YAML, CVS, Excel, XML, RDF, Atom are supported.
-Catmandu provides a suite of Perl modules to ease the import, storage,
-retrieval, export and transformation of metadata records. Combine Catmandu
-modules with web application frameworks such as PSGI/Plack, document stores
-such as MongoDB and full text indexes such as Solr to create a rapid
-development environment for digital library services such as institutional
-repositories and search engines.
+Read :
-In the L<http://librecat.org/> project it is our goal to provide an
-open source set of programming components to build up digital libraries
-services suited to your local needs.
+=over
+
+=item * L<Catmandu::Introduction> for a primer on the command line capabilities of Catmandu.
+
+=item * L<Catmandu::Importer> for the basics of importing
+
+=item * L<Catmandu::Fix> for the basics of transformations
+
+=item * L<Catmandu::Exporter> for the basics of exporting
-Read an in depth introduction into Catmandu programming at
-L<https://github.com/LibreCat/Catmandu/wiki/Introduction>.
+=item * L<Catmandu::Store> for the basics of storing information
-=head1 INSTALLATION
+=item * Or, visit our website at L<http://librecat.org/> and our blog L<https://librecatproject.wordpress.com/>
+ for many tutorials
-To install Catmandu just run:
+=back
+
+The documentation below describes the methods available when including Catmandu as
+part of a Perl script. For an overview of the command line tool itself read the
+documentation on L<catmandu>.
- cpanm Catmandu
+=head1 USE
-Read our documentation for more installation hints and OS specific requirements:
+To include Catmandu in a Perl script it should be loaded with a C<use> command:
-http://librecat.org/Catmandu/#installation
+ use Catmandu;
-=head1 METHODS
+By default no methods are imported into the Perl context. To import all or some Catmandu methods,
+provide them as a list to the C<use> command:
+
+ use Catmandu -all;
+ use Catmandu qw(config store exporter);
+
+Catmandu can load configuration options for exports, importers, fixers via configuration
+files (see the CONFIG section below). When adding the --load option (optionally with a path) to the
+C<use> command, these configuration files will be loaded at the start of your script.
+
+ use Catmandu -load;
+ use Catmandu --load => ['/my/config/directory'];
+
+ # or use all the options
+ use Catmandu -all -load => [qw(/config/path' '/another/config/path)];
+
+=head1 CLASS METHODS
=head2 log
-Return the current logger (the L<Log::Any::Adapter> for category
-L<Catmandu::Env>). See L<Log::Any#Logging> for how to send messages to the
-logger. Read our L<https://github.com/LibreCat/Catmandu/wiki/Cookbook>
-"See some debug messages" for some hints on logging.
+Return the current L<Log::Any::Adapter> logger.
+
+ use Catmandu;
+ use Log::Any::Adapter;
+ use Log::Log4perl;
+
+ Log::Any::Adapter->set('Log4perl');
+ Log::Log4perl::init('./log4perl.conf');
+
+ my $logger = Catmandu->log;
+ $logger->info("Starting main program");
+
+with log4perl.conf like:
+
+ # Send a copy of all logging messages to STDERR
+ log4perl.rootLogger=DEBUG,STDERR
+
+ # Logging specific for your main program
+ log4perl.category.myprog=INFO,STDERR
+
+ # Logging specific for on part of Catmandu
+ log4perl.category.Catmandu::Fix=DEBUG,STDERR
+
+ # Where to send the STDERR output
+ log4perl.appender.STDERR=Log::Log4perl::Appender::Screen
+ log4perl.appender.STDERR.stderr=1
+ log4perl.appender.STDERR.utf8=1
+
+ log4perl.appender.STDERR.layout=PatternLayout
+ log4perl.appender.STDERR.layout.ConversionPattern=%d [%P] - %p %l time=%r : %m%n
=head2 default_load_path('/default/path')
diff --git a/lib/Catmandu/Exporter.pm b/lib/Catmandu/Exporter.pm
index 8cc9b30..f5aefa4 100644
--- a/lib/Catmandu/Exporter.pm
+++ b/lib/Catmandu/Exporter.pm
@@ -42,37 +42,38 @@ Catmandu::Exporter - Namespace for packages that can export
=head1 SYNOPSIS
- package Catmandu::Exporter::Foo;
-
- use Catmandu::Sane;
- use Moo;
-
- with 'Catmandu::Exporter'
+ # From the command line
- sub add {
- my ($self, $data) = @_;
- my $fh = $self->fh;
- $fh->print( ... );
- }
+ # JSON is an importer and YAML an exporter
+ $ catmandu convert JSON to YAML < data.json
- package main;
+ # OAI is an importer and JSON an exporter
+ $ catmandu convert OAI --url http://biblio.ugent.be/oai to JSON
+ # From Perl
use Catmandu;
- my $exporter = Catmandu->exporter('Foo', file => "/tmp/output.txt");
-
- # Or on the command line
- $ catmandu convert JSON to Foo < /tmp/something.txt >/tmp/output.txt
+ my $importer = Catmandu->importer('JSON', file => 'data.json');
+ my $exporter = Catmandu->exporter('YAML');
+
+ $exporter->add({ record => "one"});
+ $exporter->add_many([ { record => "one" } , { record => "two" } ]);
+ $exporter->add_many($importer);
=head1 DESCRIPTION
-A Catmandu::Exporter is a Perl package that can export data. By default, data
-items are written to STDOUT. Optionally provide a C<file> or C<fh> parameter to
-write to a file, string, or handle. New exporter modules are expected to use
-the C<print> method of C<fh>.
+A Catmandu::Exporter is a Perl package that can export data into JSON, YAML, XML
+or many other formats. By default, data is to STDOUT. Optionally provide a C<file>
+or C<fh> parameter to write to a file, string, or handle.
Every Catmandu::Exporter is a L<Catmandu::Fixable> thus provides a C<fix>
-parameter and method to apply fixes to exported items.
+parameter and method to apply fixes to exported items:
+
+ my $exporter = Catmandu->exporter('JSON', fix => ['upcase(title)']);
+
+ # This will be printed to STDOUT like: {"title":"MY TITLE"}
+ $exporter->add({ title => "my title"});
+
Every Catmandu::Exporter is a L<Catmandu::Addable> thus inherits the methods
C<add> and C<add_many>.
@@ -123,13 +124,47 @@ Returns the number of items exported by this Catmandu::Exporter.
Returns the current logger.
+=head1 CODING
+
+Create your own exporter by creating a Perl package in the Catmandu::Exporter namespace
+that implements C<Catmandu::Exporter>. Basically, you need to create a method add which
+writes a Perl hash to a file handle:
+
+
+ package Catmandu::Exporter::Foo;
+
+ use Catmandu::Sane;
+ use Moo;
+
+ with 'Catmandu::Exporter'
+
+ sub add {
+ my ($self, $data) = @_;
+ my $fh = $self->fh;
+ $fh->print( "Hello, World!");
+ }
+
+ 1;
+
+This exporter can be called from the command line as:
+
+ $ catmandu convert JSON to Foo < data.json
+
+Or, via Perl
+
+ use Catmandu;
+
+ my $exporter = Catmandu->exporter('Foo', file => "/tmp/output.txt");
+
+ $exporter->add({test => 123});
+
=head1 SEE ALSO
See function L<export_to_string|Catmandu/export_to_string> in module
L<Catmandu>.
The exporters L<Catmandu::Exporter::JSON>, L<Catmandu::Exporter::YAML>,
-L<Catmandu::Exporter::CSV>, and L<Catmandu::Exporter::RIS> are included in
+L<Catmandu::Exporter::CSV>, and L<Catmandu::Exporter::Text> are included in
Catmandu core.
See L<Catmandu::Importer> for the opposite action.
diff --git a/lib/Catmandu/Fix.pm b/lib/Catmandu/Fix.pm
index 46744ba..1f8b7d5 100644
--- a/lib/Catmandu/Fix.pm
+++ b/lib/Catmandu/Fix.pm
@@ -664,29 +664,33 @@ __END__
=head1 NAME
-Catmandu::Fix - a Catmandu class used for data crunching
+Catmandu::Fix - a Catmandu class used for data transformations
=head1 SYNOPSIS
- use Catmandu::Fix;
+ # From the command line
- my $fixer = Catmandu::Fix->new(fixes => ['upcase("job")','remove_field("test")']);
+ $ catmandu convert JSON --fix 'add_field(foo,bar)' < data.json
+ $ catmandu convert YAML --fix 'upcase(job); remove_field(test)' < data.yml
+ $ catmandu convert CSV --fix 'sort_field(tags)' < data.csv
+ $ catmandu run /tmp/myfixes.txt
+ $ catmandu convert OAI --url http://biblio.ugent.be/oai --fix /tmp/myfixes.txt
- or
+ # From Perl
- my $fixer = Catmandu::Fix->new(fixes => ['fix_file.txt']);
+ use Catmandu;
- my $arr = $fixer->fix([ ... ]);
- my $hash = $fixer->fix({ ... });
+ my $fixer = Catmandu->fixer('upcase(job)','remove_field(test)');
+ my $fixer = Catmandu->fixer('/tmp/myfixes.txt');
- my $it = Catmandu::Importer::YAML(file => '...');
- $fixer->fix($it)->each(sub {
- ...
- });
+ # Convert data
+ my $arr = $fixer->fix([ ... ]);
+ my $hash = $fixer->fix({ ... });
+ my $importer = Catmandu->importer('YAML', file => 'data.yml');
+ my $fixed_importer = $fixer->fix($importer);
- or
-
- use Catmandu::Fix::upcase as => 'my_upcase';
+ # Inline fixes
+ use Catmandu::Fix::upcase as => 'my_upcase';
use Catmandu::Fix::remove_field as => 'my_remove';
my $hash = { 'job' => 'librarian' , deep => { nested => '1'} };
@@ -696,10 +700,37 @@ Catmandu::Fix - a Catmandu class used for data crunching
=head1 DESCRIPTION
-Catmandu::Fixes can be used for easy data manipulation by non programmers. Using a
-small Perl DSL language end-users can use Fix routines to manipulate data objects.
-A plain text file of fixes can be created to specify all the routines needed to
-tranform the data into the desired format.
+A Catmandu::Fix is a Perl package that can transform data. These packages are used
+for easy data manipulation by non programmers. The main intention is to use fixes
+on the command line or in Fix scripts. A small DSL language is available to execute
+many Fix command on a stream of data.
+
+When a C<fix> argument is given to a L<Catmandu::Importer>, L<Catmandu::Store> or
+L<Catmandu::Store> then the transformations are executed on every item in the stream.
+
+Many fixes can use Fix paths (see below) to point to fields in a data record. A
+Fix script is a collection of one or more Fix commands:
+
+ upcase(title)
+ add_field(deep.nested.field,1)
+
+Conditionals can be used to provide the logic when to execute some fixes:
+
+ if exists(deep.nested.field)
+ add_field(nested,"ok!")
+ end
+
+ unless all_match(title,"PERL")
+ add_field(is_perl,"noooo")
+ end
+
+Binds are used to manipulate the context in which Fixes are executed. E.g.
+execute a fix on every item in a list
+
+ # 'demo' is an array of hashes
+ do list(path:demo)
+ add_field(foo,bar)
+ end
=head1 PATHS
@@ -732,7 +763,7 @@ E.g.
# Create { mods => { titleInfo => [ { 'title' => 'foo' } , { 'title' => 'bar' }] } };
add_field('mods.titleInfo.$last.title', 'bar');
-Read more about the Fix language at our Wiki: L<https://github.com/LibreCat/Catmandu/wiki/Fixes>
+Read more about the Fix language in our Wiki: L<https://github.com/LibreCat/Catmandu/wiki/Fixes>
=head1 PUBLIC METHODS
@@ -763,7 +794,7 @@ Executes all the fixes on a generator function. Returns a new generator with fix
Return the current logger. See L<Catmandu> for activating the logger in your main code.
-=head1 EXTEND
+=head1 CODING
One can extend the Fix language by creating own custom-made fixes. Two methods are
available to create an own Fix function:
diff --git a/lib/Catmandu/Importer.pm b/lib/Catmandu/Importer.pm
index 6d57034..2f7b956 100644
--- a/lib/Catmandu/Importer.pm
+++ b/lib/Catmandu/Importer.pm
@@ -161,10 +161,12 @@ sub _build_http_client {
}
sub readline {
+ warnings::warnif("deprecated","readline is deprecated, fh->getline instead");
$_[0]->fh->getline;
}
sub readall {
+ warnings::warnif("deprecated","readall is deprecated, join('',fh->getlines) instead");
join '', $_[0]->fh->getlines;
}
@@ -180,60 +182,72 @@ Catmandu::Importer - Namespace for packages that can import
=head1 SYNOPSIS
- package Catmandu::Importer::Hello;
+ # From the command line
- use Catmandu::Sane;
- use Moo;
+ # JSON is an importer and YAML an exporter
+ $ catmandu convert JSON to YAML < data.json
- with 'Catmandu::Importer';
-
- sub generator {
- my ($self) = @_;
- state $fh = $self->fh;
- my $n = 0;
- return sub {
- $self->log->debug("generating record " . ++$n);
- my $name = $self->readline;
- return defined $name ? { "hello" => $name } : undef;
- };
- }
-
- package main;
+ # OAI is an importer and JSON an exporter
+ $ catmandu convert OAI --url http://biblio.ugent.be/oai to JSON
+ # Fetch remote content
+ $ catmandu convert JSON --file http://example.com/data.json to YAML
+
+ # From Perl
+
use Catmandu;
+ use Data::Dumper;
+
+ my $importer = Catmandu->importer('JSON', file => 'data.json');
- my $importer = Catmandu->importer('Hello', file => '/tmp/names.txt');
$importer->each(sub {
- my $items = shift;
- .
- .
- .
+ my $item = shift;
+ print Dumper($item);
});
- # Or on the command line
- $ catmandu convert Hello to YAML < /tmp/names.txt
- # Fetch remote content
- $ catmandu convert JSON --file http://example.com/data.json to YAML
+ my $num = $importer->count;
+
+ my $first_item = $importer->first;
+
+ # Convert OAI to JSON in Perl
+ my $importer = Catmandu->importer('OAI', url => 'http://biblio.ugent.be/oai');
+ my $exporter = Catmandu->exporter('JSON');
+
+ $exporter->add_many($importer);
=head1 DESCRIPTION
-A Catmandu::Importer is a Perl package that can import data from an external
-source (a file, the network, ...). Most importers read from an input stream,
-such as STDIN, a given file, or an URL to fetch data from, so this base class
-provides helper method for consuming the input stream once.
+A Catmandu::Importer is a Perl package that can generate structured data from
+sources such as JSON, YAML, XML, RDF or network protocols such as Atom, OAI-PMH,
+SRU and even DBI databases. Given an Catmandu::Importer a programmer can read
+data from using one of the many L<Catmandu::Iterable> methods:
-Every Catmandu::Importer is a L<Catmandu::Fixable> and thus inherits a 'fix'
-parameter that can be set in the constructor. When given then each item returned
-by the generator will be automatically Fixed using one or more L<Catmandu::Fix>es.
+
+ $importer->to_array;
+ $importer->count;
+ $importer->each(\&callback);
+ $importer->first;
+ $importer->rest;
+ ...etc...
+
+Every Catmandu::Importer is also L<Catmandu::Fixable> and thus inherits a 'fix'
+parameter that can be set in the constructor. When given a 'fix' parameter, then each
+item returned by the generator will be automatically Fixed using one or
+more L<Catmandu::Fix>es.
E.g.
- my $importer = Catmandu->importer('Hello',fix => ['upcase(hello)']);
+ my $importer = Catmandu->importer('JSON',fix => ['upcase(title)']);
$importer->each( sub {
- my $item = shift ; # Every item will be upcased...
- } );
+ my $item = shift ; # Every $item->{title} is now upcased...
-Every Catmandu::Importer is a L<Catmandu::Iterable> and inherits the methods (C<first>,
-C<each>, C<to_array>...) etc.
+ });
+
+ # or via a Fix file
+ my $importer = Catmandu->importer('JSON',fix => ['/my/fixes.txt']);
+ $importer->each( sub {
+ my $item = shift ; # Every $item->{title} is now upcased...
+
+ });
=head1 CONFIGURATION
@@ -256,7 +270,7 @@ Binmode of the input stream C<fh>. Set to C<:utf8> by default.
=item fix
-An ARRAY of one or more fixes or file scripts to be applied to imported items.
+An ARRAY of one or more Fix-es or Fix scripts to be applied to imported items.
=item data_path
@@ -277,14 +291,25 @@ Variables given here will interpolate the C<file> and C<http_body> options. The
syntax is the same as L<URI::Template>.
# named arguments
- my $importer = Catmandu->importer('Hello',
- file => 'http://example.com/{id}',
- variables => {id => 1234},
+ my $importer = Catmandu->importer('JSON',
+ file => 'http://{server}/{path}',
+ variables => {server => 'biblio.ugent.be', path => 'file.json'},
);
+
# positional arguments
- {variables => "1234,768"}
+ my $importer = Catmandu->importer('JSON',
+ file => 'http://{server}/{path}',
+ variables => 'biblio.ugent.be,file.json',
+ );
+
# or
- {variables => [1234,768]}
+ my $importer = Catmandu->importer('JSON',
+ url => 'http://{server}/{path}',
+ variables => ['biblio.ugent.be','file.json'],
+ );
+
+ # or via the command line
+ $ catmandu convert JSON --file 'http://{server}/{path}' --variables 'biblio.ugent.be,file.json'
=back
@@ -342,17 +367,58 @@ Verify the SSL certificate.
=head1 METHODS
-=head2 readline
+=head2 first, each, rest , ...
+
+See L<Catmandu::Iterable> for all inherited methods.
-Read a line from the input stream. Equivalent to C<< $importer->fh->getline >>.
+=head1 CODING
-=head2 readall
+Create your own importer by creating a Perl package in the Catmandu::Importer namespace that
+implements C<Catmandu::Importer>. Basically, you need to create a method 'generate' which
+returns a callback that creates one Perl hash for each call:
-Read the whole input stream as string.
+ my $importer = Catmandu::Importer::Hello->new;
-=head2 first, each, rest , ...
+ $importer->generate(); # record
+ $importer->generate(); # next record
+ $importer->generate(); # undef = end of stream
-See L<Catmandu::Iterable> for all inherited methods.
+Here is an example of a simple C<Hello> importer:
+
+ package Catmandu::Importer::Hello;
+
+ use Catmandu::Sane;
+ use Moo;
+
+ with 'Catmandu::Importer';
+
+ sub generator {
+ my ($self) = @_;
+ state $fh = $self->fh;
+ my $n = 0;
+ return sub {
+ $self->log->debug("generating record " . ++$n);
+ my $name = $self->fh->readline;
+ return defined $name ? { "hello" => $name } : undef;
+ };
+ }
+
+ 1;
+
+This importer can be called via the command line as:
+
+ $ catmandu convert Hello to JSON < /tmp/names.txt
+ $ catmandu convert Hello to YAML < /tmp/names.txt
+ $ catmandu import Hello to MongoDB --database_name test < /tmp/names.txt
+
+Or, via Perl
+
+ use Catmandu;
+
+ my $importer = Catmandu->importer('Hello', file => '/tmp/names.txt');
+ $importer->each(sub {
+ my $items = shift;
+ });
=head1 SEE ALSO
diff --git a/lib/Catmandu/Importer/Text.pm b/lib/Catmandu/Importer/Text.pm
index b398926..d115dc5 100644
--- a/lib/Catmandu/Importer/Text.pm
+++ b/lib/Catmandu/Importer/Text.pm
@@ -31,7 +31,7 @@ sub generator {
state $count = 0;
state $line;
- while ( defined( $line = $self->readline ) ) {
+ while ( defined( $line = $self->fh->getline ) ) {
chomp $line;
next if $pattern and $line !~ $pattern;
diff --git a/lib/Catmandu/Introduction.pod b/lib/Catmandu/Introduction.pod
index 4c14f5d..a247722 100644
--- a/lib/Catmandu/Introduction.pod
+++ b/lib/Catmandu/Introduction.pod
@@ -21,15 +21,15 @@ command:
$ catmandu convert JSON to CSV < data.json
-Or, to store a YAML file into an ElasticSearch database type:
+Or, to store a YAML file into an ElasticSearch database type (requires Catmandu::ElasticSearch):
$ catmandu import YAML to ElasticSearch --index_name demo < test.yml
-To export all the data from an Solr search engine into JSON type:
+To export all the data from an Solr search engine into JSON type (requires Catmandu::Solr):
$ catmandu export Solr --url http://localhost:8983/solr to JSON
-With Catmandu one can import OAI-PMH records in your application:
+With Catmandu one can import OAI-PMH records in your application (requires Catmandu::OAI):
$ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt
@@ -52,7 +52,7 @@ which contains a (sub)field 'nested'.
Catmandu was created by librarians for librarians. We process a lot of metadata especially
library metadata in formats such as MARC, MAB2 and MODS. With the following command we can extract
-data from a marc record and to store it into the title field:
+data from a marc record and to store it into the title field (requires Catmandu::MARC):
$ catmandu convert MARC --fix 'marc_map(245,title)' < data.mrc
@@ -123,7 +123,7 @@ For a quick and demo installation visit our L<blog|https://librecatproject.wordp
where a VirtualBox image is available containing all the Catmandu modules, including
ElasticSearch and MongoDB.
-On our L<website|http://librecat.org/Catmandu/> we provide installation instructions for:
+On our L<website|http://librecat.org/Catmandu/#installation> we provide installation instructions for:
* Debian
* Ubuntu Server
diff --git a/t/Catmandu-Importer.t b/t/Catmandu-Importer.t
index 1fb5663..3ab2bff 100644
--- a/t/Catmandu-Importer.t
+++ b/t/Catmandu-Importer.t
@@ -24,7 +24,7 @@ require_ok $pkg;
my ($self) = @_;
sub {
state $fh = $self->fh;
- my $name = $self->readline;
+ my $name = $self->fh->getline;
return defined $name ? { "hello" => $name } : undef;
};
}
@@ -50,7 +50,7 @@ $i = T::Importer->new( file => \"World" );
is_deeply $i->to_array, [{ hello => "World"}], 'import from string reference';
$i = T::Importer->new( file => \"Hello\nWorld" );
-is $i->readall, "Hello\nWorld", "import all";
+is join('',$i->fh->getlines), "Hello\nWorld", "import all";
$i = T::DataPathImporter->new;
is_deeply $i->to_array, [{abc => [{a=>1},{b=>2},{c=>3}]},{abc => [{d=>4},{e=>5},{f=>6}]}];
@@ -60,31 +60,31 @@ $i = T::DataPathImporter->new(data_path => 'abc.*');
is_deeply $i->to_array, [{a=>1},{b=>2},{c=>3},{d=>4},{e=>5},{f=>6}];
$i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/' );
-is $i->readall , "test123" , "read from http (file)";
+is join('',$i->fh->getlines) , "test123" , "read from http (file)";
$i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{id}' , variables => { id => 1234} );
is $i->file , "http://demo.org/1234";
-is $i->readall , "test1234" , "read from http (file + variables)";
+is join('',$i->fh->getlines) , "test1234" , "read from http (file + variables)";
$i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{1},{2},{3}' , variables => [qw(red green blue)]);
is $i->file , "http://demo.org/red,green,blue";
-is $i->readall , "RED-GREEN-BLUE" , "read from http (file + variables list)";
+is join('',$i->fh->getlines) , "RED-GREEN-BLUE" , "read from http (file + variables list)";
$i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{1},{2},{3}' , variables => "red,green,blue" );
is $i->file , "http://demo.org/red,green,blue";
-is $i->readall , "RED-GREEN-BLUE" , "read from http (file + variables list)";
+is join('',$i->fh->getlines) , "RED-GREEN-BLUE" , "read from http (file + variables list)";
$i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/post' , http_method => 'POST' , http_body => '=={id}==' , variables => { id => 1234} );
is $i->file , "http://demo.org/post";
-is $i->readall , "POST" , "read from http (file + variables list + post request)";
+is join('',$i->fh->getlines) , "POST" , "read from http (file + variables list + post request)";
$i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/post' , http_method => 'POST', http_body => '=={id}==' , variables => "red,green,blue" );
is $i->file , "http://demo.org/post";
-is $i->readall , "POST" , "read from http (file + variables list + post request)";
+is join('',$i->fh->getlines) , "POST" , "read from http (file + variables list + post request)";
$i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/not-exsists' , http_method => 'POST', http_body => '=={id}==' , variables => "red,green,blue" );
-throws_ok { $i->readall } 'Catmandu::HTTPError' , "throws an error on non-existing pages";
+throws_ok { $i->fh->getlines } 'Catmandu::HTTPError' , "throws an error on non-existing pages";
$i = T::Importer->new(file => 'http://demo.org');
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git
More information about the Pkg-perl-cvs-commits
mailing list