[libcatmandu-perl] 61/101: Adding more POD

Jonas Smedegaard dr at jones.dk
Tue Feb 23 13:43:56 UTC 2016


This is an automated email from the git hooks/post-receive script.

js pushed a commit to branch master
in repository libcatmandu-perl.

commit 8fc50027635a35bb6f9229906a5831297024640a
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date:   Wed Feb 3 16:50:00 2016 +0100

    Adding more POD
---
 Build.PL                      |   4 +-
 README.md                     |  10 +--
 lib/Catmandu.pm               | 136 +++++++++++++++++++++++++---------
 lib/Catmandu/Exporter.pm      |  79 ++++++++++++++------
 lib/Catmandu/Fix.pm           |  71 +++++++++++++-----
 lib/Catmandu/Importer.pm      | 166 +++++++++++++++++++++++++++++-------------
 lib/Catmandu/Importer/Text.pm |   2 +-
 lib/Catmandu/Introduction.pod |  10 +--
 t/Catmandu-Importer.t         |  18 ++---
 9 files changed, 348 insertions(+), 148 deletions(-)

diff --git a/Build.PL b/Build.PL
index aae0a0a..946832e 100644
--- a/Build.PL
+++ b/Build.PL
@@ -1,5 +1,5 @@
 
-# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.041.
+# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.039.
 use strict;
 use warnings;
 
@@ -15,7 +15,7 @@ my %module_build_args = (
   },
   "dist_abstract" => "a data toolkit",
   "dist_author" => [
-    "Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>"
+    "Nicolas Steenlant <nicolas.steenlant\@ugent.be>"
   ],
   "dist_name" => "Catmandu",
   "dist_version" => "0.9505",
diff --git a/README.md b/README.md
index c318963..1d1edba 100644
--- a/README.md
+++ b/README.md
@@ -17,15 +17,15 @@ command:
 
     $ catmandu convert JSON to CSV < data.json
 
-Or, to store a YAML file into an ElasticSearch database type:
+Or, to store a YAML file into an ElasticSearch database type (requires Catmandu::ElasticSearch):
 
     $ catmandu import YAML to ElasticSearch --index_name demo < test.yml
 
-To export all the data from an Solr search engine into JSON type:
+To export all the data from an Solr search engine into JSON type (requires Catmandu::Solr):
 
     $ catmandu export Solr --url http://localhost:8983/solr to JSON
 
-With Catmandu one can import OAI-PMH records in your application:
+With Catmandu one can import OAI-PMH records in your application (requires Catmandu::OAI):
 
     $ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt
 
@@ -48,7 +48,7 @@ which contains a (sub)field 'nested'.
 
 Catmandu was created by librarians for librarians. We process a lot of metadata especially
 library metadata in formats such as MARC, MAB2 and MODS. With the following command we can extract
-data from a marc record and to store it into the title field:
+data from a marc record and to store it into the title field (requires Catmandu::MARC):
 
     $ catmandu convert MARC --fix 'marc_map(245,title)' < data.mrc
 
@@ -119,7 +119,7 @@ For a quick and demo installation visit our [blog](https://librecatproject.wordp
 where a VirtualBox image is available containing all the Catmandu modules, including
 ElasticSearch and MongoDB.
 
-On our [website](http://librecat.org/Catmandu/) we provide installation instructions for:
+On our [website](http://librecat.org/Catmandu/#installation) we provide installation instructions for:
 
     * Debian
     * Ubuntu Server
diff --git a/lib/Catmandu.pm b/lib/Catmandu.pm
index d5adc09..f2c1bd5 100644
--- a/lib/Catmandu.pm
+++ b/lib/Catmandu.pm
@@ -148,15 +148,34 @@ __END__
 Catmandu - a data toolkit
 
 =head1 SYNOPSIS
+    
+    # From the command line
 
-    use Catmandu -all;
-    use Catmandu qw(config store);
-    use Catmandu -load; # loads default configuration file
-    use Catmandu -all -load => [qw(/config/path' '/another/config/path)];
+    # Convert data from one format to another
+    $ catmandu convert JSON to CSV  < data.json
+    $ catmandu convert CSV  to YAML < data.csv
+    $ catmandu convert MARC to YAML < data.mrc
+
+    # Fix data, add, delete, change fields
+    $ catmandu convert JSON --fix 'move_field(title,my_title)' < data.json
+    $ catmandu convert JSON --fix al_my_fixes.txt < data.json
+
+    # Import data into a database
+    # Requires: Catmandu::MongoDB and Catmandu::ElasticSearch
+    $ catmandu import YAML to MongoDB --database_name bibliography < data.yml
+    $ catmandu import CSV to ElasticSearch --index_name mystuff < data.csv
+
+    # Export data from a database
+    # Requires: Catmandu::MongoDB and Catmandu::ElasticSearch
+    $ catmandu export MongoDB --database_name bibliography to YAML > data.yml
+    $ catmandu export ElasticSearch --index_name mystuff to CSV > data.csv
+
+    # From Perl
+    use Catmandu;
 
     # If you have Catmandu::OAI and Catmandu::MongoDB installed
     my $importer = Catmandu->importer('OAI',url => 'https://biblio.ugent.be/oai')
-    my $store    = Catmandu->exporter('MongoDB',database_name => 'test');
+    my $store    = Catmandu->store('MongoDB',database_name => 'test');
 
     # Import all the OAI records into MongoDB
     $store->add_many($importer);
@@ -170,51 +189,100 @@ Catmandu - a data toolkit
     my $fixer    = Catmandu->fixer('myfixes.txt');
     my $exporter = Catmandu->exporter('YAML');
 
-    $exporter->add_many(
-        $fixer->fix($store)
-    );
+    $exporter->add_many( $fixer->fix($store) );
     $exporter->commit;
 
-    # Or be very lazy and do this via the command line
-    $ catmandu import OAI --url https://biblio.ugent.be/oai to MongoDB --database_name test
-    $ catmandu export MongoDB --database_name test --fix myfixes.txt to YAML
-
 =head1 DESCRIPTION
 
-Importing, transforming, storing and indexing data should be easy.
+Catmandu provides a command line client and a Perl API to ease the export (E) 
+transformation (T) and loading (L) of data into databases or data file, ETL in short. 
+
+Most of the daily work processing structured data can be done on the command line
+executing the C<catmandu> command. With our catmandu command ETL processing is available 
+in a Perl context. Catmandu is different from other
+ETL tools by its focus on command line processing with much support for dataformats
+available in (academic) libraries: MARC, MODS, OAI and SRU. But, also generic formats such
+as JSON, YAML, CVS, Excel, XML, RDF, Atom are supported. 
 
-Catmandu provides a suite of Perl modules to ease the import, storage,
-retrieval, export and transformation of metadata records. Combine Catmandu
-modules with web application frameworks such as PSGI/Plack, document stores
-such as MongoDB and full text indexes such as Solr to create a rapid
-development environment for digital library services such as institutional
-repositories and search engines.
+Read :
 
-In the L<http://librecat.org/> project it is our goal to provide an
-open source set of programming components to build up digital libraries
-services suited to your local needs.
+=over
+  
+=item  * L<Catmandu::Introduction> for a primer on the command line capabilities of Catmandu. 
+
+=item  * L<Catmandu::Importer> for the basics of importing
+
+=item  * L<Catmandu::Fix> for the basics of transformations
+
+=item  * L<Catmandu::Exporter> for the basics of exporting
 
-Read an in depth introduction into Catmandu programming at
-L<https://github.com/LibreCat/Catmandu/wiki/Introduction>.
+=item  * L<Catmandu::Store> for the basics of storing information
 
-=head1 INSTALLATION
+=item  * Or, visit our website at L<http://librecat.org/> and our blog L<https://librecatproject.wordpress.com/> 
+    for many tutorials
 
-To install Catmandu just run:
+=back
+
+The documentation below describes the methods available when including Catmandu as
+part of a Perl script. For an overview of the command line tool itself read the 
+documentation on L<catmandu>.
 
-  cpanm Catmandu
+=head1 USE
 
-Read our documentation for more installation hints and OS specific requirements:
+To include Catmandu in a Perl script it should be loaded with a C<use> command:
 
-http://librecat.org/Catmandu/#installation
+    use Catmandu;
 
-=head1 METHODS
+By default no methods are imported into the Perl context. To import all or some Catmandu methods,
+provide them as a list to the C<use> command:
+
+    use Catmandu -all;
+    use Catmandu qw(config store exporter);
+
+Catmandu can load configuration options for exports, importers, fixers via configuration
+files (see the CONFIG section below). When adding the --load option (optionally with a path) to the 
+C<use> command, these configuration files will be loaded at the start of your script.
+
+    use Catmandu -load;
+    use Catmandu --load => ['/my/config/directory'];
+
+    # or use all the options
+    use Catmandu -all -load => [qw(/config/path' '/another/config/path)];
+
+=head1 CLASS METHODS
 
 =head2 log
 
-Return the current logger (the L<Log::Any::Adapter> for category
-L<Catmandu::Env>). See L<Log::Any#Logging> for how to send messages to the
-logger. Read our L<https://github.com/LibreCat/Catmandu/wiki/Cookbook>
-"See some debug messages" for some hints on logging.
+Return the current L<Log::Any::Adapter> logger.
+
+    use Catmandu;
+    use Log::Any::Adapter;
+    use Log::Log4perl;
+
+    Log::Any::Adapter->set('Log4perl');
+    Log::Log4perl::init('./log4perl.conf');
+
+    my $logger = Catmandu->log;
+    $logger->info("Starting main program");
+
+with log4perl.conf like:
+
+    # Send a copy of all logging messages to STDERR
+    log4perl.rootLogger=DEBUG,STDERR
+
+    # Logging specific for your main program
+    log4perl.category.myprog=INFO,STDERR
+
+    # Logging specific for on part of Catmandu
+    log4perl.category.Catmandu::Fix=DEBUG,STDERR
+
+    # Where to send the STDERR output
+    log4perl.appender.STDERR=Log::Log4perl::Appender::Screen
+    log4perl.appender.STDERR.stderr=1
+    log4perl.appender.STDERR.utf8=1
+
+    log4perl.appender.STDERR.layout=PatternLayout
+    log4perl.appender.STDERR.layout.ConversionPattern=%d [%P] - %p %l time=%r : %m%n
 
 =head2 default_load_path('/default/path')
 
diff --git a/lib/Catmandu/Exporter.pm b/lib/Catmandu/Exporter.pm
index 8cc9b30..f5aefa4 100644
--- a/lib/Catmandu/Exporter.pm
+++ b/lib/Catmandu/Exporter.pm
@@ -42,37 +42,38 @@ Catmandu::Exporter - Namespace for packages that can export
 
 =head1 SYNOPSIS
 
-    package Catmandu::Exporter::Foo;
-
-    use Catmandu::Sane;
-    use Moo;
-
-    with 'Catmandu::Exporter'
+    # From the command line
 
-    sub add {
-        my ($self, $data) = @_;
-        my $fh = $self->fh;
-        $fh->print( ... );
-    }
+    # JSON is an importer and YAML an exporter
+    $ catmandu convert JSON to YAML < data.json
 
-    package main;
+    # OAI is an importer and JSON an exporter
+    $ catmandu convert OAI --url http://biblio.ugent.be/oai to JSON 
 
+    # From Perl
     use Catmandu;
 
-    my $exporter = Catmandu->exporter('Foo', file => "/tmp/output.txt");
-    
-    # Or on the command line
-    $ catmandu convert JSON to Foo < /tmp/something.txt >/tmp/output.txt
+    my $importer = Catmandu->importer('JSON', file => 'data.json');
+    my $exporter = Catmandu->exporter('YAML');
+
+    $exporter->add({ record => "one"});
+    $exporter->add_many([ { record => "one" } , { record => "two" } ]);
+    $exporter->add_many($importer);
 
 =head1 DESCRIPTION
 
-A Catmandu::Exporter is a Perl package that can export data. By default, data
-items are written to STDOUT. Optionally provide a C<file> or C<fh> parameter to
-write to a file, string, or handle. New exporter modules are expected to use
-the C<print> method of C<fh>.
+A Catmandu::Exporter is a Perl package that can export data into JSON, YAML, XML
+or many other formats. By default, data is to STDOUT. Optionally provide a C<file> 
+or C<fh> parameter to write to a file, string, or handle. 
 
 Every Catmandu::Exporter is a L<Catmandu::Fixable> thus provides a C<fix>
-parameter and method to apply fixes to exported items.
+parameter and method to apply fixes to exported items:
+    
+    my $exporter = Catmandu->exporter('JSON', fix => ['upcase(title)']);
+
+    # This will be printed to STDOUT like: {"title":"MY TITLE"}
+    $exporter->add({ title => "my title"});
+
 
 Every Catmandu::Exporter is a L<Catmandu::Addable> thus inherits the methods
 C<add> and C<add_many>.
@@ -123,13 +124,47 @@ Returns the number of items exported by this Catmandu::Exporter.
 
 Returns the current logger.
 
+=head1 CODING
+
+Create your own exporter by creating a Perl package in the Catmandu::Exporter namespace
+that implements C<Catmandu::Exporter>. Basically, you need to create a method add which
+writes a Perl hash to a file handle:
+
+
+    package Catmandu::Exporter::Foo;
+
+    use Catmandu::Sane;
+    use Moo;
+
+    with 'Catmandu::Exporter'
+
+    sub add {
+        my ($self, $data) = @_;
+        my $fh = $self->fh;
+        $fh->print( "Hello, World!");
+    }
+
+    1;
+
+This exporter can be called from the command line as:
+
+    $ catmandu convert JSON to Foo < data.json
+
+Or, via Perl
+
+    use Catmandu;
+
+    my $exporter = Catmandu->exporter('Foo', file => "/tmp/output.txt");
+
+    $exporter->add({test => 123});
+
 =head1 SEE ALSO
 
 See function L<export_to_string|Catmandu/export_to_string> in module
 L<Catmandu>.
 
 The exporters L<Catmandu::Exporter::JSON>, L<Catmandu::Exporter::YAML>,
-L<Catmandu::Exporter::CSV>, and L<Catmandu::Exporter::RIS> are included in
+L<Catmandu::Exporter::CSV>, and L<Catmandu::Exporter::Text> are included in
 Catmandu core.
 
 See L<Catmandu::Importer> for the opposite action.
diff --git a/lib/Catmandu/Fix.pm b/lib/Catmandu/Fix.pm
index 46744ba..1f8b7d5 100644
--- a/lib/Catmandu/Fix.pm
+++ b/lib/Catmandu/Fix.pm
@@ -664,29 +664,33 @@ __END__
 
 =head1 NAME
 
-Catmandu::Fix - a Catmandu class used for data crunching
+Catmandu::Fix - a Catmandu class used for data transformations
 
 =head1 SYNOPSIS
 
-    use Catmandu::Fix;
+    # From the command line
 
-    my $fixer = Catmandu::Fix->new(fixes => ['upcase("job")','remove_field("test")']);
+    $ catmandu convert JSON --fix 'add_field(foo,bar)' < data.json
+    $ catmandu convert YAML --fix 'upcase(job); remove_field(test)' < data.yml
+    $ catmandu convert CSV  --fix 'sort_field(tags)' < data.csv
+    $ catmandu run /tmp/myfixes.txt
+    $ catmandu convert OAI --url http://biblio.ugent.be/oai --fix /tmp/myfixes.txt
 
-    or
+    # From Perl
 
-    my $fixer = Catmandu::Fix->new(fixes => ['fix_file.txt']);
+    use Catmandu;
 
-    my $arr  = $fixer->fix([ ... ]);
-    my $hash = $fixer->fix({ ... });
+    my $fixer = Catmandu->fixer('upcase(job)','remove_field(test)');
+    my $fixer = Catmandu->fixer('/tmp/myfixes.txt');
 
-    my $it = Catmandu::Importer::YAML(file => '...');
-    $fixer->fix($it)->each(sub {
-        ...
-    });
+    # Convert data
+    my $arr      = $fixer->fix([ ... ]);
+    my $hash     = $fixer->fix({ ... });
+    my $importer = Catmandu->importer('YAML', file => 'data.yml');
+    my $fixed_importer = $fixer->fix($importer);
 
-    or
-
-    use Catmandu::Fix::upcase as => 'my_upcase';
+    # Inline fixes
+    use Catmandu::Fix::upcase       as => 'my_upcase';
     use Catmandu::Fix::remove_field as => 'my_remove';
 
     my $hash = { 'job' => 'librarian' , deep => { nested => '1'} };
@@ -696,10 +700,37 @@ Catmandu::Fix - a Catmandu class used for data crunching
 
 =head1 DESCRIPTION
 
-Catmandu::Fixes can be used for easy data manipulation by non programmers. Using a
-small Perl DSL language end-users can use Fix routines to manipulate data objects.
-A plain text file of fixes can be created to specify all the routines needed to
-tranform the data into the desired format.
+A Catmandu::Fix is a Perl package that can transform data. These packages are used
+for easy data manipulation by non programmers. The main intention is to use fixes
+on the command line or in Fix scripts. A small DSL language is available to execute
+many Fix command on a stream of data.
+
+When a C<fix> argument is given to a L<Catmandu::Importer>, L<Catmandu::Store> or
+L<Catmandu::Store> then the transformations are executed on every item in the stream.
+
+Many fixes can use Fix paths (see below) to point to fields in a data record. A
+Fix script is a collection of one or more Fix commands:
+
+    upcase(title)
+    add_field(deep.nested.field,1)
+
+Conditionals can be used to provide the logic when to execute some fixes:
+
+    if exists(deep.nested.field)
+        add_field(nested,"ok!")
+    end
+
+    unless all_match(title,"PERL")
+        add_field(is_perl,"noooo")
+    end
+
+Binds are used to manipulate the context in which Fixes are executed. E.g.
+execute a fix on every item in a list
+
+     # 'demo' is an array of hashes
+     do list(path:demo)
+        add_field(foo,bar)
+     end   
 
 =head1 PATHS
 
@@ -732,7 +763,7 @@ E.g.
  # Create { mods => { titleInfo => [ { 'title' => 'foo' } , { 'title' => 'bar' }] } };
  add_field('mods.titleInfo.$last.title', 'bar');
 
-Read more about the Fix language at our Wiki: L<https://github.com/LibreCat/Catmandu/wiki/Fixes>
+Read more about the Fix language in our Wiki: L<https://github.com/LibreCat/Catmandu/wiki/Fixes>
 
 =head1 PUBLIC METHODS
 
@@ -763,7 +794,7 @@ Executes all the fixes on a generator function. Returns a new generator with fix
 
 Return the current logger. See L<Catmandu> for activating the logger in your main code.
 
-=head1 EXTEND
+=head1 CODING
 
 One can extend the Fix language by creating own custom-made fixes. Two methods are
 available to create an own Fix function:
diff --git a/lib/Catmandu/Importer.pm b/lib/Catmandu/Importer.pm
index 6d57034..2f7b956 100644
--- a/lib/Catmandu/Importer.pm
+++ b/lib/Catmandu/Importer.pm
@@ -161,10 +161,12 @@ sub _build_http_client {
 }
 
 sub readline {
+    warnings::warnif("deprecated","readline is deprecated, fh->getline instead");
     $_[0]->fh->getline;
 }
 
 sub readall {
+    warnings::warnif("deprecated","readall is deprecated, join('',fh->getlines) instead");
     join '', $_[0]->fh->getlines;
 }
 
@@ -180,60 +182,72 @@ Catmandu::Importer - Namespace for packages that can import
 
 =head1 SYNOPSIS
 
-    package Catmandu::Importer::Hello;
+    # From the command line
 
-    use Catmandu::Sane;
-    use Moo;
+    # JSON is an importer and YAML an exporter
+    $ catmandu convert JSON to YAML < data.json
 
-    with 'Catmandu::Importer';
-
-    sub generator {
-        my ($self) = @_;
-        state $fh = $self->fh;
-        my $n = 0;
-        return sub {
-            $self->log->debug("generating record " . ++$n);
-            my $name = $self->readline;
-            return defined $name ? { "hello" => $name } : undef;
-        };
-    }
-
-    package main;
+    # OAI is an importer and JSON an exporter
+    $ catmandu convert OAI --url http://biblio.ugent.be/oai to JSON 
 
+    # Fetch remote content
+    $ catmandu convert JSON --file http://example.com/data.json to YAML
+    
+    # From Perl
+    
     use Catmandu;
+    use Data::Dumper;
+
+    my $importer = Catmandu->importer('JSON', file => 'data.json');
 
-    my $importer = Catmandu->importer('Hello', file => '/tmp/names.txt');
     $importer->each(sub {
-        my $items = shift;
-        .
-        .
-        .
+        my $item = shift;
+        print Dumper($item);
     });
 
-    # Or on the command line
-    $ catmandu convert Hello to YAML < /tmp/names.txt
-    # Fetch remote content
-    $ catmandu convert JSON --file http://example.com/data.json to YAML
+    my $num = $importer->count;
+
+    my $first_item = $importer->first;
+
+    # Convert OAI to JSON in Perl
+    my $importer = Catmandu->importer('OAI', url => 'http://biblio.ugent.be/oai');
+    my $exporter = Catmandu->exporter('JSON');
+
+    $exporter->add_many($importer);
 
 =head1 DESCRIPTION
 
-A Catmandu::Importer is a Perl package that can import data from an external
-source (a file, the network, ...). Most importers read from an input stream, 
-such as STDIN, a given file, or an URL to fetch data from, so this base class
-provides helper method for consuming the input stream once.
+A Catmandu::Importer is a Perl package that can generate structured data from
+sources such as JSON, YAML, XML, RDF or network protocols such as Atom, OAI-PMH,
+SRU and even DBI databases. Given an Catmandu::Importer a programmer can read
+data from using one of the many L<Catmandu::Iterable> methods:
 
-Every Catmandu::Importer is a L<Catmandu::Fixable> and thus inherits a 'fix'
-parameter that can be set in the constructor. When given then each item returned
-by the generator will be automatically Fixed using one or more L<Catmandu::Fix>es.
+
+    $importer->to_array;
+    $importer->count;
+    $importer->each(\&callback);
+    $importer->first;
+    $importer->rest;
+    ...etc...
+
+Every Catmandu::Importer is also L<Catmandu::Fixable> and thus inherits a 'fix'
+parameter that can be set in the constructor. When given a 'fix' parameter, then each 
+item returned by the generator will be automatically Fixed using one or 
+more L<Catmandu::Fix>es.
 E.g.
     
-    my $importer = Catmandu->importer('Hello',fix => ['upcase(hello)']);
+    my $importer = Catmandu->importer('JSON',fix => ['upcase(title)']);
     $importer->each( sub {
-        my $item = shift ; # Every item will be upcased... 
-    } );
+        my $item = shift ; # Every $item->{title} is now upcased... 
 
-Every Catmandu::Importer is a L<Catmandu::Iterable> and inherits the methods (C<first>,
-C<each>, C<to_array>...) etc.
+    });
+
+    # or via a Fix file
+    my $importer = Catmandu->importer('JSON',fix => ['/my/fixes.txt']);
+    $importer->each( sub {
+        my $item = shift ; # Every $item->{title} is now upcased... 
+
+    });
 
 =head1 CONFIGURATION
 
@@ -256,7 +270,7 @@ Binmode of the input stream C<fh>. Set to C<:utf8> by default.
 
 =item fix
 
-An ARRAY of one or more fixes or file scripts to be applied to imported items.
+An ARRAY of one or more Fix-es or Fix scripts to be applied to imported items.
 
 =item data_path
 
@@ -277,14 +291,25 @@ Variables given here will interpolate the C<file> and C<http_body> options. The
 syntax is the same as L<URI::Template>.
 
     # named arguments
-    my $importer = Catmandu->importer('Hello',
-        file => 'http://example.com/{id}',
-        variables => {id => 1234},
+    my $importer = Catmandu->importer('JSON',
+        file => 'http://{server}/{path}',
+        variables => {server => 'biblio.ugent.be', path => 'file.json'},
     );
+
     # positional arguments
-    {variables => "1234,768"}
+    my $importer = Catmandu->importer('JSON',
+        file => 'http://{server}/{path}',
+        variables => 'biblio.ugent.be,file.json',
+    );
+
     # or
-    {variables => [1234,768]}
+    my $importer = Catmandu->importer('JSON',
+        url => 'http://{server}/{path}',
+        variables => ['biblio.ugent.be','file.json'],
+    );
+
+    # or via the command line
+    $ catmandu convert JSON --file 'http://{server}/{path}' --variables 'biblio.ugent.be,file.json'
 
 =back
 
@@ -342,17 +367,58 @@ Verify the SSL certificate.
 
 =head1 METHODS
 
-=head2 readline
+=head2 first, each, rest , ...
+
+See L<Catmandu::Iterable> for all inherited methods.
 
-Read a line from the input stream. Equivalent to C<< $importer->fh->getline >>.
+=head1 CODING
 
-=head2 readall
+Create your own importer by creating a Perl package in the Catmandu::Importer namespace that
+implements C<Catmandu::Importer>. Basically, you need to create a method 'generate' which 
+returns a callback that creates one Perl hash for each call:
 
-Read the whole input stream as string.
+    my $importer = Catmandu::Importer::Hello->new;
 
-=head2 first, each, rest , ...
+    $importer->generate(); # record
+    $importer->generate(); # next record
+    $importer->generate(); # undef = end of stream
 
-See L<Catmandu::Iterable> for all inherited methods.
+Here is an example of a simple C<Hello> importer:
+
+    package Catmandu::Importer::Hello;
+
+    use Catmandu::Sane;
+    use Moo;
+
+    with 'Catmandu::Importer';
+
+    sub generator {
+        my ($self) = @_;
+        state $fh = $self->fh;
+        my $n = 0;
+        return sub {
+            $self->log->debug("generating record " . ++$n);
+            my $name = $self->fh->readline;
+            return defined $name ? { "hello" => $name } : undef;
+        };
+    }
+
+    1;
+
+This importer can be called via the command line as:
+
+    $ catmandu convert Hello to JSON < /tmp/names.txt
+    $ catmandu convert Hello to YAML < /tmp/names.txt
+    $ catmandu import Hello to MongoDB --database_name test < /tmp/names.txt
+
+Or, via Perl
+
+    use Catmandu;
+
+    my $importer = Catmandu->importer('Hello', file => '/tmp/names.txt');
+    $importer->each(sub {
+        my $items = shift;
+    });
 
 =head1 SEE ALSO
 
diff --git a/lib/Catmandu/Importer/Text.pm b/lib/Catmandu/Importer/Text.pm
index b398926..d115dc5 100644
--- a/lib/Catmandu/Importer/Text.pm
+++ b/lib/Catmandu/Importer/Text.pm
@@ -31,7 +31,7 @@ sub generator {
         state $count   = 0;
         state $line;
 
-        while ( defined( $line = $self->readline ) ) {
+        while ( defined( $line = $self->fh->getline ) ) {
             chomp $line;
             next if $pattern and $line !~ $pattern;
 
diff --git a/lib/Catmandu/Introduction.pod b/lib/Catmandu/Introduction.pod
index 4c14f5d..a247722 100644
--- a/lib/Catmandu/Introduction.pod
+++ b/lib/Catmandu/Introduction.pod
@@ -21,15 +21,15 @@ command:
 
     $ catmandu convert JSON to CSV < data.json
 
-Or, to store a YAML file into an ElasticSearch database type:
+Or, to store a YAML file into an ElasticSearch database type (requires Catmandu::ElasticSearch):
 
     $ catmandu import YAML to ElasticSearch --index_name demo < test.yml
 
-To export all the data from an Solr search engine into JSON type:
+To export all the data from an Solr search engine into JSON type (requires Catmandu::Solr):
 
     $ catmandu export Solr --url http://localhost:8983/solr to JSON
 
-With Catmandu one can import OAI-PMH records in your application:
+With Catmandu one can import OAI-PMH records in your application (requires Catmandu::OAI):
 
     $ catmandu convert OAI --url http://biblio.ugent.be/oai --set allFtxt
 
@@ -52,7 +52,7 @@ which contains a (sub)field 'nested'.
 
 Catmandu was created by librarians for librarians. We process a lot of metadata especially
 library metadata in formats such as MARC, MAB2 and MODS. With the following command we can extract
-data from a marc record and to store it into the title field:
+data from a marc record and to store it into the title field (requires Catmandu::MARC):
 
     $ catmandu convert MARC --fix 'marc_map(245,title)' < data.mrc
 
@@ -123,7 +123,7 @@ For a quick and demo installation visit our L<blog|https://librecatproject.wordp
 where a VirtualBox image is available containing all the Catmandu modules, including
 ElasticSearch and MongoDB.
 
-On our L<website|http://librecat.org/Catmandu/> we provide installation instructions for:
+On our L<website|http://librecat.org/Catmandu/#installation> we provide installation instructions for:
 
  * Debian
  * Ubuntu Server
diff --git a/t/Catmandu-Importer.t b/t/Catmandu-Importer.t
index 1fb5663..3ab2bff 100644
--- a/t/Catmandu-Importer.t
+++ b/t/Catmandu-Importer.t
@@ -24,7 +24,7 @@ require_ok $pkg;
         my ($self) = @_;
         sub {
             state $fh = $self->fh;
-            my $name = $self->readline;
+            my $name = $self->fh->getline;
             return defined $name ? { "hello" => $name } : undef;
         };
     }
@@ -50,7 +50,7 @@ $i = T::Importer->new( file => \"World" );
 is_deeply $i->to_array, [{ hello => "World"}], 'import from string reference';
 
 $i = T::Importer->new( file => \"Hello\nWorld" );
-is $i->readall, "Hello\nWorld", "import all";
+is join('',$i->fh->getlines), "Hello\nWorld", "import all";
 
 $i = T::DataPathImporter->new;
 is_deeply $i->to_array, [{abc => [{a=>1},{b=>2},{c=>3}]},{abc => [{d=>4},{e=>5},{f=>6}]}];
@@ -60,31 +60,31 @@ $i = T::DataPathImporter->new(data_path => 'abc.*');
 is_deeply $i->to_array, [{a=>1},{b=>2},{c=>3},{d=>4},{e=>5},{f=>6}];
 
 $i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/' );
-is $i->readall , "test123" , "read from http (file)";
+is join('',$i->fh->getlines) , "test123" , "read from http (file)";
 
 $i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{id}' , variables => { id => 1234} );
 is $i->file , "http://demo.org/1234";
-is $i->readall , "test1234" , "read from http (file + variables)";
+is join('',$i->fh->getlines) , "test1234" , "read from http (file + variables)";
 
 $i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{1},{2},{3}' , variables => [qw(red green blue)]);
 is $i->file , "http://demo.org/red,green,blue";
-is $i->readall , "RED-GREEN-BLUE" , "read from http (file + variables list)";
+is join('',$i->fh->getlines) , "RED-GREEN-BLUE" , "read from http (file + variables list)";
 
 $i = T::Importer->new( user_agent => user_agent() , file => 'http://demo.org/{1},{2},{3}' , variables => "red,green,blue" );
 is $i->file , "http://demo.org/red,green,blue";
-is $i->readall , "RED-GREEN-BLUE" , "read from http (file + variables list)";
+is join('',$i->fh->getlines) , "RED-GREEN-BLUE" , "read from http (file + variables list)";
 
 $i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/post' , http_method => 'POST' , http_body => '=={id}==' , variables => { id => 1234} );
 is $i->file , "http://demo.org/post";
-is $i->readall , "POST" , "read from http (file + variables list + post request)";
+is join('',$i->fh->getlines) , "POST" , "read from http (file + variables list + post request)";
 
 $i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/post' , http_method => 'POST',  http_body => '=={id}==' , variables => "red,green,blue" );
 is $i->file , "http://demo.org/post";
-is $i->readall , "POST" , "read from http (file + variables list + post request)";
+is join('',$i->fh->getlines) , "POST" , "read from http (file + variables list + post request)";
 
 $i = T::Importer->new(user_agent => user_agent() , file => 'http://demo.org/not-exsists' , http_method => 'POST',  http_body => '=={id}==' , variables => "red,green,blue" );
 
-throws_ok { $i->readall } 'Catmandu::HTTPError' , "throws an error on non-existing pages";
+throws_ok { $i->fh->getlines } 'Catmandu::HTTPError' , "throws an error on non-existing pages";
 
 $i = T::Importer->new(file => 'http://demo.org');
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git



More information about the Pkg-perl-cvs-commits mailing list