[libcatmandu-rdf-perl] 14/20: Adding POD

Sat Oct 28 03:10:22 UTC 2017

This is an automated email from the git hooks/post-receive script.

js pushed a commit to annotated tag upstream/0.32
in repository libcatmandu-rdf-perl.

commit 5665a01476c7a804974ad280034f2bd45390b50a
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date:   Sat Jul 29 11:39:58 2017 +0200

    Adding POD
---
 README.md                    |  9 +++++++--
 lib/Catmandu/Importer/RDF.pm | 11 +++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b7dfbb6..90e3a7d 100644
--- a/README.md
+++ b/README.md
@@ -12,10 +12,15 @@ Catmandu::RDF - Modules for handling RDF data within the Catmandu framework
 
 Command line client `catmandu`:
 
-    catmandu convert RDF --url http://dx.doi.org/10.2474/trol.7.147 
+    catmandu convert RDF --url http://dx.doi.org/10.2474/trol.7.147
                          --fix 'aref_query(dct_title,title)' to YAML
 
-    catmandu convert RDF --file rdfdump.nt to RDF --type turtle
+    catmandu convert RDF --file rdfdump.ttl to RDF --type turtle
+
+    # For big file the only efficient option to convert RDF is by
+    # transforming the input stream into triples and writing to NTriples
+    # in the output
+    catmandu convert convert RDF --triples 1 --type ttl to RDF --type NTriples < rdfdump.ttl
 
 See documentation of modules for more examples.
 
diff --git a/lib/Catmandu/Importer/RDF.pm b/lib/Catmandu/Importer/RDF.pm
index 6a0810c..faad9d6 100644
--- a/lib/Catmandu/Importer/RDF.pm
+++ b/lib/Catmandu/Importer/RDF.pm
@@ -155,8 +155,6 @@ sub rdf_generator {
 
         if ($self->triples) {
             if (my $hashref = $stream->()) {
-              use Data::Dumper;
-              warn Dumper($hashref);
                 $self->encoder->add_hashref($hashref, $aref);
             }
             else {
@@ -252,6 +250,7 @@ sub _sparql_stream {
 sub _hashref_stream {
   my ($self) = @_;
 
+  # Create a pipe stream to convert a callback handler into an iterator
   my $pipe = IO::Pipe->new();
 
   if (my $pid = fork()) {
@@ -294,9 +293,11 @@ sub _hashref_stream {
                               '_:' . $triple->object->blank_identifier :
                               $triple->object->uri_value;
         my $type      = lc $triple->object->type;
+        $type         = 'bnode' if $type eq 'blank';
         my $lang      = $triple->object->is_literal ? $triple->object->literal_value_language : undef;
         my $datatype  = $triple->object->is_literal ? $triple->object->literal_datatype : undef;
 
+        # Create the RDF::Trine type RDF/JSON RDF::aREF can parse
         my $hashref = {};
 
         $hashref->{$subject}->{$predicate}->[0]->{type}     = $type;
@@ -350,7 +351,8 @@ Command line client C<catmandu>:
   # bit to return each triple fragment
   catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to JSON
 
-  # Transform back into NTriples
+  # Transform back into NTriples (conversions to and from triples is the
+  # most efficient way to process RDF)
   catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to RDF --type NTriples
 
   # Query a SPARQL endpoint
@@ -402,7 +404,8 @@ Set to a specific date to get stable namespace prefix mappings.
 =item triples
 
 Import each RDF triple as one aREF subject map (default) or predicate map
-(option C<predicate_map>), if enabled.
+(option C<predicate_map>), if enabled. This is the most efficient way to
+process large input files. All the processing can be streamed.
 
 =item predicate_map
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-rdf-perl.git