[libcatmandu-rdf-perl] 14/20: Adding POD
Jonas Smedegaard
dr at jones.dk
Sat Oct 28 03:10:22 UTC 2017
This is an automated email from the git hooks/post-receive script.
js pushed a commit to annotated tag upstream/0.32
in repository libcatmandu-rdf-perl.
commit 5665a01476c7a804974ad280034f2bd45390b50a
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date: Sat Jul 29 11:39:58 2017 +0200
Adding POD
---
README.md | 9 +++++++--
lib/Catmandu/Importer/RDF.pm | 11 +++++++----
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index b7dfbb6..90e3a7d 100644
--- a/README.md
+++ b/README.md
@@ -12,10 +12,15 @@ Catmandu::RDF - Modules for handling RDF data within the Catmandu framework
Command line client `catmandu`:
- catmandu convert RDF --url http://dx.doi.org/10.2474/trol.7.147
+ catmandu convert RDF --url http://dx.doi.org/10.2474/trol.7.147
--fix 'aref_query(dct_title,title)' to YAML
- catmandu convert RDF --file rdfdump.nt to RDF --type turtle
+ catmandu convert RDF --file rdfdump.ttl to RDF --type turtle
+
+ # For big file the only efficient option to convert RDF is by
+ # transforming the input stream into triples and writing to NTriples
+ # in the output
+ catmandu convert convert RDF --triples 1 --type ttl to RDF --type NTriples < rdfdump.ttl
See documentation of modules for more examples.
diff --git a/lib/Catmandu/Importer/RDF.pm b/lib/Catmandu/Importer/RDF.pm
index 6a0810c..faad9d6 100644
--- a/lib/Catmandu/Importer/RDF.pm
+++ b/lib/Catmandu/Importer/RDF.pm
@@ -155,8 +155,6 @@ sub rdf_generator {
if ($self->triples) {
if (my $hashref = $stream->()) {
- use Data::Dumper;
- warn Dumper($hashref);
$self->encoder->add_hashref($hashref, $aref);
}
else {
@@ -252,6 +250,7 @@ sub _sparql_stream {
sub _hashref_stream {
my ($self) = @_;
+ # Create a pipe stream to convert a callback handler into an iterator
my $pipe = IO::Pipe->new();
if (my $pid = fork()) {
@@ -294,9 +293,11 @@ sub _hashref_stream {
'_:' . $triple->object->blank_identifier :
$triple->object->uri_value;
my $type = lc $triple->object->type;
+ $type = 'bnode' if $type eq 'blank';
my $lang = $triple->object->is_literal ? $triple->object->literal_value_language : undef;
my $datatype = $triple->object->is_literal ? $triple->object->literal_datatype : undef;
+ # Create the RDF::Trine type RDF/JSON RDF::aREF can parse
my $hashref = {};
$hashref->{$subject}->{$predicate}->[0]->{type} = $type;
@@ -350,7 +351,8 @@ Command line client C<catmandu>:
# bit to return each triple fragment
catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to JSON
- # Transform back into NTriples
+ # Transform back into NTriples (conversions to and from triples is the
+ # most efficient way to process RDF)
catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to RDF --type NTriples
# Query a SPARQL endpoint
@@ -402,7 +404,8 @@ Set to a specific date to get stable namespace prefix mappings.
=item triples
Import each RDF triple as one aREF subject map (default) or predicate map
-(option C<predicate_map>), if enabled.
+(option C<predicate_map>), if enabled. This is the most efficient way to
+process large input files. All the processing can be streamed.
=item predicate_map
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-rdf-perl.git
More information about the Pkg-perl-cvs-commits
mailing list