[libcatmandu-rdf-perl] 15/20: Adding a --speed option
Jonas Smedegaard
dr at jones.dk
Sat Oct 28 03:10:22 UTC 2017
This is an automated email from the git hooks/post-receive script.
js pushed a commit to annotated tag upstream/0.32
in repository libcatmandu-rdf-perl.
commit 4b545ed422b000d4af359eb9ea8b25ff0a70a462
Author: Patrick Hochstenbach <patrick.hochstenbach at ugent.be>
Date: Sat Jul 29 15:58:21 2017 +0200
Adding a --speed option
---
Changes | 2 ++
lib/Catmandu/Importer/RDF.pm | 29 +++++++++++++++++++++++------
2 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/Changes b/Changes
index 5e0b03f..a05937c 100644
--- a/Changes
+++ b/Changes
@@ -2,6 +2,8 @@ Changelog for Catmandu-RDF
{{$NEXT}}
- Fixing SPARQL examples with latest RDF::LDF
+ - Better support for streaming RDF input
+ - Adding a --speed option to the Catmandu::Importer::RDF
0.31 2016-04-13 10:24:55 CEST
- Fix test failure caused by RDF::NS (#29)
diff --git a/lib/Catmandu/Importer/RDF.pm b/lib/Catmandu/Importer/RDF.pm
index faad9d6..f2140e4 100644
--- a/lib/Catmandu/Importer/RDF.pm
+++ b/lib/Catmandu/Importer/RDF.pm
@@ -88,6 +88,10 @@ has cache_options => (
} }
);
+has speed => (
+ is => 'ro',
+);
+
sub BUILD {
my ($self) = @_;
@@ -279,7 +283,9 @@ sub _hashref_stream {
? RDF::Trine::Parser->new( $self->type ) : 'RDF::Trine::Parser';
my $handler = sub {
- my $triple = shift;
+ my $triple = shift;
+ state $start = time;
+ state $count = 0;
my $subject = $triple->subject->is_blank ?
'_:' . $triple->subject->blank_identifier :
@@ -306,6 +312,12 @@ sub _hashref_stream {
$hashref->{$subject}->{$predicate}->[0]->{value} = $value;
print $pipe encode_json($hashref) , "\n";
+
+ $count++;
+
+ if ($self->speed && ($count % 100 == 0) && (my $elapsed = time - $start) ) {
+ printf STDERR "triples %9d (%d/sec)\n" , $count , $count/$elapsed;
+ }
};
if ($self->url) {
@@ -345,15 +357,15 @@ Command line client C<catmandu>:
catmandu convert RDF --url http://d-nb.info/gnd/4151473-7 to YAML
- catmandu convert RDF --type ttl --file rdfdump.ttl to JSON
+ catmandu convert RDF --file rdfdump.ttl to JSON
- # For big input files it will be faster not to build a big hash in memory
- # bit to return each triple fragment
- catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to JSON
+ # Parse the input into on JSON document per triplet. This is the
+ # most memory efficient (and fastest) way to parse RDF input.
+ catmandu convert RDF --triples 1 --file rdfdump.ttl to JSON
# Transform back into NTriples (conversions to and from triples is the
# most efficient way to process RDF)
- catmandu convert RDF --type ttl --triples 1 --file rdfdump.ttl to RDF --type NTriples
+ catmandu convert RDF --triples 1 --file rdfdump.ttl to RDF --type NTriples
# Query a SPARQL endpoint
catmandu convert RDF --url http://dbpedia.org/sparql
@@ -452,6 +464,11 @@ Provide the L<CHI> based options for caching result sets. By default a memory st
max_size => 1024*1024
});
+=item speed
+
+If set to a true value, then write RDF file processing speed on the STDERR as
+number of triples parsed per second.
+
=back
=head1 METHODS
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-rdf-perl.git
More information about the Pkg-perl-cvs-commits
mailing list