[libfolia] 02/04: New upstream version 1.11
Maarten van Gompel
proycon-guest at moszumanska.debian.org
Mon Dec 4 20:33:58 UTC 2017
This is an automated email from the git hooks/post-receive script.
proycon-guest pushed a commit to branch master
in repository libfolia.
commit b53808e2d2421490abc9f747d1e1cfadbfd93eef
Author: proycon <proycon at anaproy.nl>
Date: Mon Dec 4 21:31:20 2017 +0100
New upstream version 1.11
---
ChangeLog | 67 +++++++++++++++++-
NEWS | 5 ++
configure | 20 +++---
configure.ac | 2 +-
include/libfolia/folia_document.h | 4 +-
include/libfolia/folia_impl.h | 1 +
src/folia_document.cxx | 23 +++---
src/folia_impl.cxx | 143 ++++++++++++++++++++++----------------
src/folia_properties.cxx | 1 +
9 files changed, 180 insertions(+), 86 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f4b67e6..a44a2ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,70 @@
+2017-11-23 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/libfolia/folia_impl.h, src/folia_impl.cxx: added some
+ danit checking to WordReference: matches the 't' and do we refer
+ something referable?
+
+2017-11-23 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_document.cxx, src/folia_impl.cxx: reverted
+ implementation of forward Wordrefs
+
+2017-11-22 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_document.cxx, src/folia_impl.cxx: allow for forwars
+ wref's (VERY UNWISE!)
+
+2017-11-21 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx, src/folia_properties.cxx: fixed text() problem
+ with <comment> tags
+
+2017-11-20 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: in some cases restrict text checking to
+ printable elements.
+
+2017-11-13 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * docs/folialint.1: updated 'man' page of folialint
+
2017-11-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
- * NEWS, configure.ac, src/Makefile.am: release 1.10.1: Bumping .so
- version to 7.0
+ * : commit 78c9e0331d7ed47d8ebde7db0b6f4ea5738ac7dc Author: Ko van
+ der Sloot <K.vanderSloot at let.ru.nl> Date: Mon Nov 6 16:55:18 2017
+ +0100
+
+2017-11-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: small refactoring, plus a typo fixed
+
+2017-10-26 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: a failed append() should leave the document
+ untouched. (this is NOT enforced everywhere)
+
+2017-10-26 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: fix ref counting. (needs more testing!)
+
+2017-10-26 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: enforce that all children in an
+ annotationlayer are in the same set
+
+2017-10-18 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/libfolia/folia_document.h, src/folia_document.cxx: work on
+ Document::setmode()/getmode(). A bit clumsy it is.
+
+2017-10-18 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/folia_impl.cxx: when serializing Wrefs, be sure to get text in
+ the parents textclass
+
+2017-10-18 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * configure.ac: bumped version after release
2017-10-17 Ko van der Sloot <K.vanderSloot at let.ru.nl>
diff --git a/NEWS b/NEWS
index 8bc970c..ec63a6e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+1.11 2017-12-04
+Bug fix release:
+* handling of <comment> tags within <t> nodes
+* better handling of <wref> tags. Forbid forward references
+
1.10.1 2017-11-06
Minor fix
* bumped the .so version to 7.0
diff --git a/configure b/configure
index db86d45..defd898 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for libfolia 1.10.1.
+# Generated by GNU Autoconf 2.69 for libfolia 1.11.
#
# Report bugs to <lamasoftware at science.ru.nl>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='libfolia'
PACKAGE_TARNAME='libfolia'
-PACKAGE_VERSION='1.10.1'
-PACKAGE_STRING='libfolia 1.10.1'
+PACKAGE_VERSION='1.11'
+PACKAGE_STRING='libfolia 1.11'
PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
PACKAGE_URL=''
@@ -1358,7 +1358,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures libfolia 1.10.1 to adapt to many kinds of systems.
+\`configure' configures libfolia 1.11 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1429,7 +1429,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of libfolia 1.10.1:";;
+ short | recursive ) echo "Configuration of libfolia 1.11:";;
esac
cat <<\_ACEOF
@@ -1556,7 +1556,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-libfolia configure 1.10.1
+libfolia configure 1.11
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2076,7 +2076,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by libfolia $as_me 1.10.1, which was
+It was created by libfolia $as_me 1.11, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2939,7 +2939,7 @@ fi
# Define the identity of the package.
PACKAGE='libfolia'
- VERSION='1.10.1'
+ VERSION='1.11'
cat >>confdefs.h <<_ACEOF
@@ -17788,7 +17788,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by libfolia $as_me 1.10.1, which was
+This file was extended by libfolia $as_me 1.11, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -17854,7 +17854,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-libfolia config.status 1.10.1
+libfolia config.status 1.11
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index 64fe2a1..3b885bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT([libfolia], [1.10.1], [lamasoftware at science.ru.nl])
+AC_INIT([libfolia], [1.11], [lamasoftware at science.ru.nl])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_SRCDIR([configure.ac])
AC_CONFIG_MACRO_DIR([m4])
diff --git a/include/libfolia/folia_document.h b/include/libfolia/folia_document.h
index 40b538c..a685569 100644
--- a/include/libfolia/folia_document.h
+++ b/include/libfolia/folia_document.h
@@ -198,7 +198,7 @@ namespace folia {
};
void incrRef( AnnotationType::AnnotationType, const std::string& );
void decrRef( AnnotationType::AnnotationType, const std::string& );
- void setmode( const std::string& );
+ void setmode( const std::string& ) const;
std::string getmode() const;
std::multimap<AnnotationType::AnnotationType,std::string> unused_declarations( ) const;
const MetaData *get_submetadata( const std::string& m ){
@@ -255,7 +255,7 @@ namespace folia {
MetaData *_metadata;
std::map<std::string,MetaData *> submetadata;
std::multimap<std::string,std::string> styles;
- Mode mode;
+ mutable Mode mode;
std::string filename;
std::string _version;
bool external;
diff --git a/include/libfolia/folia_impl.h b/include/libfolia/folia_impl.h
index b400d1b..fea0bd9 100644
--- a/include/libfolia/folia_impl.h
+++ b/include/libfolia/folia_impl.h
@@ -453,6 +453,7 @@ namespace folia {
virtual void setAuth( bool b ) = 0;
virtual bool auth( ) const = 0;
virtual const std::string generateId( const std::string& ) NOT_IMPLEMENTED;
+ virtual const std::string textclass() const NOT_IMPLEMENTED;
};
class FoliaImpl: public virtual FoliaElement {
diff --git a/src/folia_document.cxx b/src/folia_document.cxx
index 4773064..01ce276 100644
--- a/src/folia_document.cxx
+++ b/src/folia_document.cxx
@@ -168,7 +168,8 @@ namespace folia {
return true;
}
- void Document::setmode( const string& ms ){
+ void Document::setmode( const string& ms ) const {
+ // mode is mutable, so this even sets mode on CONST documents!
vector<string> modev;
TiCC::split_at( ms, modev, "," );
for ( const auto& mod : modev ){
@@ -192,16 +193,16 @@ namespace folia {
string Document::getmode() const{
string result = "mode=";
- if ( mode == PERMISSIVE ){
+ if ( mode & PERMISSIVE ){
result += "permissive,";
}
- if ( mode == STRIP ){
+ if ( mode & STRIP ){
result += "strip,";
}
- if ( mode == CHECKTEXT ){
+ if ( mode & CHECKTEXT ){
result += "checktext,";
}
- if ( mode == FIXTEXT ){
+ if ( mode & FIXTEXT ){
result += "fixtext,";
}
return result;
@@ -285,21 +286,19 @@ namespace folia {
if ( s.empty() ) {
return;
}
- // cerr << _id << "-add docindex " << el << " (" << s << ")" << endl;
- // using TiCC::operator <<;
- // cerr << "VOOR: " << sindex << endl;
- if ( sindex.find( s ) == sindex.end() ){
+ auto it = sindex.find( s );
+ if ( it == sindex.end() ){
sindex[s] = el;
iindex.push_back( el );
}
- else
+ else {
throw DuplicateIDError( s );
- // cerr << "NA : " << sindex << endl;
+ }
}
void Document::delDocIndex( const FoliaElement* el, const string& s ){
if ( sindex.empty() ){
- // only when ~~Document is in progress
+ // only when ~Document is in progress
return;
}
if ( s.empty() ) {
diff --git a/src/folia_impl.cxx b/src/folia_impl.cxx
index d4a8dcd..54375a9 100644
--- a/src/folia_impl.cxx
+++ b/src/folia_impl.cxx
@@ -790,7 +790,7 @@ namespace folia {
}
void FoliaImpl::check_text_consistency( ) const {
- if ( !mydoc || !mydoc->checktext() ){
+ if ( !mydoc || !mydoc->checktext() || ! printable() ){
return;
}
// check if the text associated with all children is compatible with the
@@ -1035,11 +1035,13 @@ namespace folia {
else if ( is_textcontainer() ){
UnicodeString result;
for ( const auto& d : data ){
- if ( !result.isEmpty() ){
- const string& delim = d->getTextDelimiter( retaintok );
- result += UTF8ToUnicode(delim);
+ if ( d->printable() ){
+ if ( !result.isEmpty() ){
+ const string& delim = d->getTextDelimiter( retaintok );
+ result += UTF8ToUnicode(delim);
+ }
+ result += d->text( cls );
}
- result += d->text( cls );
}
#ifdef DEBUG_TEXT
cerr << "TEXT op a textcontainer :" << xmltag() << " returned '" << result << "'" << endl;
@@ -1076,7 +1078,7 @@ namespace folia {
result += d->text( cls, retaintok, strict );
}
#ifdef DEBUG_TEXT
- cerr << "FoLiA::TEXT returnes '" << result << "'" << endl;
+ cerr << "FoLiA::TEXT returns '" << result << "'" << endl;
#endif
return result;
}
@@ -1602,11 +1604,13 @@ namespace folia {
}
}
if ( c->parent() &&
- !( c->element_id() == Word_t
+ !( c->element_id() == WordReference_t
+ || c->element_id() == Word_t
|| c->element_id() == Morpheme_t
|| c->element_id() == Phoneme_t ) ) {
- throw XmlError( "attempt to reconnect node " + c->classname()
- + " to a " + classname() + " node, id=" + _id
+ throw XmlError( "attempt to reconnect node " + c->classname() + "("
+ + c->id()
+ + ") to a " + classname() + " node, id=" + _id
+ ", it was already connected to a "
+ c->parent()->classname() + " id=" + c->parent()->id() );
}
@@ -1948,7 +1952,7 @@ namespace folia {
p = p->next;
}
if ( doc() && ( doc()->checktext() || doc()->fixtext() )
- && is_structure( this )
+ && this->printable()
&& !isSubClass( Morpheme_t ) && !isSubClass( Phoneme_t) ){
vector<TextContent*> tv = select<TextContent>( false );
// first see which text classes ar present
@@ -2390,24 +2394,20 @@ namespace folia {
KWargs kwargs = args; // need to copy
auto it = kwargs.find( "value" );
if ( it != kwargs.end() ) {
- XmlText *t = new XmlText();
string value = it->second;
+ kwargs.erase(it);
if ( value.empty() ) {
// can this ever happen?
throw ValueError( "TextContent: 'value' attribute may not be empty." );
}
+ XmlText *t = new XmlText();
t->setvalue( value );
append( t );
- kwargs.erase(it);
}
it = kwargs.find( "offset" );
if ( it != kwargs.end() ) {
_offset = stringTo<int>(it->second);
kwargs.erase(it);
- // if ( doc() && doc()->checktext() ){
- // cerr << "ANOTHER cache " << this << endl;
- // doc()->cache_textcontent(this);
- // }
}
else
_offset = -1;
@@ -2692,7 +2692,7 @@ namespace folia {
i = stringTo<int>( val );
}
catch ( exception ) {
- // no number, so assume so user defined id
+ // no number, so assume some user defined id
return;
}
const auto& it = id_map.find( child->xmltag() );
@@ -3547,27 +3547,39 @@ namespace folia {
}
FoliaElement* WordReference::parseXml( const xmlNode *node ) {
- KWargs att = getAttributes( node );
- string id = att["id"];
+ KWargs atts = getAttributes( node );
+ string id = atts["id"];
if ( id.empty() ) {
throw XmlError( "empty id in WordReference" );
}
if ( mydoc->debug ) {
cerr << "Found word reference" << id << endl;
}
- FoliaElement *res = (*mydoc)[id];
- if ( res ) {
- // To DO: check type. Word_t, Phoneme_t or Morpheme_t??
- res->increfcount();
+ FoliaElement *ref = (*mydoc)[id];
+ if ( ref ) {
+ if ( ref->element_id() != Word_t
+ && ref->element_id() != Phoneme_t
+ && ref->element_id() != Morpheme_t ) {
+ throw XmlError( "WordRefence id=" + id + " refers a non-word: "
+ + ref->xmltag() );
+ }
+ string tval = atts["t"];
+ if ( !tval.empty() ){
+ string tc = ref->textclass();
+ string rtval = ref->str(tc);
+ if ( tval != rtval ){
+ throw XmlError( "WordRefence id=" + id + " has another value for "
+ + " the t attribute them it's reference. ("
+ + tval + " versus " + rtval + ")" );
+ }
+ }
+ ref->increfcount();
}
else {
- if ( mydoc->debug ) {
- cerr << "...Unresolvable id: " << id << endl;
- }
- throw XmlError( "Unresolvable id " + id + "in WordReference" );
+ throw XmlError( "Unresolvable id " + id + " in WordReference" );
}
delete this;
- return res;
+ return ref;
}
FoliaElement* AlignReference::parseXml( const xmlNode *node ) {
@@ -3706,31 +3718,31 @@ namespace folia {
// If there is no set (yet), try to get the set from the child
// but not if it is the default set.
// for a Correction child, we look deeper.
- if ( _set.empty() ) {
- if ( child->isSubClass( AbstractSpanAnnotation_t ) ) {
- string st = child->sett();
- if ( !st.empty()
- && mydoc->defaultset( child->annotation_type() ) != st ) {
- _set = st;
- mydoc->incrRef( child->annotation_type(), _set );
- }
- }
- else if ( child->isinstance(Correction_t) ) {
- Original *org = child->getOriginal();
- if ( org ) {
- for ( size_t i=0; i < org->size(); ++i ) {
- FoliaElement *el = org->index(i);
- if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
- string st = el->sett();
- if ( !st.empty()
- && mydoc->defaultset( el->annotation_type() ) != st ) {
- _set = st;
- mydoc->incrRef( el->annotation_type(), _set );
- return;
- }
+ // BARF when the sets are incompatible.
+ string c_set;
+ if ( child->isSubClass( AbstractSpanAnnotation_t ) ) {
+ string st = child->sett();
+ if ( !st.empty()
+ && mydoc->defaultset( child->annotation_type() ) != st ) {
+ c_set = st;
+ }
+ }
+ else if ( child->isinstance(Correction_t) ) {
+ Original *org = child->getOriginal();
+ if ( org ) {
+ for ( size_t i=0; i < org->size(); ++i ) {
+ FoliaElement *el = org->index(i);
+ if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
+ string st = el->sett();
+ if ( !st.empty()
+ && mydoc->defaultset( el->annotation_type() ) != st ) {
+ c_set = st;
+ break;
}
}
}
+ }
+ if ( c_set.empty() ){
New *nw = child->getNew();
if ( nw ) {
for ( size_t i=0; i < nw->size(); ++i ) {
@@ -3739,33 +3751,46 @@ namespace folia {
string st = el->sett();
if ( !st.empty()
&& mydoc->defaultset( el->annotation_type() ) != st ) {
- _set = st;
- mydoc->incrRef( el->annotation_type(), _set );
- return;
+ c_set = st;
+ break;
}
}
}
}
+ }
+ if ( c_set.empty() ){
auto v = child->suggestions();
for ( const auto& el : v ) {
if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
string st = el->sett();
if ( !st.empty()
&& mydoc->defaultset( el->annotation_type() ) != st ) {
- _set = st;
- mydoc->incrRef( el->annotation_type(), _set );
- return;
+ c_set = st;
+ break;
}
}
}
}
}
+ if ( c_set.empty() ){
+ return;
+ }
+ if ( _set.empty() ) {
+ _set = c_set;
+ }
+ else if ( _set != c_set ){
+ throw DuplicateAnnotationError( "appending child: " + child->xmltag()
+ + " with set='"
+ + c_set + "' to " + xmltag()
+ + " failed while it already has set='"
+ + _set + "'" );
+ }
+ mydoc->incrRef( child->annotation_type(), _set );
}
FoliaElement *AbstractAnnotationLayer::append( FoliaElement *child ) {
- FoliaImpl::append( child );
assignset( child );
- return child;
+ return FoliaImpl::append( child );
}
KWargs AbstractAnnotationLayer::collectAttributes() const {
@@ -3787,7 +3812,7 @@ namespace folia {
xmlNode *t = XmlNewNode( foliaNs(), "wref" );
KWargs attribs;
attribs["id"] = el->id();
- string txt = el->str();
+ string txt = el->str( textclass() );
if ( !txt.empty() ) {
attribs["t"] = txt;
}
diff --git a/src/folia_properties.cxx b/src/folia_properties.cxx
index 04b7f21..ad9a17a 100644
--- a/src/folia_properties.cxx
+++ b/src/folia_properties.cxx
@@ -670,6 +670,7 @@ namespace folia {
Comment::PROPS.LABEL = "Comment";
Comment::PROPS.OPTIONAL_ATTRIBS = ID|ANNOTATOR|CONFIDENCE|DATETIME|N|METADATA;
Comment::PROPS.XMLTAG = "comment";
+ Comment::PROPS.PRINTABLE = false;
//------ ComplexAlignment -------
ComplexAlignment::PROPS.ELEMENT_ID = ComplexAlignment_t;
ComplexAlignment::PROPS.ACCEPTED_DATA += {Alignment_t, Comment_t, Description_t, Feature_t, ForeignData_t, Metric_t};
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/libfolia.git
More information about the debian-science-commits
mailing list