[libfolia] 02/04: New upstream version 1.11

Maarten van Gompel proycon-guest at moszumanska.debian.org
Mon Dec 4 20:33:58 UTC 2017


This is an automated email from the git hooks/post-receive script.

proycon-guest pushed a commit to branch master
in repository libfolia.

commit b53808e2d2421490abc9f747d1e1cfadbfd93eef
Author: proycon <proycon at anaproy.nl>
Date:   Mon Dec 4 21:31:20 2017 +0100

    New upstream version 1.11
---
 ChangeLog                         |  67 +++++++++++++++++-
 NEWS                              |   5 ++
 configure                         |  20 +++---
 configure.ac                      |   2 +-
 include/libfolia/folia_document.h |   4 +-
 include/libfolia/folia_impl.h     |   1 +
 src/folia_document.cxx            |  23 +++---
 src/folia_impl.cxx                | 143 ++++++++++++++++++++++----------------
 src/folia_properties.cxx          |   1 +
 9 files changed, 180 insertions(+), 86 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f4b67e6..a44a2ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,70 @@
+2017-11-23  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* include/libfolia/folia_impl.h, src/folia_impl.cxx: added some
+	danit checking to WordReference: matches the 't' and do we refer
+	something referable?
+
+2017-11-23  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_document.cxx, src/folia_impl.cxx: reverted
+	implementation of forward Wordrefs
+
+2017-11-22  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_document.cxx, src/folia_impl.cxx: allow for forwars
+	wref's (VERY UNWISE!)
+
+2017-11-21  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx, src/folia_properties.cxx: fixed text() problem
+	with <comment> tags
+
+2017-11-20  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: in some cases restrict text checking to
+	printable elements.
+
+2017-11-13  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* docs/folialint.1: updated 'man' page of folialint
+
 2017-11-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
 
-	* NEWS, configure.ac, src/Makefile.am: release 1.10.1: Bumping .so
-	version to 7.0
+	* : commit 78c9e0331d7ed47d8ebde7db0b6f4ea5738ac7dc Author: Ko van
+	der Sloot <K.vanderSloot at let.ru.nl> Date:   Mon Nov 6 16:55:18 2017
+	+0100
+
+2017-11-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: small refactoring, plus a typo fixed
+
+2017-10-26  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: a failed append() should leave the document
+	untouched. (this is NOT enforced everywhere)
+
+2017-10-26  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: fix ref counting. (needs more testing!)
+
+2017-10-26  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: enforce that all children in an
+	annotationlayer are in the same set
+
+2017-10-18  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* include/libfolia/folia_document.h, src/folia_document.cxx: work on
+	Document::setmode()/getmode(). A bit clumsy it is.
+
+2017-10-18  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* src/folia_impl.cxx: when serializing Wrefs, be sure to get text in
+	the parents textclass
+
+2017-10-18  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* configure.ac: bumped version after release
 
 2017-10-17  Ko van der Sloot <K.vanderSloot at let.ru.nl>
 
diff --git a/NEWS b/NEWS
index 8bc970c..ec63a6e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+1.11 2017-12-04
+Bug fix release:
+* handling of <comment> tags within <t> nodes
+* better handling of <wref> tags. Forbid forward references
+
 1.10.1 2017-11-06
 Minor fix
 * bumped the .so version to 7.0
diff --git a/configure b/configure
index db86d45..defd898 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for libfolia 1.10.1.
+# Generated by GNU Autoconf 2.69 for libfolia 1.11.
 #
 # Report bugs to <lamasoftware at science.ru.nl>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='libfolia'
 PACKAGE_TARNAME='libfolia'
-PACKAGE_VERSION='1.10.1'
-PACKAGE_STRING='libfolia 1.10.1'
+PACKAGE_VERSION='1.11'
+PACKAGE_STRING='libfolia 1.11'
 PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
 PACKAGE_URL=''
 
@@ -1358,7 +1358,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures libfolia 1.10.1 to adapt to many kinds of systems.
+\`configure' configures libfolia 1.11 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1429,7 +1429,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of libfolia 1.10.1:";;
+     short | recursive ) echo "Configuration of libfolia 1.11:";;
    esac
   cat <<\_ACEOF
 
@@ -1556,7 +1556,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-libfolia configure 1.10.1
+libfolia configure 1.11
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2076,7 +2076,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by libfolia $as_me 1.10.1, which was
+It was created by libfolia $as_me 1.11, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2939,7 +2939,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='libfolia'
- VERSION='1.10.1'
+ VERSION='1.11'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -17788,7 +17788,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by libfolia $as_me 1.10.1, which was
+This file was extended by libfolia $as_me 1.11, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -17854,7 +17854,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-libfolia config.status 1.10.1
+libfolia config.status 1.11
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 64fe2a1..3b885bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.59)
-AC_INIT([libfolia], [1.10.1], [lamasoftware at science.ru.nl])
+AC_INIT([libfolia], [1.11], [lamasoftware at science.ru.nl])
 AM_INIT_AUTOMAKE([foreign])
 AC_CONFIG_SRCDIR([configure.ac])
 AC_CONFIG_MACRO_DIR([m4])
diff --git a/include/libfolia/folia_document.h b/include/libfolia/folia_document.h
index 40b538c..a685569 100644
--- a/include/libfolia/folia_document.h
+++ b/include/libfolia/folia_document.h
@@ -198,7 +198,7 @@ namespace folia {
     };
     void incrRef( AnnotationType::AnnotationType, const std::string& );
     void decrRef( AnnotationType::AnnotationType, const std::string& );
-    void setmode( const std::string& );
+    void setmode( const std::string& ) const;
     std::string getmode() const;
     std::multimap<AnnotationType::AnnotationType,std::string> unused_declarations( ) const;
       const MetaData *get_submetadata( const std::string& m ){
@@ -255,7 +255,7 @@ namespace folia {
     MetaData *_metadata;
     std::map<std::string,MetaData *> submetadata;
     std::multimap<std::string,std::string> styles;
-    Mode mode;
+    mutable Mode mode;
     std::string filename;
     std::string _version;
     bool external;
diff --git a/include/libfolia/folia_impl.h b/include/libfolia/folia_impl.h
index b400d1b..fea0bd9 100644
--- a/include/libfolia/folia_impl.h
+++ b/include/libfolia/folia_impl.h
@@ -453,6 +453,7 @@ namespace folia {
     virtual void setAuth( bool b ) = 0;
     virtual bool auth( ) const = 0;
     virtual const std::string generateId( const std::string& ) NOT_IMPLEMENTED;
+    virtual const std::string textclass() const NOT_IMPLEMENTED;
   };
 
   class FoliaImpl: public virtual FoliaElement {
diff --git a/src/folia_document.cxx b/src/folia_document.cxx
index 4773064..01ce276 100644
--- a/src/folia_document.cxx
+++ b/src/folia_document.cxx
@@ -168,7 +168,8 @@ namespace folia {
     return true;
   }
 
-  void Document::setmode( const string& ms ){
+  void Document::setmode( const string& ms ) const {
+    // mode is mutable, so this even sets mode on CONST documents!
     vector<string> modev;
     TiCC::split_at( ms, modev, "," );
     for ( const auto& mod : modev ){
@@ -192,16 +193,16 @@ namespace folia {
 
   string Document::getmode() const{
     string result = "mode=";
-    if ( mode == PERMISSIVE ){
+    if ( mode & PERMISSIVE ){
       result += "permissive,";
     }
-    if ( mode == STRIP ){
+    if ( mode & STRIP ){
       result += "strip,";
     }
-    if ( mode == CHECKTEXT ){
+    if ( mode & CHECKTEXT ){
       result += "checktext,";
     }
-    if ( mode == FIXTEXT ){
+    if ( mode & FIXTEXT ){
       result += "fixtext,";
     }
     return result;
@@ -285,21 +286,19 @@ namespace folia {
     if ( s.empty() ) {
       return;
     }
-    // cerr << _id << "-add docindex " << el << " (" << s << ")" << endl;
-    // using TiCC::operator <<;
-    // cerr << "VOOR: " << sindex << endl;
-    if ( sindex.find( s ) == sindex.end() ){
+    auto it = sindex.find( s );
+    if ( it == sindex.end() ){
       sindex[s] = el;
       iindex.push_back( el );
     }
-    else
+    else {
       throw DuplicateIDError( s );
-    //    cerr << "NA  : " << sindex << endl;
+    }
   }
 
   void Document::delDocIndex( const FoliaElement* el, const string& s ){
     if ( sindex.empty() ){
-      // only when ~~Document is in progress
+      // only when ~Document is in progress
       return;
     }
     if ( s.empty() ) {
diff --git a/src/folia_impl.cxx b/src/folia_impl.cxx
index d4a8dcd..54375a9 100644
--- a/src/folia_impl.cxx
+++ b/src/folia_impl.cxx
@@ -790,7 +790,7 @@ namespace folia {
   }
 
   void FoliaImpl::check_text_consistency( ) const {
-    if ( !mydoc || !mydoc->checktext() ){
+    if ( !mydoc || !mydoc->checktext() || ! printable() ){
       return;
     }
     // check if the text associated with all children is compatible with the
@@ -1035,11 +1035,13 @@ namespace folia {
     else if ( is_textcontainer() ){
       UnicodeString result;
       for ( const auto& d : data ){
-	if ( !result.isEmpty() ){
-	  const string& delim = d->getTextDelimiter( retaintok );
-	  result += UTF8ToUnicode(delim);
+	if ( d->printable() ){
+	  if ( !result.isEmpty() ){
+	    const string& delim = d->getTextDelimiter( retaintok );
+	    result += UTF8ToUnicode(delim);
+	  }
+	  result += d->text( cls );
 	}
-	result += d->text( cls );
       }
 #ifdef DEBUG_TEXT
       cerr << "TEXT op a textcontainer :" << xmltag() << " returned '" << result << "'" << endl;
@@ -1076,7 +1078,7 @@ namespace folia {
       result += d->text( cls, retaintok, strict );
     }
 #ifdef DEBUG_TEXT
-    cerr << "FoLiA::TEXT returnes '" << result << "'" << endl;
+    cerr << "FoLiA::TEXT returns '" << result << "'" << endl;
 #endif
     return result;
   }
@@ -1602,11 +1604,13 @@ namespace folia {
       }
     }
     if ( c->parent() &&
-	 !( c->element_id() == Word_t
+	 !( c->element_id() == WordReference_t
+	    || c->element_id() == Word_t
 	    || c->element_id() == Morpheme_t
 	    || c->element_id() == Phoneme_t ) ) {
-      throw XmlError( "attempt to reconnect node " + c->classname()
-		      + " to a " + classname() + " node, id=" + _id
+      throw XmlError( "attempt to reconnect node " + c->classname() + "("
+		      + c->id()
+		      + ") to a " + classname() + " node, id=" + _id
 		      + ", it was already connected to a "
 		      +  c->parent()->classname() + " id=" + c->parent()->id() );
     }
@@ -1948,7 +1952,7 @@ namespace folia {
       p = p->next;
     }
     if ( doc() && ( doc()->checktext() || doc()->fixtext() )
-	 && is_structure( this )
+	 && this->printable()
 	 && !isSubClass( Morpheme_t ) && !isSubClass( Phoneme_t) ){
       vector<TextContent*> tv = select<TextContent>( false );
       // first see which text classes ar present
@@ -2390,24 +2394,20 @@ namespace folia {
     KWargs kwargs = args; // need to copy
     auto it = kwargs.find( "value" );
     if ( it != kwargs.end() ) {
-      XmlText *t = new XmlText();
       string value = it->second;
+      kwargs.erase(it);
       if ( value.empty() ) {
 	// can this ever happen?
 	throw ValueError( "TextContent: 'value' attribute may not be empty." );
       }
+      XmlText *t = new XmlText();
       t->setvalue( value );
       append( t );
-      kwargs.erase(it);
     }
     it = kwargs.find( "offset" );
     if ( it != kwargs.end() ) {
       _offset = stringTo<int>(it->second);
       kwargs.erase(it);
-      // if ( doc() && doc()->checktext() ){
-      // 	cerr << "ANOTHER cache " << this << endl;
-      // 	doc()->cache_textcontent(this);
-      // }
     }
     else
       _offset = -1;
@@ -2692,7 +2692,7 @@ namespace folia {
 	  i = stringTo<int>( val );
 	}
 	catch ( exception ) {
-	  // no number, so assume so user defined id
+	  // no number, so assume some user defined id
 	  return;
 	}
 	const auto& it = id_map.find( child->xmltag() );
@@ -3547,27 +3547,39 @@ namespace folia {
   }
 
   FoliaElement* WordReference::parseXml( const xmlNode *node ) {
-    KWargs att = getAttributes( node );
-    string id = att["id"];
+    KWargs atts = getAttributes( node );
+    string id = atts["id"];
     if ( id.empty() ) {
       throw XmlError( "empty id in WordReference" );
     }
     if ( mydoc->debug ) {
       cerr << "Found word reference" << id << endl;
     }
-    FoliaElement *res = (*mydoc)[id];
-    if ( res ) {
-      // To DO: check type. Word_t, Phoneme_t or Morpheme_t??
-      res->increfcount();
+    FoliaElement *ref = (*mydoc)[id];
+    if ( ref ) {
+      if ( ref->element_id() != Word_t
+	   && ref->element_id() != Phoneme_t
+	   && ref->element_id() != Morpheme_t ) {
+	throw XmlError( "WordRefence id=" + id + " refers a non-word: "
+			+ ref->xmltag() );
+      }
+      string tval = atts["t"];
+      if ( !tval.empty() ){
+	string tc = ref->textclass();
+	string rtval = ref->str(tc);
+	if ( tval != rtval ){
+	  throw XmlError( "WordRefence id=" + id + " has another value for "
+			  + " the t attribute them it's reference. ("
+			  + tval + " versus " + rtval + ")" );
+	}
+      }
+      ref->increfcount();
     }
     else {
-      if ( mydoc->debug ) {
-	cerr << "...Unresolvable id: " << id << endl;
-      }
-      throw XmlError( "Unresolvable id " + id + "in WordReference" );
+      throw XmlError( "Unresolvable id " + id + " in WordReference" );
     }
     delete this;
-    return res;
+    return ref;
   }
 
   FoliaElement* AlignReference::parseXml( const xmlNode *node ) {
@@ -3706,31 +3718,31 @@ namespace folia {
     // If there is no set (yet), try to get the set from the child
     // but not if it is the default set.
     // for a Correction child, we look deeper.
-    if ( _set.empty() ) {
-      if ( child->isSubClass( AbstractSpanAnnotation_t ) ) {
-	string st = child->sett();
-	if ( !st.empty()
-	     && mydoc->defaultset( child->annotation_type() ) != st ) {
-	  _set = st;
-	  mydoc->incrRef( child->annotation_type(), _set );
-	}
-      }
-      else if ( child->isinstance(Correction_t) ) {
-	Original *org = child->getOriginal();
-	if ( org ) {
-	  for ( size_t i=0; i < org->size(); ++i ) {
-	    FoliaElement *el = org->index(i);
-	    if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
-	      string st = el->sett();
-	      if ( !st.empty()
-		   && mydoc->defaultset( el->annotation_type() ) != st ) {
-		_set = st;
-		mydoc->incrRef( el->annotation_type(), _set );
-		return;
-	      }
+    // BARF when the sets are incompatible.
+    string c_set;
+    if ( child->isSubClass( AbstractSpanAnnotation_t ) ) {
+      string st = child->sett();
+      if ( !st.empty()
+	   && mydoc->defaultset( child->annotation_type() ) != st ) {
+	c_set = st;
+      }
+    }
+    else if ( child->isinstance(Correction_t) ) {
+      Original *org = child->getOriginal();
+      if ( org ) {
+	for ( size_t i=0; i < org->size(); ++i ) {
+	  FoliaElement *el = org->index(i);
+	  if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
+	    string st = el->sett();
+	    if ( !st.empty()
+		 && mydoc->defaultset( el->annotation_type() ) != st ) {
+	      c_set = st;
+	      break;
 	    }
 	  }
 	}
+      }
+      if ( c_set.empty() ){
 	New *nw = child->getNew();
 	if ( nw ) {
 	  for ( size_t i=0; i < nw->size(); ++i ) {
@@ -3739,33 +3751,46 @@ namespace folia {
 	      string st = el->sett();
 	      if ( !st.empty()
 		   && mydoc->defaultset( el->annotation_type() ) != st ) {
-		_set = st;
-		mydoc->incrRef( el->annotation_type(), _set );
-		return;
+		c_set = st;
+		break;
 	      }
 	    }
 	  }
 	}
+      }
+      if ( c_set.empty() ){
 	auto v = child->suggestions();
 	for ( const auto& el : v ) {
 	  if ( el->isSubClass( AbstractSpanAnnotation_t ) ) {
 	    string st = el->sett();
 	    if ( !st.empty()
 		 && mydoc->defaultset( el->annotation_type() ) != st ) {
-	      _set = st;
-	      mydoc->incrRef( el->annotation_type(), _set );
-	      return;
+	      c_set = st;
+	      break;
 	    }
 	  }
 	}
       }
     }
+    if ( c_set.empty() ){
+      return;
+    }
+    if ( _set.empty() ) {
+      _set = c_set;
+    }
+    else if ( _set != c_set ){
+      throw DuplicateAnnotationError( "appending child: " + child->xmltag()
+				      + " with set='"
+				      +  c_set + "' to " + xmltag()
+				      + " failed while it already has set='"
+				      + _set + "'" );
+    }
+    mydoc->incrRef( child->annotation_type(), _set );
   }
 
   FoliaElement *AbstractAnnotationLayer::append( FoliaElement *child ) {
-    FoliaImpl::append( child );
     assignset( child );
-    return child;
+    return FoliaImpl::append( child );
   }
 
   KWargs AbstractAnnotationLayer::collectAttributes() const {
@@ -3787,7 +3812,7 @@ namespace folia {
 	xmlNode *t = XmlNewNode( foliaNs(), "wref" );
 	KWargs attribs;
 	attribs["id"] = el->id();
-	string txt = el->str();
+	string txt = el->str( textclass() );
 	if ( !txt.empty() ) {
 	  attribs["t"] = txt;
 	}
diff --git a/src/folia_properties.cxx b/src/folia_properties.cxx
index 04b7f21..ad9a17a 100644
--- a/src/folia_properties.cxx
+++ b/src/folia_properties.cxx
@@ -670,6 +670,7 @@ namespace folia {
     Comment::PROPS.LABEL = "Comment";
     Comment::PROPS.OPTIONAL_ATTRIBS = ID|ANNOTATOR|CONFIDENCE|DATETIME|N|METADATA;
     Comment::PROPS.XMLTAG = "comment";
+    Comment::PROPS.PRINTABLE = false;
 //------ ComplexAlignment -------
     ComplexAlignment::PROPS.ELEMENT_ID = ComplexAlignment_t;
     ComplexAlignment::PROPS.ACCEPTED_DATA += {Alignment_t, Comment_t, Description_t, Feature_t, ForeignData_t, Metric_t};

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/libfolia.git



More information about the debian-science-commits mailing list