[lttoolbox] 01/02: Imported Upstream version 3.3.3~r68466
Tino Didriksen
tinodidriksen-guest at moszumanska.debian.org
Sat Jun 4 19:02:20 UTC 2016
This is an automated email from the git hooks/post-receive script.
tinodidriksen-guest pushed a commit to branch master
in repository lttoolbox.
commit 2c6bb32b83b80fedbd3d83da52e0c16550c7455c
Author: Tino Didriksen <tino at didriksen.cc>
Date: Sat Jun 4 19:01:12 2016 +0000
Imported Upstream version 3.3.3~r68466
---
NEWS | 62 ++++++++++++++++++++++-
configure.ac | 4 +-
lttoolbox/Makefile.am | 4 +-
lttoolbox/buffer.h | 8 +++
lttoolbox/dix.rnc | 129 +++++++++++++++++++++++++++++++++++++++++++++++
lttoolbox/dix.rng | 19 ++++++-
lttoolbox/lt-print.1 | 4 +-
lttoolbox/lt-trim.1 | 4 +-
lttoolbox/lt_proc.cc | 2 +-
lttoolbox/match_state.cc | 9 +++-
lttoolbox/match_state.h | 2 +
11 files changed, 234 insertions(+), 13 deletions(-)
diff --git a/NEWS b/NEWS
index 4c25bdf..387b9a7 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,61 @@
-Mon Mar 31 08:43:07 BST 2008
- * Version 3.0.2 released
+===================
+ NEWS for lttoolbox
+===================
+
+SVN
+---
+
+
+Version 3.3.3, 2016-05-15 (-r68450)
+-----------------------------------
+
+* add .rnc files for validation (just converted from DTD's; DTD's are
+ still the source)
+
+* new match-state functions needed for apertium-transfer-tools
+
+* various distribution-related fixes
+
+Version 3.3.2, 2016-01-02 (-r64329)
+-----------------------------------
+
+* lt-proc -b no longer swallows tags on partial matches
+ https://sourceforge.net/p/apertium/tickets/82/
+
+* various distribution-related fixes, static analysis fixes; "make
+ test" should always pass now
+
+* lt-trim now supports multiple <j/> elements
+ https://sourceforge.net/p/apertium/tickets/83/
+
+
+Version 3.3.1, 2015-03-17 (-r61000)
+-----------------------------------
+
+* dix.xsd used for extra warnings on validation
+
+* minor bug fixes in lt-trim, lt-comp
+
+* new functions needed for pcre_version storing in apertium
+
+* tests run on "make test"
+
+* various distribution-related fixes, static analysis fixes,
+ documentation
+
+Version 3.3.0, 2014-08-20 (-r?)
+-----------------------------------
+
+# NEWS TODO
+
+Version 3.0.2, 2008-03-31 08:43:07 BST
+--------------------------------------
+
+# older NEWS TODO
+
+
+
+# Local Variables:
+# mode: markdown
+# End:
diff --git a/configure.ac b/configure.ac
index f017264..f48e089 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5,7 +5,7 @@ AC_PREREQ(2.52)
m4_define([required_libxml_version], [2.6.17])
-AC_INIT([lttoolbox/lttoolbox.h], [3.3.2], [sortiz at users.sourceforge.net])
+AC_INIT([lttoolbox/lttoolbox.h], [3.3.3], [sortiz at users.sourceforge.net])
AC_CONFIG_HEADER([lttoolbox/lttoolbox_config.h])
AC_CANONICAL_SYSTEM
@@ -15,7 +15,7 @@ GENERIC_LIBRARY_NAME=lttoolbox
# Release versioning
GENERIC_MAJOR_VERSION=3
GENERIC_MINOR_VERSION=3
-GENERIC_MICRO_VERSION=2
+GENERIC_MICRO_VERSION=3
# API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION)
GENERIC_API_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am
index 1c43693..f8729d9 100644
--- a/lttoolbox/Makefile.am
+++ b/lttoolbox/Makefile.am
@@ -24,7 +24,7 @@ lttoolboxdir = $(prefix)/share/lttoolbox
lttoolboxinclude = $(prefix)/include
lttoolboxlib = $(prefix)/lib
-lttoolbox_DATA = dix.dtd dix.rng acx.rng xsd/dix.xsd xsd/acx.xsd
+lttoolbox_DATA = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd
lt_print_SOURCES = lt_print.cc
lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
@@ -72,4 +72,4 @@ if WINDOWS
endif
CLEANFILES = *~
-EXTRA_DIST = dix.dtd dix.rng acx.rng xsd/dix.xsd xsd/acx.xsd CMakeLists.txt $(man_MANS)
+EXTRA_DIST = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd CMakeLists.txt $(man_MANS)
diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h
index 59ff1ed..563681b 100644
--- a/lttoolbox/buffer.h
+++ b/lttoolbox/buffer.h
@@ -100,6 +100,14 @@ public:
destroy();
}
+ /**
+ * Get size of buffer
+ */
+ unsigned int getSize() const
+ {
+ return size;
+ }
+
/**
* Assign operator.
*/
diff --git a/lttoolbox/dix.rnc b/lttoolbox/dix.rnc
new file mode 100644
index 0000000..dd0e010
--- /dev/null
+++ b/lttoolbox/dix.rnc
@@ -0,0 +1,129 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not <http://www.gnu.org/licenses/>.
+#
+# DTD for the format of dictionaries
+
+dictionary =
+ element dictionary {
+ attlist.dictionary, alphabet?, sdefs?, pardefs?, section+
+ }
+attlist.dictionary &= empty
+# root element
+alphabet = element alphabet { attlist.alphabet, text }
+attlist.alphabet &= empty
+# alphabetic character list
+sdefs = element sdefs { attlist.sdefs, sdef+ }
+attlist.sdefs &= empty
+# symbol definition section
+sdef = element sdef { attlist.sdef, empty }
+# symbol definition
+attlist.sdef &= attribute n { xsd:ID }
+# n: symbol (tag) name
+attlist.sdef &= attribute c { text }?
+# c: symbol (tag) comment
+pardefs = element pardefs { attlist.pardefs, pardef+ }
+attlist.pardefs &= empty
+# paradigm definition section
+pardef = element pardef { attlist.pardef, e+ }
+# paradigm definition
+attlist.pardef &= attribute n { text }
+# n: paradigm name
+attlist.pardef &= attribute c { text }?
+# c: comment about paradigm
+section = element section { attlist.section, e+ }
+# dictionary section
+attlist.section &=
+ attribute id { xsd:ID },
+ attribute type {
+ "standard" | "inconditional" | "postblank" | "preblank"
+ }
+# id: dictionary section identifier
+
+# type: dictionary section type
+e = element e { attlist.e, (i | p | par | re)+ }
+# entry
+attlist.e &=
+ attribute r { "LR" | "RL" }?,
+ attribute lm { text }?,
+ attribute a { text }?,
+ attribute c { text }?,
+ attribute i { text }?,
+ attribute slr { text }?,
+ attribute srl { text }?,
+ attribute alt { text }?,
+ attribute v { text }?,
+ attribute vl { text }?,
+ attribute vr { text }?
+# r: restriction LR: left-to-right,
+# RL: right-to-left
+
+# lm: lemma
+
+# a: author
+
+# c: comment
+
+# i: ignore ('yes') means ignore, otherwise it is not ignored)
+
+# slr: translation sense when translating from left to right
+
+# srl: translation sense when translating from right to left
+
+# alt: alternative entries are omitted if not selected
+
+# v: variant sets (monodix) direction restrictions based on language variant
+
+# vl: variant left sets direction restrictions based on language variant for language on left of bidix
+
+# vr: variant right sets direction restrictions based on language variant for language on right of bidix
+par = element par { attlist.par, empty }
+# reference to paradigm
+attlist.par &= attribute n { text }
+# n: paradigm name
+i = element i { attlist.i, (text | b | s | g | j | a)* }
+attlist.i &= empty
+# identity
+re = element re { attlist.re, text }
+attlist.re &= empty
+# regular expression identification
+p = element p { attlist.p, l, r }
+attlist.p &= empty
+# pair of strings
+l = element l { attlist.l, (text | a | b | g | j | s)* }
+attlist.l &= empty
+# left part of p
+r = element r { attlist.r, (text | a | b | g | j | s)* }
+attlist.r &= empty
+# right part of p
+a = element a { attlist.a, empty }
+attlist.a &= empty
+# post-generator wake-up mark
+b = element b { attlist.b, empty }
+attlist.b &= empty
+# blank chars block mark
+g = element g { attlist.g, (text | a | b | j | s)* }
+# mark special groups in lemmas
+attlist.g &= attribute i { text }?
+# i is used to co-index groups in the left with those
+
+# on the right of a pair
+j = element j { attlist.j, empty }
+attlist.j &= empty
+# join lexical forms
+s = element s { attlist.s, empty }
+# reference to symbol (tag)
+attlist.s &= attribute n { xsd:IDREF }
+start = dictionary
+# n: symbol (tag) name
diff --git a/lttoolbox/dix.rng b/lttoolbox/dix.rng
index ad3fd69..42d6ecd 100644
--- a/lttoolbox/dix.rng
+++ b/lttoolbox/dix.rng
@@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!-- DTD for the format of dictionaries -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not <http://www.gnu.org/licenses/>.
+
+ DTD for the format of dictionaries
+-->
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
<define name="dictionary">
<element name="dictionary">
diff --git a/lttoolbox/lt-print.1 b/lttoolbox/lt-print.1
index a165aee..938a7eb 100644
--- a/lttoolbox/lt-print.1
+++ b/lttoolbox/lt-print.1
@@ -29,6 +29,6 @@ The transducer in ATT format .
.I apertium-tagger\fR(1),
.I apertium\fR(1).
.SH BUGS
-Lots of...lurking in the dark and waiting for you!
+Currently requires a UTF-8 locale (and doesn't crash if it doesn't have one).
.SH AUTHOR
-(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante.
+(c) 2005--2016 Universitat d'Alacant / Universidad de Alicante.
diff --git a/lttoolbox/lt-trim.1 b/lttoolbox/lt-trim.1
index 94090db..dc84bf8 100644
--- a/lttoolbox/lt-trim.1
+++ b/lttoolbox/lt-trim.1
@@ -17,7 +17,7 @@ is the application responsible for trimming compiled dictionaries. The
analyses (right-side when compiling lr) of analyser_binary are trimmed
to the input side of bidix_binary (left-side when compiling lr,
right-side when compiling rl), such that only analyses which would
-pass through `lt-proc -b bidix_binary' are kept.
+pass through `lt-proc \-b bidix_binary' are kept.
\fBWarning: this program is experimental!\fR It has been tested, but
not deployed extensively yet.
@@ -29,7 +29,7 @@ combinations of + followed by # in monodix are handled.
Some minor caveats: If you have the capitalised lemma "Foo" in the
monodix, but "foo" in the bidix, an analysis "^Foo<tag>$" would pass
-through bidix when doing lt-proc -b, but will not make it through
+through bidix when doing lt-proc \-b, but will not make it through
trimming. Make sure your lemmas have the same capitalisation in the
different dictionaries. Also, you should not have literal `+' or `#'
in your lemmas. Since lt-comp doesn't escape these, lt-trim cannot
diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc
index e6b9101..24c5db9 100644
--- a/lttoolbox/lt_proc.cc
+++ b/lttoolbox/lt_proc.cc
@@ -327,7 +327,7 @@ int main(int argc, char *argv[])
{
cerr << e.what();
if (fstp.getNullFlush()) {
- fputws_unlocked('\0', output);
+ fputwc_unlocked(L'\0', output);
}
exit(1);
diff --git a/lttoolbox/match_state.cc b/lttoolbox/match_state.cc
index 7f0fc64..2f98c85 100644
--- a/lttoolbox/match_state.cc
+++ b/lttoolbox/match_state.cc
@@ -124,13 +124,20 @@ MatchState::step(int const input, int const alt)
int
MatchState::classifyFinals(map<MatchNode *, int> const &final_class) const
{
+ set<int> empty_set;
+ return classifyFinals(final_class, empty_set);
+}
+
+int
+MatchState::classifyFinals(map<MatchNode *, int> const &final_class, set<int> const &banned_rules) const
+{
int result = INT_MAX;
for (int i = first; i != last; i = (i+1)%BUF_LIMIT)
{
map<MatchNode*, int>::const_iterator it2 = final_class.find(state[i]);
if(it2 != final_class.end())
{
- if(it2->second < result)
+ if(it2->second < result && banned_rules.find(it2->second) == banned_rules.end())
{
result = it2->second;
}
diff --git a/lttoolbox/match_state.h b/lttoolbox/match_state.h
index 79456d7..fb88e81 100644
--- a/lttoolbox/match_state.h
+++ b/lttoolbox/match_state.h
@@ -104,6 +104,8 @@ public:
*/
void init(MatchNode *initial);
+ int classifyFinals(map<MatchNode *, int> const &final_class, set<int> const &banned_rules) const;
+
int classifyFinals(map<MatchNode *, int> const &final_class) const;
void debug();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lttoolbox.git
More information about the debian-science-commits
mailing list