[uctodata] 01/03: New upstream version 0.3.1

Maarten van Gompel proycon-guest at moszumanska.debian.org
Fri Jan 6 14:45:21 UTC 2017


This is an automated email from the git hooks/post-receive script.

proycon-guest pushed a commit to branch master
in repository uctodata.

commit 3ba404052cb349ec718cd7d072bad7b895fe85c2
Author: proycon <proycon at anaproy.nl>
Date:   Fri Jan 6 15:44:47 2017 +0100

    New upstream version 0.3.1
---
 ChangeLog                            |  75 ++++++++++++++++++++++
 NEWS                                 |   7 ++-
 config/Makefile.am                   |   4 +-
 config/Makefile.in                   |   4 +-
 config/tokconfig-deu                 |  14 ++---
 config/tokconfig-eng                 |   5 +-
 config/tokconfig-fra                 |  10 +--
 config/tokconfig-fry                 |  14 +++--
 config/tokconfig-generic             | 116 -----------------------------------
 config/tokconfig-ita                 |   8 +--
 config/tokconfig-nld                 |   5 +-
 config/tokconfig-nld-sonarchat       |   6 +-
 config/tokconfig-nld-twitter         |   4 +-
 config/tokconfig-nld-withplaceholder |   4 +-
 config/tokconfig-por                 |   6 +-
 config/tokconfig-rus                 |  11 ++--
 config/tokconfig-spa                 |   9 +--
 config/tokconfig-swe                 |  15 ++---
 config/tokconfig-tur                 |  13 ++--
 configure                            |  20 +++---
 configure.ac                         |   2 +-
 21 files changed, 144 insertions(+), 208 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5d11f7e..5a9db33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,78 @@
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* NEWS: NEWS for the release
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-tur: updated Turkish
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-swe: updated swedish
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-rus: updated russian
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-nld-withplaceholder: updated nld-withplaceholder
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-nld-twitter: updated nld-twitter
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-ita, config/tokconfig-nld-sonarchat: ypdates
+	italian and sonarchat
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-eng: Cleaned up English
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-spa: cleaned Spanish
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-por: cleaned up Portugese
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-deu: cleaned up German
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-fra: cleaned up French
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-fry: cleaned up Frysian
+
+2017-01-06  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-nld: cleaned up dutch rules
+
+2017-01-06  Maarten van Gompel <proycon at anaproy.nl>
+
+	* config/Makefile.am, config/tokconfig-generic: removed
+	tokconfig-generic, ucto already provides it
+
+2017-01-06  Maarten van Gompel <proycon at anaproy.nl>
+
+	* configure.ac: bumped version to 0.3.1 for DESTDIR patch
+
+2017-01-06  Maarten van Gompel <proycon at anaproy.nl>
+
+	* config/Makefile.am: Add missing DESTDIR in install-data-hook, to
+	allow for packaging
+
+2017-01-05  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* NEWS: updated NEWS for upcoming release
+
 2016-11-11  Ko van der Sloot <K.vanderSloot at let.ru.nl>
 
 	* config/tokconfig-deu, config/tokconfig-fry,
diff --git a/NEWS b/NEWS
index 106c939..b90a70e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,10 @@
+0.3.1 [Ko van der Sloot] 06-01-2017
+Bug fix release:
+ * fixed install problem in debian packaging using DESTDIR
+ * cleaned all rules from 'empty' entries (which lead to warnings)
+
 0.3 [Ko vd Sloot] 05-01-2017
-  * new direcory structure based on ISO 693-3 language codes.
+  * new direcory structure an filenames based on ISO 693-3 language codes.
 
 0.2 [Ko vd Sloot] 28-09-2016
   * New implementation of rules. Needs a recent ucto that supports recursive
diff --git a/config/Makefile.am b/config/Makefile.am
index c5c7570..ce2d3b0 100644
--- a/config/Makefile.am
+++ b/config/Makefile.am
@@ -2,7 +2,7 @@ config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
 	tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
 	tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
 	tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
-	tokconfig-generic ligatures.filter \
+	ligatures.filter \
 	exotic-quotes.quote exotic-eos.eos \
 	nld_afk.abr spa.abr por.abr
 
@@ -12,7 +12,7 @@ EXTRA_DIST = $(config_DATA)
 
 install-data-hook:
 # for backward compatability add symlinks with 'historical' names
-	cd $(configdir) && \
+	cd $(DESTDIR)$(configdir) && \
 	$(LN_S) -f tokconfig-eng tokconfig-en && \
 	$(LN_S) -f tokconfig-deu tokconfig-de && \
 	$(LN_S) -f tokconfig-nld tokconfig-nl && \
diff --git a/config/Makefile.in b/config/Makefile.in
index 7378e13..f06dea9 100644
--- a/config/Makefile.in
+++ b/config/Makefile.in
@@ -225,7 +225,7 @@ config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
 	tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
 	tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
 	tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
-	tokconfig-generic ligatures.filter \
+	ligatures.filter \
 	exotic-quotes.quote exotic-eos.eos \
 	nld_afk.abr spa.abr por.abr
 
@@ -444,7 +444,7 @@ uninstall-am: uninstall-configDATA
 
 install-data-hook:
 # for backward compatability add symlinks with 'historical' names
-	cd $(configdir) && \
+	cd $(DESTDIR)$(configdir) && \
 	$(LN_S) -f tokconfig-eng tokconfig-en && \
 	$(LN_S) -f tokconfig-deu tokconfig-de && \
 	$(LN_S) -f tokconfig-nld tokconfig-nl && \
diff --git a/config/tokconfig-deu b/config/tokconfig-deu
index 55ee863..1b9eee3 100644
--- a/config/tokconfig-deu
+++ b/config/tokconfig-deu
@@ -1,19 +1,17 @@
 version=0.2
 [RULE-ORDER]
-TOKEN SUFFIX WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+SUFFIX WORD-TOKEN ABBREVIATION-KNOWN
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
 [META-RULES]
 SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
 WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
 SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
diff --git a/config/tokconfig-eng b/config/tokconfig-eng
index acba5ed..cad264d 100644
--- a/config/tokconfig-eng
+++ b/config/tokconfig-eng
@@ -11,10 +11,7 @@ NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 #ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
 WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(?:\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(?:\p{L}+)
 SUFFIX = (?:\A|\p{L})+( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
diff --git a/config/tokconfig-fra b/config/tokconfig-fra
index ff21999..f9eb3f2 100644
--- a/config/tokconfig-fra
+++ b/config/tokconfig-fra
@@ -1,19 +1,15 @@
 version=0.2
 [RULE-ORDER]
-PREFIX SUFFIX WORD-TOKEN ABBREVIATION-KNOWN URL URL-WWW URL-DOMAIN
+PREFIX SUFFIX WORD-TOKEN URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
 ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
 NUMBER-YEAR FRACNUMBER TIME NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
 [META-RULES]
 SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
-#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
 WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
 PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )\p{L}+
 SUFFIX = (?:\A|\p{L})+( %SUFFIXES% )(?:\Z|\P{L})
 
diff --git a/config/tokconfig-fry b/config/tokconfig-fry
index 4826e36..cc7ceba 100644
--- a/config/tokconfig-fry
+++ b/config/tokconfig-fry
@@ -1,8 +1,11 @@
 version=0.2
 [RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIALS INITIAL SMILEY PUNCTUATION-MULTI DATE DATE-REVERSE
+WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN WORD-INFIX-COMPOUND NUMBER-ORDINAL
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIALS INITIAL
+SMILEY REVERSE-SMILEY
+PUNCTUATION-MULTI DATE DATE-REVERSE
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 # to do PREFIXES (is leeg nu) UNITS (uitgecommentarieerd in de c++ code)
 
@@ -11,11 +14,10 @@ SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
 WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
 WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
 WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/config/tokconfig-generic b/config/tokconfig-generic
deleted file mode 100644
index cb56ab6..0000000
--- a/config/tokconfig-generic
+++ /dev/null
@@ -1,116 +0,0 @@
-version=0.2
-[RULE-ORDER]
-URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
-NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
-
-
-[META-RULES]
-SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
-ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
-
-[RULES]
-%include url
-%include e-mail
-%include smiley
-
-#Ex: (dis)information
-WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
-
-#Ex: understand(s)
-WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Keep dash/underscore connected parts (even if they are in parenthesis)
-WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
-
-#Abbreviations with multiple periods
-ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
-
-#retain initials
-INITIAL=^(?:\p{Lt}|\p{Lu})\.$
-
-#Homogeneous punctuation (ellipsis etc)
-PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
-
-#Date
-DATE=\p{N}{1,2}[/\-]\p{N}{1,2}[/-]\p{N}{2,4}
-DATE-REVERSE=\p{N}{4}[/\-]\p{N}{1,2}[/\-]\p{N}{1,2}
-
-FRACNUMBER=\p{N}+(?:/\p{N}+)+
-
-NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
-
-#Times
-TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
-
-#retain digits, including those starting with initial period (.22), and negative numbers
-NUMBER=-?(?:[\.,]?\p{N}+)+
-
-CURRENCY=\p{Sc}
-
-WORD=[\p{L}\p{Mn}]+
-
-PUNCTUATION=\p{P}
-
-UNKNOWN=.
-
-[PREFIXES]
-
-[SUFFIXES]
-
-[ORDINALS]
-
-[TOKENS]
-
-[UNITS]
-km
-m
-cm
-mm
-g
-kg
-C
-l
-s
-sec
-min
-gb
-mb
-kb
-
-
-[CURRENCY]
-USD
-GBP
-CAD
-NZD
-AUD
-SGD
-HKD
-EUR
-
-[ABBREVIATIONS]
-
-
-[FILTER]
-fl fl
-ff ff
-ffi ffi
-ffl ffl
-# also filter soft hyphen
-\u00AD
-
-
-[EOSMARKERS]
-%include standard-eos
-
-[QUOTES]
-%include standard-quotes
diff --git a/config/tokconfig-ita b/config/tokconfig-ita
index 04ebaec..ff25b13 100644
--- a/config/tokconfig-ita
+++ b/config/tokconfig-ita
@@ -1,19 +1,17 @@
 version=0.2
 [RULE-ORDER]
-ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
+ABBREVIATION-KNOWN PREFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
 ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
-NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
+NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD-TOKEN WORD PUNCTUATION UNKNOWN
 
 [META-RULES]
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
 WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
 PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/config/tokconfig-nld b/config/tokconfig-nld
index a818fbf..d405426 100644
--- a/config/tokconfig-nld
+++ b/config/tokconfig-nld
@@ -28,14 +28,13 @@ NUMBER-ORDINAL = \p{N}+-?(?i)(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 # the ^\p{S} prevents splitting <tag> like strings
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\p{S}\.])((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
 WORD-TOKEN =(?i)(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
 WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?i)(?: %ATTACHEDSUFFIXES% ))(?:\Z)
 WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?i)(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
 
 CURRENCY=^(\p{Sc}|%CURRENCY%)(?:\p{N}|\Z)
 
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 #UNIT-COMPOUND = \p{N}+((?i: %UNITS% )(?:[./*=]{1})(?i: %UNITS% )(?:\p{P}{0,1}))$
 #UNIT = (?i)(?:\a|\P{L})( %UNITS% )(?:\z|\P{L})
diff --git a/config/tokconfig-nld-sonarchat b/config/tokconfig-nld-sonarchat
index 5785a39..9a4b7e2 100644
--- a/config/tokconfig-nld-sonarchat
+++ b/config/tokconfig-nld-sonarchat
@@ -1,8 +1,8 @@
 version=0.2
 [RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
+URL URL-WWW
 URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
-WORD-COMPOUND NICKNAME ABBREVIATION INITIALS INITIAL SMILEY REVERSE_SMILEY
+WORD-COMPOUND NICKNAME ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY
 PUNCTUATION-MULTI DATE DATE-REVERSE
 NUMBER-YEAR TIME NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 # to do PREFIXES (is leeg nu) UNITS (uitgecommentarieerd in de c++ code)
@@ -35,8 +35,6 @@ NICKNAME=chatter\p{N}+
 #retain initials
 INITIAL=^(?:\p{Lt}|\p{Lu})\.$
 
-#SMILEY=^(?:>?[:;]['`^]?[-~]*[)}\](\\/\[{Ss\$PpDd]+)$
-
 #Homogeneous punctuation (ellipsis etc)
 PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
 
diff --git a/config/tokconfig-nld-twitter b/config/tokconfig-nld-twitter
index 6954776..0d01971 100644
--- a/config/tokconfig-nld-twitter
+++ b/config/tokconfig-nld-twitter
@@ -1,7 +1,7 @@
 version=0.2
 [RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
-URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
 WORD-COMPOUND ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY HASHTAG
 ADDRESSEE PUNCTUATION-MULTI DATE-REVERSE DATE FRACTIONORDATE
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
diff --git a/config/tokconfig-nld-withplaceholder b/config/tokconfig-nld-withplaceholder
index 62491b7..534377b 100644
--- a/config/tokconfig-nld-withplaceholder
+++ b/config/tokconfig-nld-withplaceholder
@@ -1,7 +1,7 @@
 version=0.2
 [RULE-ORDER]
-PLACEHOLDER WORD-WITHSUFFIX QUOTE-SUFFIX
-WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL QUOTE-COMPOUND
+PLACEHOLDER QUOTE-SUFFIX
+QUOTE-COMPOUND
 NUMBER-STRING STRING-NUMBER URL URL-WWW URL-DOMAIN E-MAIL
 WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
 ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY
diff --git a/config/tokconfig-por b/config/tokconfig-por
index 3b4514a..1737c9b 100644
--- a/config/tokconfig-por
+++ b/config/tokconfig-por
@@ -9,11 +9,7 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
 SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
diff --git a/config/tokconfig-rus b/config/tokconfig-rus
index e2eb736..ec4476f 100644
--- a/config/tokconfig-rus
+++ b/config/tokconfig-rus
@@ -8,14 +8,11 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
 [META-RULES]
 SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/config/tokconfig-spa b/config/tokconfig-spa
index c60a49e..fc2f2c4 100644
--- a/config/tokconfig-spa
+++ b/config/tokconfig-spa
@@ -9,12 +9,9 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/config/tokconfig-swe b/config/tokconfig-swe
index d0eb5f9..0af8482 100644
--- a/config/tokconfig-swe
+++ b/config/tokconfig-swe
@@ -1,21 +1,18 @@
 version=0.2
 
 [RULE-ORDER]
-SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+ABBREVIATION-KNOWN NUMBER-ORDINAL
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE DATE-SHORT
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
 [META-RULES]
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/config/tokconfig-tur b/config/tokconfig-tur
index 23f2132..3b09ea3 100644
--- a/config/tokconfig-tur
+++ b/config/tokconfig-tur
@@ -2,8 +2,9 @@ version=0.2
 #by Turkish National Corpus Team
 
 [RULE-ORDER]
-DATE NP-COMP URL URL-WWW URL-DOMAIN
-E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI
+NUMBER-ORDINAL ROMAN-NUMERALS DATE NP-COMP URL URL-WWW URL-DOMAIN
+E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY ABBREVIATION-KNOWN
+PUNCTUATION-MULTI
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
 
@@ -11,12 +12,8 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
 ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 
 [RULES]
 %include url
diff --git a/configure b/configure
index 6fa7531..83e0b81 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for uctodata 0.3.
+# Generated by GNU Autoconf 2.69 for uctodata 0.3.1.
 #
 # Report bugs to <lamasoftware at science.ru.nl>.
 #
@@ -579,8 +579,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='uctodata'
 PACKAGE_TARNAME='uctodata'
-PACKAGE_VERSION='0.3'
-PACKAGE_STRING='uctodata 0.3'
+PACKAGE_VERSION='0.3.1'
+PACKAGE_STRING='uctodata 0.3.1'
 PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
 PACKAGE_URL=''
 
@@ -1212,7 +1212,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures uctodata 0.3 to adapt to many kinds of systems.
+\`configure' configures uctodata 0.3.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1279,7 +1279,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of uctodata 0.3:";;
+     short | recursive ) echo "Configuration of uctodata 0.3.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1353,7 +1353,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-uctodata configure 0.3
+uctodata configure 0.3.1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1370,7 +1370,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by uctodata $as_me 0.3, which was
+It was created by uctodata $as_me 0.3.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2233,7 +2233,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='uctodata'
- VERSION='0.3'
+ VERSION='0.3.1'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -2892,7 +2892,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by uctodata $as_me 0.3, which was
+This file was extended by uctodata $as_me 0.3.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -2945,7 +2945,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-uctodata config.status 0.3
+uctodata config.status 0.3.1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 4226f97..72ec1ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.59)
-AC_INIT([uctodata], [0.3], [lamasoftware at science.ru.nl])
+AC_INIT([uctodata], [0.3.1], [lamasoftware at science.ru.nl])
 AM_INIT_AUTOMAKE([foreign])
 AC_CONFIG_SRCDIR([configure.ac])
 AC_PROG_LN_S

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/uctodata.git



More information about the debian-science-commits mailing list