[uctodata] 01/03: New upstream version 0.3.1
Maarten van Gompel
proycon-guest at moszumanska.debian.org
Fri Jan 6 14:45:21 UTC 2017
This is an automated email from the git hooks/post-receive script.
proycon-guest pushed a commit to branch master
in repository uctodata.
commit 3ba404052cb349ec718cd7d072bad7b895fe85c2
Author: proycon <proycon at anaproy.nl>
Date: Fri Jan 6 15:44:47 2017 +0100
New upstream version 0.3.1
---
ChangeLog | 75 ++++++++++++++++++++++
NEWS | 7 ++-
config/Makefile.am | 4 +-
config/Makefile.in | 4 +-
config/tokconfig-deu | 14 ++---
config/tokconfig-eng | 5 +-
config/tokconfig-fra | 10 +--
config/tokconfig-fry | 14 +++--
config/tokconfig-generic | 116 -----------------------------------
config/tokconfig-ita | 8 +--
config/tokconfig-nld | 5 +-
config/tokconfig-nld-sonarchat | 6 +-
config/tokconfig-nld-twitter | 4 +-
config/tokconfig-nld-withplaceholder | 4 +-
config/tokconfig-por | 6 +-
config/tokconfig-rus | 11 ++--
config/tokconfig-spa | 9 +--
config/tokconfig-swe | 15 ++---
config/tokconfig-tur | 13 ++--
configure | 20 +++---
configure.ac | 2 +-
21 files changed, 144 insertions(+), 208 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 5d11f7e..5a9db33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,78 @@
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * NEWS: NEWS for the release
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-tur: updated Turkish
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-swe: updated swedish
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-rus: updated russian
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-nld-withplaceholder: updated nld-withplaceholder
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-nld-twitter: updated nld-twitter
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-ita, config/tokconfig-nld-sonarchat: ypdates
+ italian and sonarchat
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-eng: Cleaned up English
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-spa: cleaned Spanish
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-por: cleaned up Portugese
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-deu: cleaned up German
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-fra: cleaned up French
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-fry: cleaned up Frysian
+
+2017-01-06 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-nld: cleaned up dutch rules
+
+2017-01-06 Maarten van Gompel <proycon at anaproy.nl>
+
+ * config/Makefile.am, config/tokconfig-generic: removed
+ tokconfig-generic, ucto already provides it
+
+2017-01-06 Maarten van Gompel <proycon at anaproy.nl>
+
+ * configure.ac: bumped version to 0.3.1 for DESTDIR patch
+
+2017-01-06 Maarten van Gompel <proycon at anaproy.nl>
+
+ * config/Makefile.am: Add missing DESTDIR in install-data-hook, to
+ allow for packaging
+
+2017-01-05 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * NEWS: updated NEWS for upcoming release
+
2016-11-11 Ko van der Sloot <K.vanderSloot at let.ru.nl>
* config/tokconfig-deu, config/tokconfig-fry,
diff --git a/NEWS b/NEWS
index 106c939..b90a70e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,10 @@
+0.3.1 [Ko van der Sloot] 06-01-2017
+Bug fix release:
+ * fixed install problem in debian packaging using DESTDIR
+ * cleaned all rules from 'empty' entries (which lead to warnings)
+
0.3 [Ko vd Sloot] 05-01-2017
- * new direcory structure based on ISO 693-3 language codes.
+ * new direcory structure an filenames based on ISO 693-3 language codes.
0.2 [Ko vd Sloot] 28-09-2016
* New implementation of rules. Needs a recent ucto that supports recursive
diff --git a/config/Makefile.am b/config/Makefile.am
index c5c7570..ce2d3b0 100644
--- a/config/Makefile.am
+++ b/config/Makefile.am
@@ -2,7 +2,7 @@ config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
- tokconfig-generic ligatures.filter \
+ ligatures.filter \
exotic-quotes.quote exotic-eos.eos \
nld_afk.abr spa.abr por.abr
@@ -12,7 +12,7 @@ EXTRA_DIST = $(config_DATA)
install-data-hook:
# for backward compatability add symlinks with 'historical' names
- cd $(configdir) && \
+ cd $(DESTDIR)$(configdir) && \
$(LN_S) -f tokconfig-eng tokconfig-en && \
$(LN_S) -f tokconfig-deu tokconfig-de && \
$(LN_S) -f tokconfig-nld tokconfig-nl && \
diff --git a/config/Makefile.in b/config/Makefile.in
index 7378e13..f06dea9 100644
--- a/config/Makefile.in
+++ b/config/Makefile.in
@@ -225,7 +225,7 @@ config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
- tokconfig-generic ligatures.filter \
+ ligatures.filter \
exotic-quotes.quote exotic-eos.eos \
nld_afk.abr spa.abr por.abr
@@ -444,7 +444,7 @@ uninstall-am: uninstall-configDATA
install-data-hook:
# for backward compatability add symlinks with 'historical' names
- cd $(configdir) && \
+ cd $(DESTDIR)$(configdir) && \
$(LN_S) -f tokconfig-eng tokconfig-en && \
$(LN_S) -f tokconfig-deu tokconfig-de && \
$(LN_S) -f tokconfig-nld tokconfig-nl && \
diff --git a/config/tokconfig-deu b/config/tokconfig-deu
index 55ee863..1b9eee3 100644
--- a/config/tokconfig-deu
+++ b/config/tokconfig-deu
@@ -1,19 +1,17 @@
version=0.2
[RULE-ORDER]
-TOKEN SUFFIX WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+SUFFIX WORD-TOKEN ABBREVIATION-KNOWN
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
diff --git a/config/tokconfig-eng b/config/tokconfig-eng
index acba5ed..cad264d 100644
--- a/config/tokconfig-eng
+++ b/config/tokconfig-eng
@@ -11,10 +11,7 @@ NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(?:\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(?:\p{L}+)
SUFFIX = (?:\A|\p{L})+( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
diff --git a/config/tokconfig-fra b/config/tokconfig-fra
index ff21999..f9eb3f2 100644
--- a/config/tokconfig-fra
+++ b/config/tokconfig-fra
@@ -1,19 +1,15 @@
version=0.2
[RULE-ORDER]
-PREFIX SUFFIX WORD-TOKEN ABBREVIATION-KNOWN URL URL-WWW URL-DOMAIN
+PREFIX SUFFIX WORD-TOKEN URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
NUMBER-YEAR FRACNUMBER TIME NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
-#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])(?:\A)((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )\p{L}+
SUFFIX = (?:\A|\p{L})+( %SUFFIXES% )(?:\Z|\P{L})
diff --git a/config/tokconfig-fry b/config/tokconfig-fry
index 4826e36..cc7ceba 100644
--- a/config/tokconfig-fry
+++ b/config/tokconfig-fry
@@ -1,8 +1,11 @@
version=0.2
[RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIALS INITIAL SMILEY PUNCTUATION-MULTI DATE DATE-REVERSE
+WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN WORD-INFIX-COMPOUND NUMBER-ORDINAL
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIALS INITIAL
+SMILEY REVERSE-SMILEY
+PUNCTUATION-MULTI DATE DATE-REVERSE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
# to do PREFIXES (is leeg nu) UNITS (uitgecommentarieerd in de c++ code)
@@ -11,11 +14,10 @@ SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/config/tokconfig-generic b/config/tokconfig-generic
deleted file mode 100644
index cb56ab6..0000000
--- a/config/tokconfig-generic
+++ /dev/null
@@ -1,116 +0,0 @@
-version=0.2
-[RULE-ORDER]
-URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
-NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
-
-
-[META-RULES]
-SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
-ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
-
-[RULES]
-%include url
-%include e-mail
-%include smiley
-
-#Ex: (dis)information
-WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
-
-#Ex: understand(s)
-WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Keep dash/underscore connected parts (even if they are in parenthesis)
-WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
-
-#Abbreviations with multiple periods
-ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
-
-#retain initials
-INITIAL=^(?:\p{Lt}|\p{Lu})\.$
-
-#Homogeneous punctuation (ellipsis etc)
-PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
-
-#Date
-DATE=\p{N}{1,2}[/\-]\p{N}{1,2}[/-]\p{N}{2,4}
-DATE-REVERSE=\p{N}{4}[/\-]\p{N}{1,2}[/\-]\p{N}{1,2}
-
-FRACNUMBER=\p{N}+(?:/\p{N}+)+
-
-NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
-
-#Times
-TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
-
-#retain digits, including those starting with initial period (.22), and negative numbers
-NUMBER=-?(?:[\.,]?\p{N}+)+
-
-CURRENCY=\p{Sc}
-
-WORD=[\p{L}\p{Mn}]+
-
-PUNCTUATION=\p{P}
-
-UNKNOWN=.
-
-[PREFIXES]
-
-[SUFFIXES]
-
-[ORDINALS]
-
-[TOKENS]
-
-[UNITS]
-km
-m
-cm
-mm
-g
-kg
-C
-l
-s
-sec
-min
-gb
-mb
-kb
-
-
-[CURRENCY]
-USD
-GBP
-CAD
-NZD
-AUD
-SGD
-HKD
-EUR
-
-[ABBREVIATIONS]
-
-
-[FILTER]
-fl fl
-ff ff
-ffi ffi
-ffl ffl
-# also filter soft hyphen
-\u00AD
-
-
-[EOSMARKERS]
-%include standard-eos
-
-[QUOTES]
-%include standard-quotes
diff --git a/config/tokconfig-ita b/config/tokconfig-ita
index 04ebaec..ff25b13 100644
--- a/config/tokconfig-ita
+++ b/config/tokconfig-ita
@@ -1,19 +1,17 @@
version=0.2
[RULE-ORDER]
-ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
+ABBREVIATION-KNOWN PREFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
-NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
+NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD-TOKEN WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/config/tokconfig-nld b/config/tokconfig-nld
index a818fbf..d405426 100644
--- a/config/tokconfig-nld
+++ b/config/tokconfig-nld
@@ -28,14 +28,13 @@ NUMBER-ORDINAL = \p{N}+-?(?i)(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
# the ^\p{S} prevents splitting <tag> like strings
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\p{S}\.])((?:%ABBREVIATIONS%)(?:\.{0,1}))(?:\Z|\P{L})
WORD-TOKEN =(?i)(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?i)(?: %ATTACHEDSUFFIXES% ))(?:\Z)
WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?i)(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
CURRENCY=^(\p{Sc}|%CURRENCY%)(?:\p{N}|\Z)
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
#UNIT-COMPOUND = \p{N}+((?i: %UNITS% )(?:[./*=]{1})(?i: %UNITS% )(?:\p{P}{0,1}))$
#UNIT = (?i)(?:\a|\P{L})( %UNITS% )(?:\z|\P{L})
diff --git a/config/tokconfig-nld-sonarchat b/config/tokconfig-nld-sonarchat
index 5785a39..9a4b7e2 100644
--- a/config/tokconfig-nld-sonarchat
+++ b/config/tokconfig-nld-sonarchat
@@ -1,8 +1,8 @@
version=0.2
[RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
+URL URL-WWW
URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
-WORD-COMPOUND NICKNAME ABBREVIATION INITIALS INITIAL SMILEY REVERSE_SMILEY
+WORD-COMPOUND NICKNAME ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY
PUNCTUATION-MULTI DATE DATE-REVERSE
NUMBER-YEAR TIME NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
# to do PREFIXES (is leeg nu) UNITS (uitgecommentarieerd in de c++ code)
@@ -35,8 +35,6 @@ NICKNAME=chatter\p{N}+
#retain initials
INITIAL=^(?:\p{Lt}|\p{Lu})\.$
-#SMILEY=^(?:>?[:;]['`^]?[-~]*[)}\](\\/\[{Ss\$PpDd]+)$
-
#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
diff --git a/config/tokconfig-nld-twitter b/config/tokconfig-nld-twitter
index 6954776..0d01971 100644
--- a/config/tokconfig-nld-twitter
+++ b/config/tokconfig-nld-twitter
@@ -1,7 +1,7 @@
version=0.2
[RULE-ORDER]
-WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
-URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
WORD-COMPOUND ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY HASHTAG
ADDRESSEE PUNCTUATION-MULTI DATE-REVERSE DATE FRACTIONORDATE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
diff --git a/config/tokconfig-nld-withplaceholder b/config/tokconfig-nld-withplaceholder
index 62491b7..534377b 100644
--- a/config/tokconfig-nld-withplaceholder
+++ b/config/tokconfig-nld-withplaceholder
@@ -1,7 +1,7 @@
version=0.2
[RULE-ORDER]
-PLACEHOLDER WORD-WITHSUFFIX QUOTE-SUFFIX
-WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL QUOTE-COMPOUND
+PLACEHOLDER QUOTE-SUFFIX
+QUOTE-COMPOUND
NUMBER-STRING STRING-NUMBER URL URL-WWW URL-DOMAIN E-MAIL
WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIALS INITIAL SMILEY REVERSE-SMILEY
diff --git a/config/tokconfig-por b/config/tokconfig-por
index 3b4514a..1737c9b 100644
--- a/config/tokconfig-por
+++ b/config/tokconfig-por
@@ -9,11 +9,7 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
diff --git a/config/tokconfig-rus b/config/tokconfig-rus
index e2eb736..ec4476f 100644
--- a/config/tokconfig-rus
+++ b/config/tokconfig-rus
@@ -8,14 +8,11 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
-NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
+#NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/config/tokconfig-spa b/config/tokconfig-spa
index c60a49e..fc2f2c4 100644
--- a/config/tokconfig-spa
+++ b/config/tokconfig-spa
@@ -9,12 +9,9 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/config/tokconfig-swe b/config/tokconfig-swe
index d0eb5f9..0af8482 100644
--- a/config/tokconfig-swe
+++ b/config/tokconfig-swe
@@ -1,21 +1,18 @@
version=0.2
[RULE-ORDER]
-SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+ABBREVIATION-KNOWN NUMBER-ORDINAL
+URL URL-WWW URL-DOMAIN E-MAIL
+WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
+ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE DATE-SHORT
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
[META-RULES]
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/config/tokconfig-tur b/config/tokconfig-tur
index 23f2132..3b09ea3 100644
--- a/config/tokconfig-tur
+++ b/config/tokconfig-tur
@@ -2,8 +2,9 @@ version=0.2
#by Turkish National Corpus Team
[RULE-ORDER]
-DATE NP-COMP URL URL-WWW URL-DOMAIN
-E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI
+NUMBER-ORDINAL ROMAN-NUMERALS DATE NP-COMP URL URL-WWW URL-DOMAIN
+E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY ABBREVIATION-KNOWN
+PUNCTUATION-MULTI
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
@@ -11,12 +12,8 @@ NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
ABBREVIATION-KNOWN = (?:\p{P}*)?(?:\A|[^\p{L}\.])((?:%ABBREVIATIONS%)\.)(?:\Z|\P{L})
-WORD-TOKEN =(%TOKENS%)(?:\p{P}*)?$
-#WORD-WITHPREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(?: %ATTACHEDPREFIXES% )\p{L}+
-#WORD-WITHSUFFIX = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% ))(?:\Z)
-#WORD-INFIX-COMPOUND = ((?:\p{L}|\p{N}|-)+(?: %ATTACHEDSUFFIXES% )-(?:\p{L}+))$
-PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
-SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
+#PREFIX = (?:\A|[^\p{Lu}\.]|[^\p{Ll}\.])(%PREFIXES% )(\p{L}+)
+#SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
[RULES]
%include url
diff --git a/configure b/configure
index 6fa7531..83e0b81 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for uctodata 0.3.
+# Generated by GNU Autoconf 2.69 for uctodata 0.3.1.
#
# Report bugs to <lamasoftware at science.ru.nl>.
#
@@ -579,8 +579,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='uctodata'
PACKAGE_TARNAME='uctodata'
-PACKAGE_VERSION='0.3'
-PACKAGE_STRING='uctodata 0.3'
+PACKAGE_VERSION='0.3.1'
+PACKAGE_STRING='uctodata 0.3.1'
PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
PACKAGE_URL=''
@@ -1212,7 +1212,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures uctodata 0.3 to adapt to many kinds of systems.
+\`configure' configures uctodata 0.3.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1279,7 +1279,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of uctodata 0.3:";;
+ short | recursive ) echo "Configuration of uctodata 0.3.1:";;
esac
cat <<\_ACEOF
@@ -1353,7 +1353,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-uctodata configure 0.3
+uctodata configure 0.3.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1370,7 +1370,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by uctodata $as_me 0.3, which was
+It was created by uctodata $as_me 0.3.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2233,7 +2233,7 @@ fi
# Define the identity of the package.
PACKAGE='uctodata'
- VERSION='0.3'
+ VERSION='0.3.1'
cat >>confdefs.h <<_ACEOF
@@ -2892,7 +2892,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by uctodata $as_me 0.3, which was
+This file was extended by uctodata $as_me 0.3.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -2945,7 +2945,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-uctodata config.status 0.3
+uctodata config.status 0.3.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index 4226f97..72ec1ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT([uctodata], [0.3], [lamasoftware at science.ru.nl])
+AC_INIT([uctodata], [0.3.1], [lamasoftware at science.ru.nl])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_SRCDIR([configure.ac])
AC_PROG_LN_S
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/uctodata.git
More information about the debian-science-commits
mailing list