[uctodata] 01/03: New upstream version 0.3

Maarten van Gompel proycon-guest at moszumanska.debian.org
Fri Jan 6 10:37:26 UTC 2017


This is an automated email from the git hooks/post-receive script.

proycon-guest pushed a commit to branch master
in repository uctodata.

commit 545bba90f42ed696ef2243c8e6c6236f5e82e954
Author: proycon <proycon at anaproy.nl>
Date:   Fri Jan 6 11:36:20 2017 +0100

    New upstream version 0.3
---
 ChangeLog                                          |  42 ++
 Makefile.in                                        |   1 +
 NEWS                                               |   3 +
 config/Makefile.am                                 |  33 +-
 config/Makefile.in                                 |  55 +-
 config/{nl_afk.abr => nld_afk.abr}                 |   0
 config/{pt.abr => por.abr}                         |   0
 config/{es.abr => spa.abr}                         |   0
 config/{tokconfig-de => tokconfig-deu}             |   1 +
 config/{tokconfig-en => tokconfig-eng}             |   0
 config/{tokconfig-fr => tokconfig-fra}             |   0
 config/{tokconfig-fy => tokconfig-fry}             |   1 +
 config/{tokconfig-es => tokconfig-generic}         |  27 +-
 config/{tokconfig-it => tokconfig-ita}             |   1 +
 config/{tokconfig-nl => tokconfig-nld}             |   2 +-
 ...config-nl-sonarchat => tokconfig-nld-sonarchat} |   3 +-
 ...{tokconfig-nl-twitter => tokconfig-nld-twitter} |   3 +-
 ...thplaceholder => tokconfig-nld-withplaceholder} |   1 +
 config/{tokconfig-pt => tokconfig-por}             |   3 +-
 config/{tokconfig-ru => tokconfig-rus}             |   1 +
 config/{tokconfig-es => tokconfig-spa}             |   3 +-
 config/{tokconfig-sv => tokconfig-swe}             |   2 +
 config/{tokconfig-sv => tokconfig-tur}             | 563 +++++++++++++++++----
 configure                                          |  32 +-
 configure.ac                                       |   3 +-
 25 files changed, 644 insertions(+), 136 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f6883f1..5d11f7e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,45 @@
+2016-11-11  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-deu, config/tokconfig-fry,
+	config/tokconfig-generic, config/tokconfig-ita,
+	config/tokconfig-nld-sonarchat, config/tokconfig-nld-twitter,
+	config/tokconfig-nld-withplaceholder, config/tokconfig-por,
+	config/tokconfig-rus, config/tokconfig-spa, config/tokconfig-swe,
+	config/tokconfig-tur: added version info to eacht datafile
+
+2016-10-17  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/tokconfig-nld, config/tokconfig-nld-sonarchat,
+	config/tokconfig-nld-twitter, config/tokconfig-por,
+	config/tokconfig-spa: although old names still work, be cautious and
+	use the new names
+
+2016-10-12  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* config/Makefile.am, config/es.abr, config/nl_afk.abr,
+	config/nld_afk.abr, config/por.abr, config/pt.abr, config/spa.abr,
+	config/tokconfig-de, config/tokconfig-deu, config/tokconfig-en,
+	config/tokconfig-eng, config/tokconfig-es, config/tokconfig-fr,
+	config/tokconfig-fra, config/tokconfig-fry, config/tokconfig-fy,
+	config/tokconfig-generic, config/tokconfig-it,
+	config/tokconfig-ita, config/tokconfig-nl,
+	config/tokconfig-nl-sonarchat, config/tokconfig-nl-twitter,
+	config/tokconfig-nl-withplaceholder, config/tokconfig-nld,
+	config/tokconfig-nld-sonarchat, config/tokconfig-nld-twitter,
+	config/tokconfig-nld-withplaceholder, config/tokconfig-por,
+	config/tokconfig-pt, config/tokconfig-ru, config/tokconfig-rus,
+	config/tokconfig-spa, config/tokconfig-sv, config/tokconfig-swe,
+	config/tokconfig-tr, config/tokconfig-tur, configure.ac: use ISO
+	693-3 language codes. But preserve old versions as symlinks
+
+2016-09-28  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* configure.ac: bump version after release
+
+2016-09-28  Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+	* NEWS: news! upcomming release.
+
 2016-09-28  Ko van der Sloot <K.vanderSloot at let.ru.nl>
 
 	* config/tokconfig-nl: fixed PUNCTUATION-MULTI-DOT and
diff --git a/Makefile.in b/Makefile.in
index 8ccbf14..f007a73 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
+LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
diff --git a/NEWS b/NEWS
index 89f7e2a..106c939 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+0.3 [Ko vd Sloot] 05-01-2017
+  * new direcory structure based on ISO 693-3 language codes.
+
 0.2 [Ko vd Sloot] 28-09-2016
   * New implementation of rules. Needs a recent ucto that supports recursive
     application of rules
diff --git a/config/Makefile.am b/config/Makefile.am
index 0056f2c..c5c7570 100644
--- a/config/Makefile.am
+++ b/config/Makefile.am
@@ -1,11 +1,32 @@
-config_DATA = tokconfig-en tokconfig-nl tokconfig-fr tokconfig-it \
-	tokconfig-es tokconfig-pt tokconfig-de tokconfig-sv \
-	tokconfig-nl-twitter tokconfig-nl-sonarchat \
-	tokconfig-nl-withplaceholder tokconfig-fy tokconfig-ru \
-	ligatures.filter \
+config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
+	tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
+	tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
+	tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
+	tokconfig-generic ligatures.filter \
 	exotic-quotes.quote exotic-eos.eos \
-	nl_afk.abr es.abr pt.abr
+	nld_afk.abr spa.abr por.abr
 
 configdir = $(sysconfdir)/ucto
 
 EXTRA_DIST = $(config_DATA)
+
+install-data-hook:
+# for backward compatability add symlinks with 'historical' names
+	cd $(configdir) && \
+	$(LN_S) -f tokconfig-eng tokconfig-en && \
+	$(LN_S) -f tokconfig-deu tokconfig-de && \
+	$(LN_S) -f tokconfig-nld tokconfig-nl && \
+	$(LN_S) -f tokconfig-fra tokconfig-fr && \
+	$(LN_S) -f tokconfig-spa tokconfig-es && \
+	$(LN_S) -f tokconfig-ita tokconfig-it && \
+	$(LN_S) -f tokconfig-por tokconfig-pt && \
+	$(LN_S) -f tokconfig-swe tokconfig-sv && \
+	$(LN_S) -f tokconfig-rus tokconfig-ru && \
+	$(LN_S) -f tokconfig-tur tokconfig-tr && \
+	$(LN_S) -f tokconfig-fry tokconfig-fy && \
+	$(LN_S) -f tokconfig-nld-twitter tokconfig-nl-twitter && \
+	$(LN_S) -f tokconfig-nld-sonarchat tokconfig-nl-sonarchat && \
+	$(LN_S) -f tokconfig-nld-withplaceholder tokconfig-nl-withplaceholder && \
+	$(LN_S) -f nld_afk.abr nl_afk.abr && \
+	$(LN_S) -f spa.abr es.abr && \
+	$(LN_S) -f por.abr pt.abr
diff --git a/config/Makefile.in b/config/Makefile.in
index fd50266..7378e13 100644
--- a/config/Makefile.in
+++ b/config/Makefile.in
@@ -165,6 +165,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
+LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAKEINFO = @MAKEINFO@
 MKDIR_P = @MKDIR_P@
@@ -220,13 +221,13 @@ target_alias = @target_alias@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
-config_DATA = tokconfig-en tokconfig-nl tokconfig-fr tokconfig-it \
-	tokconfig-es tokconfig-pt tokconfig-de tokconfig-sv \
-	tokconfig-nl-twitter tokconfig-nl-sonarchat \
-	tokconfig-nl-withplaceholder tokconfig-fy tokconfig-ru \
-	ligatures.filter \
+config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
+	tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
+	tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
+	tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
+	tokconfig-generic ligatures.filter \
 	exotic-quotes.quote exotic-eos.eos \
-	nl_afk.abr es.abr pt.abr
+	nld_afk.abr spa.abr por.abr
 
 configdir = $(sysconfdir)/ucto
 EXTRA_DIST = $(config_DATA)
@@ -378,7 +379,8 @@ info: info-am
 info-am:
 
 install-data-am: install-configDATA
-
+	@$(NORMAL_INSTALL)
+	$(MAKE) $(AM_MAKEFLAGS) install-data-hook
 install-dvi: install-dvi-am
 
 install-dvi-am:
@@ -423,23 +425,44 @@ ps-am:
 
 uninstall-am: uninstall-configDATA
 
-.MAKE: install-am install-strip
+.MAKE: install-am install-data-am install-strip
 
 .PHONY: all all-am check check-am clean clean-generic cscopelist-am \
 	ctags-am distclean distclean-generic distdir dvi dvi-am html \
 	html-am info info-am install install-am install-configDATA \
-	install-data install-data-am install-dvi install-dvi-am \
-	install-exec install-exec-am install-html install-html-am \
-	install-info install-info-am install-man install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
-	maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
-	pdf-am ps ps-am tags-am uninstall uninstall-am \
-	uninstall-configDATA
+	install-data install-data-am install-data-hook install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic pdf pdf-am ps ps-am tags-am uninstall \
+	uninstall-am uninstall-configDATA
 
 .PRECIOUS: Makefile
 
 
+install-data-hook:
+# for backward compatability add symlinks with 'historical' names
+	cd $(configdir) && \
+	$(LN_S) -f tokconfig-eng tokconfig-en && \
+	$(LN_S) -f tokconfig-deu tokconfig-de && \
+	$(LN_S) -f tokconfig-nld tokconfig-nl && \
+	$(LN_S) -f tokconfig-fra tokconfig-fr && \
+	$(LN_S) -f tokconfig-spa tokconfig-es && \
+	$(LN_S) -f tokconfig-ita tokconfig-it && \
+	$(LN_S) -f tokconfig-por tokconfig-pt && \
+	$(LN_S) -f tokconfig-swe tokconfig-sv && \
+	$(LN_S) -f tokconfig-rus tokconfig-ru && \
+	$(LN_S) -f tokconfig-tur tokconfig-tr && \
+	$(LN_S) -f tokconfig-fry tokconfig-fy && \
+	$(LN_S) -f tokconfig-nld-twitter tokconfig-nl-twitter && \
+	$(LN_S) -f tokconfig-nld-sonarchat tokconfig-nl-sonarchat && \
+	$(LN_S) -f tokconfig-nld-withplaceholder tokconfig-nl-withplaceholder && \
+	$(LN_S) -f nld_afk.abr nl_afk.abr && \
+	$(LN_S) -f spa.abr es.abr && \
+	$(LN_S) -f por.abr pt.abr
+
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/config/nl_afk.abr b/config/nld_afk.abr
similarity index 100%
rename from config/nl_afk.abr
rename to config/nld_afk.abr
diff --git a/config/pt.abr b/config/por.abr
similarity index 100%
rename from config/pt.abr
rename to config/por.abr
diff --git a/config/es.abr b/config/spa.abr
similarity index 100%
rename from config/es.abr
rename to config/spa.abr
diff --git a/config/tokconfig-de b/config/tokconfig-deu
similarity index 99%
rename from config/tokconfig-de
rename to config/tokconfig-deu
index bd39ed6..55ee863 100644
--- a/config/tokconfig-de
+++ b/config/tokconfig-deu
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 TOKEN SUFFIX WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-en b/config/tokconfig-eng
similarity index 100%
rename from config/tokconfig-en
rename to config/tokconfig-eng
diff --git a/config/tokconfig-fr b/config/tokconfig-fra
similarity index 100%
rename from config/tokconfig-fr
rename to config/tokconfig-fra
diff --git a/config/tokconfig-fy b/config/tokconfig-fry
similarity index 99%
rename from config/tokconfig-fy
rename to config/tokconfig-fry
index 3ddad3b..4826e36 100644
--- a/config/tokconfig-fy
+++ b/config/tokconfig-fry
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-es b/config/tokconfig-generic
similarity index 90%
copy from config/tokconfig-es
copy to config/tokconfig-generic
index c46791b..cb56ab6 100644
--- a/config/tokconfig-es
+++ b/config/tokconfig-generic
@@ -1,9 +1,11 @@
+version=0.2
 [RULE-ORDER]
-ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
+URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
 ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
+
 [META-RULES]
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
@@ -47,7 +49,7 @@ FRACNUMBER=\p{N}+(?:/\p{N}+)+
 NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
 
 #Times
-TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:am|pm)?
+TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
 
 #retain digits, including those starting with initial period (.22), and negative numbers
 NUMBER=-?(?:[\.,]?\p{N}+)+
@@ -65,8 +67,6 @@ UNKNOWN=.
 [SUFFIXES]
 
 [ORDINALS]
-o|O
-a|A
 
 [TOKENS]
 
@@ -88,14 +88,29 @@ kb
 
 
 [CURRENCY]
+USD
+GBP
+CAD
+NZD
+AUD
+SGD
+HKD
 EUR
 
 [ABBREVIATIONS]
-%include es
+
+
+[FILTER]
+fl fl
+ff ff
+ffi ffi
+ffl ffl
+# also filter soft hyphen
+\u00AD
+
 
 [EOSMARKERS]
 %include standard-eos
 
 [QUOTES]
 %include standard-quotes
-%include exotic-quotes
diff --git a/config/tokconfig-it b/config/tokconfig-ita
similarity index 99%
rename from config/tokconfig-it
rename to config/tokconfig-ita
index a425419..04ebaec 100644
--- a/config/tokconfig-it
+++ b/config/tokconfig-ita
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-nl b/config/tokconfig-nld
similarity index 99%
rename from config/tokconfig-nl
rename to config/tokconfig-nld
index 587768a..a818fbf 100644
--- a/config/tokconfig-nl
+++ b/config/tokconfig-nld
@@ -169,7 +169,7 @@ f
 
 
 [ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
 
 [FILTER]
 %include ligatures
diff --git a/config/tokconfig-nl-sonarchat b/config/tokconfig-nld-sonarchat
similarity index 98%
rename from config/tokconfig-nl-sonarchat
rename to config/tokconfig-nld-sonarchat
index 3dab7e3..5785a39 100644
--- a/config/tokconfig-nl-sonarchat
+++ b/config/tokconfig-nld-sonarchat
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
 URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
@@ -105,7 +106,7 @@ f
 
 
 [ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
 
 [FILTER]
 %include ligatures
diff --git a/config/tokconfig-nl-twitter b/config/tokconfig-nld-twitter
similarity index 98%
rename from config/tokconfig-nl-twitter
rename to config/tokconfig-nld-twitter
index 18e0a34..6954776 100644
--- a/config/tokconfig-nl-twitter
+++ b/config/tokconfig-nld-twitter
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
 URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
@@ -111,7 +112,7 @@ fl
 f
 
 [ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
 
 [FILTER]
 %include ligatures
diff --git a/config/tokconfig-nl-withplaceholder b/config/tokconfig-nld-withplaceholder
similarity index 99%
rename from config/tokconfig-nl-withplaceholder
rename to config/tokconfig-nld-withplaceholder
index b6e10c1..62491b7 100644
--- a/config/tokconfig-nl-withplaceholder
+++ b/config/tokconfig-nld-withplaceholder
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 PLACEHOLDER WORD-WITHSUFFIX QUOTE-SUFFIX
 WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL QUOTE-COMPOUND
diff --git a/config/tokconfig-pt b/config/tokconfig-por
similarity index 98%
rename from config/tokconfig-pt
rename to config/tokconfig-por
index 62a3aac..3b4514a 100644
--- a/config/tokconfig-pt
+++ b/config/tokconfig-por
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
@@ -116,7 +117,7 @@ kb
 EUR
 
 [ABBREVIATIONS]
-%include pt
+%include por
 
 [EOSMARKERS]
 %include standard-eos
diff --git a/config/tokconfig-ru b/config/tokconfig-rus
similarity index 99%
rename from config/tokconfig-ru
rename to config/tokconfig-rus
index 7d5d69e..e2eb736 100644
--- a/config/tokconfig-ru
+++ b/config/tokconfig-rus
@@ -1,3 +1,4 @@
+version=0.2
 # coding: utf-8
 [RULE-ORDER]
 URL URL-WWW URL-DOMAIN
diff --git a/config/tokconfig-es b/config/tokconfig-spa
similarity index 98%
rename from config/tokconfig-es
rename to config/tokconfig-spa
index c46791b..c60a49e 100644
--- a/config/tokconfig-es
+++ b/config/tokconfig-spa
@@ -1,3 +1,4 @@
+version=0.2
 [RULE-ORDER]
 ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
@@ -91,7 +92,7 @@ kb
 EUR
 
 [ABBREVIATIONS]
-%include es
+%include spa
 
 [EOSMARKERS]
 %include standard-eos
diff --git a/config/tokconfig-sv b/config/tokconfig-swe
similarity index 99%
copy from config/tokconfig-sv
copy to config/tokconfig-swe
index e97d9f3..d0eb5f9 100644
--- a/config/tokconfig-sv
+++ b/config/tokconfig-swe
@@ -1,3 +1,5 @@
+version=0.2
+
 [RULE-ORDER]
 SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
 E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-sv b/config/tokconfig-tur
similarity index 53%
rename from config/tokconfig-sv
rename to config/tokconfig-tur
index e97d9f3..23f2132 100644
--- a/config/tokconfig-sv
+++ b/config/tokconfig-tur
@@ -1,9 +1,12 @@
+version=0.2
+#by Turkish National Corpus Team
+
 [RULE-ORDER]
-SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+DATE NP-COMP URL URL-WWW URL-DOMAIN
+E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI
 NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
 
+
 [META-RULES]
 SPLITTER=%
 NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
@@ -20,19 +23,11 @@ SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
 %include e-mail
 %include smiley
 
-#Ex (oud)-studente(s)
-WORD-PARPREFIX-PARSUFFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Ex: (oud)-studente, (on)zin,
-WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
+#Affixed proper nouns and compounds with dash/underscore.
+#Ex= <Mahir'in> <siyah-beyaz>
+NP-COMP=\p{L}+(?:['`’‘´-]\p{L}+)+
 
-#Ex: könig(in)
-WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Keep dash/underscore connected parts (even if they are in parenthesis)
-WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
-
-#Abbreviations with multiple periods
+#Abbreviations with multiple periods as in <A.B.C>
 ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
 
 #retain initials
@@ -41,20 +36,24 @@ INITIAL=^(?:\p{Lt}|\p{Lu})\.$
 #Homogeneous punctuation (ellipsis etc)
 PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
 
-#Date
-DATE=\p{N}{4}-\p{N}{1,2}[\.-]\p{N}{1,2}\.?
-DATE-SHORT=\p{N}{1,2}[-]\p{Ps}?\p{N}{1,2}[-]\p{Ps}?\p{N}{2,4}
+#Abbreviations with multiple periods
+ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)\Z
 
-FRACNUMBER=\p{N}+(?:/\p{N}+)+
+#Date
+DATE=\p{N}{1,2}\.\p{N}{1,2}\.\p{N}{2,4}(?:['`’‘´-]\p{L}+)+
 
 NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
 
+FRACNUMBER=\p{N}+(?:/\p{N}+)+
+
 #Times
-TIME=\p{N}{1,2}\.\p{N}{1,2}(?:\.\p{N})?(?i:am|pm)?
+TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
 
 #retain digits, including those starting with initial period (.22), and negative numbers
 NUMBER=-?(?:[\.,]?\p{N}+)+
 
+ROMAN-NUMERALS=\b[IVXivx]{1,2}\.
+
 CURRENCY=\p{Sc}
 
 WORD=[\p{L}\p{Mn}]+
@@ -66,21 +65,14 @@ UNKNOWN=.
 [PREFIXES]
 
 [SUFFIXES]
-['`’‘´][sS]
-:[nN]
-:[aA]
-:ar
-:AR
-:arna
-:ARNA
-:[sS]
 
+#Ordinals and Numbering
 [ORDINALS]
-:[aA]
-:[eE]
+\.
+\)
+\-
 
 [TOKENS]
-['`’‘´][sS]
 
 [UNITS]
 km
@@ -89,8 +81,6 @@ cm
 mm
 g
 kg
-hg
-cg
 C
 l
 s
@@ -99,75 +89,464 @@ min
 gb
 mb
 kb
-St
-
 
 [CURRENCY]
-SEK
+YTL
+TL
 EUR
-DM
 USD
+GBP
+CAD
+NZD
+AUD
+SGD
+HKD
 
 [ABBREVIATIONS]
-adr
-aug
-bl.a
-ca
-cg
-dec
-d.v.s
-dvs
-dr
-dyl
-el
-enl
+A
+acc
+AD
+Adj
+Adm
+Adr
+Adv
+Age
+agm
+agy
+al
+Ala
+Alb
+Alm
+Amp
+anat
+Ank
+anon
+ant
+Apr
+Apt
+Ar
+Ariz
+Ark
+arr
+Arş
+Art
+As
+Asb
+assoc
+Asst
+astr
+astrol
+Astsb
+Atğm
+Atm
+Aug
+av
+Ave
+B
+BS
+BSc
+bağ
+Bancorp
+Bart
+Baş:
+BC
+Bçvş
+Bel
+Bhd
+bitb
+biy
+Bk
+Bkz
+Bl
+Bld
+Bldg
+Bn
+Bnb
+Böl
+bot
+Brig
+Bros
+Bşk
+Bştbp
+Bul
+Bulg
+C
+Ç
+Cad
+cal
+Calif
+cap
+Capt
+Çav
+Çev
+cf
+Cie
+cm
+Cmdr
+Cmh
+Cnv
+Co
+coğ
+col
+Colo
+comb
+combform
+Comdr
+compar
+Con
+Conn
+cont
+contd
+contr
+Corp
+Cos
+Cpl
+cu
+Cum
+Çvş
+D-Mass
+D
+dal
+dam
+db
+dbl
+Dec
+Del
+Den
+dept
+Der
+Derg
+Dev
+dg
+Dipl
+dist
+div
+dk
+dl
+dm
+doc
+doğ
+doz
+Dr
+drl
+Dz
+dzl
+E
+eg
+Ecz
+Ed
+Ef
+ekon
+Em
+Ens
+Erm
+esp
+Esq
+est
 etc
-&c
-ex
-febr
-f.d
-fg.å
-f.k
-f.n
-f.v
-forts
-fr.o.m
+Ex
+F
+Fak
+Far
+Feb
+fel
+fem
+ff
+fig
+fil
+fiz
+fizy
+Fla
+for
+Fr
+Fran
+Fri
+ft
+G
+Ga
+Gal
+Gen
+geom
 gm
-h
-ha
-hg
-hr
-i st.f
-jfr
-kg
-kl
-km
-lr
-m.fl
-m
+Gmr
+Gn
+Gnkur
+Gön
+Gör
+Gov
+gr
+Grt
+Güv
+H
+Hav
+Haz
+hek
+hl
+hlk
+hm
+Hon
+Hosp
+Hrk
+Hrp
+Hs
+Hst
+huk
+Hv
+Hz
+hzl
+I
+İ
+Ill
+Inc
+Ind
+Insp
+inst
+Int
+is
+işl
+J
+Jan
+Jansz
+Jap
+jeol
+Jos
+Jr
+Jul
+Jun
+K
+Kan
+Kar
+Kb
+Kd
+Kg
+Khz
+kim
+Km
+Ko
+Koll
+Kom
+Koop
+koor
+Kor
+Kora
+Korg
+krş
+Kur
+Ky
+L
+La
+Lat
+Log
+Lt
+Ltd
+Lv
+M
+M\A
+M\Sc
+MA
+Mac
+mad
+Mah
+Maj
+man
+masc
+Mass
+mat
+Mb
+Mc
+Md
+mec
+met
+Mfg
+mg
+Mich
+mim
 min
-min
-m.m
+Minn
+Miss
+Mlle
 mm
-mån
-ngn
-ngt
-nr
-o.dyl
-o.likn
-o.s.v
-p.g.a
-s.a.s
-s.k
-skn
-st
-t.ex
-t.o.m
-tfn
-trpt
-u.a
-vard
-v.g.v
+Mme
+Mo
+Mon
+Mr
+Mrs
+Mrşl
+Ms
+MSc
+Msgr
+Mu
+Müd
+Muh
+Mük
+Mür
+Mv
+N
+Neb
+neg
+Nev
+no
+nom
+Nos
+Nov
+Nr
+Nu
+O
+Ö
+Oct
+Öğ
+Öğr
+Okla
+Okt
+öl
+ölm
+On
+Onb
+Op
+Opr
+Or
+Ora
+Ord
+Ore
+Org
+Örn
+Ort
+Osm
+Oto
+öz
+P
+Pa
+pass
+Paz
+Pb
+pers
+Pfc
+Ph
+phr
+Pl
+pla
+Pol
+Port
+poss
+pres
+Pro
+Prof
+Prop
+Pş
+psikol
+Pty
+Pvt
+Q
+R
+Rd
+ref
+refl
+Rep
+Reps
+Res
+Rev
+Rt
+Rum
+Rus
+S
+Ş
+sa
+San
+Sb
+sc
+Sen
+Sens
+Sept
+sf
+Sfc
+Sgt
+Sin
+sing
+Sl
+sm
+Sn
+snt
+Soc
+Sok
+sos
+sp
+Sr
+Şrt
+St
+Şti
+Stj
+Su
+superl
+Supt
+Surg
+Süt
+T
+tar
+Tb
+Tbp
+tek
+Tel
+Telg
+Tenn
+Tex
+Tğm
+Th
+Tic
+tiy
+tlks
+tls
+Top
+Ts
+Tues
+Tug
+Tuğa
+Tuğg
+Tüm
+Tüma
+Tümg
+U
+Ü
+Üçvş
+Uk
+Un
+Üni
+Univ
+Ünive
+ünl
+usu
+Ütğm
+Uz
+Uzm
+V
+v
+Va
+var
+Vb
+vd
+Vet
+viz
+Vl
+Vol
+vs
+Vt
+W
+X
+Y
+Yar
+Yard
+Yay
+Yb
+Yd
+YKr
+Yrd
+Yük
+Yun
+yy
+Yzb
+Z
+zf
+zm
+zool
 
 [EOSMARKERS]
 # Character: !
diff --git a/configure b/configure
index b04c6f2..6fa7531 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for uctodata 0.2.
+# Generated by GNU Autoconf 2.69 for uctodata 0.3.
 #
 # Report bugs to <lamasoftware at science.ru.nl>.
 #
@@ -579,14 +579,15 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='uctodata'
 PACKAGE_TARNAME='uctodata'
-PACKAGE_VERSION='0.2'
-PACKAGE_STRING='uctodata 0.2'
+PACKAGE_VERSION='0.3'
+PACKAGE_STRING='uctodata 0.3'
 PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
 PACKAGE_URL=''
 
 ac_unique_file="configure.ac"
 ac_subst_vars='LTLIBOBJS
 LIBOBJS
+LN_S
 AM_BACKSLASH
 AM_DEFAULT_VERBOSITY
 AM_DEFAULT_V
@@ -1211,7 +1212,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures uctodata 0.2 to adapt to many kinds of systems.
+\`configure' configures uctodata 0.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1278,7 +1279,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of uctodata 0.2:";;
+     short | recursive ) echo "Configuration of uctodata 0.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1352,7 +1353,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-uctodata configure 0.2
+uctodata configure 0.3
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1369,7 +1370,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by uctodata $as_me 0.2, which was
+It was created by uctodata $as_me 0.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2232,7 +2233,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='uctodata'
- VERSION='0.2'
+ VERSION='0.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -2326,6 +2327,17 @@ END
 fi
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+$as_echo_n "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+$as_echo "no, using $LN_S" >&6; }
+fi
+
 
 ac_config_files="$ac_config_files Makefile uctodata.pc config/Makefile"
 
@@ -2880,7 +2892,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by uctodata $as_me 0.2, which was
+This file was extended by uctodata $as_me 0.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -2933,7 +2945,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-uctodata config.status 0.2
+uctodata config.status 0.3
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index a05af49..4226f97 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,9 +2,10 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.59)
-AC_INIT([uctodata], [0.2], [lamasoftware at science.ru.nl])
+AC_INIT([uctodata], [0.3], [lamasoftware at science.ru.nl])
 AM_INIT_AUTOMAKE([foreign])
 AC_CONFIG_SRCDIR([configure.ac])
+AC_PROG_LN_S
 
 AC_OUTPUT([
   Makefile

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/uctodata.git



More information about the debian-science-commits mailing list