[uctodata] 01/03: New upstream version 0.3
Maarten van Gompel
proycon-guest at moszumanska.debian.org
Fri Jan 6 10:37:26 UTC 2017
This is an automated email from the git hooks/post-receive script.
proycon-guest pushed a commit to branch master
in repository uctodata.
commit 545bba90f42ed696ef2243c8e6c6236f5e82e954
Author: proycon <proycon at anaproy.nl>
Date: Fri Jan 6 11:36:20 2017 +0100
New upstream version 0.3
---
ChangeLog | 42 ++
Makefile.in | 1 +
NEWS | 3 +
config/Makefile.am | 33 +-
config/Makefile.in | 55 +-
config/{nl_afk.abr => nld_afk.abr} | 0
config/{pt.abr => por.abr} | 0
config/{es.abr => spa.abr} | 0
config/{tokconfig-de => tokconfig-deu} | 1 +
config/{tokconfig-en => tokconfig-eng} | 0
config/{tokconfig-fr => tokconfig-fra} | 0
config/{tokconfig-fy => tokconfig-fry} | 1 +
config/{tokconfig-es => tokconfig-generic} | 27 +-
config/{tokconfig-it => tokconfig-ita} | 1 +
config/{tokconfig-nl => tokconfig-nld} | 2 +-
...config-nl-sonarchat => tokconfig-nld-sonarchat} | 3 +-
...{tokconfig-nl-twitter => tokconfig-nld-twitter} | 3 +-
...thplaceholder => tokconfig-nld-withplaceholder} | 1 +
config/{tokconfig-pt => tokconfig-por} | 3 +-
config/{tokconfig-ru => tokconfig-rus} | 1 +
config/{tokconfig-es => tokconfig-spa} | 3 +-
config/{tokconfig-sv => tokconfig-swe} | 2 +
config/{tokconfig-sv => tokconfig-tur} | 563 +++++++++++++++++----
configure | 32 +-
configure.ac | 3 +-
25 files changed, 644 insertions(+), 136 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f6883f1..5d11f7e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,45 @@
+2016-11-11 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-deu, config/tokconfig-fry,
+ config/tokconfig-generic, config/tokconfig-ita,
+ config/tokconfig-nld-sonarchat, config/tokconfig-nld-twitter,
+ config/tokconfig-nld-withplaceholder, config/tokconfig-por,
+ config/tokconfig-rus, config/tokconfig-spa, config/tokconfig-swe,
+ config/tokconfig-tur: added version info to eacht datafile
+
+2016-10-17 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/tokconfig-nld, config/tokconfig-nld-sonarchat,
+ config/tokconfig-nld-twitter, config/tokconfig-por,
+ config/tokconfig-spa: although old names still work, be cautious and
+ use the new names
+
+2016-10-12 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * config/Makefile.am, config/es.abr, config/nl_afk.abr,
+ config/nld_afk.abr, config/por.abr, config/pt.abr, config/spa.abr,
+ config/tokconfig-de, config/tokconfig-deu, config/tokconfig-en,
+ config/tokconfig-eng, config/tokconfig-es, config/tokconfig-fr,
+ config/tokconfig-fra, config/tokconfig-fry, config/tokconfig-fy,
+ config/tokconfig-generic, config/tokconfig-it,
+ config/tokconfig-ita, config/tokconfig-nl,
+ config/tokconfig-nl-sonarchat, config/tokconfig-nl-twitter,
+ config/tokconfig-nl-withplaceholder, config/tokconfig-nld,
+ config/tokconfig-nld-sonarchat, config/tokconfig-nld-twitter,
+ config/tokconfig-nld-withplaceholder, config/tokconfig-por,
+ config/tokconfig-pt, config/tokconfig-ru, config/tokconfig-rus,
+ config/tokconfig-spa, config/tokconfig-sv, config/tokconfig-swe,
+ config/tokconfig-tr, config/tokconfig-tur, configure.ac: use ISO
+ 693-3 language codes. But preserve old versions as symlinks
+
+2016-09-28 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * configure.ac: bump version after release
+
+2016-09-28 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * NEWS: news! upcomming release.
+
2016-09-28 Ko van der Sloot <K.vanderSloot at let.ru.nl>
* config/tokconfig-nl: fixed PUNCTUATION-MULTI-DOT and
diff --git a/Makefile.in b/Makefile.in
index 8ccbf14..f007a73 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
diff --git a/NEWS b/NEWS
index 89f7e2a..106c939 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+0.3 [Ko vd Sloot] 05-01-2017
+ * new direcory structure based on ISO 693-3 language codes.
+
0.2 [Ko vd Sloot] 28-09-2016
* New implementation of rules. Needs a recent ucto that supports recursive
application of rules
diff --git a/config/Makefile.am b/config/Makefile.am
index 0056f2c..c5c7570 100644
--- a/config/Makefile.am
+++ b/config/Makefile.am
@@ -1,11 +1,32 @@
-config_DATA = tokconfig-en tokconfig-nl tokconfig-fr tokconfig-it \
- tokconfig-es tokconfig-pt tokconfig-de tokconfig-sv \
- tokconfig-nl-twitter tokconfig-nl-sonarchat \
- tokconfig-nl-withplaceholder tokconfig-fy tokconfig-ru \
- ligatures.filter \
+config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
+ tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
+ tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
+ tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
+ tokconfig-generic ligatures.filter \
exotic-quotes.quote exotic-eos.eos \
- nl_afk.abr es.abr pt.abr
+ nld_afk.abr spa.abr por.abr
configdir = $(sysconfdir)/ucto
EXTRA_DIST = $(config_DATA)
+
+install-data-hook:
+# for backward compatability add symlinks with 'historical' names
+ cd $(configdir) && \
+ $(LN_S) -f tokconfig-eng tokconfig-en && \
+ $(LN_S) -f tokconfig-deu tokconfig-de && \
+ $(LN_S) -f tokconfig-nld tokconfig-nl && \
+ $(LN_S) -f tokconfig-fra tokconfig-fr && \
+ $(LN_S) -f tokconfig-spa tokconfig-es && \
+ $(LN_S) -f tokconfig-ita tokconfig-it && \
+ $(LN_S) -f tokconfig-por tokconfig-pt && \
+ $(LN_S) -f tokconfig-swe tokconfig-sv && \
+ $(LN_S) -f tokconfig-rus tokconfig-ru && \
+ $(LN_S) -f tokconfig-tur tokconfig-tr && \
+ $(LN_S) -f tokconfig-fry tokconfig-fy && \
+ $(LN_S) -f tokconfig-nld-twitter tokconfig-nl-twitter && \
+ $(LN_S) -f tokconfig-nld-sonarchat tokconfig-nl-sonarchat && \
+ $(LN_S) -f tokconfig-nld-withplaceholder tokconfig-nl-withplaceholder && \
+ $(LN_S) -f nld_afk.abr nl_afk.abr && \
+ $(LN_S) -f spa.abr es.abr && \
+ $(LN_S) -f por.abr pt.abr
diff --git a/config/Makefile.in b/config/Makefile.in
index fd50266..7378e13 100644
--- a/config/Makefile.in
+++ b/config/Makefile.in
@@ -165,6 +165,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MKDIR_P = @MKDIR_P@
@@ -220,13 +221,13 @@ target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-config_DATA = tokconfig-en tokconfig-nl tokconfig-fr tokconfig-it \
- tokconfig-es tokconfig-pt tokconfig-de tokconfig-sv \
- tokconfig-nl-twitter tokconfig-nl-sonarchat \
- tokconfig-nl-withplaceholder tokconfig-fy tokconfig-ru \
- ligatures.filter \
+config_DATA = tokconfig-eng tokconfig-nld tokconfig-fra tokconfig-ita \
+ tokconfig-spa tokconfig-por tokconfig-deu tokconfig-swe \
+ tokconfig-nld-twitter tokconfig-nld-sonarchat tokconfig-tur \
+ tokconfig-nld-withplaceholder tokconfig-fry tokconfig-rus \
+ tokconfig-generic ligatures.filter \
exotic-quotes.quote exotic-eos.eos \
- nl_afk.abr es.abr pt.abr
+ nld_afk.abr spa.abr por.abr
configdir = $(sysconfdir)/ucto
EXTRA_DIST = $(config_DATA)
@@ -378,7 +379,8 @@ info: info-am
info-am:
install-data-am: install-configDATA
-
+ @$(NORMAL_INSTALL)
+ $(MAKE) $(AM_MAKEFLAGS) install-data-hook
install-dvi: install-dvi-am
install-dvi-am:
@@ -423,23 +425,44 @@ ps-am:
uninstall-am: uninstall-configDATA
-.MAKE: install-am install-strip
+.MAKE: install-am install-data-am install-strip
.PHONY: all all-am check check-am clean clean-generic cscopelist-am \
ctags-am distclean distclean-generic distdir dvi dvi-am html \
html-am info info-am install install-am install-configDATA \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
- pdf-am ps ps-am tags-am uninstall uninstall-am \
- uninstall-configDATA
+ install-data install-data-am install-data-hook install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic pdf pdf-am ps ps-am tags-am uninstall \
+ uninstall-am uninstall-configDATA
.PRECIOUS: Makefile
+install-data-hook:
+# for backward compatability add symlinks with 'historical' names
+ cd $(configdir) && \
+ $(LN_S) -f tokconfig-eng tokconfig-en && \
+ $(LN_S) -f tokconfig-deu tokconfig-de && \
+ $(LN_S) -f tokconfig-nld tokconfig-nl && \
+ $(LN_S) -f tokconfig-fra tokconfig-fr && \
+ $(LN_S) -f tokconfig-spa tokconfig-es && \
+ $(LN_S) -f tokconfig-ita tokconfig-it && \
+ $(LN_S) -f tokconfig-por tokconfig-pt && \
+ $(LN_S) -f tokconfig-swe tokconfig-sv && \
+ $(LN_S) -f tokconfig-rus tokconfig-ru && \
+ $(LN_S) -f tokconfig-tur tokconfig-tr && \
+ $(LN_S) -f tokconfig-fry tokconfig-fy && \
+ $(LN_S) -f tokconfig-nld-twitter tokconfig-nl-twitter && \
+ $(LN_S) -f tokconfig-nld-sonarchat tokconfig-nl-sonarchat && \
+ $(LN_S) -f tokconfig-nld-withplaceholder tokconfig-nl-withplaceholder && \
+ $(LN_S) -f nld_afk.abr nl_afk.abr && \
+ $(LN_S) -f spa.abr es.abr && \
+ $(LN_S) -f por.abr pt.abr
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/config/nl_afk.abr b/config/nld_afk.abr
similarity index 100%
rename from config/nl_afk.abr
rename to config/nld_afk.abr
diff --git a/config/pt.abr b/config/por.abr
similarity index 100%
rename from config/pt.abr
rename to config/por.abr
diff --git a/config/es.abr b/config/spa.abr
similarity index 100%
rename from config/es.abr
rename to config/spa.abr
diff --git a/config/tokconfig-de b/config/tokconfig-deu
similarity index 99%
rename from config/tokconfig-de
rename to config/tokconfig-deu
index bd39ed6..55ee863 100644
--- a/config/tokconfig-de
+++ b/config/tokconfig-deu
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
TOKEN SUFFIX WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-en b/config/tokconfig-eng
similarity index 100%
rename from config/tokconfig-en
rename to config/tokconfig-eng
diff --git a/config/tokconfig-fr b/config/tokconfig-fra
similarity index 100%
rename from config/tokconfig-fr
rename to config/tokconfig-fra
diff --git a/config/tokconfig-fy b/config/tokconfig-fry
similarity index 99%
rename from config/tokconfig-fy
rename to config/tokconfig-fry
index 3ddad3b..4826e36 100644
--- a/config/tokconfig-fy
+++ b/config/tokconfig-fry
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-es b/config/tokconfig-generic
similarity index 90%
copy from config/tokconfig-es
copy to config/tokconfig-generic
index c46791b..cb56ab6 100644
--- a/config/tokconfig-es
+++ b/config/tokconfig-generic
@@ -1,9 +1,11 @@
+version=0.2
[RULE-ORDER]
-ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
+URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
+
[META-RULES]
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
@@ -47,7 +49,7 @@ FRACNUMBER=\p{N}+(?:/\p{N}+)+
NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
#Times
-TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:am|pm)?
+TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+
@@ -65,8 +67,6 @@ UNKNOWN=.
[SUFFIXES]
[ORDINALS]
-o|O
-a|A
[TOKENS]
@@ -88,14 +88,29 @@ kb
[CURRENCY]
+USD
+GBP
+CAD
+NZD
+AUD
+SGD
+HKD
EUR
[ABBREVIATIONS]
-%include es
+
+
+[FILTER]
+fl fl
+ff ff
+ffi ffi
+ffl ffl
+# also filter soft hyphen
+\u00AD
+
[EOSMARKERS]
%include standard-eos
[QUOTES]
%include standard-quotes
-%include exotic-quotes
diff --git a/config/tokconfig-it b/config/tokconfig-ita
similarity index 99%
rename from config/tokconfig-it
rename to config/tokconfig-ita
index a425419..04ebaec 100644
--- a/config/tokconfig-it
+++ b/config/tokconfig-ita
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-nl b/config/tokconfig-nld
similarity index 99%
rename from config/tokconfig-nl
rename to config/tokconfig-nld
index 587768a..a818fbf 100644
--- a/config/tokconfig-nl
+++ b/config/tokconfig-nld
@@ -169,7 +169,7 @@ f
[ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
[FILTER]
%include ligatures
diff --git a/config/tokconfig-nl-sonarchat b/config/tokconfig-nld-sonarchat
similarity index 98%
rename from config/tokconfig-nl-sonarchat
rename to config/tokconfig-nld-sonarchat
index 3dab7e3..5785a39 100644
--- a/config/tokconfig-nl-sonarchat
+++ b/config/tokconfig-nld-sonarchat
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
@@ -105,7 +106,7 @@ f
[ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
[FILTER]
%include ligatures
diff --git a/config/tokconfig-nl-twitter b/config/tokconfig-nld-twitter
similarity index 98%
rename from config/tokconfig-nl-twitter
rename to config/tokconfig-nld-twitter
index 18e0a34..6954776 100644
--- a/config/tokconfig-nl-twitter
+++ b/config/tokconfig-nld-twitter
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
WORD-WITHSUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW
URL-DOMAIN E-MAIL WORD-PARPREFIX-PARSUFFIX WORD-PARPREFIX WORD-PARSUFFIX
@@ -111,7 +112,7 @@ fl
f
[ABBREVIATIONS]
-%include nl_afk
+%include nld_afk
[FILTER]
%include ligatures
diff --git a/config/tokconfig-nl-withplaceholder b/config/tokconfig-nld-withplaceholder
similarity index 99%
rename from config/tokconfig-nl-withplaceholder
rename to config/tokconfig-nld-withplaceholder
index b6e10c1..62491b7 100644
--- a/config/tokconfig-nl-withplaceholder
+++ b/config/tokconfig-nld-withplaceholder
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
PLACEHOLDER WORD-WITHSUFFIX QUOTE-SUFFIX
WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL QUOTE-COMPOUND
diff --git a/config/tokconfig-pt b/config/tokconfig-por
similarity index 98%
rename from config/tokconfig-pt
rename to config/tokconfig-por
index 62a3aac..3b4514a 100644
--- a/config/tokconfig-pt
+++ b/config/tokconfig-por
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
ABBREVIATION-KNOWN SUFFIX NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
@@ -116,7 +117,7 @@ kb
EUR
[ABBREVIATIONS]
-%include pt
+%include por
[EOSMARKERS]
%include standard-eos
diff --git a/config/tokconfig-ru b/config/tokconfig-rus
similarity index 99%
rename from config/tokconfig-ru
rename to config/tokconfig-rus
index 7d5d69e..e2eb736 100644
--- a/config/tokconfig-ru
+++ b/config/tokconfig-rus
@@ -1,3 +1,4 @@
+version=0.2
# coding: utf-8
[RULE-ORDER]
URL URL-WWW URL-DOMAIN
diff --git a/config/tokconfig-es b/config/tokconfig-spa
similarity index 98%
rename from config/tokconfig-es
rename to config/tokconfig-spa
index c46791b..c60a49e 100644
--- a/config/tokconfig-es
+++ b/config/tokconfig-spa
@@ -1,3 +1,4 @@
+version=0.2
[RULE-ORDER]
ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
@@ -91,7 +92,7 @@ kb
EUR
[ABBREVIATIONS]
-%include es
+%include spa
[EOSMARKERS]
%include standard-eos
diff --git a/config/tokconfig-sv b/config/tokconfig-swe
similarity index 99%
copy from config/tokconfig-sv
copy to config/tokconfig-swe
index e97d9f3..d0eb5f9 100644
--- a/config/tokconfig-sv
+++ b/config/tokconfig-swe
@@ -1,3 +1,5 @@
+version=0.2
+
[RULE-ORDER]
SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
diff --git a/config/tokconfig-sv b/config/tokconfig-tur
similarity index 53%
rename from config/tokconfig-sv
rename to config/tokconfig-tur
index e97d9f3..23f2132 100644
--- a/config/tokconfig-sv
+++ b/config/tokconfig-tur
@@ -1,9 +1,12 @@
+version=0.2
+#by Turkish National Corpus Team
+
[RULE-ORDER]
-SUFFIX WORD-TOKEN ABBREVIATION-KNOWN NUMBER-ORDINAL URL URL-WWW URL-DOMAIN
-E-MAIL WORD-PARPREFIX WORD-PARSUFFIX WORD-COMPOUND
-ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI DATE-REVERSE DATE
+DATE NP-COMP URL URL-WWW URL-DOMAIN
+E-MAIL ABBREVIATION INITIAL SMILEY REVERSE-SMILEY PUNCTUATION-MULTI
NUMBER-YEAR TIME FRACNUMBER NUMBER CURRENCY WORD PUNCTUATION UNKNOWN
+
[META-RULES]
SPLITTER=%
NUMBER-ORDINAL = \p{N}+-?(?: %ORDINALS% )(?:\Z|\P{Lu}|\P{Ll})$
@@ -20,19 +23,11 @@ SUFFIX = ((?:\p{L})+)( %SUFFIXES% )(?:\Z|\P{L})
%include e-mail
%include smiley
-#Ex (oud)-studente(s)
-WORD-PARPREFIX-PARSUFFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Ex: (oud)-studente, (on)zin,
-WORD-PARPREFIX=(?:\p{Ps}\p{L}+[\p{Pc}\p{Pd}]?\p{Pe}[\p{Pc}\p{Pd}]?)\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*
+#Affixed proper nouns and compounds with dash/underscore.
+#Ex= <Mahir'in> <siyah-beyaz>
+NP-COMP=\p{L}+(?:['`’‘´-]\p{L}+)+
-#Ex: könig(in)
-WORD-PARSUFFIX=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)*(?:[\p{Pc}\p{Pd}]?\p{Ps}[\p{Pc}\p{Pd}]?\p{L}+\p{Pe})
-
-#Keep dash/underscore connected parts (even if they are in parenthesis)
-WORD-COMPOUND=\p{L}+(?:[\p{Pc}\p{Pd}]\p{L}+)+
-
-#Abbreviations with multiple periods
+#Abbreviations with multiple periods as in <A.B.C>
ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)(?:\Z|[,:;])
#retain initials
@@ -41,20 +36,24 @@ INITIAL=^(?:\p{Lt}|\p{Lu})\.$
#Homogeneous punctuation (ellipsis etc)
PUNCTUATION-MULTI=(?:\.|\-|[!\?]){2,}
-#Date
-DATE=\p{N}{4}-\p{N}{1,2}[\.-]\p{N}{1,2}\.?
-DATE-SHORT=\p{N}{1,2}[-]\p{Ps}?\p{N}{1,2}[-]\p{Ps}?\p{N}{2,4}
+#Abbreviations with multiple periods
+ABBREVIATION=^(\p{L}{1,3}(?:\.\p{L}{1,3})+\.?)\Z
-FRACNUMBER=\p{N}+(?:/\p{N}+)+
+#Date
+DATE=\p{N}{1,2}\.\p{N}{1,2}\.\p{N}{2,4}(?:['`’‘´-]\p{L}+)+
NUMBER-YEAR=(['`’‘´]\p{N}{2})(?:\P{N}|\z)
+FRACNUMBER=\p{N}+(?:/\p{N}+)+
+
#Times
-TIME=\p{N}{1,2}\.\p{N}{1,2}(?:\.\p{N})?(?i:am|pm)?
+TIME=\p{N}{1,2}:\p{N}{1,2}(?::\p{N})?(?i:a\.?m\.?|p\.?m\.?)?
#retain digits, including those starting with initial period (.22), and negative numbers
NUMBER=-?(?:[\.,]?\p{N}+)+
+ROMAN-NUMERALS=\b[IVXivx]{1,2}\.
+
CURRENCY=\p{Sc}
WORD=[\p{L}\p{Mn}]+
@@ -66,21 +65,14 @@ UNKNOWN=.
[PREFIXES]
[SUFFIXES]
-['`’‘´][sS]
-:[nN]
-:[aA]
-:ar
-:AR
-:arna
-:ARNA
-:[sS]
+#Ordinals and Numbering
[ORDINALS]
-:[aA]
-:[eE]
+\.
+\)
+\-
[TOKENS]
-['`’‘´][sS]
[UNITS]
km
@@ -89,8 +81,6 @@ cm
mm
g
kg
-hg
-cg
C
l
s
@@ -99,75 +89,464 @@ min
gb
mb
kb
-St
-
[CURRENCY]
-SEK
+YTL
+TL
EUR
-DM
USD
+GBP
+CAD
+NZD
+AUD
+SGD
+HKD
[ABBREVIATIONS]
-adr
-aug
-bl.a
-ca
-cg
-dec
-d.v.s
-dvs
-dr
-dyl
-el
-enl
+A
+acc
+AD
+Adj
+Adm
+Adr
+Adv
+Age
+agm
+agy
+al
+Ala
+Alb
+Alm
+Amp
+anat
+Ank
+anon
+ant
+Apr
+Apt
+Ar
+Ariz
+Ark
+arr
+Arş
+Art
+As
+Asb
+assoc
+Asst
+astr
+astrol
+Astsb
+Atğm
+Atm
+Aug
+av
+Ave
+B
+BS
+BSc
+bağ
+Bancorp
+Bart
+Baş:
+BC
+Bçvş
+Bel
+Bhd
+bitb
+biy
+Bk
+Bkz
+Bl
+Bld
+Bldg
+Bn
+Bnb
+Böl
+bot
+Brig
+Bros
+Bşk
+Bştbp
+Bul
+Bulg
+C
+Ç
+Cad
+cal
+Calif
+cap
+Capt
+Çav
+Çev
+cf
+Cie
+cm
+Cmdr
+Cmh
+Cnv
+Co
+coğ
+col
+Colo
+comb
+combform
+Comdr
+compar
+Con
+Conn
+cont
+contd
+contr
+Corp
+Cos
+Cpl
+cu
+Cum
+Çvş
+D-Mass
+D
+dal
+dam
+db
+dbl
+Dec
+Del
+Den
+dept
+Der
+Derg
+Dev
+dg
+Dipl
+dist
+div
+dk
+dl
+dm
+doc
+doğ
+doz
+Dr
+drl
+Dz
+dzl
+E
+eg
+Ecz
+Ed
+Ef
+ekon
+Em
+Ens
+Erm
+esp
+Esq
+est
etc
-&c
-ex
-febr
-f.d
-fg.å
-f.k
-f.n
-f.v
-forts
-fr.o.m
+Ex
+F
+Fak
+Far
+Feb
+fel
+fem
+ff
+fig
+fil
+fiz
+fizy
+Fla
+for
+Fr
+Fran
+Fri
+ft
+G
+Ga
+Gal
+Gen
+geom
gm
-h
-ha
-hg
-hr
-i st.f
-jfr
-kg
-kl
-km
-lr
-m.fl
-m
+Gmr
+Gn
+Gnkur
+Gön
+Gör
+Gov
+gr
+Grt
+Güv
+H
+Hav
+Haz
+hek
+hl
+hlk
+hm
+Hon
+Hosp
+Hrk
+Hrp
+Hs
+Hst
+huk
+Hv
+Hz
+hzl
+I
+İ
+Ill
+Inc
+Ind
+Insp
+inst
+Int
+is
+işl
+J
+Jan
+Jansz
+Jap
+jeol
+Jos
+Jr
+Jul
+Jun
+K
+Kan
+Kar
+Kb
+Kd
+Kg
+Khz
+kim
+Km
+Ko
+Koll
+Kom
+Koop
+koor
+Kor
+Kora
+Korg
+krş
+Kur
+Ky
+L
+La
+Lat
+Log
+Lt
+Ltd
+Lv
+M
+M\A
+M\Sc
+MA
+Mac
+mad
+Mah
+Maj
+man
+masc
+Mass
+mat
+Mb
+Mc
+Md
+mec
+met
+Mfg
+mg
+Mich
+mim
min
-min
-m.m
+Minn
+Miss
+Mlle
mm
-mån
-ngn
-ngt
-nr
-o.dyl
-o.likn
-o.s.v
-p.g.a
-s.a.s
-s.k
-skn
-st
-t.ex
-t.o.m
-tfn
-trpt
-u.a
-vard
-v.g.v
+Mme
+Mo
+Mon
+Mr
+Mrs
+Mrşl
+Ms
+MSc
+Msgr
+Mu
+Müd
+Muh
+Mük
+Mür
+Mv
+N
+Neb
+neg
+Nev
+no
+nom
+Nos
+Nov
+Nr
+Nu
+O
+Ö
+Oct
+Öğ
+Öğr
+Okla
+Okt
+öl
+ölm
+On
+Onb
+Op
+Opr
+Or
+Ora
+Ord
+Ore
+Org
+Örn
+Ort
+Osm
+Oto
+öz
+P
+Pa
+pass
+Paz
+Pb
+pers
+Pfc
+Ph
+phr
+Pl
+pla
+Pol
+Port
+poss
+pres
+Pro
+Prof
+Prop
+Pş
+psikol
+Pty
+Pvt
+Q
+R
+Rd
+ref
+refl
+Rep
+Reps
+Res
+Rev
+Rt
+Rum
+Rus
+S
+Ş
+sa
+San
+Sb
+sc
+Sen
+Sens
+Sept
+sf
+Sfc
+Sgt
+Sin
+sing
+Sl
+sm
+Sn
+snt
+Soc
+Sok
+sos
+sp
+Sr
+Şrt
+St
+Şti
+Stj
+Su
+superl
+Supt
+Surg
+Süt
+T
+tar
+Tb
+Tbp
+tek
+Tel
+Telg
+Tenn
+Tex
+Tğm
+Th
+Tic
+tiy
+tlks
+tls
+Top
+Ts
+Tues
+Tug
+Tuğa
+Tuğg
+Tüm
+Tüma
+Tümg
+U
+Ü
+Üçvş
+Uk
+Un
+Üni
+Univ
+Ünive
+ünl
+usu
+Ütğm
+Uz
+Uzm
+V
+v
+Va
+var
+Vb
+vd
+Vet
+viz
+Vl
+Vol
+vs
+Vt
+W
+X
+Y
+Yar
+Yard
+Yay
+Yb
+Yd
+YKr
+Yrd
+Yük
+Yun
+yy
+Yzb
+Z
+zf
+zm
+zool
[EOSMARKERS]
# Character: !
diff --git a/configure b/configure
index b04c6f2..6fa7531 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for uctodata 0.2.
+# Generated by GNU Autoconf 2.69 for uctodata 0.3.
#
# Report bugs to <lamasoftware at science.ru.nl>.
#
@@ -579,14 +579,15 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='uctodata'
PACKAGE_TARNAME='uctodata'
-PACKAGE_VERSION='0.2'
-PACKAGE_STRING='uctodata 0.2'
+PACKAGE_VERSION='0.3'
+PACKAGE_STRING='uctodata 0.3'
PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
PACKAGE_URL=''
ac_unique_file="configure.ac"
ac_subst_vars='LTLIBOBJS
LIBOBJS
+LN_S
AM_BACKSLASH
AM_DEFAULT_VERBOSITY
AM_DEFAULT_V
@@ -1211,7 +1212,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures uctodata 0.2 to adapt to many kinds of systems.
+\`configure' configures uctodata 0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1278,7 +1279,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of uctodata 0.2:";;
+ short | recursive ) echo "Configuration of uctodata 0.3:";;
esac
cat <<\_ACEOF
@@ -1352,7 +1353,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-uctodata configure 0.2
+uctodata configure 0.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1369,7 +1370,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by uctodata $as_me 0.2, which was
+It was created by uctodata $as_me 0.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2232,7 +2233,7 @@ fi
# Define the identity of the package.
PACKAGE='uctodata'
- VERSION='0.2'
+ VERSION='0.3'
cat >>confdefs.h <<_ACEOF
@@ -2326,6 +2327,17 @@ END
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+$as_echo_n "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+$as_echo "no, using $LN_S" >&6; }
+fi
+
ac_config_files="$ac_config_files Makefile uctodata.pc config/Makefile"
@@ -2880,7 +2892,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by uctodata $as_me 0.2, which was
+This file was extended by uctodata $as_me 0.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -2933,7 +2945,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-uctodata config.status 0.2
+uctodata config.status 0.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index a05af49..4226f97 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,9 +2,10 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT([uctodata], [0.2], [lamasoftware at science.ru.nl])
+AC_INIT([uctodata], [0.3], [lamasoftware at science.ru.nl])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_SRCDIR([configure.ac])
+AC_PROG_LN_S
AC_OUTPUT([
Makefile
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/uctodata.git
More information about the debian-science-commits
mailing list