[liblingua-stem-snowball-perl] 17/19: add actual stemming algorithms source code

dom at earth.li dom at earth.li
Sun Apr 24 10:25:25 UTC 2016


This is an automated email from the git hooks/post-receive script.

dom pushed a commit to branch master
in repository liblingua-stem-snowball-perl.

commit 8f98ff48bc940bc66966bcd793c0f2dbdb683e27
Author: Dominic Hargreaves <dom at earth.li>
Date:   Sat Mar 6 13:17:53 2010 +0000

    add actual stemming algorithms source code
---
 debian/README.source                               |    8 +
 debian/changelog                                   |    7 +
 snowball_code/GNUmakefile                          |  291 ++++
 snowball_code/README                               |    5 +
 .../algorithms/danish/stem_ISO_8859_1.sbl          |   91 ++
 .../algorithms/danish/stem_MS_DOS_Latin_I.sbl      |   91 ++
 snowball_code/algorithms/dutch/stem_ISO_8859_1.sbl |  164 +++
 .../algorithms/dutch/stem_MS_DOS_Latin_I.sbl       |  164 +++
 .../algorithms/english/stem_ISO_8859_1.sbl         |  229 +++
 .../algorithms/finnish/stem_ISO_8859_1.sbl         |  196 +++
 .../algorithms/french/stem_ISO_8859_1.sbl          |  248 ++++
 .../algorithms/french/stem_MS_DOS_Latin_I.sbl      |  239 ++++
 .../algorithms/german/stem_ISO_8859_1.sbl          |  139 ++
 .../algorithms/german/stem_MS_DOS_Latin_I.sbl      |  139 ++
 .../algorithms/german2/stem_ISO_8859_1.sbl         |  145 ++
 .../algorithms/hungarian/stem_ISO_8859_1.sbl       |  241 ++++
 .../algorithms/italian/stem_ISO_8859_1.sbl         |  195 +++
 .../algorithms/italian/stem_MS_DOS_Latin_I.sbl     |  195 +++
 .../algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl |  245 ++++
 .../algorithms/lovins/stem_ISO_8859_1.sbl          |  208 +++
 .../algorithms/norwegian/stem_ISO_8859_1.sbl       |   80 ++
 .../algorithms/norwegian/stem_MS_DOS_Latin_I.sbl   |   80 ++
 .../algorithms/porter/stem_ISO_8859_1.sbl          |  139 ++
 .../algorithms/portuguese/stem_ISO_8859_1.sbl      |  218 +++
 .../algorithms/portuguese/stem_MS_DOS_Latin_I.sbl  |  218 +++
 .../algorithms/romanian/stem_ISO_8859_2.sbl        |  236 ++++
 snowball_code/algorithms/romanian/stem_Unicode.sbl |  236 ++++
 snowball_code/algorithms/russian/stem_KOI8_R.sbl   |  217 +++
 snowball_code/algorithms/russian/stem_Unicode.sbl  |  215 +++
 .../algorithms/spanish/stem_ISO_8859_1.sbl         |  230 ++++
 .../algorithms/spanish/stem_MS_DOS_Latin_I.sbl     |  230 ++++
 .../algorithms/swedish/stem_ISO_8859_1.sbl         |   72 +
 .../algorithms/swedish/stem_MS_DOS_Latin_I.sbl     |   72 +
 snowball_code/algorithms/turkish/stem_Unicode.sbl  |  477 +++++++
 snowball_code/compiler/analyser.c                  |  961 +++++++++++++
 snowball_code/compiler/driver.c                    |  256 ++++
 snowball_code/compiler/generator.c                 | 1443 +++++++++++++++++++
 snowball_code/compiler/generator_java.c            | 1452 ++++++++++++++++++++
 snowball_code/compiler/header.h                    |  315 +++++
 snowball_code/compiler/space.c                     |  257 ++++
 snowball_code/compiler/syswords.h                  |   84 ++
 snowball_code/compiler/syswords2.h                 |   14 +
 snowball_code/compiler/tokeniser.c                 |  469 +++++++
 snowball_code/doc/TODO                             |   15 +
 snowball_code/examples/stemwords.c                 |  209 +++
 snowball_code/include/libstemmer.h                 |   79 ++
 snowball_code/libstemmer/libstemmer.c              |   95 ++
 snowball_code/libstemmer/libstemmer_c.in           |   95 ++
 snowball_code/libstemmer/libstemmer_utf8.c         |   95 ++
 snowball_code/libstemmer/mkmodules.pl              |  256 ++++
 snowball_code/libstemmer/modules.h                 |  190 +++
 snowball_code/libstemmer/modules.txt               |   50 +
 snowball_code/libstemmer/modules_utf8.h            |  121 ++
 snowball_code/libstemmer/modules_utf8.txt          |   49 +
 snowball_code/runtime/api.c                        |   66 +
 snowball_code/runtime/api.h                        |   26 +
 snowball_code/runtime/header.h                     |   58 +
 snowball_code/runtime/utilities.c                  |  478 +++++++
 58 files changed, 13093 insertions(+)

diff --git a/debian/README.source b/debian/README.source
new file mode 100644
index 0000000..92e84dd
--- /dev/null
+++ b/debian/README.source
@@ -0,0 +1,8 @@
+In order satisfy DFSG#2, I have included the snowball source files and
+compiler into this package in the snowball_code directory.
+
+If you modify the algorithms, you will need to manually recompile them
+and place the results in libstemmer_c/src_c before building this Debian
+package.
+
+ -- Dominic Hargreaves <dom at earth.li>  Sat, 06 Mar 2010 13:13:19 +0000
diff --git a/debian/changelog b/debian/changelog
index d07a312..547748f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+liblingua-stem-snowball-perl (0.952-2) unstable; urgency=low
+
+  * Include the snowball to C compiler and the algorithms in the
+    source distribution (Closes: #572129)
+
+ -- Dominic Hargreaves <dom at earth.li>  Sat, 06 Mar 2010 13:13:19 +0000
+
 liblingua-stem-snowball-perl (0.952-1) unstable; urgency=low
 
   * New upstream release
diff --git a/snowball_code/GNUmakefile b/snowball_code/GNUmakefile
new file mode 100644
index 0000000..1f07dc4
--- /dev/null
+++ b/snowball_code/GNUmakefile
@@ -0,0 +1,291 @@
+# -*- makefile -*-
+
+c_src_dir = src_c
+java_src_main_dir = java/org/tartarus/snowball
+java_src_dir = $(java_src_main_dir)/ext
+
+libstemmer_algorithms = danish dutch english finnish french german hungarian \
+			italian \
+			norwegian porter portuguese romanian \
+			russian spanish swedish turkish
+
+KOI8_R_algorithms = russian
+ISO_8859_1_algorithms = danish dutch english finnish french german hungarian \
+			italian \
+			norwegian porter portuguese spanish swedish
+ISO_8859_2_algorithms = romanian
+
+other_algorithms = german2 kraaij_pohlmann lovins
+
+all_algorithms = $(libstemmer_algorithms) $(other_algorithms)
+
+COMPILER_SOURCES = compiler/space.c \
+		   compiler/tokeniser.c \
+		   compiler/analyser.c \
+		   compiler/generator.c \
+		   compiler/driver.c \
+		   compiler/generator_java.c
+COMPILER_HEADERS = compiler/header.h \
+		   compiler/syswords.h \
+		   compiler/syswords2.h
+
+RUNTIME_SOURCES  = runtime/api.c \
+		   runtime/utilities.c
+RUNTIME_HEADERS  = runtime/api.h \
+		   runtime/header.h
+
+JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
+		      java/org/tartarus/snowball/SnowballProgram.java \
+		      java/org/tartarus/snowball/SnowballStemmer.java \
+		      java/org/tartarus/snowball/TestApp.java
+
+LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
+LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
+LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
+LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in
+
+STEMWORDS_SOURCES = examples/stemwords.c
+
+ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%/stem*.sbl)
+C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
+		$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
+		$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.c) \
+		$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.c)
+C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
+		$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.h) \
+		$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.h) \
+		$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.h)
+C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
+C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
+JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)
+
+COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
+RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
+LIBSTEMMER_OBJECTS=$(LIBSTEMMER_SOURCES:.c=.o)
+LIBSTEMMER_UTF8_OBJECTS=$(LIBSTEMMER_UTF8_SOURCES:.c=.o)
+STEMWORDS_OBJECTS=$(STEMWORDS_SOURCES:.c=.o)
+C_LIB_OBJECTS = $(C_LIB_SOURCES:.c=.o)
+C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
+JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
+JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)
+
+CFLAGS=-Iinclude -O2
+CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations
+
+all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS)
+
+clean:
+	rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \
+	      $(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball \
+	      libstemmer.o stemwords \
+              libstemmer/modules.h \
+              libstemmer/modules_utf8.h \
+              snowball.splint \
+	      $(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
+	      $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
+	      $(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
+              libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
+              libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c
+	rm -rf dist
+	rmdir $(c_src_dir) || true
+
+snowball: $(COMPILER_OBJECTS)
+	$(CC) -o $@ $^
+
+$(COMPILER_OBJECTS): $(COMPILER_HEADERS)
+
+libstemmer/libstemmer.c: libstemmer/libstemmer_c.in
+	sed 's/@MODULES_H@/modules.h/' $^ >$@
+
+libstemmer/libstemmer_utf8.c: libstemmer/libstemmer_c.in
+	sed 's/@MODULES_H@/modules_utf8.h/' $^ >$@
+
+libstemmer/modules.h libstemmer/mkinc.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
+	libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc.mak
+
+libstemmer/modules_utf8.h libstemmer/mkinc_utf8.mak: libstemmer/mkmodules.pl libstemmer/modules_utf8.txt
+	libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules_utf8.txt libstemmer/mkinc_utf8.mak utf8
+
+libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS)
+
+libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
+	$(AR) -cru $@ $^
+
+stemwords: $(STEMWORDS_OBJECTS) libstemmer.o
+	$(CC) -o $@ $^
+
+algorithms/%/stem_Unicode.sbl: algorithms/%/stem_ISO_8859_1.sbl
+	cp $^ $@
+
+$(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%/stem_Unicode.sbl snowball
+	@mkdir -p $(c_src_dir)
+	@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
+	o="$(c_src_dir)/stem_UTF_8_$${l}"; \
+	echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
+	./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u
+
+$(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%/stem_KOI8_R.sbl snowball
+	@mkdir -p $(c_src_dir)
+	@l=`echo "$<" | sed 's!\(.*\)/stem_KOI8_R.sbl$$!\1!;s!^.*/!!'`; \
+	o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
+	echo "./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
+	./snowball $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime
+
+$(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%/stem_ISO_8859_1.sbl snowball
+	@mkdir -p $(c_src_dir)
+	@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_1.sbl$$!\1!;s!^.*/!!'`; \
+	o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
+	echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
+	./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime
+
+$(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%/stem_ISO_8859_2.sbl snowball
+	@mkdir -p $(c_src_dir)
+	@l=`echo "$<" | sed 's!\(.*\)/stem_ISO_8859_2.sbl$$!\1!;s!^.*/!!'`; \
+	o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
+	echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
+	./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime
+
+$(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
+	$(CC) $(CFLAGS) -O2 -c -o $@ $< -Wall
+
+$(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball
+	@mkdir -p $(java_src_dir)
+	@l=`echo "$<" | sed 's!\(.*\)/stem_Unicode.sbl$$!\1!;s!^.*/!!'`; \
+	o="$(java_src_dir)/$${l}Stemmer"; \
+	echo "./snowball $< -j -o $${o} -p \"org.tartarus.snowball.SnowballStemmer\" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer"; \
+	./snowball $< -j -o $${o} -p "org.tartarus.snowball.SnowballStemmer" -eprefix $${l}_ -r ../runtime -n $${l}Stemmer
+
+splint: snowball.splint
+snowball.splint: $(COMPILER_SOURCES)
+	splint $^ >$@ -weak
+
+# Make a full source distribution
+dist: dist_snowball dist_libstemmer_c dist_libstemmer_java
+
+# Make a distribution of all the sources involved in snowball
+dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
+	    $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
+	    $(LIBSTEMMER_SOURCES) \
+	    $(LIBSTEMMER_UTF8_SOURCES) \
+            $(LIBSTEMMER_HEADERS) \
+	    $(LIBSTEMMER_EXTRA) \
+	    $(ALL_ALGORITHM_FILES) $(STEMWORDS_SOURCES) \
+	    GNUmakefile README doc/TODO libstemmer/mkmodules.pl
+	destname=snowball_code; \
+	dest=dist/$${destname}; \
+	rm -rf $${dest} && \
+	rm -f $${dest}.tgz && \
+	for file in $^; do \
+	  dir=`dirname $$file` && \
+	  mkdir -p $${dest}/$${dir} && \
+	  cp -a $${file} $${dest}/$${dir} || exit 1 ; \
+	done && \
+	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
+	rm -rf $${dest}
+
+# Make a distribution of all the sources required to compile the C library.
+dist_libstemmer_c: \
+            $(RUNTIME_SOURCES) \
+            $(RUNTIME_HEADERS) \
+            $(LIBSTEMMER_SOURCES) \
+            $(LIBSTEMMER_UTF8_SOURCES) \
+            $(LIBSTEMMER_HEADERS) \
+            $(LIBSTEMMER_EXTRA) \
+	    $(C_LIB_SOURCES) \
+            $(C_LIB_HEADERS) \
+            libstemmer/mkinc.mak \
+            libstemmer/mkinc_utf8.mak
+	destname=libstemmer_c; \
+	dest=dist/$${destname}; \
+	rm -rf $${dest} && \
+	rm -f $${dest}.tgz && \
+	mkdir -p $${dest} && \
+	cp -a doc/libstemmer_c_README $${dest}/README && \
+	mkdir -p $${dest}/examples && \
+	cp -a examples/stemwords.c $${dest}/examples && \
+	mkdir -p $${dest}/$(c_src_dir) && \
+	cp -a $(C_LIB_SOURCES) $(C_LIB_HEADERS) $${dest}/$(c_src_dir) && \
+	mkdir -p $${dest}/runtime && \
+	cp -a $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) $${dest}/runtime && \
+	mkdir -p $${dest}/libstemmer && \
+	cp -a $(LIBSTEMMER_SOURCES) $(LIBSTEMMER_UTF8_SOURCES) $(LIBSTEMMER_HEADERS) $(LIBSTEMMER_EXTRA) $${dest}/libstemmer && \
+	mkdir -p $${dest}/include && \
+	mv $${dest}/libstemmer/libstemmer.h $${dest}/include && \
+	(cd $${dest} && \
+	 echo "README" >> MANIFEST && \
+	 ls $(c_src_dir)/*.c $(c_src_dir)/*.h >> MANIFEST && \
+	 ls runtime/*.c runtime/*.h >> MANIFEST && \
+	 ls libstemmer/*.c libstemmer/*.h >> MANIFEST && \
+	 ls include/*.h >> MANIFEST) && \
+        cp -a libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak $${dest}/ && \
+	echo 'include mkinc.mak' >> $${dest}/Makefile && \
+	echo 'CFLAGS=-Iinclude' >> $${dest}/Makefile && \
+	echo 'all: libstemmer.o stemwords' >> $${dest}/Makefile && \
+	echo 'libstemmer.o: $$(snowball_sources:.c=.o)' >> $${dest}/Makefile && \
+	echo '	$$(AR) -cru $$@ $$^' >> $${dest}/Makefile && \
+	echo 'stemwords: examples/stemwords.o libstemmer.o' >> $${dest}/Makefile && \
+	echo '	$$(CC) -o $$@ $$^' >> $${dest}/Makefile && \
+	echo 'clean:' >> $${dest}/Makefile && \
+	echo '	rm -f stemwords *.o $(c_src_dir)/*.o runtime/*.o libstemmer/*.o' >> $${dest}/Makefile && \
+	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
+	rm -rf $${dest}
+
+# Make a distribution of all the sources required to compile the Java library.
+dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
+            $(LIBSTEMMER_EXTRA) \
+	    $(JAVA_SOURCES)
+	destname=libstemmer_java; \
+	dest=dist/$${destname}; \
+	rm -rf $${dest} && \
+	rm -f $${dest}.tgz && \
+	mkdir -p $${dest} && \
+	cp -a doc/libstemmer_java_README $${dest}/README && \
+	mkdir -p $${dest}/$(java_src_dir) && \
+	cp -a $(JAVA_SOURCES) $${dest}/$(java_src_dir) && \
+	mkdir -p $${dest}/$(java_src_main_dir) && \
+	cp -a $(JAVARUNTIME_SOURCES) $${dest}/$(java_src_main_dir) && \
+	(cd $${dest} && \
+	 echo "README" >> MANIFEST && \
+	 ls $(java_src_dir)/*.java >> MANIFEST && \
+	 ls $(java_src_main_dir)/*.java >> MANIFEST) && \
+	(cd dist && tar zcf $${destname}.tgz $${destname}) && \
+	rm -rf $${dest}
+
+check: check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
+
+check_utf8: $(libstemmer_algorithms:%=check_utf8_%)
+
+check_iso_8859_1: $(ISO_8859_1_algorithms:%=check_iso_8859_1_%)
+
+check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)
+
+check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)
+
+check_utf8_%: ../data/% stemwords
+	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
+	@./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt
+	@diff -u $</output.txt tmp.txt
+	@if [ -e $</diffs.txt ] ; \
+	then \
+	  ./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt -p2 && \
+	  diff -u $</diffs.txt tmp.txt; \
+	fi
+	@rm tmp.txt
+
+check_iso_8859_1_%: ../data/% stemwords
+	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
+	@iconv -fUTF8 -tISO8859-1 $</voc.txt|./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
+	@iconv -fUTF8 -tISO8859-1 $</output.txt|diff -u - tmp.txt
+	@rm tmp.txt
+
+check_iso_8859_2_%: ../data/% stemwords
+	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
+	@iconv -fUTF8 -tISO8859-2 $</voc.txt|./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
+	@iconv -fUTF8 -tISO8859-2 $</output.txt|diff -u - tmp.txt
+	@rm tmp.txt
+
+check_koi8r_%: ../data/% stemwords
+	@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
+	@iconv -fUTF8 -tKOI8R $</voc.txt|./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
+	@iconv -fUTF8 -tKOI8R $</output.txt|diff -u - tmp.txt
+	@rm tmp.txt
diff --git a/snowball_code/README b/snowball_code/README
new file mode 100644
index 0000000..afb51b3
--- /dev/null
+++ b/snowball_code/README
@@ -0,0 +1,5 @@
+This contains the source code for the snowball compiler and the stemming
+algorithms on the website.
+
+See http://snowball.tartarus.org/ for more details.
+
diff --git a/snowball_code/algorithms/danish/stem_ISO_8859_1.sbl b/snowball_code/algorithms/danish/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..0a8190a
--- /dev/null
+++ b/snowball_code/algorithms/danish/stem_ISO_8859_1.sbl
@@ -0,0 +1,91 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+           undouble
+)
+
+externals ( stem )
+
+strings ( ch )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef ae   hex 'E6'
+stringdef ao   hex 'E5'
+stringdef o/   hex 'F8'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'abcdfghjklmnoprtvyz{ao}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
+            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
+            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
+            'erets' 'et' 'eret'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'gd' // significant in the call from other_suffix
+                'dt' 'gt' 'kt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        do ( ['st'] 'ig' delete )
+        setlimit tomark p1 for ([substring])
+        among(
+            'ig' 'lig' 'elig' 'els'
+                (delete do consonant_pair)
+            'l{o/}st'
+                (<-'l{o/}s')
+        )
+    )
+    define undouble as (
+        setlimit tomark p1 for ([non-v] ->ch)
+        ch
+        delete
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+        do undouble
+    )
+)
diff --git a/snowball_code/algorithms/danish/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/danish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..1131a1c
--- /dev/null
+++ b/snowball_code/algorithms/danish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,91 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+           undouble
+)
+
+externals ( stem )
+
+strings ( ch )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef ae   hex '91'
+stringdef ao   hex '86'
+stringdef o/   hex '9B'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'abcdfghjklmnoprtvyz{ao}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'hed' 'ethed' 'ered' 'e' 'erede' 'ende' 'erende' 'ene' 'erne' 'ere'
+            'en' 'heden' 'eren' 'er' 'heder' 'erer' 'heds' 'es' 'endes'
+            'erendes' 'enes' 'ernes' 'eres' 'ens' 'hedens' 'erens' 'ers' 'ets'
+            'erets' 'et' 'eret'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'gd' // significant in the call from other_suffix
+                'dt' 'gt' 'kt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        do ( ['st'] 'ig' delete )
+        setlimit tomark p1 for ([substring])
+        among(
+            'ig' 'lig' 'elig' 'els'
+                (delete do consonant_pair)
+            'l{o/}st'
+                (<-'l{o/}s')
+        )
+    )
+    define undouble as (
+        setlimit tomark p1 for ([non-v] ->ch)
+        ch
+        delete
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+        do undouble
+    )
+)
diff --git a/snowball_code/algorithms/dutch/stem_ISO_8859_1.sbl b/snowball_code/algorithms/dutch/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..f7609f7
--- /dev/null
+++ b/snowball_code/algorithms/dutch/stem_ISO_8859_1.sbl
@@ -0,0 +1,164 @@
+routines (
+           prelude postlude
+           e_ending
+           en_ending
+           mark_regions
+           R1 R2
+           undouble
+           standard_suffix
+)
+
+externals ( stem )
+
+booleans ( e_found )
+
+integers ( p1 p2 )
+
+groupings ( v v_I v_j )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef e"   hex 'EB'
+stringdef i"   hex 'EF'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+
+stringdef a'   hex 'E1'
+stringdef e'   hex 'E9'
+stringdef i'   hex 'ED'
+stringdef o'   hex 'F3'
+stringdef u'   hex 'FA'
+
+stringdef e`   hex 'E8'
+
+define v       'aeiouy{e`}'
+define v_I     v + 'I'
+define v_j     v + 'j'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a"}' '{a'}'
+                (<- 'a')
+            '{e"}' '{e'}'
+                (<- 'e')
+            '{i"}' '{i'}'
+                (<- 'i')
+            '{o"}' '{o'}'
+                (<- 'o')
+            '{u"}' '{u'}'
+                (<- 'u')
+            ''  (next)
+        ) //or next
+    )
+    try(['y'] <- 'Y')
+    repeat goto (
+        v [('i'] v <- 'I') or
+           ('y']   <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < 3  $p1 = 3)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'  (<- 'y')
+        'I'  (<- 'i')
+        ''   (next)
+    ) //or next
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define undouble as (
+        test among('kk' 'dd' 'tt') [next] delete
+    )
+
+    define e_ending as (
+        unset e_found
+        ['e'] R1 test non-v delete
+        set e_found
+        undouble
+    )
+
+    define en_ending as (
+        R1 non-v and not 'gem' delete
+        undouble
+    )
+
+    define standard_suffix as (
+        do (
+            [substring] among(
+                'heden'
+                (   R1 <- 'heid'
+                )
+                'en' 'ene'
+                (   en_ending
+                )
+                's' 'se'
+                (   R1 non-v_j delete
+                )
+            )
+        )
+        do e_ending
+
+        do ( ['heid'] R2 not 'c' delete
+             ['en'] en_ending
+           )
+
+        do (
+            [substring] among(
+                'end' 'ing'
+                (   R2 delete
+                    (['ig'] R2 not 'e' delete) or undouble
+                )
+                'ig'
+                (   R2 not 'e' delete
+                )
+                'lijk'
+                (   R2 delete e_ending
+                )
+                'baar'
+                (   R2 delete
+                )
+                'bar'
+                (   R2 e_found delete
+                )
+            )
+        )
+        do (
+            non-v_I
+            test (
+                among ('aa' 'ee' 'oo' 'uu')
+                non-v
+            )
+            [next] delete
+        )
+    )
+)
+
+define stem as (
+
+        do prelude
+        do mark_regions
+        backwards
+            do standard_suffix
+        do postlude
+)
diff --git a/snowball_code/algorithms/dutch/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..15b8718
--- /dev/null
+++ b/snowball_code/algorithms/dutch/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,164 @@
+routines (
+           prelude postlude
+           e_ending
+           en_ending
+           mark_regions
+           R1 R2
+           undouble
+           standard_suffix
+)
+
+externals ( stem )
+
+booleans ( e_found )
+
+integers ( p1 p2 )
+
+groupings ( v v_I v_j )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef e"   hex '89'
+stringdef i"   hex '8B'
+stringdef o"   hex '94'
+stringdef u"   hex '81'
+
+stringdef a'   hex 'A0'
+stringdef e'   hex '82'
+stringdef i'   hex 'A1'
+stringdef o'   hex 'A2'
+stringdef u'   hex 'A3'
+
+stringdef e`   hex '8A'
+
+define v       'aeiouy{e`}'
+define v_I     v + 'I'
+define v_j     v + 'j'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a"}' '{a'}'
+                (<- 'a')
+            '{e"}' '{e'}'
+                (<- 'e')
+            '{i"}' '{i'}'
+                (<- 'i')
+            '{o"}' '{o'}'
+                (<- 'o')
+            '{u"}' '{u'}'
+                (<- 'u')
+            ''  (next)
+        ) //or next
+    )
+    try(['y'] <- 'Y')
+    repeat goto (
+        v [('i'] v <- 'I') or
+           ('y']   <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < 3  $p1 = 3)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'  (<- 'y')
+        'I'  (<- 'i')
+        ''   (next)
+    ) //or next
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define undouble as (
+        test among('kk' 'dd' 'tt') [next] delete
+    )
+
+    define e_ending as (
+        unset e_found
+        ['e'] R1 test non-v delete
+        set e_found
+        undouble
+    )
+
+    define en_ending as (
+        R1 non-v and not 'gem' delete
+        undouble
+    )
+
+    define standard_suffix as (
+        do (
+            [substring] among(
+                'heden'
+                (   R1 <- 'heid'
+                )
+                'en' 'ene'
+                (   en_ending
+                )
+                's' 'se'
+                (   R1 non-v_j delete
+                )
+            )
+        )
+        do e_ending
+
+        do ( ['heid'] R2 not 'c' delete
+             ['en'] en_ending
+           )
+
+        do (
+            [substring] among(
+                'end' 'ing'
+                (   R2 delete
+                    (['ig'] R2 not 'e' delete) or undouble
+                )
+                'ig'
+                (   R2 not 'e' delete
+                )
+                'lijk'
+                (   R2 delete e_ending
+                )
+                'baar'
+                (   R2 delete
+                )
+                'bar'
+                (   R2 e_found delete
+                )
+            )
+        )
+        do (
+            non-v_I
+            test (
+                among ('aa' 'ee' 'oo' 'uu')
+                non-v
+            )
+            [next] delete
+        )
+    )
+)
+
+define stem as (
+
+        do prelude
+        do mark_regions
+        backwards
+            do standard_suffix
+        do postlude
+)
diff --git a/snowball_code/algorithms/english/stem_ISO_8859_1.sbl b/snowball_code/algorithms/english/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..fe18d7a
--- /dev/null
+++ b/snowball_code/algorithms/english/stem_ISO_8859_1.sbl
@@ -0,0 +1,229 @@
+integers ( p1 p2 )
+booleans ( Y_found )
+
+routines (
+    prelude postlude
+    mark_regions
+    shortv
+    R1 R2
+    Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5
+    exception1
+    exception2
+)
+
+externals ( stem )
+
+groupings ( v v_WXY valid_LI )
+
+stringescapes {}
+
+define v        'aeiouy'
+define v_WXY    v + 'wxY'
+
+define valid_LI 'cdeghkmnrt'
+
+define prelude as (
+    unset Y_found
+    do ( ['{'}'] delete)
+    do ( ['y'] <-'Y' set Y_found)
+    do repeat(goto (v ['y']) <-'Y' set Y_found)
+)
+
+define mark_regions as (
+    $p1 = limit
+    $p2 = limit
+    do(
+        among (
+            'gener'
+            'commun'  //  added May 2005
+            'arsen'   //  added Nov 2006 (arsenic/arsenal)
+            // ... extensions possible here ...
+        ) or (gopast v  gopast non-v)
+        setmark p1
+        gopast v  gopast non-v  setmark p2
+    )
+)
+
+backwardmode (
+
+    define shortv as (
+        ( non-v_WXY v non-v )
+        or
+        ( non-v v atlimit )
+    )
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define Step_1a as (
+        try (
+            [substring] among (
+                '{'}' '{'}s' '{'}s{'}'
+                       (delete)
+            )
+        )
+        [substring] among (
+            'sses' (<-'ss')
+            'ied' 'ies'
+                   ((hop 2 <-'i') or <-'ie')
+            's'    (next gopast v delete)
+            'us' 'ss'
+        )
+    )
+
+    define Step_1b as (
+        [substring] among (
+            'eed' 'eedly'
+                (R1 <-'ee')
+            'ed' 'edly' 'ing' 'ingly'
+                (
+                test gopast v  delete
+                test substring among(
+                    'at' 'bl' 'iz'
+                         (<+ 'e')
+                    'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
+                    // ignoring double c, h, j, k, q, v, w, and x
+                         ([next]  delete)
+                    ''   (atmark p1  test shortv  <+ 'e')
+                )
+            )
+        )
+    )
+
+    define Step_1c as (
+        ['y' or 'Y']
+        non-v not atlimit
+        <-'i'
+    )
+
+    define Step_2 as (
+        [substring] R1 among (
+            'tional'  (<-'tion')
+            'enci'    (<-'ence')
+            'anci'    (<-'ance')
+            'abli'    (<-'able')
+            'entli'   (<-'ent')
+            'izer' 'ization'
+                      (<-'ize')
+            'ational' 'ation' 'ator'
+                      (<-'ate')
+            'alism' 'aliti' 'alli'
+                      (<-'al')
+            'fulness' (<-'ful')
+            'ousli' 'ousness'
+                      (<-'ous')
+            'iveness' 'iviti'
+                      (<-'ive')
+            'biliti' 'bli'
+                      (<-'ble')
+            'ogi'     ('l' <-'og')
+            'fulli'   (<-'ful')
+            'lessli'  (<-'less')
+            'li'      (valid_LI delete)
+        )
+    )
+
+    define Step_3 as (
+        [substring] R1 among (
+            'tional'  (<- 'tion')
+            'ational' (<- 'ate')
+            'alize'   (<-'al')
+            'icate' 'iciti' 'ical'
+                      (<-'ic')
+            'ful' 'ness'
+                      (delete)
+            'ative'
+                      (R2 delete)  // 'R2' added Dec 2001
+        )
+    )
+
+    define Step_4 as (
+        [substring] R2 among (
+            'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
+            'ment' 'ent' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
+                      (delete)
+            'ion'     ('s' or 't' delete)
+        )
+    )
+
+    define Step_5 as (
+        [substring] among (
+            'e' (R2 or (R1 not shortv) delete)
+            'l' (R2 'l' delete)
+        )
+    )
+
+    define exception2 as (
+
+        [substring] atlimit among(
+            'inning' 'outing' 'canning' 'herring' 'earring'
+            'proceed' 'exceed' 'succeed'
+
+            // ... extensions possible here ...
+
+        )
+    )
+)
+
+define exception1 as (
+
+    [substring] atlimit among(
+
+        /* special changes: */
+
+        'skis'      (<-'ski')
+        'skies'     (<-'sky')
+        'dying'     (<-'die')
+        'lying'     (<-'lie')
+        'tying'     (<-'tie')
+
+        /* special -LY cases */
+
+        'idly'      (<-'idl')
+        'gently'    (<-'gentl')
+        'ugly'      (<-'ugli')
+        'early'     (<-'earli')
+        'only'      (<-'onli')
+        'singly'    (<-'singl')
+
+        // ... extensions possible here ...
+
+        /* invariant forms: */
+
+        'sky'
+        'news'
+        'howe'
+
+        'atlas' 'cosmos' 'bias' 'andes' // not plural forms
+
+        // ... extensions possible here ...
+    )
+)
+
+define postlude as (Y_found  repeat(goto (['Y']) <-'y'))
+
+define stem as (
+
+    exception1 or
+    not hop 3 or (
+        do prelude
+        do mark_regions
+        backwards (
+
+            do Step_1a
+
+            exception2 or (
+
+                do Step_1b
+                do Step_1c
+
+                do Step_2
+                do Step_3
+                do Step_4
+
+                do Step_5
+            )
+        )
+        do postlude
+    )
+)
diff --git a/snowball_code/algorithms/finnish/stem_ISO_8859_1.sbl b/snowball_code/algorithms/finnish/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..9ac74f2
--- /dev/null
+++ b/snowball_code/algorithms/finnish/stem_ISO_8859_1.sbl
@@ -0,0 +1,196 @@
+
+/* Finnish stemmer.
+
+   Numbers in square brackets refer to the sections in
+   Fred Karlsson, Finnish: An Essential Grammar. Routledge, 1999
+   ISBN 0-415-20705-3
+
+*/
+
+routines (
+           mark_regions
+           R2
+           particle_etc possessive
+           LONG VI
+           case_ending
+           i_plural
+           t_plural
+           other_endings
+           tidy
+)
+
+externals ( stem )
+
+integers ( p1 p2 )
+strings ( x )
+booleans ( ending_removed )
+groupings ( AEI V1 V2 particle_end )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+
+define AEI 'a{a"}ei'
+define V1 'aeiouy{a"}{o"}'
+define V2 'aeiou{a"}{o"}'
+define particle_end V1 + 'nt'
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    goto V1  gopast non-V1  setmark p1
+    goto V1  gopast non-V1  setmark p2
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define particle_etc as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'kin'
+            'kaan' 'k{a"}{a"}n'
+            'ko'   'k{o"}'
+            'han'  'h{a"}n'
+            'pa'   'p{a"}'    // Particles [91]
+                (particle_end)
+            'sti'             // Adverb [87]
+                (R2)
+        )
+        delete
+    )
+    define possessive as (    // [36]
+        setlimit tomark p1 for ([substring])
+        among(
+            'si'
+                (not 'k' delete)  // take 'ksi' as the Comitative case
+            'ni'
+                (delete ['kse'] <- 'ksi') // kseni = ksi + ni
+            'nsa' 'ns{a"}'
+            'mme'
+            'nne'
+                (delete)
+            /* Now for Vn possessives after case endings: [36] */
+            'an'
+                (among('ta' 'ssa' 'sta' 'lla' 'lta' 'na') delete)
+            '{a"}n'
+                (among('t{a"}' 'ss{a"}' 'st{a"}'
+                       'll{a"}' 'lt{a"}' 'n{a"}') delete)
+            'en'
+                (among('lle' 'ine') delete)
+        )
+    )
+
+    define LONG as
+        among('aa' 'ee' 'ii' 'oo' 'uu' '{a"}{a"}' '{o"}{o"}')
+
+    define VI as ('i' V2)
+
+    define case_ending as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'han'    ('a')          //-.
+            'hen'    ('e')          // |
+            'hin'    ('i')          // |
+            'hon'    ('o')          // |
+            'h{a"}n' ('{a"}')       // Illative   [43]
+            'h{o"}n' ('{o"}')       // |
+            'siin'   VI             // |
+            'seen'   LONG           //-'
+
+            'den'    VI
+            'tten'   VI             // Genitive plurals [34]
+                     ()
+            'n'                     // Genitive or Illative
+                ( try ( LONG // Illative
+                        or 'ie' // Genitive
+                          and next ]
+                      )
+                  /* otherwise Genitive */
+                )
+
+            'a' '{a"}'              //-.
+                     (V1 non-V1)    // |
+            'tta' 'tt{a"}'          // Partitive  [32]
+                     ('e')          // |
+            'ta' 't{a"}'            //-'
+
+            'ssa' 'ss{a"}'          // Inessive   [41]
+            'sta' 'st{a"}'          // Elative    [42]
+
+            'lla' 'll{a"}'          // Adessive   [44]
+            'lta' 'lt{a"}'          // Ablative   [51]
+            'lle'                   // Allative   [46]
+            'na' 'n{a"}'            // Essive     [49]
+            'ksi'                   // Translative[50]
+            'ine'                   // Comitative [51]
+
+            /* Abessive and Instructive are too rare for
+               inclusion [51] */
+
+        )
+        delete
+        set ending_removed
+    )
+    define other_endings as (
+        setlimit tomark p2 for ([substring])
+        among(
+            'mpi' 'mpa' 'mp{a"}'
+            'mmi' 'mma' 'mm{a"}'    // Comparative forms [85]
+                (not 'po')          //-improves things
+            'impi' 'impa' 'imp{a"}'
+            'immi' 'imma' 'imm{a"}' // Superlative forms [86]
+            'eja' 'ej{a"}'          // indicates agent [93.1B]
+        )
+        delete
+    )
+    define i_plural as (            // [26]
+        setlimit tomark p1 for ([substring])
+        among(
+            'i'  'j'
+        )
+        delete
+    )
+    define t_plural as (            // [26]
+        setlimit tomark p1 for (
+            ['t'] test V1
+            delete
+        )
+        setlimit tomark p2 for ([substring])
+        among(
+            'mma' (not 'po') //-mmat endings
+            'imma'           //-immat endings
+        )
+        delete
+    )
+    define tidy as (
+        setlimit tomark p1 for (
+            do ( LONG and ([next] delete ) ) // undouble vowel
+            do ( [AEI] non-V1 delete ) // remove trailing a, a", e, i
+            do ( ['j'] 'o' or 'u' delete )
+            do ( ['o'] 'j' delete )
+        )
+        goto non-V1 [next] -> x  x delete // undouble consonant
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    unset ending_removed
+    backwards (
+        do particle_etc
+        do possessive
+        do case_ending
+        do other_endings
+        (ending_removed do i_plural) or do t_plural
+        do tidy
+    )
+)
+
diff --git a/snowball_code/algorithms/french/stem_ISO_8859_1.sbl b/snowball_code/algorithms/french/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..e972f22
--- /dev/null
+++ b/snowball_code/algorithms/french/stem_ISO_8859_1.sbl
@@ -0,0 +1,248 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           i_verb_suffix
+           verb_suffix
+           residual_suffix
+           un_double
+           un_accent
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v keep_with_s )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a^   hex 'E2'  // a-circumflex
+stringdef a`   hex 'E0'  // a-grave
+stringdef c,   hex 'E7'  // c-cedilla
+
+stringdef e"   hex 'EB'  // e-diaeresis (rare)
+stringdef e'   hex 'E9'  // e-acute
+stringdef e^   hex 'EA'  // e-circumflex
+stringdef e`   hex 'E8'  // e-grave
+stringdef i"   hex 'EF'  // i-diaeresis
+stringdef i^   hex 'EE'  // i-circumflex
+stringdef o^   hex 'F4'  // o-circumflex
+stringdef u^   hex 'FB'  // u-circumflex
+stringdef u`   hex 'F9'  // u-grave
+
+define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
+
+define prelude as repeat goto (
+
+    (  v [ ('u' ] v <- 'U') or
+           ('i' ] v <- 'I') or
+           ('y' ] <- 'Y')
+    )
+    or
+    (  ['y'] v <- 'Y' )
+    or
+    (  'q' ['u'] <- 'U' )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v v next )
+        or
+        among ( // this exception list begun Nov 2006
+            'par'  // paris, parie, pari
+            'col'  // colis
+            'tap'  // tapis
+            // extensions possible here
+        )
+        or
+        ( next gopast v )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I' (<- 'i')
+        'U' (<- 'u')
+        'Y' (<- 'y')
+        ''  (next)
+    )
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
+            'ances' 'iqUes' 'ismes' 'ables' 'istes'
+               ( R2 delete )
+            'atrice' 'ateur' 'ation'
+            'atrices' 'ateurs' 'ations'
+               ( R2 delete
+                 try ( ['ic'] (R2 delete) or <-'iqU' )
+               )
+            'logie'
+            'logies'
+               ( R2 <- 'log' )
+            'usion' 'ution'
+            'usions' 'utions'
+               ( R2 <- 'u' )
+            'ence'
+            'ences'
+               ( R2 <- 'ent' )
+            'ement'
+            'ements'
+            (
+                RV delete
+                try (
+                    [substring] among(
+                        'iv'   (R2 delete ['at'] R2 delete)
+                        'eus'  ((R2 delete) or (R1<-'eux'))
+                        'abl' 'iqU'
+                               (R2 delete)
+                        'i{e`}r' 'I{e`}r'      //)
+                               (RV <-'i')      //)--new 2 Sept 02
+                    )
+                )
+            )
+            'it{e'}'
+            'it{e'}s'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' ((R2 delete) or <-'abl')
+                        'ic'   ((R2 delete) or <-'iqU')
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'if' 'ive'
+            'ifs' 'ives'
+            (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
+            )
+            'eaux' (<- 'eau')
+            'aux'  (R1 <- 'al')
+            'euse'
+            'euses'((R2 delete) or (R1<-'eux'))
+
+            'issement'
+            'issements'(R1 non-v delete) // verbal
+
+            // fail(...) below forces entry to verb_suffix. -ment typically
+            // follows the p.p., e.g 'confus{e'}ment'.
+
+            'amment'   (RV fail(<- 'ant'))
+            'emment'   (RV fail(<- 'ent'))
+            'ment'
+            'ments'    (test(v RV) fail(delete))
+                       // v is e,i,u,{e'},I or U
+        )
+    )
+
+    define i_verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
+            'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
+            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
+            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
+            'issez' 'issiez' 'issions' 'issons' 'it'
+                (non-v delete)
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            'ions'
+                (R2 delete)
+
+            '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
+            'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
+            'erons' 'eront' 'ez' 'iez'
+
+            // 'ons' //-best omitted
+
+                (delete)
+
+            '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
+            'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
+            'assions'
+                (delete
+                 try(['e'] delete)
+                )
+        )
+    )
+
+    define keep_with_s 'aiou{e`}s'
+
+    define residual_suffix as (
+        try(['s'] test non-keep_with_s delete)
+        setlimit tomark pV for (
+            [substring] among(
+                'ion'           (R2 's' or 't' delete)
+                'ier' 'i{e`}re'
+                'Ier' 'I{e`}re' (<-'i')
+                'e'             (delete)
+                '{e"}'          ('gu' delete)
+            )
+        )
+    )
+
+    define un_double as (
+        test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
+    )
+
+    define un_accent as (
+        atleast 1 non-v
+        [ '{e'}' or '{e`}' ] <-'e'
+    )
+)
+
+define stem as (
+
+    do prelude
+    do mark_regions
+    backwards (
+
+        do (
+            (
+                 ( standard_suffix or
+                   i_verb_suffix or
+                   verb_suffix
+                 )
+                 and
+                 try( [ ('Y'   ] <- 'i' ) or
+                        ('{c,}'] <- 'c' )
+                 )
+            ) or
+            residual_suffix
+        )
+
+        // try(['ent'] RV delete) // is best omitted
+
+        do un_double
+        do un_accent
+    )
+    do postlude
+)
+
diff --git a/snowball_code/algorithms/french/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/french/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..996eba1
--- /dev/null
+++ b/snowball_code/algorithms/french/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,239 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           i_verb_suffix
+           verb_suffix
+           residual_suffix
+           un_double
+           un_accent
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v keep_with_s )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a^   hex '83'  // a-circumflex
+stringdef a`   hex '85'  // a-grave
+stringdef c,   hex '87'  // c-cedilla
+
+stringdef e"   hex '89'  // e-diaeresis (rare)
+stringdef e'   hex '82'  // e-acute
+stringdef e^   hex '88'  // e-circumflex
+stringdef e`   hex '8A'  // e-grave
+stringdef i"   hex '8B'  // i-diaeresis
+stringdef i^   hex '8C'  // i-circumflex
+stringdef o^   hex '93'  // o-circumflex
+stringdef u^   hex '96'  // u-circumflex
+stringdef u`   hex '97'  // u-grave
+
+define v 'aeiouy{a^}{a`}{e"}{e'}{e^}{e`}{i"}{i^}{o^}{u^}{u`}'
+
+define prelude as repeat goto (
+
+    (  v [ ('u' ] v <- 'U') or
+           ('i' ] v <- 'I') or
+           ('y' ] <- 'Y')
+    )
+    or
+    (  ['y'] v <- 'Y' )
+    or
+    (  'q' ['u'] <- 'U' )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v v next ) or ( next gopast v )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I' (<- 'i')
+        'U' (<- 'u')
+        'Y' (<- 'y')
+        ''  (next)
+    )
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'ance' 'iqUe' 'isme' 'able' 'iste' 'eux'
+            'ances' 'iqUes' 'ismes' 'ables' 'istes'
+               ( R2 delete )
+            'atrice' 'ateur' 'ation'
+            'atrices' 'ateurs' 'ations'
+               ( R2 delete
+                 try ( ['ic'] (R2 delete) or <-'iqU' )
+               )
+            'logie'
+            'logies'
+               ( R2 <- 'log' )
+            'usion' 'ution'
+            'usions' 'utions'
+               ( R2 <- 'u' )
+            'ence'
+            'ences'
+               ( R2 <- 'ent' )
+            'ement'
+            'ements'
+            (
+                RV delete
+                try (
+                    [substring] among(
+                        'iv'   (R2 delete ['at'] R2 delete)
+                        'eus'  ((R2 delete) or (R1<-'eux'))
+                        'abl' 'iqU'
+                               (R2 delete)
+                        'i{e`}r' 'I{e`}r'      //)
+                               (RV <-'i')      //)--new 2 Sept 02
+                    )
+                )
+            )
+            'it{e'}'
+            'it{e'}s'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' ((R2 delete) or <-'abl')
+                        'ic'   ((R2 delete) or <-'iqU')
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'if' 'ive'
+            'ifs' 'ives'
+            (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] (R2 delete) or <-'iqU' )
+            )
+            'eaux' (<- 'eau')
+            'aux'  (R1 <- 'al')
+            'euse'
+            'euses'((R2 delete) or (R1<-'eux'))
+
+            'issement'
+            'issements'(R1 non-v delete) // verbal
+
+            // fail(...) below forces entry to verb_suffix. -ment typically
+            // follows the p.p., e.g 'confus{e'}ment'.
+
+            'amment'   (RV fail(<- 'ant'))
+            'emment'   (RV fail(<- 'ent'))
+            'ment'
+            'ments'    (test(v RV) fail(delete))
+                       // v is e,i,u,{e'},I or U
+        )
+    )
+
+    define i_verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            '{i^}mes' '{i^}t' '{i^}tes' 'i' 'ie' 'ies' 'ir' 'ira' 'irai'
+            'iraIent' 'irais' 'irait' 'iras' 'irent' 'irez' 'iriez'
+            'irions' 'irons' 'iront' 'is' 'issaIent' 'issais' 'issait'
+            'issant' 'issante' 'issantes' 'issants' 'isse' 'issent' 'isses'
+            'issez' 'issiez' 'issions' 'issons' 'it'
+                (non-v delete)
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among (
+            'ions'
+                (R2 delete)
+
+            '{e'}' '{e'}e' '{e'}es' '{e'}s' '{e`}rent' 'er' 'era' 'erai'
+            'eraIent' 'erais' 'erait' 'eras' 'erez' 'eriez' 'erions'
+            'erons' 'eront' 'ez' 'iez'
+
+            // 'ons' //-best omitted
+
+                (delete)
+
+            '{a^}mes' '{a^}t' '{a^}tes' 'a' 'ai' 'aIent' 'ais' 'ait' 'ant'
+            'ante' 'antes' 'ants' 'as' 'asse' 'assent' 'asses' 'assiez'
+            'assions'
+                (delete
+                 try(['e'] delete)
+                )
+        )
+    )
+
+    define keep_with_s 'aiou{e`}s'
+
+    define residual_suffix as (
+        try(['s'] test non-keep_with_s delete)
+        setlimit tomark pV for (
+            [substring] among(
+                'ion'           (R2 's' or 't' delete)
+                'ier' 'i{e`}re'
+                'Ier' 'I{e`}re' (<-'i')
+                'e'             (delete)
+                '{e"}'          ('gu' delete)
+            )
+        )
+    )
+
+    define un_double as (
+        test among('enn' 'onn' 'ett' 'ell' 'eill') [next] delete
+    )
+
+    define un_accent as (
+        atleast 1 non-v
+        [ '{e'}' or '{e`}' ] <-'e'
+    )
+)
+
+define stem as (
+
+    do prelude
+    do mark_regions
+    backwards (
+
+        do (
+            (
+                 ( standard_suffix or
+                   i_verb_suffix or
+                   verb_suffix
+                 )
+                 and
+                 try( [ ('Y'   ] <- 'i' ) or
+                        ('{c,}'] <- 'c' )
+                 )
+            ) or
+            residual_suffix
+        )
+
+        // try(['ent'] RV delete) // is best omitted
+
+        do un_double
+        do un_accent
+    )
+    do postlude
+)
+
diff --git a/snowball_code/algorithms/german/stem_ISO_8859_1.sbl b/snowball_code/algorithms/german/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..7069daf
--- /dev/null
+++ b/snowball_code/algorithms/german/stem_ISO_8859_1.sbl
@@ -0,0 +1,139 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+stringdef ss   hex 'DF'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat (
+        (
+            ['{ss}'] <- 'ss'
+        ) or next
+    )
+
+    repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/snowball_code/algorithms/german/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/german/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..3effb32
--- /dev/null
+++ b/snowball_code/algorithms/german/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,139 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef o"   hex '94'
+stringdef u"   hex '81'
+stringdef ss   hex 'E1'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat (
+        (
+            ['{ss}'] <- 'ss'
+        ) or next
+    )
+
+    repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/snowball_code/algorithms/german2/stem_ISO_8859_1.sbl b/snowball_code/algorithms/german2/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..ce6026a
--- /dev/null
+++ b/snowball_code/algorithms/german2/stem_ISO_8859_1.sbl
@@ -0,0 +1,145 @@
+
+/*
+    Extra rule for -nisse ending added 11 Dec 2009
+*/
+
+routines (
+           prelude postlude
+           mark_regions
+           R1 R2
+           standard_suffix
+)
+
+externals ( stem )
+
+integers ( p1 p2 x )
+
+groupings ( v s_ending st_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef o"   hex 'F6'
+stringdef u"   hex 'FC'
+stringdef ss   hex 'DF'
+
+define v 'aeiouy{a"}{o"}{u"}'
+
+define s_ending  'bdfghklmnrt'
+define st_ending s_ending - 'r'
+
+define prelude as (
+
+    test repeat goto (
+        v [('u'] v <- 'U') or
+           ('y'] v <- 'Y')
+    )
+
+    repeat (
+        [substring] among(
+            '{ss}' (<- 'ss')
+            'ae'   (<- '{a"}')
+            'oe'   (<- '{o"}')
+            'ue'   (<- '{u"}')
+            'qu'   (hop 2)
+            ''     (next)
+        )
+    )
+
+)
+
+define mark_regions as (
+
+    $p1 = limit
+    $p2 = limit
+
+    test(hop 3 setmark x)
+
+    gopast v  gopast non-v  setmark p1
+    try($p1 < x  $p1 = x)  // at least 3
+    gopast v  gopast non-v  setmark p2
+
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'Y'    (<- 'y')
+        'U'    (<- 'u')
+        '{a"}' (<- 'a')
+        '{o"}' (<- 'o')
+        '{u"}' (<- 'u')
+        ''     (next)
+    )
+
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        do (
+            [substring] R1 among(
+                'em' 'ern' 'er'
+                (   delete
+                )
+                'e' 'en' 'es'
+                (   delete
+                    try (['s'] 'nis' delete)
+                )
+                's'
+                (   s_ending delete
+                )
+            )
+        )
+        do (
+            [substring] R1 among(
+                'en' 'er' 'est'
+                (   delete
+                )
+                'st'
+                (   st_ending hop 3 delete
+                )
+            )
+        )
+        do (
+            [substring] R2 among(
+                'end' 'ung'
+                (   delete
+                    try (['ig'] not 'e' R2 delete)
+                )
+                'ig' 'ik' 'isch'
+                (   not 'e' delete
+                )
+                'lich' 'heit'
+                (   delete
+                    try (
+                        ['er' or 'en'] R1 delete
+                    )
+                )
+                'keit'
+                (   delete
+                    try (
+                        [substring] R2 among(
+                            'lich' 'ig'
+                            (   delete
+                            )
+                        )
+                    )
+                )
+            )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards
+        do standard_suffix
+    do postlude
+)
diff --git a/snowball_code/algorithms/hungarian/stem_ISO_8859_1.sbl b/snowball_code/algorithms/hungarian/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..9c2da00
--- /dev/null
+++ b/snowball_code/algorithms/hungarian/stem_ISO_8859_1.sbl
@@ -0,0 +1,241 @@
+/*
+Hungarian Stemmer
+Removes noun inflections
+*/
+
+routines (
+    mark_regions
+    R1
+    v_ending
+    case
+    case_special
+    case_other
+    plural
+    owned
+    sing_owner
+    plur_owner
+    instrum
+    factive
+    undouble
+    double
+)
+
+externals ( stem )
+
+integers ( p1 )
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'  hex 'E1'  //a-acute
+stringdef e'  hex 'E9'  //e-acute
+stringdef i'  hex 'ED'  //i-acute
+stringdef o'  hex 'F3'  //o-acute
+stringdef o"  hex 'F6'  //o-umlaut
+stringdef oq  hex 'F5'  //o-double acute
+stringdef u'  hex 'FA'  //u-acute
+stringdef u"  hex 'FC'  //u-umlaut
+stringdef uq  hex 'FB'  //u-double acute
+
+define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    (v goto non-v
+     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
+     setmark p1)
+    or
+
+    (non-v gopast v setmark p1)
+)
+
+backwardmode (
+
+    define R1 as $p1 <= cursor
+
+    define v_ending as (
+        [substring] R1 among(
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define double as (
+        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
+        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
+    )
+
+    define undouble as (
+        next [hop 1] delete
+    )
+
+    define instrum as(
+        [substring] R1 among(
+            'al' (double)
+            'el' (double)
+        )
+        delete
+        undouble
+    )
+
+
+    define case as (
+        [substring] R1 among(
+            'ban' 'ben'
+            'ba' 'be'
+            'ra' 're'
+            'nak' 'nek'
+            'val' 'vel'
+            't{o'}l' 't{oq}l'
+            'r{o'}l' 'r{oq}l'
+            'b{o'}l' 'b{oq}l'
+            'hoz' 'hez' 'h{o"}z'
+            'n{a'}l' 'n{e'}l'
+            'ig'
+            'at' 'et' 'ot' '{o"}t'
+            '{e'}rt'
+            'k{e'}pp' 'k{e'}ppen'
+            'kor'
+            'ul' '{u"}l'
+            'v{a'}' 'v{e'}'
+            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
+            'k{e'}nt'
+            'en' 'on' 'an' '{o"}n'
+            'n'
+            't'
+        )
+        delete
+        v_ending
+    )
+
+    define case_special as(
+        [substring] R1 among(
+            '{e'}n' (<- 'e')
+            '{a'}n' (<- 'a')
+            '{a'}nk{e'}nt' (<- 'a')
+        )
+    )
+
+    define case_other as(
+        [substring] R1 among(
+            'astul' 'est{u"}l' (delete)
+            'stul' 'st{u"}l' (delete)
+            '{a'}stul' (<- 'a')
+            '{e'}st{u"}l' (<- 'e')
+        )
+    )
+
+    define factive as(
+        [substring] R1 among(
+            '{a'}' (double)
+            '{e'}' (double)
+        )
+        delete
+        undouble
+    )
+
+    define plural as (
+        [substring] R1 among(
+            '{a'}k' (<- 'a')
+            '{e'}k' (<- 'e')
+            '{o"}k' (delete)
+            'ak' (delete)
+            'ok' (delete)
+            'ek' (delete)
+            'k' (delete)
+        )
+    )
+
+    define owned as (
+        [substring] R1 among (
+            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
+            '{e'}k{e'}' (<- 'e')
+            '{a'}k{e'}' (<- 'a')
+            'k{e'}' (delete)
+            '{e'}{e'}i' (<- 'e')
+            '{a'}{e'}i' (<- 'a')
+            '{e'}i'  (delete)
+            '{e'}{e'}' (<- 'e')
+            '{e'}' (delete)
+        )
+    )
+
+    define sing_owner as (
+        [substring] R1 among(
+            '{u"}nk' 'unk' (delete)
+            '{a'}nk' (<- 'a')
+            '{e'}nk' (<- 'e')
+            'nk' (delete)
+            '{a'}juk' (<- 'a')
+            '{e'}j{u"}k' (<- 'e')
+            'juk' 'j{u"}k' (delete)
+            'uk' '{u"}k' (delete)
+            'em' 'om' 'am' (delete)
+            '{a'}m' (<- 'a')
+            '{e'}m' (<- 'e')
+            'm' (delete)
+            'od' 'ed' 'ad' '{o"}d' (delete)
+            '{a'}d' (<- 'a')
+            '{e'}d' (<- 'e')
+            'd' (delete)
+            'ja' 'je' (delete)
+            'a' 'e' 'o' (delete)
+            '{a'}' (<- 'a')
+            '{e'}' (<- 'e')
+        )
+    )
+
+    define plur_owner as (
+        [substring] R1 among(
+            'jaim' 'jeim' (delete)
+            '{a'}im' (<- 'a')
+            '{e'}im' (<- 'e')
+            'aim' 'eim' (delete)
+            'im' (delete)
+            'jaid' 'jeid' (delete)
+            '{a'}id' (<- 'a')
+            '{e'}id' (<- 'e')
+            'aid' 'eid' (delete)
+            'id' (delete)
+            'jai' 'jei' (delete)
+            '{a'}i' (<- 'a')
+            '{e'}i' (<- 'e')
+            'ai' 'ei' (delete)
+            'i' (delete)
+            'jaink' 'jeink' (delete)
+            'eink' 'aink' (delete)
+            '{a'}ink' (<- 'a')
+            '{e'}ink' (<- 'e')
+            'ink'
+            'jaitok' 'jeitek' (delete)
+            'aitok' 'eitek' (delete)
+            '{a'}itok' (<- 'a')
+            '{e'}itek' (<- 'e')
+            'itek' (delete)
+            'jeik' 'jaik' (delete)
+            'aik' 'eik' (delete)
+            '{a'}ik' (<- 'a')
+            '{e'}ik' (<- 'e')
+            'ik' (delete)
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+      do instrum
+        do case
+        do case_special
+        do case_other
+        do factive
+        do owned
+        do sing_owner
+        do plur_owner
+        do plural
+    )
+)
diff --git a/snowball_code/algorithms/italian/stem_ISO_8859_1.sbl b/snowball_code/algorithms/italian/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..8d25cf6
--- /dev/null
+++ b/snowball_code/algorithms/italian/stem_ISO_8859_1.sbl
@@ -0,0 +1,195 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v AEIO CG )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'
+stringdef a`   hex 'E0'
+stringdef e'   hex 'E9'
+stringdef e`   hex 'E8'
+stringdef i'   hex 'ED'
+stringdef i`   hex 'EC'
+stringdef o'   hex 'F3'
+stringdef o`   hex 'F2'
+stringdef u'   hex 'FA'
+stringdef u`   hex 'F9'
+
+define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a'}' (<- '{a`}')
+            '{e'}' (<- '{e`}')
+            '{i'}' (<- '{i`}')
+            '{o'}' (<- '{o`}')
+            '{u'}' (<- '{u`}')
+            'qu'   (<- 'qU')
+            ''     (next)
+        )
+    )
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'ci' 'gli' 'la' 'le' 'li' 'lo'
+            'mi' 'ne' 'si'  'ti' 'vi'
+            // the compound forms are:
+            'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
+            'mela' 'mele' 'meli' 'melo' 'mene'
+            'tela' 'tele' 'teli' 'telo' 'tene'
+            'cela' 'cele' 'celi' 'celo' 'cene'
+            'vela' 'vele' 'veli' 'velo' 'vene'
+        )
+        among( (RV)
+            'ando' 'endo'   (delete)
+            'ar' 'er' 'ir'  (<- 'e')
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
+            'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
+            'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
+            'atrice' 'atrici'
+            'ante' 'anti' // Note 1
+               ( R2 delete )
+            'azione' 'azioni' 'atore' 'atori'
+               ( R2 delete
+                 try ( ['ic'] R2 delete )
+               )
+            'logia' 'logie'
+               ( R2 <- 'log' )
+            'uzione' 'uzioni' 'usione' 'usioni'
+               ( R2 <- 'u' )
+            'enza' 'enze'
+               ( R2 <- 'ente' )
+            'amento' 'amenti' 'imento' 'imenti'
+               ( RV delete )
+            'amente' (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' ( ['at'] R2 delete )
+                        'os' 'ic' 'abil'
+                    )
+                )
+            )
+            'it{a`}' (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' 'ic' 'iv' (R2 delete)
+                    )
+                )
+            )
+            'ivo' 'ivi' 'iva' 'ive' (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] R2 delete )
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
+            'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
+            'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
+            'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
+            'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
+            'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
+            'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
+            'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
+            'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
+            'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
+            'ono' 'uta' 'ute' 'uti' 'uto'
+
+            'ar' 'ir' // but 'er' is problematical
+                (delete)
+        )
+    )
+
+    define AEIO 'aeio{a`}{e`}{i`}{o`}'
+    define CG 'cg'
+
+    define vowel_suffix as (
+        try (
+            [AEIO] RV delete
+            ['i'] RV delete
+        )
+        try (
+            ['h'] CG RV delete
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do (standard_suffix or verb_suffix)
+        do vowel_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
+
diff --git a/snowball_code/algorithms/italian/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/italian/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..b43295c
--- /dev/null
+++ b/snowball_code/algorithms/italian/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,195 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v AEIO CG )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'
+stringdef a`   hex '85'
+stringdef e'   hex '82'
+stringdef e`   hex '8A'
+stringdef i'   hex 'A1'
+stringdef i`   hex '8D'
+stringdef o'   hex 'A2'
+stringdef o`   hex '95'
+stringdef u'   hex 'A3'
+stringdef u`   hex '97'
+
+define v 'aeiou{a`}{e`}{i`}{o`}{u`}'
+
+define prelude as (
+    test repeat (
+        [substring] among(
+            '{a'}' (<- '{a`}')
+            '{e'}' (<- '{e`}')
+            '{i'}' (<- '{i`}')
+            '{o'}' (<- '{o`}')
+            '{u'}' (<- '{u`}')
+            'qu'   (<- 'qU')
+            ''     (next)
+        )
+    )
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'ci' 'gli' 'la' 'le' 'li' 'lo'
+            'mi' 'ne' 'si'  'ti' 'vi'
+            // the compound forms are:
+            'sene' 'gliela' 'gliele' 'glieli' 'glielo' 'gliene'
+            'mela' 'mele' 'meli' 'melo' 'mene'
+            'tela' 'tele' 'teli' 'telo' 'tene'
+            'cela' 'cele' 'celi' 'celo' 'cene'
+            'vela' 'vele' 'veli' 'velo' 'vene'
+        )
+        among( (RV)
+            'ando' 'endo'   (delete)
+            'ar' 'er' 'ir'  (<- 'e')
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anze' 'ico' 'ici' 'ica' 'ice' 'iche' 'ichi' 'ismo'
+            'ismi' 'abile' 'abili' 'ibile' 'ibili' 'ista' 'iste' 'isti'
+            'ist{a`}' 'ist{e`}' 'ist{i`}' 'oso' 'osi' 'osa' 'ose' 'mente'
+            'atrice' 'atrici'
+            'ante' 'anti' // Note 1
+               ( R2 delete )
+            'azione' 'azioni' 'atore' 'atori'
+               ( R2 delete
+                 try ( ['ic'] R2 delete )
+               )
+            'logia' 'logie'
+               ( R2 <- 'log' )
+            'uzione' 'uzioni' 'usione' 'usioni'
+               ( R2 <- 'u' )
+            'enza' 'enze'
+               ( R2 <- 'ente' )
+            'amento' 'amenti' 'imento' 'imenti'
+               ( RV delete )
+            'amente' (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' ( ['at'] R2 delete )
+                        'os' 'ic' 'abil'
+                    )
+                )
+            )
+            'it{a`}' (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil' 'ic' 'iv' (R2 delete)
+                    )
+                )
+            )
+            'ivo' 'ivi' 'iva' 'ive' (
+                R2 delete
+                try ( ['at'] R2 delete ['ic'] R2 delete )
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ammo' 'ando' 'ano' 'are' 'arono' 'asse' 'assero' 'assi'
+            'assimo' 'ata' 'ate' 'ati' 'ato' 'ava' 'avamo' 'avano' 'avate'
+            'avi' 'avo' 'emmo' 'enda' 'ende' 'endi' 'endo' 'er{a`}' 'erai'
+            'eranno' 'ere' 'erebbe' 'erebbero' 'erei' 'eremmo' 'eremo'
+            'ereste' 'eresti' 'erete' 'er{o`}' 'erono' 'essero' 'ete'
+            'eva' 'evamo' 'evano' 'evate' 'evi' 'evo' 'Yamo' 'iamo' 'immo'
+            'ir{a`}' 'irai' 'iranno' 'ire' 'irebbe' 'irebbero' 'irei'
+            'iremmo' 'iremo' 'ireste' 'iresti' 'irete' 'ir{o`}' 'irono'
+            'isca' 'iscano' 'isce' 'isci' 'isco' 'iscono' 'issero' 'ita'
+            'ite' 'iti' 'ito' 'iva' 'ivamo' 'ivano' 'ivate' 'ivi' 'ivo'
+            'ono' 'uta' 'ute' 'uti' 'uto'
+
+            'ar' 'ir' // but 'er' is problematical
+                (delete)
+        )
+    )
+
+    define AEIO 'aeio{a`}{e`}{i`}{o`}'
+    define CG 'cg'
+
+    define vowel_suffix as (
+        try (
+            [AEIO] RV delete
+            ['i'] RV delete
+        )
+        try (
+            ['h'] CG RV delete
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do (standard_suffix or verb_suffix)
+        do vowel_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
+
diff --git a/snowball_code/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl b/snowball_code/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..cd79d12
--- /dev/null
+++ b/snowball_code/algorithms/kraaij_pohlmann/stem_ISO_8859_1.sbl
@@ -0,0 +1,245 @@
+strings ( ch )
+integers ( x p1 p2 )
+booleans ( Y_found stemmed GE_removed )
+
+routines (
+
+   R1 R2
+   C V VX
+   lengthen_V
+   Step_1 Step_2 Step_3 Step_4 Step_7
+   Step_6 Step_1c
+   Lose_prefix
+   Lose_infix
+   measure
+)
+
+externals ( stem )
+
+groupings ( v v_WX AOU AIOU )
+
+stringescapes {}
+
+stringdef '   hex '27'  // yuk
+
+define v        'aeiouy'
+define v_WX     v + 'wx'
+define AOU      'aou'
+define AIOU     'aiou'
+
+backwardmode (
+
+    define R1 as (setmark x $x >= p1)
+    define R2 as (setmark x $x >= p2)
+
+    define V  as test (v or 'ij')
+    define VX as test (next v or 'ij')
+    define C  as test (not 'ij' non-v)
+
+    define lengthen_V as do (
+        non-v_WX [ (AOU] test (non-v or atlimit)) or
+                   ('e'] test (non-v or atlimit
+                               not AIOU
+                               not (next AIOU non-v)))
+        ->ch insert ch
+    )
+
+    define Step_1 as
+    (
+        [among ( (])
+
+            '{'}s' (delete)
+            's'    (R1 not ('t' R1) C delete)
+            'ies'  (R1 <-'ie')
+            'es'
+                   (('ar' R1 C ] delete lengthen_V) or
+                    ('er' R1 C ] delete) or
+                    (R1 C <-'e'))
+
+            'aus'  (R1 V <-'au')
+            'en'   (('hed' R1 ] <-'heid') or
+                    ('nd' delete) or
+                    ('d' R1 C ] delete) or
+                    ('i' or 'j' V delete) or
+                    (R1 C delete lengthen_V))
+            'nde'  (<-'nd')
+        )
+    )
+
+    define Step_2 as
+    (
+        [among ( (])
+            'je'   (('{'}t' ] delete) or
+                    ('et'   ] R1 C delete) or
+                    ('rnt'  ] <-'rn') or
+                    ('t'    ] R1 VX delete) or
+                    ('ink'  ] <-'ing') or
+                    ('mp'   ] <-'m') or
+                    ('{'}'  ] R1 delete) or
+                    (] R1 C delete))
+            'ge'   (R1 <-'g')
+            'lijke'(R1 <-'lijk')
+            'ische'(R1 <-'isch')
+            'de'   (R1 C delete)
+            'te'   (R1 <-'t')
+            'se'   (R1 <-'s')
+            're'   (R1 <-'r')
+            'le'   (R1 delete attach 'l' lengthen_V)
+            'ene'  (R1 C delete attach 'en' lengthen_V)
+            'ieve' (R1 C <-'ief')
+        )
+    )
+
+    define Step_3 as
+    (
+        [among ( (])
+            'atie'  (R1 <-'eer')
+            'iteit' (R1 delete lengthen_V)
+            'heid'
+            'sel'
+            'ster'  (R1 delete)
+            'rder'  (<-'r')
+            'ing'
+            'isme'
+            'erij'  (R1 delete lengthen_V)
+            'arij'  (R1 C <-'aar')
+            'fie'   (R2 delete attach 'f' lengthen_V)
+            'gie'   (R2 delete attach 'g' lengthen_V)
+            'tst'   (R1 C <-'t')
+            'dst'   (R1 C <-'d')
+        )
+    )
+
+    define Step_4 as
+    (
+        (   [among ( (])
+                'ioneel'  (R1 <-'ie')
+                'atief'   (R1 <-'eer')
+                'baar'    (R1 delete)
+                'naar'    (R1 V <-'n')
+                'laar'    (R1 V <-'l')
+                'raar'    (R1 V <-'r')
+                'tant'    (R1 <-'teer')
+                'lijker'
+                'lijkst'  (R1 <-'lijk')
+                'achtig'
+                'achtiger'
+                'achtigst'(R1 delete)
+                'eriger'
+                'erigst'
+                'erig'
+                'end'     (R1 C delete lengthen_V)
+            )
+        )
+        or
+        (   [among ( (])
+                'iger'
+                'igst'
+                'ig'      (R1 C delete lengthen_V)
+            )
+        )
+    )
+
+    define Step_7 as
+    (
+        [among ( (])
+            'kt'   (<-'k')
+            'ft'   (<-'f')
+            'pt'   (<-'p')
+        )
+    )
+
+    define Step_6 as
+    (
+        [among ( (])
+            'bb'   (<-'b')
+            'cc'   (<-'c')
+            'dd'   (<-'d')
+            'ff'   (<-'f')
+            'gg'   (<-'g')
+            'hh'   (<-'h')
+            'jj'   (<-'j')
+            'kk'   (<-'k')
+            'll'   (<-'l')
+            'mm'   (<-'m')
+            'nn'   (<-'n')
+            'pp'   (<-'p')
+            'qq'   (<-'q')
+            'rr'   (<-'r')
+            'ss'   (<-'s')
+            'tt'   (<-'t')
+            'vv'   (<-'v')
+            'ww'   (<-'w')
+            'xx'   (<-'x')
+            'zz'   (<-'z')
+            'v'    (<-'f')
+            'z'    (<-'s')
+        )
+    )
+
+    define Step_1c as
+    (
+        [among ( (] R1 C)
+            'd' (not ('n' R1) delete)
+            't' (not ('h' R1) delete)
+        )
+    )
+)
+
+define Lose_prefix as (
+    ['ge'] test hop 3 (goto v goto non-v)
+    set GE_removed
+    delete
+)
+
+define Lose_infix as (
+    next
+    gopast (['ge']) test hop 3 (goto v goto non-v)
+    set GE_removed
+    delete
+)
+
+define measure as (
+    do (
+        tolimit
+        setmark p1
+        setmark p2
+    )
+    do(
+        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p1
+        repeat non-v  atleast 1 ('ij' or v)  non-v  setmark p2
+    )
+
+)
+define stem as (
+
+    unset Y_found
+    unset stemmed
+    do ( ['y'] <-'Y' set Y_found )
+    do repeat(goto (v  ['y'])<-'Y' set Y_found )
+
+    measure
+
+    backwards (
+            do (Step_1 set stemmed )
+            do (Step_2 set stemmed )
+            do (Step_3 set stemmed )
+            do (Step_4 set stemmed )
+    )
+    unset GE_removed
+    do (Lose_prefix and measure)
+    backwards (
+            do (GE_removed Step_1c)
+        )
+    unset GE_removed
+    do (Lose_infix and measure)
+    backwards (
+            do (GE_removed Step_1c)
+        )
+    backwards (
+            do (Step_7 set stemmed )
+            do (stemmed or GE_removed Step_6)
+        )
+    do(Y_found  repeat(goto (['Y']) <-'y'))
+)
+
diff --git a/snowball_code/algorithms/lovins/stem_ISO_8859_1.sbl b/snowball_code/algorithms/lovins/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..3f69f15
--- /dev/null
+++ b/snowball_code/algorithms/lovins/stem_ISO_8859_1.sbl
@@ -0,0 +1,208 @@
+
+stringescapes {}
+
+routines (
+   A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA BB CC
+
+   endings
+
+   undouble respell
+)
+
+externals ( stem )
+
+backwardmode (
+
+  /* Lovins' conditions A, B ... CC, as given in her Appendix B, where
+     a test for a two letter prefix ('test hop 2') is implicitly
+     assumed. Note that 'e' next 'u' corresponds to her u*e because
+     Snowball is scanning backwards. */
+
+  define A  as ( hop 2 )
+  define B  as ( hop 3 )
+  define C  as ( hop 4 )
+  define D  as ( hop 5 )
+  define E  as ( test hop 2 not 'e' )
+  define F  as ( test hop 3 not 'e' )
+  define G  as ( test hop 3 'f' )
+  define H  as ( test hop 2 't' or 'll' )
+  define I  as ( test hop 2 not 'o' not 'e' )
+  define J  as ( test hop 2 not 'a' not 'e' )
+  define K  as ( test hop 3 'l' or 'i' or ('e' next 'u') )
+  define L  as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
+  define M  as ( test hop 2 not 'a' not 'c' not 'e' not 'm' )
+  define N  as ( test hop 3 ( hop 2 not 's' or hop 2 ) )
+  define O  as ( test hop 2 'l' or 'i' )
+  define P  as ( test hop 2 not 'c' )
+  define Q  as ( test hop 2 test hop 3 not 'l' not 'n' )
+  define R  as ( test hop 2 'n' or 'r' )
+  define S  as ( test hop 2 'dr' or ('t' not 't') )
+  define T  as ( test hop 2 's' or ('t' not 'o') )
+  define U  as ( test hop 2 'l' or 'm' or 'n' or 'r' )
+  define V  as ( test hop 2 'c' )
+  define W  as ( test hop 2 not 's' not 'u' )
+  define X  as ( test hop 2 'l' or 'i' or ('e' next 'u') )
+  define Y  as ( test hop 2 'in' )
+  define Z  as ( test hop 2 not 'f' )
+  define AA as ( test hop 2 among ( 'd' 'f' 'ph' 'th' 'l' 'er' 'or'
+                                    'es' 't' ) )
+  define BB as ( test hop 3 not 'met' not 'ryst' )
+  define CC as ( test hop 2 'l' )
+
+
+  /* The system of endings, as given in Appendix A. */
+
+  define endings as (
+    [substring] among(
+    'alistically' B 'arizability' A 'izationally' B
+
+     'antialness' A  'arisations' A  'arizations' A  'entialness' A
+
+      'allically' C   'antaneous' A   'antiality' A   'arisation' A
+      'arization' A   'ationally' B   'ativeness' A   'eableness' E
+      'entations' A   'entiality' A   'entialize' A   'entiation' A
+      'ionalness' A   'istically' A   'itousness' A   'izability' A
+      'izational' A
+
+       'ableness' A    'arizable' A    'entation' A    'entially' A
+       'eousness' A    'ibleness' A    'icalness' A    'ionalism' A
+       'ionality' A    'ionalize' A    'iousness' A    'izations' A
+       'lessness' A
+
+        'ability' A     'aically' A     'alistic' B     'alities' A
+        'ariness' E     'aristic' A     'arizing' A     'ateness' A
+        'atingly' A     'ational' B     'atively' A     'ativism' A
+        'elihood' E     'encible' A     'entally' A     'entials' A
+        'entiate' A     'entness' A     'fulness' A     'ibility' A
+        'icalism' A     'icalist' A     'icality' A     'icalize' A
+        'ication' G     'icianry' A     'ination' A     'ingness' A
+        'ionally' A     'isation' A     'ishness' A     'istical' A
+        'iteness' A     'iveness' A     'ivistic' A     'ivities' A
+        'ization' F     'izement' A     'oidally' A     'ousness' A
+
+         'aceous' A      'acious' B      'action' G      'alness' A
+         'ancial' A      'ancies' A      'ancing' B      'ariser' A
+         'arized' A      'arizer' A      'atable' A      'ations' B
+         'atives' A      'eature' Z      'efully' A      'encies' A
+         'encing' A      'ential' A      'enting' C      'entist' A
+         'eously' A      'ialist' A      'iality' A      'ialize' A
+         'ically' A      'icance' A      'icians' A      'icists' A
+         'ifully' A      'ionals' A      'ionate' D      'ioning' A
+         'ionist' A      'iously' A      'istics' A      'izable' E
+         'lessly' A      'nesses' A      'oidism' A
+
+          'acies' A       'acity' A       'aging' B       'aical' A
+          'alist' A       'alism' B       'ality' A       'alize' A
+          'allic'BB       'anced' B       'ances' B       'antic' C
+          'arial' A       'aries' A       'arily' A       'arity' B
+          'arize' A       'aroid' A       'ately' A       'ating' I
+          'ation' B       'ative' A       'ators' A       'atory' A
+          'ature' E       'early' Y       'ehood' A       'eless' A
+          'elity' A       'ement' A       'enced' A       'ences' A
+          'eness' E       'ening' E       'ental' A       'ented' C
+          'ently' A       'fully' A       'ially' A       'icant' A
+          'ician' A       'icide' A       'icism' A       'icist' A
+          'icity' A       'idine' I       'iedly' A       'ihood' A
+          'inate' A       'iness' A       'ingly' B       'inism' J
+          'inity'CC       'ional' A       'ioned' A       'ished' A
+          'istic' A       'ities' A       'itous' A       'ively' A
+          'ivity' A       'izers' F       'izing' F       'oidal' A
+          'oides' A       'otide' A       'ously' A
+
+           'able' A        'ably' A        'ages' B        'ally' B
+           'ance' B        'ancy' B        'ants' B        'aric' A
+           'arly' K        'ated' I        'ates' A        'atic' B
+           'ator' A        'ealy' Y        'edly' E        'eful' A
+           'eity' A        'ence' A        'ency' A        'ened' E
+           'enly' E        'eous' A        'hood' A        'ials' A
+           'ians' A        'ible' A        'ibly' A        'ical' A
+           'ides' L        'iers' A        'iful' A        'ines' M
+           'ings' N        'ions' B        'ious' A        'isms' B
+           'ists' A        'itic' H        'ized' F        'izer' F
+           'less' A        'lily' A        'ness' A        'ogen' A
+           'ward' A        'wise' A        'ying' B        'yish' A
+
+            'acy' A         'age' B         'aic' A         'als'BB
+            'ant' B         'ars' O         'ary' F         'ata' A
+            'ate' A         'eal' Y         'ear' Y         'ely' E
+            'ene' E         'ent' C         'ery' E         'ese' A
+            'ful' A         'ial' A         'ian' A         'ics' A
+            'ide' L         'ied' A         'ier' A         'ies' P
+            'ily' A         'ine' M         'ing' N         'ion' Q
+            'ish' C         'ism' B         'ist' A         'ite'AA
+            'ity' A         'ium' A         'ive' A         'ize' F
+            'oid' A         'one' R         'ous' A
+
+             'ae' A          'al'BB          'ar' X          'as' B
+             'ed' E          'en' F          'es' E          'ia' A
+             'ic' A          'is' A          'ly' B          'on' S
+             'or' T          'um' U          'us' V          'yl' R
+           '{'}s' A        's{'}' A
+
+              'a' A           'e' A           'i' A           'o' A
+              's' W           'y' B
+
+        (delete)
+    )
+  )
+
+  /* Undoubling is rule 1 of appendix C. */
+
+  define undouble as (
+    test substring among ('bb' 'dd' 'gg' 'll' 'mm' 'nn' 'pp' 'rr' 'ss'
+                          'tt')
+    [next] delete
+  )
+
+  /* The other appendix C rules can be done together. */
+
+  define respell as (
+    [substring] among (
+      'iev'  (<-'ief')
+      'uct'  (<-'uc')
+      'umpt' (<-'um')
+      'rpt'  (<-'rb')
+      'urs'  (<-'ur')
+      'istr' (<-'ister')
+      'metr' (<-'meter')
+      'olv'  (<-'olut')
+      'ul'   (not 'a' not 'i' not 'o' <-'l')
+      'bex'  (<-'bic')
+      'dex'  (<-'dic')
+      'pex'  (<-'pic')
+      'tex'  (<-'tic')
+      'ax'   (<-'ac')
+      'ex'   (<-'ec')
+      'ix'   (<-'ic')
+      'lux'  (<-'luc')
+      'uad'  (<-'uas')
+      'vad'  (<-'vas')
+      'cid'  (<-'cis')
+      'lid'  (<-'lis')
+      'erid' (<-'eris')
+      'pand' (<-'pans')
+      'end'  (not 's' <-'ens')
+      'ond'  (<-'ons')
+      'lud'  (<-'lus')
+      'rud'  (<-'rus')
+      'her'  (not 'p' not 't' <-'hes')
+      'mit'  (<-'mis')
+      'ent'  (not 'm' <-'ens')
+        /* 'ent' was 'end' in the 1968 paper - a typo. */
+      'ert'  (<-'ers')
+      'et'   (not 'n' <-'es')
+      'yt'   (<-'ys')
+      'yz'   (<-'ys')
+    )
+  )
+)
+
+define stem as (
+
+  backwards (
+    do endings
+    do undouble
+    do respell
+  )
+)
+
diff --git a/snowball_code/algorithms/norwegian/stem_ISO_8859_1.sbl b/snowball_code/algorithms/norwegian/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..94a0716
--- /dev/null
+++ b/snowball_code/algorithms/norwegian/stem_ISO_8859_1.sbl
@@ -0,0 +1,80 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef ae   hex 'E6'
+stringdef ao   hex 'E5'
+stringdef o/   hex 'F8'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'bcdfghjlmnoprtvyz'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v  gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
+            'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
+            'hetens' 'ers' 'ets' 'et' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending or ('k' non-v) delete)
+            'erte' 'ert'
+                (<-'er')
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'dt' 'vt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
+            'hetslov'
+                (delete)
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/snowball_code/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..f574833
--- /dev/null
+++ b/snowball_code/algorithms/norwegian/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,80 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef ae   hex '91'
+stringdef ao   hex '86'
+stringdef o/   hex '9B'
+
+define v 'aeiouy{ae}{ao}{o/}'
+
+define s_ending  'bcdfghjlmnoprtvyz'
+
+define mark_regions as (
+
+    $p1 = limit
+
+    test ( hop 3 setmark x )
+    goto v  gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'e' 'ede' 'ande' 'ende' 'ane' 'ene' 'hetene' 'en' 'heten' 'ar'
+            'er' 'heter' 'as' 'es' 'edes' 'endes' 'enes' 'hetenes' 'ens'
+            'hetens' 'ers' 'ets' 'et' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending or ('k' non-v) delete)
+            'erte' 'ert'
+                (<-'er')
+        )
+    )
+
+    define consonant_pair as (
+        test (
+            setlimit tomark p1 for ([substring])
+            among(
+                'dt' 'vt'
+            )
+        )
+        next] delete
+    )
+
+    define other_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+            'leg' 'eleg' 'ig' 'eig' 'lig' 'elig' 'els' 'lov' 'elov' 'slov'
+            'hetslov'
+                (delete)
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/snowball_code/algorithms/porter/stem_ISO_8859_1.sbl b/snowball_code/algorithms/porter/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..9533b79
--- /dev/null
+++ b/snowball_code/algorithms/porter/stem_ISO_8859_1.sbl
@@ -0,0 +1,139 @@
+integers ( p1 p2 )
+booleans ( Y_found )
+
+routines (
+   shortv
+   R1 R2
+   Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
+)
+
+externals ( stem )
+
+groupings ( v v_WXY )
+
+define v        'aeiouy'
+define v_WXY    v + 'wxY'
+
+backwardmode (
+
+    define shortv as ( non-v_WXY v non-v )
+
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define Step_1a as (
+        [substring] among (
+            'sses' (<-'ss')
+            'ies'  (<-'i')
+            'ss'   ()
+            's'    (delete)
+        )
+    )
+
+    define Step_1b as (
+        [substring] among (
+            'eed'  (R1 <-'ee')
+            'ed'
+            'ing' (
+                test gopast v  delete
+                test substring among(
+                    'at' 'bl' 'iz'
+                         (<+ 'e')
+                    'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
+                    // ignoring double c, h, j, k, q, v, w, and x
+                         ([next]  delete)
+                    ''   (atmark p1  test shortv  <+ 'e')
+                )
+            )
+        )
+    )
+
+    define Step_1c as (
+        ['y' or 'Y']
+        gopast v
+        <-'i'
+    )
+
+    define Step_2 as (
+        [substring] R1 among (
+            'tional'  (<-'tion')
+            'enci'    (<-'ence')
+            'anci'    (<-'ance')
+            'abli'    (<-'able')
+            'entli'   (<-'ent')
+            'eli'     (<-'e')
+            'izer' 'ization'
+                      (<-'ize')
+            'ational' 'ation' 'ator'
+                      (<-'ate')
+            'alli'    (<-'al')
+            'alism' 'aliti'
+                      (<-'al')
+            'fulness' (<-'ful')
+            'ousli' 'ousness'
+                      (<-'ous')
+            'iveness' 'iviti'
+                      (<-'ive')
+            'biliti'  (<-'ble')
+        )
+    )
+
+    define Step_3 as (
+        [substring] R1 among (
+            'alize'   (<-'al')
+            'icate' 'iciti' 'ical'
+                      (<-'ic')
+            'ative' 'ful' 'ness'
+                      (delete)
+        )
+    )
+
+    define Step_4 as (
+        [substring] R2 among (
+            'al' 'ance' 'ence' 'er' 'ic' 'able' 'ible' 'ant' 'ement'
+            'ment' 'ent' 'ou' 'ism' 'ate' 'iti' 'ous' 'ive' 'ize'
+                      (delete)
+            'ion'     ('s' or 't' delete)
+        )
+    )
+
+    define Step_5a as (
+        ['e']
+        R2 or (R1 not shortv)
+        delete
+    )
+
+    define Step_5b as (
+        ['l']
+        R2 'l'
+        delete
+    )
+)
+
+define stem as (
+
+    unset Y_found
+    do ( ['y'] <-'Y' set Y_found)
+    do repeat(goto (v ['y']) <-'Y' set Y_found)
+
+    $p1 = limit
+    $p2 = limit
+    do(
+        gopast v  gopast non-v  setmark p1
+        gopast v  gopast non-v  setmark p2
+    )
+
+    backwards (
+        do Step_1a
+        do Step_1b
+        do Step_1c
+        do Step_2
+        do Step_3
+        do Step_4
+        do Step_5a
+        do Step_5b
+    )
+
+    do(Y_found  repeat(goto (['Y']) <-'y'))
+
+)
diff --git a/snowball_code/algorithms/portuguese/stem_ISO_8859_1.sbl b/snowball_code/algorithms/portuguese/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..3e7da08
--- /dev/null
+++ b/snowball_code/algorithms/portuguese/stem_ISO_8859_1.sbl
@@ -0,0 +1,218 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           verb_suffix
+           residual_suffix
+           residual_form
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'  // a-acute
+stringdef a^   hex 'E2'  // a-circumflex e.g. 'bota^nico
+stringdef e'   hex 'E9'  // e-acute
+stringdef e^   hex 'EA'  // e-circumflex
+stringdef i'   hex 'ED'  // i-acute
+stringdef o^   hex 'F4'  // o-circumflex
+stringdef o'   hex 'F3'  // o-acute
+stringdef u'   hex 'FA'  // u-acute
+stringdef c,   hex 'E7'  // c-cedilla
+
+stringdef a~   hex 'E3'  // a-tilde
+stringdef o~   hex 'F5'  // o-tilde
+
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
+
+define prelude as repeat (
+    [substring] among(
+        '{a~}' (<- 'a~')
+        '{o~}' (<- 'o~')
+        ''     (next)
+    ) //or next
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        'a~' (<- '{a~}')
+        'o~' (<- '{o~}')
+        ''   (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'eza' 'ezas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            '{a'}vel'
+            '{i'}vel'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amento' 'amentos'
+            'imento' 'imentos'
+
+           'adora' 'ador' 'a{c,}a~o'
+           'adoras' 'adores' 'a{c,}o~es'  // no -ic test
+           'ante' 'antes' '{a^}ncia' // Note 1
+            (
+                R2 delete
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            '{e^}ncia' '{e^}ncias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'avel'
+                        '{i'}vel' (R2 delete)
+                    )
+                )
+            )
+            'idade'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+            'ira' 'iras'
+            (
+                RV 'e'  // -eira -eiras usually non-verbal
+                <- 'ir'
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
+            'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
+            'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
+            'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
+            'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
+            'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
+            'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
+            'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
+            'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
+            'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
+            '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
+            '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
+            '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
+            'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
+            'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
+            '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
+
+            'ira' 'iras'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+        )
+    )
+
+    define residual_form as (
+        [substring] among(
+            'e' '{e'}' '{e^}'
+                ( RV delete [('u'] test 'g') or
+                             ('i'] test 'c') RV delete )
+            '{c,}' (<-'c')
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do (
+            ( ( standard_suffix or verb_suffix )
+              and do ( ['i'] test 'c' RV delete )
+            )
+            or residual_suffix
+        )
+        do residual_form
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/snowball_code/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..4d6c852
--- /dev/null
+++ b/snowball_code/algorithms/portuguese/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,218 @@
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           standard_suffix
+           verb_suffix
+           residual_suffix
+           residual_form
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'  // a-acute
+stringdef a^   hex '83'  // a-circumflex e.g. 'bota^nico
+stringdef e'   hex '82'  // e-acute
+stringdef e^   hex '88'  // e-circumflex
+stringdef i'   hex 'A1'  // i-acute
+stringdef o^   hex '93'  // o-circumflex
+stringdef o'   hex 'A2'  // o-acute
+stringdef u'   hex 'A3'  // u-acute
+stringdef c,   hex '87'  // c-cedilla
+
+stringdef a~   hex 'C6'  // a-tilde
+stringdef o~   hex 'E4'  // o-tilde
+
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{a^}{e^}{o^}'
+
+define prelude as repeat (
+    [substring] among(
+        '{a~}' (<- 'a~')
+        '{o~}' (<- 'o~')
+        ''     (next)
+    ) //or next
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        'a~' (<- '{a~}')
+        'o~' (<- '{o~}')
+        ''   (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define standard_suffix as (
+        [substring] among(
+
+            'eza' 'ezas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            '{a'}vel'
+            '{i'}vel'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amento' 'amentos'
+            'imento' 'imentos'
+
+           'adora' 'ador' 'a{c,}a~o'
+           'adoras' 'adores' 'a{c,}o~es'  // no -ic test
+           'ante' 'antes' '{a^}ncia' // Note 1
+            (
+                R2 delete
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            '{e^}ncia' '{e^}ncias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'avel'
+                        '{i'}vel' (R2 delete)
+                    )
+                )
+            )
+            'idade'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+            'ira' 'iras'
+            (
+                RV 'e'  // -eira -eiras usually non-verbal
+                <- 'ir'
+            )
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            'ada' 'ida' 'ia' 'aria' 'eria' 'iria' 'ar{a'}' 'ara' 'er{a'}'
+            'era' 'ir{a'}' 'ava' 'asse' 'esse' 'isse' 'aste' 'este' 'iste'
+            'ei' 'arei' 'erei' 'irei' 'am' 'iam' 'ariam' 'eriam' 'iriam'
+            'aram' 'eram' 'iram' 'avam' 'em' 'arem' 'erem' 'irem' 'assem'
+            'essem' 'issem' 'ado' 'ido' 'ando' 'endo' 'indo' 'ara~o'
+            'era~o' 'ira~o' 'ar' 'er' 'ir' 'as' 'adas' 'idas' 'ias'
+            'arias' 'erias' 'irias' 'ar{a'}s' 'aras' 'er{a'}s' 'eras'
+            'ir{a'}s' 'avas' 'es' 'ardes' 'erdes' 'irdes' 'ares' 'eres'
+            'ires' 'asses' 'esses' 'isses' 'astes' 'estes' 'istes' 'is'
+            'ais' 'eis' '{i'}eis' 'ar{i'}eis' 'er{i'}eis' 'ir{i'}eis'
+            '{a'}reis' 'areis' '{e'}reis' 'ereis' '{i'}reis' 'ireis'
+            '{a'}sseis' '{e'}sseis' '{i'}sseis' '{a'}veis' 'ados' 'idos'
+            '{a'}mos' 'amos' '{i'}amos' 'ar{i'}amos' 'er{i'}amos'
+            'ir{i'}amos' '{a'}ramos' '{e'}ramos' '{i'}ramos' '{a'}vamos'
+            'emos' 'aremos' 'eremos' 'iremos' '{a'}ssemos' '{e^}ssemos'
+            '{i'}ssemos' 'imos' 'armos' 'ermos' 'irmos' 'eu' 'iu' 'ou'
+
+            'ira' 'iras'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'i' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+        )
+    )
+
+    define residual_form as (
+        [substring] among(
+            'e' '{e'}' '{e^}'
+                ( RV delete [('u'] test 'g') or
+                             ('i'] test 'c') RV delete )
+            '{c,}' (<-'c')
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do (
+            ( ( standard_suffix or verb_suffix )
+              and do ( ['i'] test 'c' RV delete )
+            )
+            or residual_suffix
+        )
+        do residual_form
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/snowball_code/algorithms/romanian/stem_ISO_8859_2.sbl b/snowball_code/algorithms/romanian/stem_ISO_8859_2.sbl
new file mode 100644
index 0000000..48a1483
--- /dev/null
+++ b/snowball_code/algorithms/romanian/stem_ISO_8859_2.sbl
@@ -0,0 +1,236 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           step_0
+           standard_suffix combo_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+booleans  ( standard_suffix_removed )
+
+stringescapes {}
+
+/* special characters */
+
+stringdef a^   hex 'E2'  // a circumflex
+stringdef i^   hex 'EE'  // i circumflex
+stringdef a+   hex 'E3'  // a breve
+stringdef s,   hex 'BA'  // s cedilla
+stringdef t,   hex 'FE'  // t cedilla
+
+define v 'aeiou{a^}{i^}{a+}'
+
+define prelude as (
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define step_0 as (
+        [substring] R1 among(
+            'ul' 'ului'
+                ( delete )
+            'aua'
+                ( <-'a' )
+            'ea' 'ele' 'elor'
+                ( <-'e' )
+            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
+                ( <-'i')
+            'ile'
+                ( not 'ab' <- 'i' )
+            'atei'
+                ( <- 'at' )
+            'a{t,}ie' 'a{t,}ia'
+                ( <- 'a{t,}i' )
+        )
+    )
+
+    define combo_suffix as test (
+        [substring] R1 (
+            among(
+            /* 'IST'. alternative: include the following
+                'alism' 'alisme'
+                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
+                    <- 'al'
+                )
+            */
+                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
+                    <- 'abil'
+                )
+                'ibilitate' (
+                    <- 'ibil'
+                )
+                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
+                    <- 'iv'
+                )
+                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
+                'icator' 'icatori'
+                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
+                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
+                    <- 'ic'
+                )
+                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
+                'atoare' 'ator' 'atori'
+                '{a+}toare' '{a+}tor' '{a+}tori' (
+                    <- 'at'
+                )
+                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
+                'itoare' 'itor' 'itori' (
+                    <- 'it'
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define standard_suffix as (
+        unset standard_suffix_removed
+        repeat combo_suffix
+        [substring] R2 (
+            among(
+
+                // past participle is treated here, rather than
+                // as a verb ending:
+                'at' 'ata' 'at{a+}' 'ati' 'ate'
+                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
+                'it' 'ita' 'it{a+}' 'iti' 'ite'
+
+                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
+                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
+                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
+                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
+                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
+                'ator' 'atori'
+                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
+                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
+                    delete
+                )
+                'iune' 'iuni' (
+                    '{t,}'] <- 't'
+                )
+                'ism' 'isme'
+                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
+                    <- 'ist'
+                    /* 'IST'. alternative: remove with <- '' */
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            // 'long' infinitive:
+            'are' 'ere' 'ire' '{a^}re'
+
+            // gerund:
+            'ind' '{a^}nd'
+            'indu' '{a^}ndu'
+
+            'eze'
+            'easc{a+}'
+            // present:
+            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
+            'e{s,}te'
+            '{a+}sc' '{a+}{s,}ti'
+            '{a+}{s,}te'
+
+            // imperfect:
+            'am' 'ai' 'au'
+            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
+            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
+
+            // past: // (not 'ii')
+            'ui'
+            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
+            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
+            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
+            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
+
+            // pluferfect:
+            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
+            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
+            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
+            '{a^}ser{a+}'
+            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
+
+                ( non-v or 'u'  delete )
+
+            // present:
+            '{a+}m' 'a{t,}i'
+            'em' 'e{t,}i'
+            'im' 'i{t,}i'
+            '{a^}m' '{a^}{t,}i'
+
+            // past:
+            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
+            'sei' 'se'
+
+            // pluperfect:
+            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
+                (delete)
+        )
+    )
+
+    define vowel_suffix as (
+        [substring] RV among (
+            'a' 'e' 'i' 'ie' '{a+}' ( delete )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do step_0
+        do standard_suffix
+        do ( standard_suffix_removed or verb_suffix )
+        do vowel_suffix
+    )
+    do postlude
+)
+
diff --git a/snowball_code/algorithms/romanian/stem_Unicode.sbl b/snowball_code/algorithms/romanian/stem_Unicode.sbl
new file mode 100644
index 0000000..09aec64
--- /dev/null
+++ b/snowball_code/algorithms/romanian/stem_Unicode.sbl
@@ -0,0 +1,236 @@
+
+routines (
+           prelude postlude mark_regions
+           RV R1 R2
+           step_0
+           standard_suffix combo_suffix
+           verb_suffix
+           vowel_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+booleans  ( standard_suffix_removed )
+
+stringescapes {}
+
+/* special characters */
+
+stringdef a^   hex '0E2'  // a circumflex
+stringdef i^   hex '0EE'  // i circumflex
+stringdef a+   hex '103'  // a breve
+stringdef s,   hex '15F'  // s cedilla
+stringdef t,   hex '163'  // t cedilla
+
+define v 'aeiou{a^}{i^}{a+}'
+
+define prelude as (
+    repeat goto (
+        v [ ('u' ] v <- 'U') or
+            ('i' ] v <- 'I')
+    )
+)
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+
+    [substring] among(
+        'I'  (<- 'i')
+        'U'  (<- 'u')
+        ''   (next)
+    )
+
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define step_0 as (
+        [substring] R1 among(
+            'ul' 'ului'
+                ( delete )
+            'aua'
+                ( <-'a' )
+            'ea' 'ele' 'elor'
+                ( <-'e' )
+            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
+                ( <-'i')
+            'ile'
+                ( not 'ab' <- 'i' )
+            'atei'
+                ( <- 'at' )
+            'a{t,}ie' 'a{t,}ia'
+                ( <- 'a{t,}i' )
+        )
+    )
+
+    define combo_suffix as test (
+        [substring] R1 (
+            among(
+            /* 'IST'. alternative: include the following
+                'alism' 'alisme'
+                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
+                    <- 'al'
+                )
+            */
+                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
+                    <- 'abil'
+                )
+                'ibilitate' (
+                    <- 'ibil'
+                )
+                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
+                    <- 'iv'
+                )
+                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
+                'icator' 'icatori'
+                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
+                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
+                    <- 'ic'
+                )
+                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
+                'atoare' 'ator' 'atori'
+                '{a+}toare' '{a+}tor' '{a+}tori' (
+                    <- 'at'
+                )
+                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
+                'itoare' 'itor' 'itori' (
+                    <- 'it'
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define standard_suffix as (
+        unset standard_suffix_removed
+        repeat combo_suffix
+        [substring] R2 (
+            among(
+
+                // past participle is treated here, rather than
+                // as a verb ending:
+                'at' 'ata' 'at{a+}' 'ati' 'ate'
+                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
+                'it' 'ita' 'it{a+}' 'iti' 'ite'
+
+                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
+                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
+                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
+                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
+                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
+                'ator' 'atori'
+                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
+                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
+                    delete
+                )
+                'iune' 'iuni' (
+                    '{t,}'] <- 't'
+                )
+                'ism' 'isme'
+                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
+                    <- 'ist'
+                    /* 'IST'. alternative: remove with <- '' */
+                )
+            )
+            set standard_suffix_removed
+        )
+    )
+
+    define verb_suffix as setlimit tomark pV for (
+        [substring] among(
+            // 'long' infinitive:
+            'are' 'ere' 'ire' '{a^}re'
+
+            // gerund:
+            'ind' '{a^}nd'
+            'indu' '{a^}ndu'
+
+            'eze'
+            'easc{a+}'
+            // present:
+            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
+            'e{s,}te'
+            '{a+}sc' '{a+}{s,}ti'
+            '{a+}{s,}te'
+
+            // imperfect:
+            'am' 'ai' 'au'
+            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
+            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
+
+            // past: // (not 'ii')
+            'ui'
+            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
+            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
+            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
+            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
+
+            // pluferfect:
+            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
+            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
+            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
+            '{a^}ser{a+}'
+            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
+
+                ( non-v or 'u'  delete )
+
+            // present:
+            '{a+}m' 'a{t,}i'
+            'em' 'e{t,}i'
+            'im' 'i{t,}i'
+            '{a^}m' '{a^}{t,}i'
+
+            // past:
+            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
+            'sei' 'se'
+
+            // pluperfect:
+            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
+                (delete)
+        )
+    )
+
+    define vowel_suffix as (
+        [substring] RV among (
+            'a' 'e' 'i' 'ie' '{a+}' ( delete )
+        )
+    )
+)
+
+define stem as (
+    do prelude
+    do mark_regions
+    backwards (
+        do step_0
+        do standard_suffix
+        do ( standard_suffix_removed or verb_suffix )
+        do vowel_suffix
+    )
+    do postlude
+)
+
diff --git a/snowball_code/algorithms/russian/stem_KOI8_R.sbl b/snowball_code/algorithms/russian/stem_KOI8_R.sbl
new file mode 100644
index 0000000..cdacb19
--- /dev/null
+++ b/snowball_code/algorithms/russian/stem_KOI8_R.sbl
@@ -0,0 +1,217 @@
+stringescapes {}
+
+/* the 32 Cyrillic letters in the KOI8-R coding scheme, and represented
+   in Latin characters following the conventions of the standard Library
+   of Congress transliteration: */
+
+stringdef a    hex 'C1'
+stringdef b    hex 'C2'
+stringdef v    hex 'D7'
+stringdef g    hex 'C7'
+stringdef d    hex 'C4'
+stringdef e    hex 'C5'
+stringdef zh   hex 'D6'
+stringdef z    hex 'DA'
+stringdef i    hex 'C9'
+stringdef i`   hex 'CA'
+stringdef k    hex 'CB'
+stringdef l    hex 'CC'
+stringdef m    hex 'CD'
+stringdef n    hex 'CE'
+stringdef o    hex 'CF'
+stringdef p    hex 'D0'
+stringdef r    hex 'D2'
+stringdef s    hex 'D3'
+stringdef t    hex 'D4'
+stringdef u    hex 'D5'
+stringdef f    hex 'C6'
+stringdef kh   hex 'C8'
+stringdef ts   hex 'C3'
+stringdef ch   hex 'DE'
+stringdef sh   hex 'DB'
+stringdef shch hex 'DD'
+stringdef "    hex 'DF'
+stringdef y    hex 'D9'
+stringdef '    hex 'D8'
+stringdef e`   hex 'DC'
+stringdef iu   hex 'C0'
+stringdef ia   hex 'D1'
+
+routines ( mark_regions R2
+           perfective_gerund
+           adjective
+           adjectival
+           reflexive
+           verb
+           noun
+           derivational
+           tidy_up
+)
+
+externals ( stem )
+
+integers ( pV p2 )
+
+groupings ( v )
+
+define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p2 = limit
+    do (
+        gopast v  setmark pV  gopast non-v
+        gopast v  gopast non-v  setmark p2
+       )
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define perfective_gerund as (
+        [substring] among (
+            '{v}'
+            '{v}{sh}{i}'
+            '{v}{sh}{i}{s}{'}'
+                ('{a}' or '{ia}' delete)
+            '{i}{v}'
+            '{i}{v}{sh}{i}'
+            '{i}{v}{sh}{i}{s}{'}'
+            '{y}{v}'
+            '{y}{v}{sh}{i}'
+            '{y}{v}{sh}{i}{s}{'}'
+                (delete)
+        )
+    )
+
+    define adjective as (
+        [substring] among (
+            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
+            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
+            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
+            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
+            '{ia}{ia}'
+                        // and -
+            '{o}{iu}'   // - which is somewhat archaic
+            '{e}{iu}'   // - soft form of {o}{iu}
+                (delete)
+        )
+    )
+
+    define adjectival as (
+        adjective
+
+        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
+           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
+           errors. Removing im, uem, enn creates too many errors.
+        */
+
+        try (
+            [substring] among (
+                '{e}{m}'                  // present passive participle
+                '{n}{n}'                  // adjective from past passive participle
+                '{v}{sh}'                 // past active participle
+                '{iu}{shch}' '{shch}'     // present active participle
+                    ('{a}' or '{ia}' delete)
+
+     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
+     //or       '{e}{n}{n}'               // adjective from past passive participle
+
+                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
+                '{u}{iu}{shch}'          // present active participle
+                    (delete)
+            )
+        )
+
+    )
+
+    define reflexive as (
+        [substring] among (
+            '{s}{ia}'
+            '{s}{'}'
+                (delete)
+        )
+    )
+
+    define verb as (
+        [substring] among (
+            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
+            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
+            '{n}{y}' '{t}{'}' '{e}{sh}{'}'
+
+            '{n}{n}{o}'
+                ('{a}' or '{ia}' delete)
+
+            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
+            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
+            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
+            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
+            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
+            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
+                (delete)
+            /* note the short passive participle tests:
+               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
+            */
+        )
+    )
+
+    define noun as (
+        [substring] among (
+            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
+            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
+            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
+            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
+            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
+            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
+                (delete)
+            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
+               omitted - they only occur on 12 words.
+            */
+        )
+    )
+
+    define derivational as (
+        [substring] R2 among (
+            '{o}{s}{t}'
+            '{o}{s}{t}{'}'
+                (delete)
+        )
+    )
+
+    define tidy_up as (
+        [substring] among (
+
+            '{e}{i`}{sh}'
+            '{e}{i`}{sh}{e}'  // superlative forms
+               (delete
+                ['{n}'] '{n}' delete
+               )
+            '{n}'
+               ('{n}' delete) // e.g. -nno endings
+            '{'}'
+               (delete)  // with some slight false conflations
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards setlimit tomark pV for (
+        do (
+             perfective_gerund or
+             ( try reflexive
+               adjectival or verb or noun
+             )
+        )
+        try([ '{i}' ] delete)
+        // because noun ending -i{iu} is being treated as verb ending -{iu}
+
+        do derivational
+        do tidy_up
+    )
+)
diff --git a/snowball_code/algorithms/russian/stem_Unicode.sbl b/snowball_code/algorithms/russian/stem_Unicode.sbl
new file mode 100644
index 0000000..9e1a93f
--- /dev/null
+++ b/snowball_code/algorithms/russian/stem_Unicode.sbl
@@ -0,0 +1,215 @@
+stringescapes {}
+
+/* the 32 Cyrillic letters in Unicode */
+
+stringdef a    hex '430'
+stringdef b    hex '431'
+stringdef v    hex '432'
+stringdef g    hex '433'
+stringdef d    hex '434'
+stringdef e    hex '435'
+stringdef zh   hex '436'
+stringdef z    hex '437'
+stringdef i    hex '438'
+stringdef i`   hex '439'
+stringdef k    hex '43A'
+stringdef l    hex '43B'
+stringdef m    hex '43C'
+stringdef n    hex '43D'
+stringdef o    hex '43E'
+stringdef p    hex '43F'
+stringdef r    hex '440'
+stringdef s    hex '441'
+stringdef t    hex '442'
+stringdef u    hex '443'
+stringdef f    hex '444'
+stringdef kh   hex '445'
+stringdef ts   hex '446'
+stringdef ch   hex '447'
+stringdef sh   hex '448'
+stringdef shch hex '449'
+stringdef "    hex '44A'
+stringdef y    hex '44B'
+stringdef '    hex '44C'
+stringdef e`   hex '44D'
+stringdef iu   hex '44E'
+stringdef ia   hex '44F'
+
+routines ( mark_regions R2
+           perfective_gerund
+           adjective
+           adjectival
+           reflexive
+           verb
+           noun
+           derivational
+           tidy_up
+)
+
+externals ( stem )
+
+integers ( pV p2 )
+
+groupings ( v )
+
+define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p2 = limit
+    do (
+        gopast v  setmark pV  gopast non-v
+        gopast v  gopast non-v  setmark p2
+       )
+)
+
+backwardmode (
+
+    define R2 as $p2 <= cursor
+
+    define perfective_gerund as (
+        [substring] among (
+            '{v}'
+            '{v}{sh}{i}'
+            '{v}{sh}{i}{s}{'}'
+                ('{a}' or '{ia}' delete)
+            '{i}{v}'
+            '{i}{v}{sh}{i}'
+            '{i}{v}{sh}{i}{s}{'}'
+            '{y}{v}'
+            '{y}{v}{sh}{i}'
+            '{y}{v}{sh}{i}{s}{'}'
+                (delete)
+        )
+    )
+
+    define adjective as (
+        [substring] among (
+            '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
+            '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
+            '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
+            '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
+            '{ia}{ia}'
+                        // and -
+            '{o}{iu}'   // - which is somewhat archaic
+            '{e}{iu}'   // - soft form of {o}{iu}
+                (delete)
+        )
+    )
+
+    define adjectival as (
+        adjective
+
+        /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
+           nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
+           errors. Removing im, uem, enn creates too many errors.
+        */
+
+        try (
+            [substring] among (
+                '{e}{m}'                  // present passive participle
+                '{n}{n}'                  // adjective from past passive participle
+                '{v}{sh}'                 // past active participle
+                '{iu}{shch}' '{shch}'     // present active participle
+                    ('{a}' or '{ia}' delete)
+
+     //but not  '{i}{m}' '{u}{e}{m}'      // present passive participle
+     //or       '{e}{n}{n}'               // adjective from past passive participle
+
+                '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
+                '{u}{iu}{shch}'          // present active participle
+                    (delete)
+            )
+        )
+
+    )
+
+    define reflexive as (
+        [substring] among (
+            '{s}{ia}'
+            '{s}{'}'
+                (delete)
+        )
+    )
+
+    define verb as (
+        [substring] among (
+            '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
+            '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
+            '{n}{y}' '{t}{'}' '{e}{sh}{'}'
+
+            '{n}{n}{o}'
+                ('{a}' or '{ia}' delete)
+
+            '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
+            '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
+            '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
+            '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
+            '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
+            '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
+                (delete)
+            /* note the short passive participle tests:
+               '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
+            */
+        )
+    )
+
+    define noun as (
+        [substring] among (
+            '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
+            '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
+            '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
+            '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
+            '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
+            '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
+                (delete)
+            /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
+               '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
+               omitted - they only occur on 12 words.
+            */
+        )
+    )
+
+    define derivational as (
+        [substring] R2 among (
+            '{o}{s}{t}'
+            '{o}{s}{t}{'}'
+                (delete)
+        )
+    )
+
+    define tidy_up as (
+        [substring] among (
+
+            '{e}{i`}{sh}'
+            '{e}{i`}{sh}{e}'  // superlative forms
+               (delete
+                ['{n}'] '{n}' delete
+               )
+            '{n}'
+               ('{n}' delete) // e.g. -nno endings
+            '{'}'
+               (delete)  // with some slight false conflations
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards setlimit tomark pV for (
+        do (
+             perfective_gerund or
+             ( try reflexive
+               adjectival or verb or noun
+             )
+        )
+        try([ '{i}' ] delete)
+        // because noun ending -i{iu} is being treated as verb ending -{iu}
+
+        do derivational
+        do tidy_up
+    )
+)
diff --git a/snowball_code/algorithms/spanish/stem_ISO_8859_1.sbl b/snowball_code/algorithms/spanish/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..9dee289
--- /dev/null
+++ b/snowball_code/algorithms/spanish/stem_ISO_8859_1.sbl
@@ -0,0 +1,230 @@
+routines (
+           postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           y_verb_suffix
+           verb_suffix
+           residual_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a'   hex 'E1'  // a-acute
+stringdef e'   hex 'E9'  // e-acute
+stringdef i'   hex 'ED'  // i-acute
+stringdef o'   hex 'F3'  // o-acute
+stringdef u'   hex 'FA'  // u-acute
+stringdef u"   hex 'FC'  // u-diaeresis
+stringdef n~   hex 'F1'  // n-tilde
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        '{a'}' (<- 'a')
+        '{e'}' (<- 'e')
+        '{i'}' (<- 'i')
+        '{o'}' (<- 'o')
+        '{u'}' (<- 'u')
+        // and possibly {u"}->u here, or in prelude
+        ''     (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'me' 'se'  'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
+            'las' 'les' 'los' 'nos'
+        )
+        substring RV among(
+            'i{e'}ndo' (] <- 'iendo')
+            '{a'}ndo'  (] <- 'ando')
+            '{a'}r'    (] <- 'ar')
+            '{e'}r'    (] <- 'er')
+            '{i'}r'    (] <- 'ir')
+            'ando'
+            'iendo'
+            'ar' 'er' 'ir'
+                       (delete)
+            'yendo'    ('u' delete)
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anzas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            'able' 'ables'
+            'ible' 'ibles'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amiento' 'amientos'
+            'imiento' 'imientos'
+            (
+                R2 delete
+            )
+            'adora' 'ador' 'aci{o'}n'
+            'adoras' 'adores' 'aciones'
+            'ante' 'antes' 'ancia' 'ancias'// Note 1
+            (
+                R2 delete
+                try ( ['ic'] R2 delete )
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            'encia' 'encias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'able'
+                        'ible' (R2 delete)
+                    )
+                )
+            )
+            'idad'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+        )
+    )
+
+    define y_verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+            'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
+            'yas' 'yes' 'yais' 'yamos'
+                ('u' delete)
+        )
+    )
+
+    define verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+
+            'en' 'es' '{e'}is' 'emos'
+                (try ('u' test 'g') ] delete)
+
+            'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
+            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
+            'ar{e'}'
+            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
+            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
+            'er{e'}'
+            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
+            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
+            'ir{e'}'
+
+            'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
+            'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
+            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
+            'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
+            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
+            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
+            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
+            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
+            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+            'e' '{e'}'
+                ( RV delete try( ['u'] test 'g' RV delete ) )
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do ( standard_suffix or
+             y_verb_suffix or
+             verb_suffix
+           )
+        do residual_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/snowball_code/algorithms/spanish/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..db7a462
--- /dev/null
+++ b/snowball_code/algorithms/spanish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,230 @@
+routines (
+           postlude mark_regions
+           RV R1 R2
+           attached_pronoun
+           standard_suffix
+           y_verb_suffix
+           verb_suffix
+           residual_suffix
+)
+
+externals ( stem )
+
+integers ( pV p1 p2 )
+
+groupings ( v )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a'   hex 'A0'  // a-acute
+stringdef e'   hex '82'  // e-acute
+stringdef i'   hex 'A1'  // i-acute
+stringdef o'   hex 'A2'  // o-acute
+stringdef u'   hex 'A3'  // u-acute
+stringdef u"   hex '81'  // u-diaeresis
+stringdef n~   hex 'A4'  // n-tilde
+
+define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u"}'
+
+define mark_regions as (
+
+    $pV = limit
+    $p1 = limit
+    $p2 = limit  // defaults
+
+    do (
+        ( v (non-v gopast v) or (v gopast non-v) )
+        or
+        ( non-v (non-v gopast v) or (v next) )
+        setmark pV
+    )
+    do (
+        gopast v gopast non-v setmark p1
+        gopast v gopast non-v setmark p2
+    )
+)
+
+define postlude as repeat (
+    [substring] among(
+        '{a'}' (<- 'a')
+        '{e'}' (<- 'e')
+        '{i'}' (<- 'i')
+        '{o'}' (<- 'o')
+        '{u'}' (<- 'u')
+        // and possibly {u"}->u here, or in prelude
+        ''     (next)
+    ) //or next
+)
+
+backwardmode (
+
+    define RV as $pV <= cursor
+    define R1 as $p1 <= cursor
+    define R2 as $p2 <= cursor
+
+    define attached_pronoun as (
+        [substring] among(
+            'me' 'se'  'sela' 'selo' 'selas' 'selos' 'la' 'le' 'lo'
+            'las' 'les' 'los' 'nos'
+        )
+        substring RV among(
+            'i{e'}ndo' (] <- 'iendo')
+            '{a'}ndo'  (] <- 'ando')
+            '{a'}r'    (] <- 'ar')
+            '{e'}r'    (] <- 'er')
+            '{i'}r'    (] <- 'ir')
+            'ando'
+            'iendo'
+            'ar' 'er' 'ir'
+                       (delete)
+            'yendo'    ('u' delete)
+        )
+    )
+
+    define standard_suffix as (
+        [substring] among(
+
+            'anza' 'anzas'
+            'ico' 'ica' 'icos' 'icas'
+            'ismo' 'ismos'
+            'able' 'ables'
+            'ible' 'ibles'
+            'ista' 'istas'
+            'oso' 'osa' 'osos' 'osas'
+            'amiento' 'amientos'
+            'imiento' 'imientos'
+            (
+                R2 delete
+            )
+            'adora' 'ador' 'aci{o'}n'
+            'adoras' 'adores' 'aciones'
+            'ante' 'antes' 'ancia' 'ancias'// Note 1
+            (
+                R2 delete
+                try ( ['ic'] R2 delete )
+            )
+            'log{i'}a'
+            'log{i'}as'
+            (
+                R2 <- 'log'
+            )
+            'uci{o'}n' 'uciones'
+            (
+                R2 <- 'u'
+            )
+            'encia' 'encias'
+            (
+                R2 <- 'ente'
+            )
+            'amente'
+            (
+                R1 delete
+                try (
+                    [substring] R2 delete among(
+                        'iv' (['at'] R2 delete)
+                        'os'
+                        'ic'
+                        'ad'
+                    )
+                )
+            )
+            'mente'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'ante' // Note 1
+                        'able'
+                        'ible' (R2 delete)
+                    )
+                )
+            )
+            'idad'
+            'idades'
+            (
+                R2 delete
+                try (
+                    [substring] among(
+                        'abil'
+                        'ic'
+                        'iv'   (R2 delete)
+                    )
+                )
+            )
+            'iva' 'ivo'
+            'ivas' 'ivos'
+            (
+                R2 delete
+                try (
+                    ['at'] R2 delete // but not a further   ['ic'] R2 delete
+                )
+            )
+        )
+    )
+
+    define y_verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+            'ya' 'ye' 'yan' 'yen' 'yeron' 'yendo' 'yo' 'y{o'}'
+            'yas' 'yes' 'yais' 'yamos'
+                ('u' delete)
+        )
+    )
+
+    define verb_suffix as (
+        setlimit tomark pV for ([substring]) among(
+
+            'en' 'es' '{e'}is' 'emos'
+                (try ('u' test 'g') ] delete)
+
+            'ar{i'}an' 'ar{i'}as' 'ar{a'}n' 'ar{a'}s' 'ar{i'}ais'
+            'ar{i'}a' 'ar{e'}is' 'ar{i'}amos' 'aremos' 'ar{a'}'
+            'ar{e'}'
+            'er{i'}an' 'er{i'}as' 'er{a'}n' 'er{a'}s' 'er{i'}ais'
+            'er{i'}a' 'er{e'}is' 'er{i'}amos' 'eremos' 'er{a'}'
+            'er{e'}'
+            'ir{i'}an' 'ir{i'}as' 'ir{a'}n' 'ir{a'}s' 'ir{i'}ais'
+            'ir{i'}a' 'ir{e'}is' 'ir{i'}amos' 'iremos' 'ir{a'}'
+            'ir{e'}'
+
+            'aba' 'ada' 'ida' '{i'}a' 'ara' 'iera' 'ad' 'ed'
+            'id' 'ase' 'iese' 'aste' 'iste' 'an' 'aban' '{i'}an'
+            'aran' 'ieran' 'asen' 'iesen' 'aron' 'ieron' 'ado'
+            'ido' 'ando' 'iendo' 'i{o'}' 'ar' 'er' 'ir' 'as'
+            'abas' 'adas' 'idas' '{i'}as' 'aras' 'ieras' 'ases'
+            'ieses' '{i'}s' '{a'}is' 'abais' '{i'}ais' 'arais'
+            'ierais'  'aseis' 'ieseis' 'asteis' 'isteis' 'ados'
+            'idos' 'amos' '{a'}bamos' '{i'}amos' 'imos'
+            '{a'}ramos' 'i{e'}ramos' 'i{e'}semos' '{a'}semos'
+                (delete)
+        )
+    )
+
+    define residual_suffix as (
+        [substring] among(
+            'os'
+            'a' 'o' '{a'}' '{i'}' '{o'}'
+                ( RV delete )
+            'e' '{e'}'
+                ( RV delete try( ['u'] test 'g' RV delete ) )
+        )
+    )
+)
+
+define stem as (
+    do mark_regions
+    backwards (
+        do attached_pronoun
+        do ( standard_suffix or
+             y_verb_suffix or
+             verb_suffix
+           )
+        do residual_suffix
+    )
+    do postlude
+)
+
+/*
+    Note 1: additions of 15 Jun 2005
+*/
diff --git a/snowball_code/algorithms/swedish/stem_ISO_8859_1.sbl b/snowball_code/algorithms/swedish/stem_ISO_8859_1.sbl
new file mode 100644
index 0000000..03ce1e2
--- /dev/null
+++ b/snowball_code/algorithms/swedish/stem_ISO_8859_1.sbl
@@ -0,0 +1,72 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in ISO Latin I) */
+
+stringdef a"   hex 'E4'
+stringdef ao   hex 'E5'
+stringdef o"   hex 'F6'
+
+define v 'aeiouy{a"}{ao}{o"}'
+
+define s_ending  'bcdfghjklmnoprtvy'
+
+define mark_regions as (
+
+    $p1 = limit
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
+            'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
+            'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
+            'hetens' 'erns' 'at' 'andet' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as setlimit tomark p1 for (
+        among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
+        and ([next] delete)
+    )
+
+    define other_suffix as setlimit tomark p1 for (
+        [substring] among(
+            'lig' 'ig' 'els' (delete)
+            'l{o"}st'        (<-'l{o"}s')
+            'fullt'          (<-'full')
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/snowball_code/algorithms/swedish/stem_MS_DOS_Latin_I.sbl b/snowball_code/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
new file mode 100644
index 0000000..1631f40
--- /dev/null
+++ b/snowball_code/algorithms/swedish/stem_MS_DOS_Latin_I.sbl
@@ -0,0 +1,72 @@
+routines (
+           mark_regions
+           main_suffix
+           consonant_pair
+           other_suffix
+)
+
+externals ( stem )
+
+integers ( p1 x )
+
+groupings ( v s_ending )
+
+stringescapes {}
+
+/* special characters (in MS-DOS Latin I) */
+
+stringdef a"   hex '84'
+stringdef ao   hex '86'
+stringdef o"   hex '94'
+
+define v 'aeiouy{a"}{ao}{o"}'
+
+define s_ending  'bcdfghjklmnoprtvy'
+
+define mark_regions as (
+
+    $p1 = limit
+    test ( hop 3 setmark x )
+    goto v gopast non-v  setmark p1
+    try ( $p1 < x  $p1 = x )
+)
+
+backwardmode (
+
+    define main_suffix as (
+        setlimit tomark p1 for ([substring])
+        among(
+
+            'a' 'arna' 'erna' 'heterna' 'orna' 'ad' 'e' 'ade' 'ande' 'arne'
+            'are' 'aste' 'en' 'anden' 'aren' 'heten' 'ern' 'ar' 'er' 'heter'
+            'or' 'as' 'arnas' 'ernas' 'ornas' 'es' 'ades' 'andes' 'ens' 'arens'
+            'hetens' 'erns' 'at' 'andet' 'het' 'ast'
+                (delete)
+            's'
+                (s_ending delete)
+        )
+    )
+
+    define consonant_pair as setlimit tomark p1 for (
+        among('dd' 'gd' 'nn' 'dt' 'gt' 'kt' 'tt')
+        and ([next] delete)
+    )
+
+    define other_suffix as setlimit tomark p1 for (
+        [substring] among(
+            'lig' 'ig' 'els' (delete)
+            'l{o"}st'        (<-'l{o"}s')
+            'fullt'          (<-'full')
+        )
+    )
+)
+
+define stem as (
+
+    do mark_regions
+    backwards (
+        do main_suffix
+        do consonant_pair
+        do other_suffix
+    )
+)
diff --git a/snowball_code/algorithms/turkish/stem_Unicode.sbl b/snowball_code/algorithms/turkish/stem_Unicode.sbl
new file mode 100644
index 0000000..e47eea7
--- /dev/null
+++ b/snowball_code/algorithms/turkish/stem_Unicode.sbl
@@ -0,0 +1,477 @@
+/* Stemmer for Turkish
+	* author: Evren (Kapusuz) Çilden
+	* email: evren.kapusuz at gmail.com
+	* version: 1.0 (15.01.2007)
+	
+
+	* stems nominal verb suffixes
+	* stems nominal inflections
+	* more than one syllable word check
+	* (y,n,s,U) context check
+	* vowel harmony check
+	* last consonent check and conversion (b, c, d, ğ to p, ç, t, k)
+	
+	* The stemming algorithm is based on the paper "An Affix Stripping
+	* Morphological Analyzer for Turkish" by Gülşen Eryiğit and
+	* Eşref Adalı (Proceedings of the IAESTED International Conference
+	* ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004,
+	* Innsbruck, Austria
+	
+	* Turkish is an agglutinative language and has a very rich morphological
+	* structure. In Turkish, you can form many different words from a single stem
+	* by appending a sequence of suffixes. Eg. The word "doktoruymuşsunuz" means
+	* "You had been the doctor of him". The stem of the word is "doktor" and it
+	* takes three different suffixes -sU, -ymUs, and -sUnUz. The rules about
+	* the append order of suffixes can be clearly described as FSMs.
+	* The paper referenced above defines some FSMs for right to left
+	* morphological analysis. I generated a method for constructing snowball
+	* expressions from right to left FSMs for stemming suffixes.
+*/
+
+routines (
+	append_U_to_stems_ending_with_d_or_g // for preventing some overstemmings
+	check_vowel_harmony	// tests vowel harmony for suffixes
+	is_reserved_word	// tests whether current string is a reserved word ('ad','soyad')
+	mark_cAsInA		// nominal verb suffix
+	mark_DA			// noun suffix
+	mark_DAn		// noun suffix
+	mark_DUr		// nominal verb suffix
+	mark_ki			// noun suffix
+	mark_lAr		// noun suffix, nominal verb suffix
+	mark_lArI		// noun suffix
+	mark_nA			// noun suffix
+	mark_ncA		// noun suffix
+	mark_ndA		// noun suffix
+	mark_ndAn		// noun suffix
+	mark_nU			// noun suffix
+	mark_nUn		// noun suffix
+	mark_nUz		// nominal verb suffix
+	mark_sU			// noun suffix
+	mark_sUn		// nominal verb suffix
+	mark_sUnUz		// nominal verb suffix
+	mark_possessives	// -(U)m,-(U)n,-(U)mUz,-(U)nUz,
+	mark_yA			// noun suffix
+	mark_ylA		// noun suffix
+	mark_yU			// noun suffix
+	mark_yUm		// nominal verb suffix
+	mark_yUz		// nominal verb suffix
+	mark_yDU		// nominal verb suffix
+	mark_yken		// nominal verb suffix
+	mark_ymUs_		// nominal verb suffix
+	mark_ysA		// nominal verb suffix
+	
+	mark_suffix_with_optional_y_consonant
+	mark_suffix_with_optional_U_vowel
+	mark_suffix_with_optional_n_consonant
+	mark_suffix_with_optional_s_consonant
+	
+	more_than_one_syllable_word
+	
+	post_process_last_consonants
+	postlude
+
+	stem_nominal_verb_suffixes
+	stem_noun_suffixes
+	stem_suffix_chain_before_ki
+)
+
+/* Special characters in Unicode Latin-1 and Latin Extended-A */
+stringdef c.   	hex 'E7'	// LATIN SMALL LETTER C WITH CEDILLA
+stringdef g~   	hex '011F'	// LATIN SMALL LETTER G WITH BREVE
+stringdef i'   	hex '0131'	// LATIN SMALL LETTER I WITHOUT DOT
+stringdef o"  	hex 'F6'	// LATIN SMALL LETTER O WITH DIAERESIS
+stringdef s.	hex '015F'	// LATIN SMALL LETTER S WITH CEDILLA
+stringdef u"  	hex 'FC'	// LATIN SMALL LETTER U WITH DIAERESIS
+
+stringescapes 	{ }
+
+integers 	( strlen )	// length of a string
+
+booleans	( continue_stemming_noun_suffixes )
+
+groupings 	( vowel U vowel1 vowel2 vowel3 vowel4 vowel5 vowel6)
+
+define vowel 	'ae{i'}io{o"}u{u"}'
+define U	'{i'}iu{u"}'
+
+// the vowel grouping definitions below are used for checking vowel harmony
+define vowel1  	'a{i'}ou' 		// vowels that can end with suffixes containing 'a'
+define vowel2  	'ei{o"}{u"}' 		// vowels that can end with suffixes containing 'e'
+define vowel3  	'a{i'}' 		// vowels that can end with suffixes containing 'i''
+define vowel4  	'ei'	 		// vowels that can end with suffixes containing 'i'
+define vowel5  	'ou'	 		// vowels that can end with suffixes containing 'o' or 'u'
+define vowel6  	'{o"}{u"}' 		// vowels that can end with suffixes containing 'o"' or 'u"'
+
+externals 	( stem )
+
+backwardmode (
+	// checks vowel harmony for possible suffixes,
+	// helps to detect whether the candidate for suffix applies to vowel harmony
+	// this rule is added to prevent over stemming
+	define check_vowel_harmony as (
+		test
+		(
+			(goto vowel)   // if there is a vowel
+			(
+				('a' goto vowel1) or
+				('e' goto vowel2) or
+				('{i'}' goto vowel3) or
+				('i' goto vowel4) or
+				('o' goto vowel5) or
+				('{o"}' goto vowel6) or
+				('u' goto vowel5) or
+				('{u"}' goto vowel6)
+			)
+		)
+	)
+	
+	// if the last consonant before suffix is vowel and n then advance and delete
+	// if the last consonant before suffix is non vowel and n do nothing
+	// if the last consonant before suffix is not n then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_n_consonant as (
+		((test 'n') next (test vowel))
+		or
+		((not(test 'n')) test(next (test vowel)))
+
+	)
+	
+	// if the last consonant before suffix is vowel and s then advance and delete
+	// if the last consonant before suffix is non vowel and s do nothing
+	// if the last consonant before suffix is not s then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_s_consonant as (
+		((test 's') next (test vowel))
+		or
+		((not(test 's')) test(next (test vowel)))
+	)
+	
+	// if the last consonant before suffix is vowel and y then advance and delete
+	// if the last consonant before suffix is non vowel and y do nothing
+	// if the last consonant before suffix is not y then only delete the suffix
+	// assumption: slice beginning is set correctly
+	define mark_suffix_with_optional_y_consonant as (
+		((test 'y') next (test vowel))
+		or
+		((not(test 'y')) test(next (test vowel)))
+	)
+	
+	define mark_suffix_with_optional_U_vowel as (
+		((test U) next (test non-vowel))
+		or
+		((not(test U)) test(next (test non-vowel)))
+
+	)
+	
+	define mark_possessives as (
+		among ('m{i'}z' 'miz' 'muz' 'm{u"}z'
+		       'n{i'}z' 'niz' 'nuz' 'n{u"}z' 'm' 'n')
+		(mark_suffix_with_optional_U_vowel)
+	)
+	
+	define mark_sU as (
+		check_vowel_harmony
+		U
+		(mark_suffix_with_optional_s_consonant)
+	)
+	
+	define mark_lArI as (
+		among ('leri' 'lar{i'}')
+	)
+	
+	define mark_yU as (
+		check_vowel_harmony
+		U
+		(mark_suffix_with_optional_y_consonant)	
+	)
+	
+	define mark_nU as (
+		check_vowel_harmony
+		among ('n{i'}' 'ni' 'nu' 'n{u"}')	
+	)
+	
+	define mark_nUn as (
+		check_vowel_harmony
+		among ('{i'}n' 'in' 'un' '{u"}n')	
+		(mark_suffix_with_optional_n_consonant)
+	)
+	
+	define mark_yA as (
+		check_vowel_harmony
+		among('a' 'e')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_nA as (
+		check_vowel_harmony
+		among('na' 'ne')
+	)
+	
+	define mark_DA as (
+		check_vowel_harmony
+		among('da' 'de' 'ta' 'te')
+	)
+	
+	define mark_ndA as (
+		check_vowel_harmony
+		among('nda' 'nde')
+	)
+	
+	define mark_DAn as (
+		check_vowel_harmony
+		among('dan' 'den' 'tan' 'ten')
+	)
+	
+	define mark_ndAn as (
+		check_vowel_harmony
+		among('ndan' 'nden')
+	)
+	
+	define mark_ylA as (
+		check_vowel_harmony
+		among('la' 'le')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_ki as (
+		'ki'
+	)
+	
+	define mark_ncA as (
+		check_vowel_harmony
+		among('ca' 'ce')	
+		(mark_suffix_with_optional_n_consonant)
+	)
+	
+	define mark_yUm as (
+		check_vowel_harmony
+		among ('{i'}m' 'im' 'um' '{u"}m')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_sUn as (
+		check_vowel_harmony
+		among ('s{i'}n' 'sin' 'sun' 's{u"}n' )
+	)
+	
+	define mark_yUz as (
+		check_vowel_harmony
+		among ('{i'}z' 'iz' 'uz' '{u"}z')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_sUnUz as (
+		among ('s{i'}n{i'}z' 'siniz' 'sunuz' 's{u"}n{u"}z')
+	)
+	
+	define mark_lAr as (
+		check_vowel_harmony
+		among ('ler' 'lar')
+	)
+	
+	define mark_nUz as (
+		check_vowel_harmony
+		among ('n{i'}z' 'niz' 'nuz' 'n{u"}z')
+	)
+	
+	define mark_DUr as (
+		check_vowel_harmony
+		among ('t{i'}r' 'tir' 'tur' 't{u"}r' 'd{i'}r' 'dir' 'dur' 'd{u"}r')
+	)
+	
+	define mark_cAsInA as (
+		among ('cas{i'}na' 'cesine')
+	)
+	
+	define mark_yDU as (
+		check_vowel_harmony
+		among ('t{i'}m' 'tim' 'tum' 't{u"}m' 'd{i'}m' 'dim' 'dum' 'd{u"}m'
+			't{i'}n' 'tin' 'tun' 't{u"}n' 'd{i'}n' 'din' 'dun' 'd{u"}n'
+			't{i'}k' 'tik' 'tuk' 't{u"}k' 'd{i'}k' 'dik' 'duk' 'd{u"}k'
+			't{i'}' 'ti' 'tu' 't{u"}' 'd{i'}' 'di' 'du' 'd{u"}')
+		(mark_suffix_with_optional_y_consonant)
+	)
+
+	// does not fully obey vowel harmony	
+	define mark_ysA as (
+		among ('sam' 'san' 'sak' 'sem' 'sen' 'sek' 'sa' 'se')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_ymUs_ as (
+		check_vowel_harmony
+		among ('m{i'}{s.}' 'mi{s.}' 'mu{s.}' 'm{u"}{s.}')
+		(mark_suffix_with_optional_y_consonant)
+	)
+	
+	define mark_yken as (
+		'ken' (mark_suffix_with_optional_y_consonant)
+	)
+	
+	define stem_nominal_verb_suffixes as (
+		[	
+			set continue_stemming_noun_suffixes
+			(mark_ymUs_ or mark_yDU or mark_ysA or mark_yken)
+			or
+			(mark_cAsInA (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_)
+			or
+			(
+				mark_lAr ] delete try([(mark_DUr or mark_yDU or mark_ysA or mark_ymUs_))
+				unset continue_stemming_noun_suffixes
+			)
+			or
+			(mark_nUz (mark_yDU or mark_ysA))
+			or
+			((mark_sUnUz or mark_yUz or mark_sUn or mark_yUm) ] delete try([ mark_ymUs_))
+			or
+			(mark_DUr ] delete try([ (mark_sUnUz or mark_lAr or mark_yUm or mark_sUn or mark_yUz or true) mark_ymUs_))
+		]delete
+	)
+	
+	// stems noun suffix chains ending with -ki
+	define stem_suffix_chain_before_ki as (
+		[
+			mark_ki
+			(
+				(mark_DA] delete try([
+					(mark_lAr] delete try(stem_suffix_chain_before_ki))
+					or
+					(mark_possessives] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+					
+				))
+				or
+				(mark_nUn] delete try([
+					(mark_lArI] delete)
+					or
+					([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+					or
+					(stem_suffix_chain_before_ki)
+				))
+				or
+				(mark_ndA (	
+					(mark_lArI] delete)
+					or
+					((mark_sU] delete try([mark_lAr]delete stem_suffix_chain_before_ki)))
+					or
+					(stem_suffix_chain_before_ki)
+				))
+			)
+	)
+	
+	define stem_noun_suffixes as (
+		([mark_lAr] delete try(stem_suffix_chain_before_ki))
+		or
+		([mark_ncA] delete
+			try(
+				([mark_lArI] delete)
+				or
+				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+				or
+				([mark_lAr] delete stem_suffix_chain_before_ki)
+			)
+		)
+		or
+		([(mark_ndA or mark_nA)
+			(
+		  		(mark_lArI] delete)
+		  		or
+		  		(mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+		  		or
+		  		(stem_suffix_chain_before_ki)
+		  	)
+		)
+		or
+		([(mark_ndAn or mark_nU) ((mark_sU ] delete try([mark_lAr] delete stem_suffix_chain_before_ki)) or (mark_lArI)))
+		or
+		( [mark_DAn] delete try ([
+			(
+		 		(mark_possessives ] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+		 		or
+		 		(mark_lAr] delete try(stem_suffix_chain_before_ki))
+		 		or
+		 		(stem_suffix_chain_before_ki)
+		 	))
+		)
+		or
+		([mark_nUn or mark_ylA] delete
+			try(
+				([mark_lAr] delete stem_suffix_chain_before_ki)
+				or
+				([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+				or
+				stem_suffix_chain_before_ki
+			)
+		)
+		or
+		([mark_lArI] delete)
+		or	
+		(stem_suffix_chain_before_ki)
+		or
+		([mark_DA or mark_yU or mark_yA] delete try([((mark_possessives] delete try([mark_lAr)) or mark_lAr) ] delete [ stem_suffix_chain_before_ki))
+		or
+		([mark_possessives or mark_sU] delete try([mark_lAr] delete stem_suffix_chain_before_ki))
+	)
+	
+	define post_process_last_consonants as (	
+		[substring] among (
+			'b' (<- 'p')
+			'c' (<- '{c.}')
+			'd' (<- 't')
+			'{g~}' (<- 'k')
+		)
+	)
+
+	// after stemming if the word ends with 'd' or 'g' most probably last U is overstemmed
+	// like in 'kedim' -> 'ked'
+	// Turkish words don't usually end with 'd' or 'g'
+	// some very well known words are ignored (like 'ad' 'soyad'	
+	// appends U to stems ending with d or g, decides which vowel to add
+	// based on the last vowel in the stem
+	define append_U_to_stems_ending_with_d_or_g as (
+		test('d' or 'g')
+		(test((goto vowel) 'a' or '{i'}') <+ '{i'}')
+		or
+		(test((goto vowel) 'e' or 'i') <+ 'i')
+		or
+		(test((goto vowel) 'o' or 'u') <+ 'u')
+		or
+		(test((goto vowel) '{o"}' or '{u"}') <+ '{u"}')
+	)
+	
+)
+
+// Tests if there are more than one syllables
+// In Turkish each vowel indicates a distinct syllable
+define more_than_one_syllable_word as (
+	test (atleast 2 (gopast vowel))
+)
+
+define is_reserved_word as (
+	test(gopast 'ad' ($strlen = 2) ($strlen == limit))
+	or
+	test(gopast 'soyad' ($strlen = 5) ($strlen == limit))
+)
+
+define postlude as (
+	not(is_reserved_word)
+	backwards (
+		do append_U_to_stems_ending_with_d_or_g
+		do post_process_last_consonants
+		
+	)
+)
+
+define stem as (
+	(more_than_one_syllable_word)
+	(
+		backwards (
+			do stem_nominal_verb_suffixes
+			continue_stemming_noun_suffixes
+			do stem_noun_suffixes
+		)
+		
+	postlude
+	)
+)
+
+
diff --git a/snowball_code/compiler/analyser.c b/snowball_code/compiler/analyser.c
new file mode 100644
index 0000000..9234348
--- /dev/null
+++ b/snowball_code/compiler/analyser.c
@@ -0,0 +1,961 @@
+
+#include <stdio.h>   /* main etc */
+#include <stdlib.h>  /* exit */
+#include <string.h>  /* memmove */
+#include "header.h"
+
+/* recursive usage: */
+
+static void read_program_(struct analyser * a, int terminator);
+static struct node * read_C(struct analyser * a);
+static struct node * C_style(struct analyser * a, char * s, int token);
+
+
+static void fault(int n) { fprintf(stderr, "fault %d\n", n); exit(1); }
+
+static void print_node_(struct node * p, int n, char * s) {
+
+    int i;
+    for (i = 0; i < n; i++) printf(i == n - 1 ? s : "  ");
+    printf("%s ", name_of_token(p->type));
+    unless (p->name == 0) report_b(stdout, p->name->b);
+    unless (p->literalstring == 0) {
+        printf("'");
+        report_b(stdout, p->literalstring);
+        printf("'");
+    }
+    printf("\n");
+    unless (p->AE == 0) print_node_(p->AE, n+1, "# ");
+    unless (p->left == 0) print_node_(p->left, n+1, "  ");
+    unless (p->right == 0) print_node_(p->right, n, "  ");
+    if (p->aux != 0) print_node_(p->aux, n+1, "@ ");
+}
+
+extern void print_program(struct analyser * a) {
+    print_node_(a->program, 0, "  ");
+}
+
+static struct node * new_node(struct analyser * a, int type) {
+    NEW(node, p);
+    p->next = a->nodes; a->nodes = p;
+    p->left = 0;
+    p->right = 0;
+    p->aux = 0;
+    p->AE = 0;
+    p->name = 0;
+    p->literalstring = 0;
+    p->mode = a->mode;
+    p->line_number = a->tokeniser->line_number;
+    p->type = type;
+    return p;
+}
+
+static char * name_of_mode(int n) {
+    switch (n) {
+         default: fault(0);
+         case m_backward: return "string backward";
+         case m_forward:  return "string forward";
+    /*   case m_integer:  return "integer";  */
+    }
+}
+
+static char * name_of_type(int n) {
+    switch (n) {
+         default: fault(1);
+         case 's': return "string";
+         case 'i': return "integer";
+         case 'r': return "routine";
+         case 'R': return "routine or grouping";
+         case 'g': return "grouping";
+    }
+}
+
+static void count_error(struct analyser * a) {
+    struct tokeniser * t = a->tokeniser;
+    if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
+    t->error_count++;
+}
+
+static void error2(struct analyser * a, int n, int x) {
+    struct tokeniser * t = a->tokeniser;
+    count_error(a);
+    fprintf(stderr, "Line %d", t->line_number);
+    if (t->get_depth > 0) fprintf(stderr, " (of included file)");
+    fprintf(stderr, ": ");
+    if (n >= 30) report_b(stderr, t->b);
+    switch (n) {
+        case 0:
+            fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
+        case 3:
+            fprintf(stderr, "in among(...), ");
+        case 1:
+            fprintf(stderr, "unexpected %s", name_of_token(t->token));
+            if (t->token == c_number) fprintf(stderr, " %d", t->number);
+            if (t->token == c_name) {
+                fprintf(stderr, " ");
+                report_b(stderr, t->b);
+            } break;
+        case 2:
+            fprintf(stderr, "string omitted"); break;
+
+        case 14:
+            fprintf(stderr, "unresolved substring on line %d", x); break;
+        case 15:
+            fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
+        case 16:
+            fprintf(stderr, "empty grouping"); break;
+        case 17:
+            fprintf(stderr, "backwards used when already in this mode"); break;
+        case 18:
+            fprintf(stderr, "empty among(...)"); break;
+        case 19:
+            fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
+        case 20:
+            fprintf(stderr, "substring preceded by another substring on line %d", x); break;
+
+        case 30:
+            fprintf(stderr, " re-declared"); break;
+        case 31:
+            fprintf(stderr, " undeclared"); break;
+        case 32:
+            fprintf(stderr, " declared as %s mode; used as %s mode",
+                            name_of_mode(a->mode), name_of_mode(x)); break;
+        case 33:
+            fprintf(stderr, " not of type %s", name_of_type(x)); break;
+        case 34:
+            fprintf(stderr, " not of type string or integer"); break;
+        case 35:
+            fprintf(stderr, " misplaced"); break;
+        case 36:
+            fprintf(stderr, " redefined"); break;
+        case 37:
+            fprintf(stderr, " mis-used as %s mode",
+                            name_of_mode(x)); break;
+        default:
+            fprintf(stderr, " error %d", n); break;
+
+    }
+    if (n <= 13 && t->previous_token > 0)
+        fprintf(stderr, " after %s", name_of_token(t->previous_token));
+    fprintf(stderr, "\n");
+}
+
+static void error(struct analyser * a, int n) { error2(a, n, 0); }
+
+static void error3(struct analyser * a, struct node * p, symbol * b) {
+    count_error(a);
+    fprintf(stderr, "among(...) on line %d has repeated string '", p->line_number);
+    report_b(stderr, b);
+    fprintf(stderr, "'\n");
+}
+
+static void error4(struct analyser * a, struct name * q) {
+    count_error(a);
+    report_b(stderr, q->b);
+    fprintf(stderr, " undefined\n");
+}
+
+static void omission_error(struct analyser * a, int n) {
+    a->tokeniser->omission = n;
+    error(a, 0);
+}
+
+static int check_token(struct analyser * a, int code) {
+    struct tokeniser * t = a->tokeniser;
+    if (t->token != code) { omission_error(a, code); return false; }
+    return true;
+}
+
+static int get_token(struct analyser * a, int code) {
+    struct tokeniser * t = a->tokeniser;
+    read_token(t);
+    {
+        int x = check_token(a, code);
+        unless (x) t->token_held = true;
+        return x;
+    }
+}
+
+static struct name * look_for_name(struct analyser * a) {
+    struct name * p = a->names;
+    symbol * q = a->tokeniser->b;
+    repeat {
+        if (p == 0) return 0;
+        {   symbol * b = p->b;
+            int n = SIZE(b);
+            if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
+                p->referenced = true;
+                return p;
+            }
+        }
+        p = p->next;
+    }
+}
+
+static struct name * find_name(struct analyser * a) {
+    struct name * p = look_for_name(a);
+    if (p == 0) error(a, 31);
+    return p;
+}
+
+static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
+    if (p->mode < 0) p->mode = mode; else
+    unless (p->mode == mode) error2(a, 37, mode);
+}
+
+static void check_name_type(struct analyser * a, struct name * p, int type) {
+    switch (type) {
+        case 's': if (p->type == t_string) return; break;
+        case 'i': if (p->type == t_integer) return; break;
+        case 'b': if (p->type == t_boolean) return; break;
+        case 'R': if (p->type == t_grouping) return;
+        case 'r': if (p->type == t_routine ||
+                      p->type == t_external) return; break;
+        case 'g': if (p->type == t_grouping) return; break;
+    }
+    error2(a, 33, type);
+}
+
+static void read_names(struct analyser * a, int type) {
+    struct tokeniser * t = a->tokeniser;
+    unless (get_token(a, c_bra)) return;
+    repeat {
+        if (read_token(t) != c_name) break;
+        if (look_for_name(a) != 0) error(a, 30); else {
+            NEW(name, p);
+            p->b = copy_b(t->b);
+            p->type = type;
+            p->mode = -1; /* routines, externals */
+            p->count = a->name_count[type];
+            p->referenced = false;
+            p->used = false;
+            p->grouping = 0;
+            p->definition = 0;
+            a->name_count[type] ++;
+            p->next = a->names;
+            a->names = p;
+        }
+    }
+    unless (check_token(a, c_ket)) t->token_held = true;
+}
+
+static symbol * new_literalstring(struct analyser * a) {
+    NEW(literalstring, p);
+    p->b = copy_b(a->tokeniser->b);
+    p->next = a->literalstrings;
+    a->literalstrings = p;
+    return p->b;
+}
+
+static int read_AE_test(struct analyser * a) {
+
+    struct tokeniser * t = a->tokeniser;
+    switch (read_token(t)) {
+        case c_assign: return c_mathassign;
+        case c_plusassign:
+        case c_minusassign:
+        case c_multiplyassign:
+        case c_divideassign:
+        case c_eq:
+        case c_ne:
+        case c_gr:
+        case c_ge:
+        case c_ls:
+        case c_le: return t->token;
+        default: error(a, 1); t->token_held = true; return c_eq;
+    }
+}
+
+static int binding(int t) {
+    switch (t) {
+        case c_plus: case c_minus: return 1;
+        case c_multiply: case c_divide: return 2;
+        default: return -2;
+    }
+}
+
+static void name_to_node(struct analyser * a, struct node * p, int type) {
+    struct name * q = find_name(a);
+    unless (q == 0) {
+        check_name_type(a, q, type);
+        q->used = true;
+    }
+    p->name = q;
+}
+
+static struct node * read_AE(struct analyser * a, int B) {
+    struct tokeniser * t = a->tokeniser;
+    struct node * p;
+    struct node * q;
+    switch (read_token(t)) {
+        case c_minus: /* monadic */
+            p = new_node(a, c_neg);
+            p->right = read_AE(a, 100);
+            break;
+        case c_bra:
+            p = read_AE(a, 0);
+            get_token(a, c_ket);
+            break;
+        case c_name:
+            p = new_node(a, c_name);
+            name_to_node(a, p, 'i');
+            break;
+        case c_maxint:
+        case c_minint:
+        case c_cursor:
+        case c_limit:
+        case c_size:
+            p = new_node(a, t->token);
+            break;
+        case c_number:
+            p = new_node(a, c_number);
+            p->number = t->number;
+            break;
+        case c_sizeof:
+            p = C_style(a, "s", c_sizeof);
+            break;
+        default:
+            error(a, 1);
+            t->token_held = true;
+            return 0;
+    }
+    repeat {
+        int token = read_token(t);
+        int b = binding(token);
+        unless (binding(token) > B) {
+            t->token_held = true;
+            return p;
+        }
+        q = new_node(a, token);
+        q->left = p;
+        q->right = read_AE(a, b);
+        p = q;
+    }
+}
+
+static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
+    struct tokeniser * t = a->tokeniser;
+    struct node * p = new_node(a, op);
+    struct node * p_end = q;
+    p->left = q;
+    repeat {
+        q = read_C(a);
+        p_end->right = q; p_end = q;
+        if (read_token(t) != op) {
+            t->token_held = true;
+            break;
+        }
+    }
+    return p;
+}
+
+static struct node * read_C_list(struct analyser * a) {
+    struct tokeniser * t = a->tokeniser;
+    struct node * p = new_node(a, c_bra);
+    struct node * p_end = 0;
+    repeat {
+        int token = read_token(t);
+        if (token == c_ket) return p;
+        if (token < 0) { omission_error(a, c_ket); return p; }
+        t->token_held = true;
+        {
+            struct node * q = read_C(a);
+            repeat {
+                token = read_token(t);
+                if (token != c_and && token != c_or) {
+                    t->token_held = true;
+                    break;
+                }
+                q = read_C_connection(a, q, token);
+            }
+            if (p_end == 0) p->left = q; else p_end->right = q;
+            p_end = q;
+        }
+    }
+}
+
+static struct node * C_style(struct analyser * a, char * s, int token) {
+    int i;
+    struct node * p = new_node(a, token);
+    for (i = 0; s[i] != 0; i++) switch(s[i]) {
+        case 'C':
+            p->left = read_C(a); continue;
+        case 'D':
+            p->aux = read_C(a); continue;
+        case 'A':
+            p->AE = read_AE(a, 0); continue;
+        case 'f':
+            get_token(a, c_for); continue;
+        case 'S':
+            {
+                int str_token = read_token(a->tokeniser);
+                if (str_token == c_name) name_to_node(a, p, 's'); else
+                if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
+                else error(a, 2);
+            }
+            continue;
+        case 'b':
+        case 's':
+        case 'i':
+            if (get_token(a, c_name)) name_to_node(a, p, s[i]);
+            continue;
+    }
+    return p;
+}
+
+static struct node * read_literalstring(struct analyser * a) {
+    struct node * p = new_node(a, c_literalstring);
+    p->literalstring = new_literalstring(a);
+    return p;
+}
+
+static void reverse_b(symbol * b) {
+    int i = 0; int j = SIZE(b) - 1;
+    until (i >= j) {
+        int ch1 = b[i]; int ch2 = b[j];
+        b[i++] = ch2; b[j--] = ch1;
+    }
+}
+
+static int compare_amongvec(const void *pv, const void *qv) {
+    const struct amongvec * p = (const struct amongvec*)pv;
+    const struct amongvec * q = (const struct amongvec*)qv;
+    symbol * b_p = p->b; int p_size = p->size;
+    symbol * b_q = q->b; int q_size = q->size;
+    int smaller_size = p_size < q_size ? p_size : q_size;
+    int i;
+    for (i = 0; i < smaller_size; i++)
+        if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
+    return p_size - q_size;
+}
+
+static void make_among(struct analyser * a, struct node * p, struct node * substring) {
+
+    NEW(among, x);
+    NEWVEC(amongvec, v, p->number);
+    struct node * q = p->left;
+    struct amongvec * w0 = v;
+    struct amongvec * w1 = v;
+    int result = 1;
+
+    int direction = substring != 0 ? substring->mode : p->mode;
+    int backward = direction == m_backward;
+
+    if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
+    a->amongs_end = x;
+    x->next = 0;
+    x->b = v;
+    x->number = a->among_count++;
+    x->starter = 0;
+
+    if (q->type == c_bra) { x->starter = q; q = q->right; }
+
+    until (q == 0) {
+        if (q->type == c_literalstring) {
+            symbol * b = q->literalstring;
+            w1->b = b;           /* pointer to case string */
+            w1->p = 0;           /* pointer to corresponding case expression */
+            w1->size = SIZE(b);  /* number of characters in string */
+            w1->i = -1;          /* index of longest substring */
+            w1->result = -1;     /* number of corresponding case expression */
+            w1->function = q->left == 0 ? 0 : q->left->name;
+            unless (w1->function == 0)
+                check_routine_mode(a, w1->function, direction);
+            w1++;
+        }
+        else
+        if (q->left == 0)  /* empty command: () */
+            w0 = w1;
+        else {
+            until (w0 == w1) {
+                w0->p = q;
+                w0->result = result;
+                w0++;
+            }
+            result++;
+        }
+        q = q->right;
+    }
+    unless (w1-v == p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
+    if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
+    qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
+
+    /* the following loop is O(n squared) */
+    for (w0 = w1 - 1; w0 >= v; w0--) {
+        symbol * b = w0->b;
+        int size = w0->size;
+        struct amongvec * w;
+
+        for (w = w0 - 1; w >= v; w--) {
+            if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
+                w0->i = w - v;  /* fill in index of longest substring */
+                break;
+            }
+        }
+    }
+    if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
+
+    for (w0 = v; w0 < w1 - 1; w0++)
+        if (w0->size == (w0 + 1)->size &&
+            memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) error3(a, p, w0->b);
+
+    x->literalstring_count = p->number;
+    x->command_count = result - 1;
+    p->among = x;
+
+    x->substring = substring;
+    if (substring != 0) substring->among = x;
+    unless (x->command_count == 0 && x->starter == 0) a->amongvar_needed = true;
+}
+
+static struct node * read_among(struct analyser * a) {
+    struct tokeniser * t = a->tokeniser;
+    struct node * p = new_node(a, c_among);
+    struct node * p_end = 0;
+    int previous_token = -1;
+    struct node * substring = a->substring;
+
+    a->substring = 0;
+    p->number = 0; /* counts the number of literals */
+    unless (get_token(a, c_bra)) return p;
+    repeat {
+        struct node * q;
+        int token = read_token(t);
+        switch (token) {
+            case c_literalstring:
+                q = read_literalstring(a);
+                if (read_token(t) == c_name) {
+                    struct node * r = new_node(a, c_name);
+                    name_to_node(a, r, 'r');
+                    q->left = r;
+                }
+                else t->token_held = true;
+                p->number++; break;
+            case c_bra:
+                if (previous_token == c_bra) error(a, 19);
+                q = read_C_list(a); break;
+            default:
+                error(a, 3);
+            case c_ket:
+                if (p->number == 0) error(a, 18);
+                if (t->error_count == 0) make_among(a, p, substring);
+                return p;
+        }
+        previous_token = token;
+        if (p_end == 0) p->left = q; else p_end->right = q;
+        p_end = q;
+    }
+}
+
+static struct node * read_substring(struct analyser * a) {
+
+    struct node * p = new_node(a, c_substring);
+    if (a->substring != 0) error2(a, 20, a->substring->line_number);
+    a->substring = p;
+    return p;
+}
+
+static void check_modifyable(struct analyser * a) {
+    unless (a->modifyable) error(a, 15);
+}
+
+static struct node * read_C(struct analyser * a) {
+    struct tokeniser * t = a->tokeniser;
+    int token = read_token(t);
+    switch (token) {
+        case c_bra:
+            return read_C_list(a);
+        case c_backwards:
+            {
+                int mode = a->mode;
+                if (a->mode == m_backward) error(a, 17); else a->mode = m_backward;
+                {   struct node * p = C_style(a, "C", token);
+                    a->mode = mode;
+                    return p;
+                }
+            }
+        case c_reverse:
+            {
+                int mode = a->mode;
+                int modifyable = a->modifyable;
+                a->modifyable = false;
+                a->mode = mode == m_forward ? m_backward : m_forward;
+                {
+                    struct node * p = C_style(a, "C", token);
+                    a->mode = mode;
+                    a->modifyable = modifyable;
+                    return p;
+                }
+            }
+        case c_not:
+        case c_try:
+        case c_fail:
+        case c_test:
+        case c_do:
+        case c_goto:
+        case c_gopast:
+        case c_repeat:
+            return C_style(a, "C", token);
+        case c_loop:
+        case c_atleast:
+            return C_style(a, "AC", token);
+        case c_setmark:
+            return C_style(a, "i", token);
+        case c_tomark:
+        case c_atmark:
+        case c_hop:
+            return C_style(a, "A", token);
+        case c_delete:
+            check_modifyable(a);
+        case c_next:
+        case c_tolimit:
+        case c_atlimit:
+        case c_leftslice:
+        case c_rightslice:
+        case c_true:
+        case c_false:
+        case c_debug:
+            return C_style(a, "", token);
+        case c_assignto:
+        case c_sliceto:
+            check_modifyable(a);
+            return C_style(a, "s", token);
+        case c_assign:
+        case c_insert:
+        case c_attach:
+        case c_slicefrom:
+            check_modifyable(a);
+            return C_style(a, "S", token);
+        case c_setlimit:
+            return C_style(a, "CfD", token);
+        case c_set:
+        case c_unset:
+            return C_style(a, "b", token);
+        case c_dollar:
+            get_token(a, c_name);
+            {
+                struct node * p;
+                struct name * q = find_name(a);
+                int mode = a->mode;
+                int modifyable = a->modifyable;
+                switch (q ? q->type : t_string)
+                    /* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
+                {
+                    default: error(a, 34);
+                    case t_string:
+                        a->mode = m_forward;
+                        a->modifyable = true;
+                        p = new_node(a, c_dollar);
+                        p->left = read_C(a); break;
+                    case t_integer:
+                    /*  a->mode = m_integer;  */
+                        p = new_node(a, read_AE_test(a));
+                        p->AE = read_AE(a, 0); break;
+                }
+                p->name = q;
+                a->mode = mode;
+                a->modifyable = modifyable;
+                return p;
+            }
+        case c_name:
+            {
+                struct name * q = find_name(a);
+                struct node * p = new_node(a, c_name);
+                unless (q == 0) {
+                    q->used = true;
+                    switch (q->type) {
+                        case t_boolean:
+                            p->type = c_booltest; break;
+                        case t_integer:
+                            error(a, 35); /* integer name misplaced */
+                        case t_string:
+                            break;
+                        case t_routine:
+                        case t_external:
+                            p->type = c_call;
+                            check_routine_mode(a, q, a->mode);
+                            break;
+                        case t_grouping:
+                            p->type = c_grouping; break;
+                    }
+                }
+                p->name = q;
+                return p;
+            }
+        case c_non:
+            {
+                struct node * p = new_node(a, token);
+                read_token(t);
+                if (t->token == c_minus) read_token(t);
+                unless (check_token(a, c_name)) { omission_error(a, c_name); return p; }
+                name_to_node(a, p, 'g');
+                return p;
+            }
+        case c_literalstring:
+            return read_literalstring(a);
+        case c_among: return read_among(a);
+        case c_substring: return read_substring(a);
+        default: error(a, 1); return 0;
+    }
+}
+
+static int next_symbol(symbol * p, symbol * W, int utf8) {
+    if (utf8) {
+        int ch;
+        int j = get_utf8(p, & ch);
+        W[0] = ch; return j;
+    } else {
+        W[0] = p[0]; return 1;
+    }
+}
+
+static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
+    int j = 0;
+    symbol W[1];
+    int width;
+    if (style == c_plus) {
+        while (j < SIZE(q)) {
+            width = next_symbol(q + j, W, utf8);
+            p = add_to_b(p, 1, W);
+            j += width;
+        }
+    } else {
+        while (j < SIZE(q)) {
+            int i;
+            width = next_symbol(q + j, W, utf8);
+            for (i = 0; i < SIZE(p); i++) {
+                if (p[i] == W[0]) {
+                    memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
+                    SIZE(p)--;
+                }
+            }
+            j += width;
+        }
+    }
+    return p;
+}
+
+static void read_define_grouping(struct analyser * a, struct name * q) {
+    struct tokeniser * t = a->tokeniser;
+    int style = c_plus;
+    {
+        NEW(grouping, p);
+        if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
+        a->groupings_end = p;
+        q->grouping = p;
+        p->next = 0;
+        p->name = q;
+        p->number = q->count;
+        p->b = create_b(0);
+        repeat {
+            switch (read_token(t)) {
+                case c_name:
+                    {
+                        struct name * r = find_name(a);
+                        unless (r == 0) {
+                            check_name_type(a, r, 'g');
+                            p->b = alter_grouping(p->b, r->grouping->b, style, false);
+                        }
+                    }
+                    break;
+                case c_literalstring:
+                    p->b = alter_grouping(p->b, t->b, style, a->utf8);
+                    break;
+                default: error(a, 1); return;
+            }
+            switch (read_token(t)) {
+                case c_plus:
+                case c_minus: style = t->token; break;
+                default: goto label0;
+            }
+        }
+    label0:
+        {
+            int i;
+            int max = 0;
+            int min = 1<<16;
+            for (i = 0; i < SIZE(p->b); i++) {
+                if (p->b[i] > max) max = p->b[i];
+                if (p->b[i] < min) min = p->b[i];
+            }
+            p->largest_ch = max;
+            p->smallest_ch = min;
+            if (min == 1<<16) error(a, 16);
+        }
+        t->token_held = true; return;
+    }
+}
+
+static void read_define_routine(struct analyser * a, struct name * q) {
+    struct node * p = new_node(a, c_define);
+    a->amongvar_needed = false;
+    unless (q == 0) {
+        check_name_type(a, q, 'R');
+        if (q->definition != 0) error(a, 36);
+        if (q->mode < 0) q->mode = a->mode; else
+        if (q->mode != a->mode) error2(a, 32, q->mode);
+    }
+    p->name = q;
+    if (a->program == 0) a->program = p; else a->program_end->right = p;
+    a->program_end = p;
+    get_token(a, c_as);
+    p->left = read_C(a);
+    unless (q == 0) q->definition = p->left;
+
+    if (a->substring != 0) {
+         error2(a, 14, a->substring->line_number);
+         a->substring = 0;
+    }
+    p->amongvar_needed = a->amongvar_needed;
+}
+
+static void read_define(struct analyser * a) {
+    unless (get_token(a, c_name)) return;
+    {
+        struct name * q = find_name(a);
+        if (q != 0 && q->type == t_grouping) read_define_grouping(a, q);
+            else read_define_routine(a, q);
+    }
+}
+
+static void read_backwardmode(struct analyser * a) {
+    int mode = a->mode;
+    a->mode = m_backward;
+    if (get_token(a, c_bra)) {
+        read_program_(a, c_ket);
+        check_token(a, c_ket);
+    }
+    a->mode = mode;
+}
+
+static void read_program_(struct analyser * a, int terminator) {
+    struct tokeniser * t = a->tokeniser;
+    repeat {
+        switch (read_token(t)) {
+            case c_strings:     read_names(a, t_string); break;
+            case c_booleans:    read_names(a, t_boolean); break;
+            case c_integers:    read_names(a, t_integer); break;
+            case c_routines:    read_names(a, t_routine); break;
+            case c_externals:   read_names(a, t_external); break;
+            case c_groupings:   read_names(a, t_grouping); break;
+            case c_define:      read_define(a); break;
+            case c_backwardmode:read_backwardmode(a); break;
+            case c_ket:
+                if (terminator == c_ket) return;
+            default:
+                error(a, 1); break;
+            case -1:
+                unless (terminator < 0) omission_error(a, c_ket);
+                return;
+        }
+    }
+}
+
+extern void read_program(struct analyser * a) {
+    read_program_(a, -1);
+    {
+        struct name * q = a->names;
+        until (q == 0) {
+            switch(q->type) {
+                case t_external: case t_routine:
+                    if (q->used && q->definition == 0) error4(a, q); break;
+                case t_grouping:
+                    if (q->used && q->grouping == 0) error4(a, q); break;
+            }
+            q = q->next;
+        }
+    }
+
+    if (a->tokeniser->error_count == 0) {
+        struct name * q = a->names;
+        int warned = false;
+        until (q == 0) {
+            unless (q->referenced) {
+                unless (warned) {
+                    fprintf(stderr, "Declared but not used:");
+                    warned = true;
+                }
+                fprintf(stderr, " "); report_b(stderr, q->b);
+            }
+            q = q->next;
+        }
+        if (warned) fprintf(stderr, "\n");
+
+        q = a->names;
+        warned = false;
+        until (q == 0) {
+            if (! q->used && (q->type == t_routine ||
+                              q->type == t_grouping)) {
+                unless (warned) {
+                    fprintf(stderr, "Declared and defined but not used:");
+                    warned = true;
+                }
+                fprintf(stderr, " "); report_b(stderr, q->b);
+            }
+            q = q->next;
+        }
+        if (warned) fprintf(stderr, "\n");
+    }
+}
+
+extern struct analyser * create_analyser(struct tokeniser * t) {
+    NEW(analyser, a);
+    a->tokeniser = t;
+    a->nodes = 0;
+    a->names = 0;
+    a->literalstrings = 0;
+    a->program = 0;
+    a->amongs = 0;
+    a->among_count = 0;
+    a->groupings = 0;
+    a->mode = m_forward;
+    a->modifyable = true;
+    { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
+    a->substring = 0;
+    return a;
+}
+
+extern void close_analyser(struct analyser * a) {
+    {
+        struct node * q = a->nodes;
+        until (q == 0) {
+            struct node * q_next = q->next;
+            FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct name * q = a->names;
+        until (q == 0) {
+            struct name * q_next = q->next;
+            lose_b(q->b); FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct literalstring * q = a->literalstrings;
+        until (q == 0) {
+            struct literalstring * q_next = q->next;
+            lose_b(q->b); FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct among * q = a->amongs;
+        until (q == 0) {
+            struct among * q_next = q->next;
+            FREE(q->b); FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct grouping * q = a->groupings;
+        until (q == 0) {
+            struct grouping * q_next = q->next;
+            lose_b(q->b); FREE(q);
+            q = q_next;
+        }
+    }
+    FREE(a);
+}
+
diff --git a/snowball_code/compiler/driver.c b/snowball_code/compiler/driver.c
new file mode 100644
index 0000000..38ad637
--- /dev/null
+++ b/snowball_code/compiler/driver.c
@@ -0,0 +1,256 @@
+#include <stdio.h>   /* for main etc */
+#include <stdlib.h>  /* for free etc */
+#include <string.h>  /* for strlen */
+#include "header.h"
+
+#define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
+#define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
+#define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
+#define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
+
+static int eq(char * s1, char * s2) {
+    int s1_len = strlen(s1);
+    int s2_len = strlen(s2);
+    return s1_len == s2_len && memcmp(s1, s2, s1_len) == 0;
+}
+
+static void print_arglist(void) {
+    fprintf(stderr, "Usage: snowball <file> [options]\n\n"
+                    "options are: [-o[utput] file]\n"
+                    "             [-s[yntax]]\n"
+#ifndef DISABLE_JAVA
+                    "             [-j[ava]]\n"
+#endif
+                    "             [-c++]\n"
+                    "             [-w[idechars]]\n"
+                    "             [-u[tf8]]\n"
+                    "             [-n[ame] class name]\n"
+                    "             [-ep[refix] string]\n"
+                    "             [-vp[refix] string]\n"
+                    "             [-i[nclude] directory]\n"
+                    "             [-r[untime] path to runtime headers]\n"
+#ifndef DISABLE_JAVA
+                    "             [-p[arentclassname] fully qualified parent class name]\n"
+                    "             [-P[ackage] package name for stemmers]\n"
+                    "             [-S[tringclass] StringBuffer-compatible class]\n"
+                    "             [-a[mongclass] fully qualified name of the Among class]\n"
+#endif
+           );
+    exit(1);
+}
+
+static void check_lim(int i, int argc) {
+    if (i >= argc) {
+        fprintf(stderr, "argument list is one short\n");
+        print_arglist();
+    }
+}
+
+static FILE * get_output(symbol * b) {
+    char * s = b_to_s(b);
+    FILE * output = fopen(s, "w");
+    if (output == 0) {
+        fprintf(stderr, "Can't open output %s\n", s);
+        exit(1);
+    }
+    free(s);
+    return output;
+}
+
+static void read_options(struct options * o, int argc, char * argv[]) {
+    char * s;
+    int i = 2;
+
+    /* set defauts: */
+
+    o->output_file = 0;
+    o->syntax_tree = false;
+    o->externals_prefix = "";
+    o->variables_prefix = 0;
+    o->runtime_path = 0;
+    o->parent_class_name = DEFAULT_BASE_CLASS;
+    o->string_class = DEFAULT_STRING_CLASS;
+    o->among_class = DEFAULT_AMONG_CLASS;
+    o->package = DEFAULT_PACKAGE;
+    o->name = "";
+    o->make_lang = LANG_C;
+    o->widechars = false;
+    o->includes = 0;
+    o->includes_end = 0;
+    o->utf8 = false;
+
+    /* read options: */
+
+    repeat {
+        if (i >= argc) break;
+        s = argv[i++];
+        {   if (eq(s, "-o") || eq(s, "-output")) {
+                check_lim(i, argc);
+                o->output_file = argv[i++];
+                continue;
+            }
+            if (eq(s, "-n") || eq(s, "-name")) {
+                check_lim(i, argc);
+                o->name = argv[i++];
+                continue;
+            }
+#ifndef DISABLE_JAVA
+            if (eq(s, "-j") || eq(s, "-java")) {
+                o->make_lang = LANG_JAVA;
+                o->widechars = true;
+                continue;
+            }
+#endif
+            if (eq(s, "-c++")) {
+                o->make_lang = LANG_CPLUSPLUS;
+                continue;
+            }
+            if (eq(s, "-w") || eq(s, "-widechars")) {
+                o->widechars = true;
+                o->utf8 = false;
+                continue;
+            }
+            if (eq(s, "-s") || eq(s, "-syntax")) {
+                o->syntax_tree = true;
+                continue;
+            }
+            if (eq(s, "-ep") || eq(s, "-eprefix")) {
+                check_lim(i, argc);
+                o->externals_prefix = argv[i++];
+                continue;
+            }
+            if (eq(s, "-vp") || eq(s, "-vprefix")) {
+                check_lim(i, argc);
+                o->variables_prefix = argv[i++];
+                continue;
+            }
+            if (eq(s, "-i") || eq(s, "-include")) {
+                check_lim(i, argc);
+
+                {
+                    NEW(include, p);
+                    symbol * b = add_s_to_b(0, argv[i++]);
+                    b = add_s_to_b(b, "/");
+                    p->next = 0; p->b = b;
+
+                    if (o->includes == 0) o->includes = p; else
+                                          o->includes_end->next = p;
+                    o->includes_end = p;
+                }
+                continue;
+            }
+            if (eq(s, "-r") || eq(s, "-runtime")) {
+                check_lim(i, argc);
+                o->runtime_path = argv[i++];
+                continue;
+            }
+            if (eq(s, "-u") || eq(s, "-utf8")) {
+                o->utf8 = true;
+                o->widechars = false;
+                continue;
+            }
+#ifndef DISABLE_JAVA
+            if (eq(s, "-p") || eq(s, "-parentclassname")) {
+                check_lim(i, argc);
+                o->parent_class_name = argv[i++];
+                continue;
+            }
+            if (eq(s, "-P") || eq(s, "-Package")) {
+                check_lim(i, argc);
+                o->package = argv[i++];
+                continue;
+            }
+            if (eq(s, "-S") || eq(s, "-stringclass")) {
+                check_lim(i, argc);
+                o->string_class = argv[i++];
+                continue;
+            }
+            if (eq(s, "-a") || eq(s, "-amongclass")) {
+                check_lim(i, argc);
+                o->among_class = argv[i++];
+                continue;
+            }
+#endif
+            fprintf(stderr, "'%s' misplaced\n", s);
+            print_arglist();
+        }
+    }
+}
+
+extern int main(int argc, char * argv[]) {
+
+    NEW(options, o);
+    if (argc == 1) print_arglist();
+    read_options(o, argc, argv);
+    {
+        symbol * filename = add_s_to_b(0, argv[1]);
+        symbol * u = get_input(filename);
+        if (u == 0) {
+            fprintf(stderr, "Can't open input %s\n", argv[1]);
+            exit(1);
+        }
+        {
+            struct tokeniser * t = create_tokeniser(u);
+            struct analyser * a = create_analyser(t);
+            t->widechars = o->widechars;
+            t->includes = o->includes;
+            a->utf8 = t->utf8 = o->utf8;
+            read_program(a);
+            if (t->error_count > 0) exit(1);
+            if (o->syntax_tree) print_program(a);
+            close_tokeniser(t);
+            unless (o->syntax_tree) {
+                struct generator * g;
+
+                char * s = o->output_file;
+                unless (s) {
+                    fprintf(stderr, "Please include the -o option\n");
+                    print_arglist();
+                    exit(1);
+                }
+                if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
+                    symbol * b = add_s_to_b(0, s);
+                    b = add_s_to_b(b, ".h");
+                    o->output_h = get_output(b);
+                    b[SIZE(b) - 1] = 'c';
+                    if (o->make_lang == LANG_CPLUSPLUS) {
+                        b = add_s_to_b(b, "c");
+                    }
+                    o->output_c = get_output(b);
+                    lose_b(b);
+
+                    g = create_generator_c(a, o);
+                    generate_program_c(g);
+                    close_generator_c(g);
+                    fclose(o->output_c);
+                    fclose(o->output_h);
+                }
+#ifndef DISABLE_JAVA
+                if (o->make_lang == LANG_JAVA) {
+                    symbol * b = add_s_to_b(0, s);
+                    b = add_s_to_b(b, ".java");
+                    o->output_java = get_output(b);
+                    lose_b(b);
+                    g = create_generator_java(a, o);
+                    generate_program_java(g);
+                    close_generator_java(g);
+                    fclose(o->output_java);
+                }
+#endif
+            }
+            close_analyser(a);
+        }
+        lose_b(u);
+        lose_b(filename);
+    }
+    {   struct include * p = o->includes;
+        until (p == 0)
+        {   struct include * q = p->next;
+            lose_b(p->b); FREE(p); p = q;
+        }
+    }
+    FREE(o);
+    unless (space_count == 0) fprintf(stderr, "%d blocks unfreed\n", space_count);
+    return 0;
+}
+
diff --git a/snowball_code/compiler/generator.c b/snowball_code/compiler/generator.c
new file mode 100644
index 0000000..7eeba9a
--- /dev/null
+++ b/snowball_code/compiler/generator.c
@@ -0,0 +1,1443 @@
+
+#include <limits.h>  /* for INT_MAX */
+#include <stdio.h>   /* for fprintf etc */
+#include <stdlib.h>  /* for free etc */
+#include <string.h>  /* for strlen */
+#include "header.h"
+
+/* Define this to get warning messages when optimisations can't be used. */
+/* #define OPTIMISATION_WARNINGS */
+
+/* recursive use: */
+
+static void generate(struct generator * g, struct node * p);
+
+enum special_labels {
+
+    x_return = -1
+
+};
+
+static int new_label(struct generator * g) {
+    return g->next_label++;
+}
+
+/* Output routines */
+static void output_str(FILE * outfile, struct str * str) {
+
+    char * s = b_to_s(str_data(str));
+    fprintf(outfile, "%s", s);
+    free(s);
+}
+
+static void wch(struct generator * g, int ch) {
+    str_append_ch(g->outbuf, ch); /* character */
+}
+
+static void wnl(struct generator * g) {
+    str_append_ch(g->outbuf, '\n'); /* newline */
+    g->line_count++;
+}
+
+static void ws(struct generator * g, const char * s) {
+    str_append_string(g->outbuf, s); /* string */
+}
+
+static void wi(struct generator * g, int i) {
+    str_append_int(g->outbuf, i); /* integer */
+}
+
+static void wh_ch(struct generator * g, int i) {
+    str_append_ch(g->outbuf, "0123456789ABCDEF"[i & 0xF]); /* hexchar */
+}
+
+static void wh(struct generator * g, int i) {
+    if (i >> 4) wh(g, i >> 4);
+    wh_ch(g, i); /* hex integer */
+}
+
+static void wi3(struct generator * g, int i) {
+    if (i < 100) wch(g, ' ');
+    if (i < 10)  wch(g, ' ');
+    wi(g, i); /* integer (width 3) */
+}
+
+static void wvn(struct generator * g, struct name * p) {  /* variable name */
+
+    int ch = "SBIrxg"[p->type];
+    switch (p->type) {
+        case t_string:
+        case t_boolean:
+        case t_integer:
+            wch(g, ch); wch(g, '['); wi(g, p->count); wch(g, ']'); return;
+        case t_external:
+            ws(g, g->options->externals_prefix); break;
+        default:
+            wch(g, ch); wch(g, '_');
+    }
+    str_append_b(g->outbuf, p->b);
+}
+
+static void wv(struct generator * g, struct name * p) {  /* reference to variable */
+    if (p->type < t_routine) ws(g, "z->");
+    wvn(g, p);
+}
+
+static void wlitarray(struct generator * g, symbol * p) {  /* write literal array */
+
+    ws(g, "{ ");
+    {
+        int i;
+        for (i = 0; i < SIZE(p); i++) {
+            int ch = p[i];
+            if (32 <= ch && ch < 127) {
+                wch(g, '\'');
+                switch (ch) {
+                    case '\'':
+                    case '\\': wch(g, '\\');
+                    default:   wch(g, ch);
+                }
+                wch(g, '\'');
+            }  else {
+                wch(g, '0'); wch(g, 'x'); wh(g, ch);
+            }
+            if (i < SIZE(p) - 1) ws(g, ", ");
+        }
+    }
+    ws(g, " }");
+}
+
+static void wlitref(struct generator * g, symbol * p) {  /* write ref to literal array */
+
+    if (SIZE(p) == 0) ws(g, "0"); else {
+        struct str * s = g->outbuf;
+        g->outbuf = g->declarations;
+        ws(g, "static const symbol s_"); wi(g, g->literalstring_count); ws(g, "[] = ");
+        wlitarray(g, p);
+        ws(g, ";\n");
+        g->outbuf = s;
+        ws(g, "s_"); wi(g, g->literalstring_count);
+        g->literalstring_count++;
+    }
+}
+
+
+static void wm(struct generator * g) {       /* margin */
+    int i;
+    for (i = 0; i < g->margin; i++) ws(g, "    ");
+}
+
+static void wc(struct generator * g, struct node * p) { /* comment */
+
+    ws(g, " /* ");
+    ws(g, (char *) name_of_token(p->type));
+    unless (p->name == 0) {
+        ws(g, " ");
+        str_append_b(g->outbuf, p->name->b);
+    }
+    ws(g, ", line "); wi(g, p->line_number); ws(g, " */");
+    wnl(g);
+}
+
+static void wms(struct generator * g, const char * s) {
+    wm(g); ws(g, s);   } /* margin + string */
+
+static void wbs(struct generator * g) { /* block start */
+    wms(g, "{   ");
+    g->margin++;
+}
+
+static void wbe(struct generator * g) {    /* block end */
+
+    if (g->line_labelled == g->line_count) { wms(g, ";"); wnl(g); }
+    g->margin--;
+    wms(g, "}"); wnl(g);
+}
+
+static void wk(struct generator * g, struct node * p) {     /* keep c */
+    ++g->keep_count;
+    if (p->mode == m_forward) {
+        ws(g, "int c"); wi(g, g->keep_count); ws(g, " = z->c;");
+    } else {
+        ws(g, "int m"); wi(g, g->keep_count); ws(g, " = z->l - z->c; (void)m");
+        wi(g, g->keep_count); ws(g, ";");
+    }
+}
+
+static void wrestore(struct generator * g, struct node * p, int keep_token) {     /* restore c */
+    if (p->mode == m_forward) {
+        ws(g, "z->c = c");
+    } else {
+        ws(g, "z->c = z->l - m");
+    }
+    wi(g, keep_token); ws(g, ";");
+}
+
+static void winc(struct generator * g, struct node * p) {     /* increment c */
+    ws(g, p->mode == m_forward ? "z->c++;" :
+                                 "z->c--;");
+}
+
+static void wsetl(struct generator * g, int n) {
+
+    g->margin--;
+    wms(g, "lab"); wi(g, n); wch(g, ':'); wnl(g);
+    g->line_labelled = g->line_count;
+    g->margin++;
+}
+
+static void wgotol(struct generator * g, int n) {
+    wms(g, "goto lab"); wi(g, n); wch(g, ';'); wnl(g);
+}
+
+static void wf(struct generator * g) {          /* fail */
+    if (g->failure_string != 0) { ws(g, "{ "); ws(g, g->failure_string); wch(g, ' '); }
+    switch (g->failure_label)
+    {
+        case x_return:
+           ws(g, "return 0;");
+           break;
+        default:
+           ws(g, "goto lab");
+           wi(g, g->failure_label);
+           wch(g, ';');
+           g->label_used = 1;
+    }
+    if (g->failure_string != 0) ws(g, " }");
+}
+
+static void wlim(struct generator * g, struct node * p) {     /* if at limit fail */
+
+    ws(g, p->mode == m_forward ? "if (z->c >= z->l) " :
+                                 "if (z->c <= z->lb) ");
+    wf(g);
+}
+
+static void wp(struct generator * g, const char * s, struct node * p) { /* formatted write */
+    int i = 0;
+    int l = strlen(s);
+    until (i >= l) {
+        int ch = s[i++];
+        if (ch != '~') wch(g, ch); else
+        switch(s[i++]) {
+            default:  wch(g, s[i - 1]); continue;
+            case 'C': wc(g, p); continue;
+            case 'k': wk(g, p); continue;
+            case 'K': /* keep for c_test */
+                ws(g, p->mode == m_forward ? "int c_test = z->c;" :
+                                             "int m_test = z->l - z->c;");
+                continue;
+            case 'R': /* restore for c_test */
+                ws(g, p->mode == m_forward ? "z->c = c_test;" :
+                                             "z->c = z->l - m_test;");
+                continue;
+            case 'i': winc(g, p); continue;
+            case 'l': wlim(g, p); continue;
+            case 'f': wf(g); continue;
+            case 'M': wm(g); continue;
+            case 'N': wnl(g); continue;
+            case '{': wbs(g); continue;
+            case '}': wbe(g); continue;
+            case 'S': ws(g, g->S[s[i++] - '0']); continue;
+            case 'I': wi(g, g->I[s[i++] - '0']); continue;
+            case 'J': wi3(g, g->I[s[i++] - '0']); continue;
+            case 'V': wv(g, g->V[s[i++] - '0']); continue;
+            case 'W': wvn(g, g->V[s[i++] - '0']); continue;
+            case 'L': wlitref(g, g->L[s[i++] - '0']); continue;
+            case 'A': wlitarray(g, g->L[s[i++] - '0']); continue;
+            case '+': g->margin++; continue;
+            case '-': g->margin--; continue;
+            case '$': /* insert_s, insert_v etc */
+                wch(g, p->literalstring == 0 ? 'v' : 's');
+                continue;
+            case 'p': ws(g, g->options->externals_prefix); continue;
+        }
+    }
+}
+
+static void w(struct generator * g, const char * s) { wp(g, s, 0); }
+
+static void generate_AE(struct generator * g, struct node * p) {
+    char * s;
+    switch (p->type) {
+        case c_name:
+            wv(g, p->name); break;
+        case c_number:
+            wi(g, p->number); break;
+        case c_maxint:
+            ws(g, "MAXINT"); break;
+        case c_minint:
+            ws(g, "MININT"); break;
+        case c_neg:
+            wch(g, '-'); generate_AE(g, p->right); break;
+        case c_multiply:
+            s = " * "; goto label0;
+        case c_plus:
+            s = " + "; goto label0;
+        case c_minus:
+            s = " - "; goto label0;
+        case c_divide:
+            s = " / ";
+        label0:
+            wch(g, '('); generate_AE(g, p->left);
+            ws(g, s); generate_AE(g, p->right); wch(g, ')'); break;
+        case c_sizeof:
+            g->V[0] = p->name;
+            w(g, "SIZE(~V0)"); break;
+        case c_cursor:
+            w(g, "z->c"); break;
+        case c_limit:
+            w(g, p->mode == m_forward ? "z->l" : "z->lb"); break;
+        case c_size:
+            w(g, "SIZE(z->p)"); break;
+    }
+}
+
+/* K_needed() tests to see if we really need to keep c. Not true when the
+   the command does not touch the cursor. This and repeat_score() could be
+   elaborated almost indefinitely.
+*/
+
+static int K_needed(struct generator * g, struct node * p) {
+    until (p == 0) {
+        switch (p->type) {
+            case c_dollar:
+            case c_leftslice:
+            case c_rightslice:
+            case c_mathassign:
+            case c_plusassign:
+            case c_minusassign:
+            case c_multiplyassign:
+            case c_divideassign:
+            case c_eq:
+            case c_ne:
+            case c_gr:
+            case c_ge:
+            case c_ls:
+            case c_le:
+            case c_sliceto:
+            case c_true:
+            case c_false:
+            case c_debug:
+                break;
+
+            case c_call:
+                if (K_needed(g, p->name->definition)) return true;
+                break;
+
+            case c_bra:
+                if (K_needed(g, p->left)) return true;
+                break;
+
+            default: return true;
+        }
+        p = p->right;
+    }
+    return false;
+}
+
+static int repeat_score(struct generator * g, struct node * p) {
+    int score = 0;
+    until (p == 0)
+    {
+        switch (p->type) {
+            case c_dollar:
+            case c_leftslice:
+            case c_rightslice:
+            case c_mathassign:
+            case c_plusassign:
+            case c_minusassign:
+            case c_multiplyassign:
+            case c_divideassign:
+            case c_eq:
+            case c_ne:
+            case c_gr:
+            case c_ge:
+            case c_ls:
+            case c_le:
+            case c_sliceto:   /* case c_not: must not be included here! */
+            case c_debug:
+                break;
+
+            case c_call:
+                score += repeat_score(g, p->name->definition);
+                break;
+
+            case c_bra:
+                score += repeat_score(g, p->left);
+                break;
+
+            case c_name:
+            case c_literalstring:
+            case c_next:
+            case c_grouping:
+            case c_non:
+            case c_hop:
+                score = score + 1; break;
+
+            default: score = 2; break;
+        }
+        p = p->right;
+    }
+    return score;
+}
+
+/* tests if an expression requires cursor reinstatement in a repeat */
+
+static int repeat_restore(struct generator * g, struct node * p) {
+    return repeat_score(g, p) >= 2;
+}
+
+static void generate_bra(struct generator * g, struct node * p) {
+    p = p->left;
+    until (p == 0) { generate(g, p); p = p->right; }
+}
+
+static void generate_and(struct generator * g, struct node * p) {
+    int keep_c = 0;
+    if (K_needed(g, p->left)) {
+        wp(g, "~{~k~C", p);
+        keep_c = g->keep_count;
+    } else {
+        wp(g, "~M~C", p);
+    }
+    p = p->left;
+    until (p == 0) {
+        generate(g, p);
+        if (keep_c && p->right != 0) {
+            w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+        }
+        p = p->right;
+    }
+    if (keep_c) w(g, "~}");
+}
+
+static void generate_or(struct generator * g, struct node * p) {
+    int keep_c = 0;
+
+    int used = g->label_used;
+    int a0 = g->failure_label;
+    const char * a1 = g->failure_string;
+
+    int out_lab = new_label(g);
+
+    if (K_needed(g, p->left)) {
+        wp(g, "~{~k~C", p);
+        keep_c = g->keep_count;
+    } else {
+        wp(g, "~M~C", p);
+    }
+    p = p->left;
+    g->failure_string = 0;
+    until (p->right == 0) {
+        g->failure_label = new_label(g);
+        g->label_used = 0;
+        generate(g, p);
+        wgotol(g, out_lab);
+        if (g->label_used)
+            wsetl(g, g->failure_label);
+        if (keep_c) {
+            w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+        }
+        p = p->right;
+    }
+    g->label_used = used;
+    g->failure_label = a0;
+    g->failure_string = a1;
+
+    generate(g, p);
+    if (keep_c) w(g, "~}");
+    wsetl(g, out_lab);
+}
+
+static void generate_backwards(struct generator * g, struct node * p) {
+
+    wp(g,"~Mz->lb = z->c; z->c = z->l;~C~N", p);
+    generate(g, p->left);
+    w(g, "~Mz->c = z->lb;~N");
+}
+
+
+static void generate_not(struct generator * g, struct node * p) {
+    int keep_c = 0;
+
+    int used = g->label_used;
+    int a0 = g->failure_label;
+    const char * a1 = g->failure_string;
+
+    if (K_needed(g, p->left)) {
+        wp(g, "~{~k~C", p);
+        keep_c = g->keep_count;
+    } else {
+        wp(g, "~M~C", p);
+    }
+
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    g->failure_string = 0;
+    generate(g, p->left);
+
+    {
+        int l = g->failure_label;
+        int u = g->label_used;
+
+        g->label_used = used;
+        g->failure_label = a0;
+        g->failure_string = a1;
+
+        w(g, "~M~f~N");
+        if (u)
+            wsetl(g, l);
+    }
+    if (keep_c) {
+        w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N~}");
+    }
+}
+
+
+static void generate_try(struct generator * g, struct node * p) {
+    int keep_c = K_needed(g, p->left);
+
+    if (keep_c) {
+        if (p->mode == m_forward) {
+            wp(g, "~{int c_keep = z->c;~C", p);
+            g->failure_string = "z->c = c_keep;";
+        } else {
+            wp(g, "~{int m_keep = z->l - z->c;/* (void) m_keep;*/~C", p);
+            g->failure_string = "z->c = z->l - m_keep;";
+        }
+    } else {
+        wp(g, "~M~C", p);
+        g->failure_string = 0;
+    }
+
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    generate(g, p->left);
+
+    if (g->label_used)
+        wsetl(g, g->failure_label);
+
+    if (keep_c) w(g, "~}");
+}
+
+static void generate_set(struct generator * g, struct node * p) {
+    g->V[0] = p->name; wp(g, "~M~V0 = 1;~C", p);
+}
+
+static void generate_unset(struct generator * g, struct node * p) {
+    g->V[0] = p->name; wp(g, "~M~V0 = 0;~C", p);
+}
+
+static void generate_fail(struct generator * g, struct node * p) {
+    generate(g, p->left);
+    wp(g, "~M~f~C", p);
+}
+
+/* generate_test() also implements 'reverse' */
+
+static void generate_test(struct generator * g, struct node * p) {
+    int keep_c = K_needed(g, p->left);
+    if (keep_c) wp(g, "~{~K~C", p);
+           else wp(g, "~M~C", p);
+
+    generate(g, p->left);
+
+    if (keep_c) wp(g, "~M~R~N"
+                   "~}", p);
+}
+
+static void generate_do(struct generator * g, struct node * p) {
+    int keep_c = 0;
+    if (K_needed(g, p->left)) {
+        wp(g, "~{~k~C", p);
+        keep_c = g->keep_count;
+    } else {
+        wp(g, "~M~C", p);
+    }
+
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    g->failure_string = 0;
+    generate(g, p->left);
+
+    if (g->label_used)
+        wsetl(g, g->failure_label);
+    if (keep_c) {
+        w(g, "~M"); wrestore(g, p, keep_c);
+        w(g, "~N~}");
+    }
+}
+
+static void generate_next(struct generator * g, struct node * p) {
+    if (g->options->utf8) {
+        if (p->mode == m_forward)
+            w(g, "~{int ret = skip_utf8(z->p, z->c, 0, z->l, 1");
+        else
+            w(g, "~{int ret = skip_utf8(z->p, z->c, z->lb, 0, -1");
+        wp(g, ");~N"
+              "~Mif (ret < 0) ~f~N"
+              "~Mz->c = ret;~C"
+              "~}", p);
+    } else
+        wp(g, "~M~l~N"
+              "~M~i~C", p);
+}
+
+static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
+
+    struct grouping * q = p->name->grouping;
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->S[1] = complement ? "in" : "out";
+    g->S[2] = g->options->utf8 ? "_U" : "";
+    g->V[0] = p->name;
+    g->I[0] = q->smallest_ch;
+    g->I[1] = q->largest_ch;
+    if (is_goto) {
+	wp(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1) < 0) ~f /* goto */~C", p);
+    } else {
+	wp(g, "~{ /* gopast */~C"
+	      "~Mint ret = ~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 1);~N"
+	      "~Mif (ret < 0) ~f~N", p);
+	if (p->mode == m_forward)
+	    w(g, "~Mz->c += ret;~N");
+	else
+	    w(g, "~Mz->c -= ret;~N");
+	w(g, "~}");
+    }
+}
+
+static void generate_GO(struct generator * g, struct node * p, int style) {
+    int keep_c = 0;
+
+    int used = g->label_used;
+    int a0 = g->failure_label;
+    const char * a1 = g->failure_string;
+
+    if (p->left->type == c_grouping || p->left->type == c_non) {
+	/* Special case for "goto" or "gopast" when used on a grouping or an
+	 * inverted grouping - the movement of c by the matching action is
+	 * exactly what we want! */
+#ifdef OPTIMISATION_WARNINGS
+	printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
+#endif
+	generate_GO_grouping(g, p->left, style, p->left->type == c_non);
+	return;
+    }
+
+    w(g, "~Mwhile(1) {"); wp(g, "~C~+", p);
+
+    if (style == 1 || repeat_restore(g, p->left)) {
+        wp(g, "~M~k~N", p);
+        keep_c = g->keep_count;
+    }
+
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    generate(g, p->left);
+
+    if (style == 1) {
+        /* include for goto; omit for gopast */
+        w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+    }
+    w(g, "~Mbreak;~N");
+    if (g->label_used)
+        wsetl(g, g->failure_label);
+    if (keep_c) {
+        w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+    }
+
+    g->label_used = used;
+    g->failure_label = a0;
+    g->failure_string = a1;
+
+/*  wp(g, "~M~l~N"
+          "~M~i~N", p);  */
+    generate_next(g, p);
+    w(g, "~}");
+}
+
+static void generate_loop(struct generator * g, struct node * p) {
+    w(g, "~{int i; for (i = "); generate_AE(g, p->AE); wp(g, "; i > 0; i--)~C"
+            "~{", p);
+
+    generate(g, p->left);
+
+    w(g,    "~}"
+         "~}");
+}
+
+static void generate_repeat(struct generator * g, struct node * p, int atleast_case) {
+    int keep_c = 0;
+    wp(g, "~Mwhile(1) {~C~+", p);
+
+    if (repeat_restore(g, p->left)) {
+        wp(g, "~M~k~N", p);
+        keep_c = g->keep_count;
+    }
+
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    g->failure_string = 0;
+    generate(g, p->left);
+
+    if (atleast_case) w(g, "~Mi--;~N");
+
+    w(g, "~Mcontinue;~N");
+    if (g->label_used)
+        wsetl(g, g->failure_label);
+
+    if (keep_c) {
+        w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+    }
+
+    w(g, "~Mbreak;~N"
+      "~}");
+}
+
+static void generate_atleast(struct generator * g, struct node * p) {
+    w(g, "~{int i = "); generate_AE(g, p->AE); w(g, ";~N");
+    {
+        int used = g->label_used;
+        int a0 = g->failure_label;
+        const char * a1 = g->failure_string;
+
+        generate_repeat(g, p, true);
+
+        g->label_used = used;
+        g->failure_label = a0;
+        g->failure_string = a1;
+    }
+    w(g, "~Mif (i > 0) ~f~N"
+      "~}");
+}
+
+static void generate_setmark(struct generator * g, struct node * p) {
+    g->V[0] = p->name;
+    wp(g, "~M~V0 = z->c;~C", p);
+}
+
+static void generate_tomark(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? ">" : "<";
+
+    w(g, "~Mif (z->c ~S0 "); generate_AE(g, p->AE); w(g, ") ~f~N");
+    w(g, "~Mz->c = "); generate_AE(g, p->AE); wp(g, ";~C", p);
+}
+
+static void generate_atmark(struct generator * g, struct node * p) {
+
+    w(g, "~Mif (z->c != "); generate_AE(g, p->AE); wp(g, ") ~f~C", p);
+}
+
+static void generate_hop(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? "+" : "-";
+    g->S[1] = p->mode == m_forward ? "0" : "z->lb";
+    if (g->options->utf8) {
+        w(g, "~{int ret = skip_utf8(z->p, z->c, ~S1, z->l, ~S0 ");
+        generate_AE(g, p->AE); w(g, ");~N");
+        w(g, "~Mif (ret < 0) ~f~N");
+    } else {
+        w(g, "~{int ret = z->c ~S0 ");
+        generate_AE(g, p->AE); w(g, ";~N");
+        w(g, "~Mif (~S1 > ret || ret > z->l) ~f~N");
+    }
+    wp(g, "~Mz->c = ret;~C"
+          "~}", p);
+}
+
+static void generate_delete(struct generator * g, struct node * p) {
+    wp(g, "~{int ret = slice_del(z);~C", p);
+    wp(g, "~Mif (ret < 0) return ret;~N"
+          "~}", p);
+}
+
+static void generate_tolimit(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? "" : "b";
+    wp(g, "~Mz->c = z->l~S0;~C", p);
+}
+
+static void generate_atlimit(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? "" : "b";
+    g->S[1] = p->mode == m_forward ? "<" : ">";
+    wp(g, "~Mif (z->c ~S1 z->l~S0) ~f~C", p);
+}
+
+static void generate_leftslice(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? "bra" : "ket";
+    wp(g, "~Mz->~S0 = z->c;~C", p);
+}
+
+static void generate_rightslice(struct generator * g, struct node * p) {
+    g->S[0] = p->mode == m_forward ? "ket" : "bra";
+    wp(g, "~Mz->~S0 = z->c;~C", p);
+}
+
+static void generate_assignto(struct generator * g, struct node * p) {
+    g->V[0] = p->name;
+    wp(g, "~M~V0 = assign_to(z, ~V0);~C"
+          "~Mif (~V0 == 0) return -1;~C", p);
+}
+
+static void generate_sliceto(struct generator * g, struct node * p) {
+    g->V[0] = p->name;
+    wp(g, "~M~V0 = slice_to(z, ~V0);~C"
+          "~Mif (~V0 == 0) return -1;~C", p);
+}
+
+static void generate_data_address(struct generator * g, struct node * p) {
+
+    symbol * b = p->literalstring;
+    if (b != 0) {
+        wi(g, SIZE(b)); w(g, ", ");
+        wlitref(g, b);
+    } else
+        wv(g, p->name);
+}
+
+static void generate_insert(struct generator * g, struct node * p, int style) {
+
+    int keep_c = style == c_attach;
+    if (p->mode == m_backward) keep_c = !keep_c;
+    wp(g, "~{", p);
+    if (keep_c) w(g, "int c_keep = z->c;~N~M");
+    wp(g, "int ret = insert_~$(z, z->c, z->c, ", p);
+    generate_data_address(g, p);
+    wp(g, ");~C", p);
+    if (keep_c) w(g, "~Mz->c = c_keep;~N");
+    wp(g, "~Mif (ret < 0) return ret;~N"
+          "~}", p);
+}
+
+static void generate_assignfrom(struct generator * g, struct node * p) {
+
+    int keep_c = p->mode == m_forward; /* like 'attach' */
+    wp(g, "~{", p);
+    if (keep_c) wp(g, "int c_keep = z->c;~N"
+                   "~Mret = insert_~$(z, z->c, z->l, ", p);
+                else wp(g, "ret = insert_~$(z, z->lb, z->c, ", p);
+    generate_data_address(g, p);
+    wp(g, ");~C", p);
+    if (keep_c) w(g, "~Mz->c = c_keep;~N");
+    wp(g, "~Mif (ret < 0) return ret;~N"
+          "~}", p);
+}
+
+/* bugs marked <======= fixed 22/7/02. Similar fixes required for Java */
+
+static void generate_slicefrom(struct generator * g, struct node * p) {
+
+/*  w(g, "~Mslice_from_s(z, ");   <============= bug! should be: */
+    wp(g, "~{int ret = slice_from_~$(z, ", p);
+    generate_data_address(g, p);
+    wp(g, ");~C", p);
+    wp(g, "~Mif (ret < 0) return ret;~N"
+          "~}", p);
+}
+
+static void generate_setlimit(struct generator * g, struct node * p) {
+    int keep_c;
+    wp(g, "~{int mlimit;~C"
+          "~M~k~N"
+          , p);
+    keep_c = g->keep_count;
+    generate(g, p->left);
+    if (p->mode == m_forward) w(g, "~Mmlimit = z->l - z->c; z->l = z->c;~N");
+                         else w(g, "~Mmlimit = z->lb; z->lb = z->c;~N");
+    w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
+    g->failure_string = p->mode == m_forward ? "z->l += mlimit;" :
+                                               "z->lb = mlimit;";
+    generate(g, p->aux);
+    wms(g, g->failure_string);
+    w(g, "~N"
+      "~}");
+}
+
+static void generate_dollar(struct generator * g, struct node * p) {
+
+    int used = g->label_used;
+    int a0 = g->failure_label;
+    const char * a1 = g->failure_string;
+    g->failure_label = new_label(g);
+    g->label_used = 0;
+    g->failure_string = 0;
+
+    g->V[0] = p->name;
+    wp(g, "~{struct SN_env env = * z;~C"
+             "~Mint failure = 1; /* assume failure */~N"
+             "~Mz->p = ~V0;~N"
+             "~Mz->lb = z->c = 0;~N"
+             "~Mz->l = SIZE(z->p);~N", p);
+    generate(g, p->left);
+    w(g, "~Mfailure = 0; /* mark success */~N");
+    if (g->label_used)
+        wsetl(g, g->failure_label);
+    g->V[0] = p->name; /* necessary */
+
+    g->label_used = used;
+    g->failure_label = a0;
+    g->failure_string = a1;
+
+    w(g, "~M~V0 = z->p;~N"
+         "~M* z = env;~N"
+         "~Mif (failure) ~f~N~}");
+}
+
+static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
+
+    g->V[0] = p->name;
+    g->S[0] = s;
+    w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
+}
+
+static void generate_integer_test(struct generator * g, struct node * p, char * s) {
+
+    g->V[0] = p->name;
+    g->S[0] = s;
+    w(g, "~Mif (!(~V0 ~S0 "); generate_AE(g, p->AE); w(g, ")) ~f~N");
+}
+
+static void generate_call(struct generator * g, struct node * p) {
+
+    g->V[0] = p->name;
+    wp(g, "~{int ret = ~V0(z);~N"
+          "~Mif (ret == 0) ~f~C"
+          "~Mif (ret < 0) return ret;~N~}", p);
+}
+
+static void generate_grouping(struct generator * g, struct node * p, int complement) {
+
+    struct grouping * q = p->name->grouping;
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->S[1] = complement ? "out" : "in";
+    g->S[2] = g->options->utf8 ? "_U" : "";
+    g->V[0] = p->name;
+    g->I[0] = q->smallest_ch;
+    g->I[1] = q->largest_ch;
+    w(g, "~Mif (~S1_grouping~S0~S2(z, ~V0, ~I0, ~I1, 0)) ~f~N");
+}
+
+static void generate_namedstring(struct generator * g, struct node * p) {
+
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->V[0] = p->name;
+    wp(g, "~Mif (!(eq_v~S0(z, ~V0))) ~f~C", p);
+}
+
+static void generate_literalstring(struct generator * g, struct node * p) {
+    symbol * b = p->literalstring;
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->I[0] = SIZE(b);
+    g->L[0] = b;
+
+    w(g, "~Mif (!(eq_s~S0(z, ~I0, ~L0))) ~f~N");
+}
+
+static void generate_define(struct generator * g, struct node * p) {
+    struct name * q = p->name;
+    g->next_label = 0;
+
+    g->S[0] = q->type == t_routine ? "static" : "extern";
+    g->V[0] = q;
+
+    w(g, "~N~S0 int ~V0(struct SN_env * z) {~N~+");
+    if (p->amongvar_needed) w(g, "~Mint among_var;~N");
+    g->failure_string = 0;
+    g->failure_label = x_return;
+    g->label_used = 0;
+    g->keep_count = 0;
+    generate(g, p->left);
+    w(g, "~Mreturn 1;~N~}");
+}
+
+static void generate_substring(struct generator * g, struct node * p) {
+
+    struct among * x = p->among;
+    int block = -1;
+    unsigned int bitmap = 0;
+    struct amongvec * among_cases = x->b;
+    int c;
+    int empty_case = -1;
+    int n_cases = 0;
+    symbol cases[2];
+    int shortest_size = INT_MAX;
+
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->I[0] = x->number;
+    g->I[1] = x->literalstring_count;
+
+    /* In forward mode with non-ASCII UTF-8 characters, the first character
+     * of the string will often be the same, so instead look at the last
+     * common character position.
+     *
+     * In backward mode, we can't match if there are fewer characters before
+     * the current position than the minimum length.
+     */
+    for (c = 0; c < x->literalstring_count; ++c) {
+        int size = among_cases[c].size;
+        if (size != 0 && size < shortest_size) {
+            shortest_size = size;
+        }
+    }
+
+    for (c = 0; c < x->literalstring_count; ++c) {
+        symbol ch;
+        if (among_cases[c].size == 0) {
+            empty_case = c;
+            continue;
+        }
+        if (p->mode == m_forward) {
+            ch = among_cases[c].b[shortest_size - 1];
+        } else {
+            ch = among_cases[c].b[among_cases[c].size - 1];
+        }
+        if (n_cases == 0) {
+            block = ch >> 5;
+        } else if (ch >> 5 != block) {
+            block = -1;
+            if (n_cases > 2) break;
+        }
+        if (block == -1) {
+            if (ch == cases[0]) continue;
+            if (n_cases < 2) {
+            cases[n_cases++] = ch;
+            } else if (ch != cases[1]) {
+            ++n_cases;
+            break;
+            }
+        } else {
+            if ((bitmap & (1u << (ch & 0x1f))) == 0) {
+            bitmap |= 1u << (ch & 0x1f);
+            if (n_cases < 2)
+                cases[n_cases] = ch;
+            ++n_cases;
+            }
+        }
+    }
+
+    if (block != -1 || n_cases <= 2) {
+    char buf[64];
+    g->I[2] = block;
+    g->I[3] = bitmap;
+    g->I[4] = shortest_size - 1;
+    if (p->mode == m_forward) {
+        sprintf(buf, "z->p[z->c + %d]", shortest_size - 1);
+        g->S[1] = buf;
+        if (shortest_size == 1) {
+            wp(g, "~Mif (z->c >= z->l || ", p);
+        } else {
+            wp(g, "~Mif (z->c + ~I4 >= z->l || ", p);
+        }
+    } else {
+        g->S[1] = "z->p[z->c - 1]";
+        if (shortest_size == 1) {
+            wp(g, "~Mif (z->c <= z->lb || ", p);
+        } else {
+            wp(g, "~Mif (z->c - ~I4 <= z->lb || ", p);
+        }
+    }
+    if (n_cases == 0) {
+        /* We get this for the degenerate case: among { '' }
+         * This doesn't seem to be a useful construct, but it is
+         * syntactically valid.
+         */
+        wp(g, "0", p);
+    } else if (n_cases == 1) {
+        g->I[4] = cases[0];
+        wp(g, "~S1 != ~I4", p);
+    } else if (n_cases == 2) {
+        g->I[4] = cases[0];
+        g->I[5] = cases[1];
+        wp(g, "(~S1 != ~I4 && ~S1 != ~I5)", p);
+    } else {
+        wp(g, "~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)", p);
+    }
+    ws(g, ") ");
+    if (empty_case != -1) {
+        /* If the among includes the empty string, it can never fail
+         * so not matching the bitmap means we match the empty string.
+         */
+        g->I[4] = among_cases[empty_case].result;
+        wp(g, "among_var = ~I4; else~N", p);
+    } else {
+        wp(g, "~f~N", p);
+    }
+    } else {
+#ifdef OPTIMISATION_WARNINGS
+    printf("Couldn't shortcut among %d\n", x->number);
+#endif
+    }
+
+    if (x->command_count == 0 && x->starter == 0)
+        wp(g, "~Mif (!(find_among~S0(z, a_~I0, ~I1))) ~f~C", p);
+    else
+        wp(g, "~Mamong_var = find_among~S0(z, a_~I0, ~I1);~C"
+              "~Mif (!(among_var)) ~f~N", p);
+}
+
+static void generate_among(struct generator * g, struct node * p) {
+
+    struct among * x = p->among;
+    int case_number = 1;
+
+    if (x->substring == 0) generate_substring(g, p);
+    if (x->command_count == 0 && x->starter == 0) return;
+
+    unless (x->starter == 0) generate(g, x->starter);
+
+    p = p->left;
+    if (p != 0 && p->type != c_literalstring) p = p->right;
+    w(g, "~Mswitch(among_var) {~N~+"
+             "~Mcase 0: ~f~N");
+
+    until (p == 0) {
+         if (p->type == c_bra && p->left != 0) {
+             g->I[0] = case_number++;
+             w(g, "~Mcase ~I0:~N~+"); generate(g, p); w(g, "~Mbreak;~N~-");
+         }
+         p = p->right;
+    }
+    w(g, "~}");
+}
+
+static void generate_booltest(struct generator * g, struct node * p) {
+
+    g->V[0] = p->name;
+    wp(g, "~Mif (!(~V0)) ~f~C", p);
+}
+
+static void generate_false(struct generator * g, struct node * p) {
+
+    wp(g, "~M~f~C", p);
+}
+
+static void generate_debug(struct generator * g, struct node * p) {
+
+    g->I[0] = g->debug_count++;
+    g->I[1] = p->line_number;
+    wp(g, "~Mdebug(z, ~I0, ~I1);~C", p);
+
+}
+
+static void generate(struct generator * g, struct node * p) {
+
+    int used = g->label_used;
+    int a0 = g->failure_label;
+    const char * a1 = g->failure_string;
+
+    switch (p->type)
+    {
+        case c_define:        generate_define(g, p); break;
+        case c_bra:           generate_bra(g, p); break;
+        case c_and:           generate_and(g, p); break;
+        case c_or:            generate_or(g, p); break;
+        case c_backwards:     generate_backwards(g, p); break;
+        case c_not:           generate_not(g, p); break;
+        case c_set:           generate_set(g, p); break;
+        case c_unset:         generate_unset(g, p); break;
+        case c_try:           generate_try(g, p); break;
+        case c_fail:          generate_fail(g, p); break;
+        case c_reverse:
+        case c_test:          generate_test(g, p); break;
+        case c_do:            generate_do(g, p); break;
+        case c_goto:          generate_GO(g, p, 1); break;
+        case c_gopast:        generate_GO(g, p, 0); break;
+        case c_repeat:        generate_repeat(g, p, false); break;
+        case c_loop:          generate_loop(g, p); break;
+        case c_atleast:       generate_atleast(g, p); break;
+        case c_setmark:       generate_setmark(g, p); break;
+        case c_tomark:        generate_tomark(g, p); break;
+        case c_atmark:        generate_atmark(g, p); break;
+        case c_hop:           generate_hop(g, p); break;
+        case c_delete:        generate_delete(g, p); break;
+        case c_next:          generate_next(g, p); break;
+        case c_tolimit:       generate_tolimit(g, p); break;
+        case c_atlimit:       generate_atlimit(g, p); break;
+        case c_leftslice:     generate_leftslice(g, p); break;
+        case c_rightslice:    generate_rightslice(g, p); break;
+        case c_assignto:      generate_assignto(g, p); break;
+        case c_sliceto:       generate_sliceto(g, p); break;
+        case c_assign:        generate_assignfrom(g, p); break;
+        case c_insert:
+        case c_attach:        generate_insert(g, p, p->type); break;
+        case c_slicefrom:     generate_slicefrom(g, p); break;
+        case c_setlimit:      generate_setlimit(g, p); break;
+        case c_dollar:        generate_dollar(g, p); break;
+        case c_mathassign:    generate_integer_assign(g, p, "="); break;
+        case c_plusassign:    generate_integer_assign(g, p, "+="); break;
+        case c_minusassign:   generate_integer_assign(g, p, "-="); break;
+        case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
+        case c_divideassign:  generate_integer_assign(g, p, "/="); break;
+        case c_eq:            generate_integer_test(g, p, "=="); break;
+        case c_ne:            generate_integer_test(g, p, "!="); break;
+        case c_gr:            generate_integer_test(g, p, ">"); break;
+        case c_ge:            generate_integer_test(g, p, ">="); break;
+        case c_ls:            generate_integer_test(g, p, "<"); break;
+        case c_le:            generate_integer_test(g, p, "<="); break;
+        case c_call:          generate_call(g, p); break;
+        case c_grouping:      generate_grouping(g, p, false); break;
+        case c_non:           generate_grouping(g, p, true); break;
+        case c_name:          generate_namedstring(g, p); break;
+        case c_literalstring: generate_literalstring(g, p); break;
+        case c_among:         generate_among(g, p); break;
+        case c_substring:     generate_substring(g, p); break;
+        case c_booltest:      generate_booltest(g, p); break;
+        case c_false:         generate_false(g, p); break;
+        case c_true:          break;
+        case c_debug:         generate_debug(g, p); break;
+        default: fprintf(stderr, "%d encountered\n", p->type);
+                 exit(1);
+    }
+
+    if (g->failure_label != a0)
+        g->label_used = used;
+    g->failure_label = a0;
+    g->failure_string = a1;
+}
+
+static void generate_start_comment(struct generator * g) {
+
+    w(g, "~N/* This file was generated automatically by the Snowball to ANSI C compiler */~N");
+}
+
+static void generate_head(struct generator * g) {
+
+    if (g->options->runtime_path == 0) {
+        w(g, "~N#include \"header.h\"~N~N");
+    } else {
+        w(g, "~N#include \"");
+        ws(g, g->options->runtime_path);
+        if (g->options->runtime_path[strlen(g->options->runtime_path) - 1] != '/')
+            wch(g, '/');
+        w(g, "header.h\"~N~N");
+    }
+}
+
+static void generate_routine_headers(struct generator * g) {
+    struct name * q = g->analyser->names;
+    until (q == 0) {
+        g->V[0] = q;
+        switch (q->type) {
+            case t_routine:
+                w(g, "static int ~W0(struct SN_env * z);~N");
+                break;
+            case t_external:
+                w(g,
+                  "#ifdef __cplusplus~N"
+                  "extern \"C\" {~N"
+                  "#endif~N"
+                  "extern int ~W0(struct SN_env * z);~N"
+                  "#ifdef __cplusplus~N"
+                  "}~N"
+                  "#endif~N"
+                  );
+                break;
+        }
+        q = q->next;
+    }
+}
+
+static void generate_among_table(struct generator * g, struct among * x) {
+
+    struct amongvec * v = x->b;
+
+    g->I[0] = x->number;
+    {
+        int i;
+        for (i = 0; i < x->literalstring_count; i++)
+        {
+            g->I[1] = i;
+            g->I[2] = v->size;
+            g->L[0] = v->b;
+            unless (v->size == 0)
+                w(g, "static const symbol s_~I0_~I1[~I2] = ~A0;~N");
+            v++;
+        }
+    }
+
+    g->I[1] = x->literalstring_count;
+    w(g, "~N~Mstatic const struct among a_~I0[~I1] =~N{~N");
+
+    v = x->b;
+    {
+        int i;
+        for (i = 0; i < x->literalstring_count; i++) {
+            g->I[1] = i;
+            g->I[2] = v->size;
+            g->I[3] = v->i;
+            g->I[4] = v->result;
+            g->S[0] = i < x->literalstring_count - 1 ? "," : "";
+
+            w(g, "/*~J1 */ { ~I2, ");
+            if (v->size == 0) w(g, "0,");
+                         else w(g, "s_~I0_~I1,");
+            w(g, " ~I3, ~I4, ");
+            if (v->function == 0) w(g, "0"); else
+                                  wvn(g, v->function);
+            w(g, "}~S0~N");
+            v++;
+        }
+    }
+    w(g, "};~N~N");
+}
+
+static void generate_amongs(struct generator * g) {
+    struct among * x = g->analyser->amongs;
+    until (x == 0) {
+        generate_among_table(g, x);
+        x = x->next;
+    }
+}
+
+static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
+
+static void generate_grouping_table(struct generator * g, struct grouping * q) {
+
+    int range = q->largest_ch - q->smallest_ch + 1;
+    int size = (range + 7)/ 8;  /* assume 8 bits per symbol */
+    symbol * b = q->b;
+    symbol * map = create_b(size);
+    int i;
+    for (i = 0; i < size; i++) map[i] = 0;
+
+    for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
+
+    {
+        g->V[0] = q->name;
+
+        w(g, "static const unsigned char ~V0[] = { ");
+        for (i = 0; i < size; i++) {
+             wi(g, map[i]);
+             if (i < size - 1) w(g, ", ");
+        }
+        w(g, " };~N~N");
+    }
+    lose_b(map);
+}
+
+static void generate_groupings(struct generator * g) {
+    struct grouping * q = g->analyser->groupings;
+    until (q == 0) {
+        generate_grouping_table(g, q);
+        q = q->next;
+    }
+}
+
+static void generate_create(struct generator * g) {
+
+    int * p = g->analyser->name_count;
+    g->I[0] = p[t_string];
+    g->I[1] = p[t_integer];
+    g->I[2] = p[t_boolean];
+    w(g, "~N"
+         "extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1, ~I2); }"
+         "~N");
+}
+
+static void generate_close(struct generator * g) {
+
+    int * p = g->analyser->name_count;
+    g->I[0] = p[t_string];
+    w(g, "~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N");
+}
+
+static void generate_create_and_close_templates(struct generator * g) {
+    w(g, "~N"
+         "extern struct SN_env * ~pcreate_env(void);~N"
+         "extern void ~pclose_env(struct SN_env * z);~N"
+         "~N");
+}
+
+static void generate_header_file(struct generator * g) {
+
+    struct name * q = g->analyser->names;
+    char * vp = g->options->variables_prefix;
+    g->S[0] = vp;
+
+    w(g, "~N"
+         "#ifdef __cplusplus~N"
+         "extern \"C\" {~N"
+         "#endif~N");            /* for C++ */
+
+    generate_create_and_close_templates(g);
+    until (q == 0) {
+        g->V[0] = q;
+        switch (q->type)
+        {
+            case t_external:
+                w(g, "extern int ~W0(struct SN_env * z);~N");
+                break;
+            case t_string:  g->S[1] = "S"; goto label0;
+            case t_integer: g->S[1] = "I"; goto label0;
+            case t_boolean: g->S[1] = "B";
+            label0:
+                if (vp) {
+                    g->I[0] = q->count;
+                    w(g, "#define ~S0");
+                    str_append_b(g->outbuf, q->b);
+                    w(g, " (~S1[~I0])~N");
+                }
+                break;
+        }
+        q = q->next;
+    }
+
+    w(g, "~N"
+         "#ifdef __cplusplus~N"
+         "}~N"
+         "#endif~N");            /* for C++ */
+
+    w(g, "~N");
+}
+
+extern void generate_program_c(struct generator * g) {
+
+    g->outbuf = str_new();
+    generate_start_comment(g);
+    generate_head(g);
+    generate_routine_headers(g);
+    w(g, "#ifdef __cplusplus~N"
+         "extern \"C\" {~N"
+         "#endif~N"
+         "~N");
+    generate_create_and_close_templates(g);
+    w(g, "~N"
+         "#ifdef __cplusplus~N"
+         "}~N"
+         "#endif~N");
+    generate_amongs(g);
+    generate_groupings(g);
+    g->declarations = g->outbuf;
+    g->outbuf = str_new();
+    g->literalstring_count = 0;
+    {
+        struct node * p = g->analyser->program;
+        until (p == 0) { generate(g, p); p = p->right; }
+    }
+    generate_create(g);
+    generate_close(g);
+    output_str(g->options->output_c, g->declarations);
+    str_delete(g->declarations);
+    output_str(g->options->output_c, g->outbuf);
+    str_clear(g->outbuf);
+
+    generate_start_comment(g);
+    generate_header_file(g);
+    output_str(g->options->output_h, g->outbuf);
+    str_delete(g->outbuf);
+}
+
+extern struct generator * create_generator_c(struct analyser * a, struct options * o) {
+    NEW(generator, g);
+    g->analyser = a;
+    g->options = o;
+    g->margin = 0;
+    g->debug_count = 0;
+    g->line_count = 0;
+    return g;
+}
+
+extern void close_generator_c(struct generator * g) {
+
+    FREE(g);
+}
+
diff --git a/snowball_code/compiler/generator_java.c b/snowball_code/compiler/generator_java.c
new file mode 100644
index 0000000..f1e0354
--- /dev/null
+++ b/snowball_code/compiler/generator_java.c
@@ -0,0 +1,1452 @@
+
+#include <stdlib.h> /* for exit */
+#include <string.h> /* for strlen */
+#include <stdio.h> /* for fprintf etc */
+#include "header.h"
+
+/* prototypes */
+
+static void generate(struct generator * g, struct node * p);
+static void w(struct generator * g, const char * s);
+static void writef(struct generator * g, const char * s, struct node * p);
+
+
+enum special_labels {
+    x_return = -1
+};
+
+static int new_label(struct generator * g) {
+
+    return g->next_label++;
+}
+
+static struct str * vars_newname(struct generator * g) {
+
+    struct str * output;
+    g->var_number ++;
+    output = str_new();
+    str_append_string(output, "v_");
+    str_append_int(output, g->var_number);
+    return output;
+}
+
+/* Output routines */
+static void output_str(FILE * outfile, struct str * str) {
+
+    char * s = b_to_s(str_data(str));
+    fprintf(outfile, "%s", s);
+    free(s);
+}
+
+/* Write routines for simple entities */
+
+static void write_char(struct generator * g, int ch) {
+
+    str_append_ch(g->outbuf, ch);
+}
+
+static void write_newline(struct generator * g) {
+
+    str_append_string(g->outbuf, "\n");
+}
+
+static void write_string(struct generator * g, const char * s) {
+
+    str_append_string(g->outbuf, s);
+}
+
+static void write_b(struct generator * g, symbol * b) {
+
+    str_append_b(g->outbuf, b);
+}
+
+static void write_str(struct generator * g, struct str * str) {
+
+    str_append(g->outbuf, str);
+}
+
+static void write_int(struct generator * g, int i) {
+
+    str_append_int(g->outbuf, i);
+}
+
+
+/* Write routines for items from the syntax tree */
+
+static void write_varname(struct generator * g, struct name * p) {
+
+    int ch = "SBIrxg"[p->type];
+    if (p->type != t_external)
+    {
+        write_char(g, ch);
+        write_char(g, '_');
+    }
+    str_append_b(g->outbuf, p->b);
+}
+
+static void write_varref(struct generator * g, struct name * p) {
+
+    /* In java, references look just the same */
+    write_varname(g, p);
+}
+
+static void write_hexdigit(struct generator * g, int n) {
+
+    write_char(g, n < 10 ? n + '0' : n - 10 + 'A');
+}
+
+static void write_hex(struct generator * g, int ch) {
+
+    write_string(g, "\\u");
+    {
+        int i;
+        for (i = 12; i >= 0; i -= 4) write_hexdigit(g, ch >> i & 0xf);
+    }
+}
+
+static void write_literal_string(struct generator * g, symbol * p) {
+
+    int i;
+    write_string(g, "\"");
+    for (i = 0; i < SIZE(p); i++) {
+        int ch = p[i];
+        if (32 <= ch && ch <= 127) {
+            if (ch == '\"' || ch == '\\') write_string(g, "\\");
+            write_char(g, ch);
+        } else {
+            write_hex(g, ch);
+        }
+    }
+    write_string(g, "\"");
+}
+
+static void write_margin(struct generator * g) {
+
+    int i;
+    for (i = 0; i < g->margin; i++) write_string(g, "    ");
+}
+
+/* Write a variable declaration. */
+static void write_declare(struct generator * g,
+                          char * declaration,
+                          struct node * p) {
+
+    struct str * temp = g->outbuf;
+    g->outbuf = g->declarations;
+    write_string(g, "            ");
+    writef(g, declaration, p);
+    write_string(g, ";");
+    write_newline(g);
+    g->outbuf = temp;
+}
+
+static void write_comment(struct generator * g, struct node * p) {
+
+    write_margin(g);
+    write_string(g, "// ");
+    write_string(g, (char *) name_of_token(p->type));
+    if (p->name != 0) {
+        write_string(g, " ");
+        str_append_b(g->outbuf, p->name->b);
+    }
+    write_string(g, ", line ");
+    write_int(g, p->line_number);
+    write_newline(g);
+}
+
+static void write_block_start(struct generator * g) {
+
+    w(g, "~M{~+~N");
+}
+
+static void write_block_end(struct generator * g)    /* block end */ {
+
+    w(g, "~-~M}~N");
+}
+
+static void write_savecursor(struct generator * g, struct node * p,
+                             struct str * savevar) {
+
+    g->B[0] = str_data(savevar);
+    g->S[1] = "";
+    if (p->mode != m_forward) g->S[1] = "limit - ";
+    write_declare(g, "int ~B0", p);
+    writef(g, "~M~B0 = ~S1cursor;~N" , p);
+}
+
+static void restore_string(struct node * p, struct str * out, struct str * savevar) {
+
+    str_clear(out);
+    str_append_string(out, "cursor = ");
+    if (p->mode != m_forward) str_append_string(out, "limit - ");
+    str_append(out, savevar);
+    str_append_string(out, ";");
+}
+
+static void write_restorecursor(struct generator * g, struct node * p,
+                                struct str * savevar) {
+
+    struct str * temp = str_new();
+    write_margin(g);
+    restore_string(p, temp, savevar);
+    write_str(g, temp);
+    write_newline(g);
+    str_delete(temp);
+}
+
+static void write_inc_cursor(struct generator * g, struct node * p) {
+
+    write_margin(g);
+    write_string(g, p->mode == m_forward ? "cursor++;" : "cursor--;");
+    write_newline(g);
+}
+
+static void wsetlab_begin(struct generator * g, int n) {
+
+    w(g, "~Mlab");
+    write_int(g, n);
+    w(g, ": do {~+~N");
+}
+
+static void wsetlab_end(struct generator * g) {
+
+    w(g, "~-~M} while (false);~N");
+}
+
+static void wgotol(struct generator * g, int n) {
+
+    write_margin(g);
+    write_string(g, "break lab");
+    write_int(g, n);
+    write_string(g, ";");
+    write_newline(g);
+}
+
+static void write_failure(struct generator * g) {
+
+    if (str_len(g->failure_str) != 0) {
+        write_margin(g);
+        write_str(g, g->failure_str);
+        write_newline(g);
+    }
+    write_margin(g);
+    switch (g->failure_label)
+    {
+        case x_return:
+            write_string(g, "return false;");
+            break;
+        default:
+            write_string(g, "break lab");
+            write_int(g, g->failure_label);
+            write_string(g, ";");
+            g->unreachable = true;
+    }
+    write_newline(g);
+}
+
+static void write_failure_if(struct generator * g, char * s, struct node * p) {
+
+    writef(g, "~Mif (", p);
+    writef(g, s, p);
+    writef(g, ")~N", p);
+    write_block_start(g);
+    write_failure(g);
+    write_block_end(g);
+    g->unreachable = false;
+}
+
+/* if at limit fail */
+static void write_check_limit(struct generator * g, struct node * p) {
+
+    if (p->mode == m_forward) {
+        write_failure_if(g, "cursor >= limit", p);
+    } else {
+        write_failure_if(g, "cursor <= limit_backward", p);
+    }
+}
+
+/* Formatted write. */
+static void writef(struct generator * g, const char * input, struct node * p) {
+
+    int i = 0;
+    int l = strlen(input);
+
+    while (i < l) {
+        int ch = input[i++];
+        if (ch == '~') {
+            switch(input[i++]) {
+                default: write_char(g, input[i - 1]); continue;
+                case 'C': write_comment(g, p); continue;
+                case 'f': write_block_start(g);
+                          write_failure(g);
+			  g->unreachable = false;
+                          write_block_end(g);
+                          continue;
+                case 'M': write_margin(g); continue;
+                case 'N': write_newline(g); continue;
+                case '{': write_block_start(g); continue;
+                case '}': write_block_end(g); continue;
+                case 'S': write_string(g, g->S[input[i++] - '0']); continue;
+                case 'B': write_b(g, g->B[input[i++] - '0']); continue;
+                case 'I': write_int(g, g->I[input[i++] - '0']); continue;
+                case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
+                case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
+                case 'L': write_literal_string(g, g->L[input[i++] - '0']); continue;
+                case '+': g->margin++; continue;
+                case '-': g->margin--; continue;
+                case 'n': write_string(g, g->options->name); continue;
+            }
+        } else {
+            write_char(g, ch);
+        }
+    }
+}
+
+static void w(struct generator * g, const char * s) {
+    writef(g, s, 0);
+}
+
+static void generate_AE(struct generator * g, struct node * p) {
+    char * s;
+    switch (p->type) {
+        case c_name:
+            write_varref(g, p->name); break;
+        case c_number:
+            write_int(g, p->number); break;
+        case c_maxint:
+            write_string(g, "MAXINT"); break;
+        case c_minint:
+            write_string(g, "MININT"); break;
+        case c_neg:
+            write_string(g, "-"); generate_AE(g, p->right); break;
+        case c_multiply:
+            s = " * "; goto label0;
+        case c_plus:
+            s = " + "; goto label0;
+        case c_minus:
+            s = " - "; goto label0;
+        case c_divide:
+            s = " / ";
+        label0:
+            write_string(g, "("); generate_AE(g, p->left);
+            write_string(g, s); generate_AE(g, p->right); write_string(g, ")"); break;
+        case c_sizeof:
+            g->V[0] = p->name;
+            w(g, "(~V0.length())"); break;
+        case c_cursor:
+            w(g, "cursor"); break;
+        case c_limit:
+            w(g, p->mode == m_forward ? "limit" : "limit_backward"); break;
+        case c_size:
+            w(g, "(current.length())"); break;
+    }
+}
+
+/* K_needed() tests to see if we really need to keep c. Not true when the
+   the command does not touch the cursor. This and repeat_score() could be
+   elaborated almost indefinitely.
+*/
+
+static int K_needed(struct generator * g, struct node * p) {
+
+    while (p != 0) {
+        switch (p->type) {
+            case c_dollar:
+            case c_leftslice:
+            case c_rightslice:
+            case c_mathassign:
+            case c_plusassign:
+            case c_minusassign:
+            case c_multiplyassign:
+            case c_divideassign:
+            case c_eq:
+            case c_ne:
+            case c_gr:
+            case c_ge:
+            case c_ls:
+            case c_le:
+            case c_sliceto:
+            case c_booltest:
+            case c_true:
+            case c_false:
+            case c_debug:
+                break;
+
+            case c_call:
+                if (K_needed(g, p->name->definition)) return true;
+                break;
+
+            case c_bra:
+                if (K_needed(g, p->left)) return true;
+                break;
+
+            default: return true;
+        }
+        p = p->right;
+    }
+    return false;
+}
+
+static int repeat_score(struct generator * g, struct node * p) {
+
+    int score = 0;
+    while (p != 0) {
+        switch (p->type) {
+            case c_dollar:
+            case c_leftslice:
+            case c_rightslice:
+            case c_mathassign:
+            case c_plusassign:
+            case c_minusassign:
+            case c_multiplyassign:
+            case c_divideassign:
+            case c_eq:
+            case c_ne:
+            case c_gr:
+            case c_ge:
+            case c_ls:
+            case c_le:
+            case c_sliceto:   /* case c_not: must not be included here! */
+            case c_debug:
+                break;
+
+            case c_call:
+                score += repeat_score(g, p->name->definition);
+                break;
+
+            case c_bra:
+                score += repeat_score(g, p->left);
+                break;
+
+            case c_name:
+            case c_literalstring:
+            case c_next:
+            case c_grouping:
+            case c_non:
+            case c_hop:
+                score = score + 1;
+                break;
+
+            default:
+                score = 2;
+                break;
+        }
+        p = p->right;
+    }
+    return score;
+}
+
+/* tests if an expression requires cursor reinstatement in a repeat */
+
+static int repeat_restore(struct generator * g, struct node * p) {
+
+    return repeat_score(g, p) >= 2;
+}
+
+static void generate_bra(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    p = p->left;
+    while (p != 0) {
+        generate(g, p);
+        p = p->right;
+    }
+}
+
+static void generate_and(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+
+    write_comment(g, p);
+
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    p = p->left;
+    while (p != 0) {
+        generate(g, p);
+        if (g->unreachable) break;
+        if (keep_c && p->right != 0) write_restorecursor(g, p, savevar);
+        p = p->right;
+    }
+    str_delete(savevar);
+}
+
+static void generate_or(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+
+    int a0 = g->failure_label;
+    struct str * a1 = str_copy(g->failure_str);
+
+    int out_lab = new_label(g);
+    write_comment(g, p);
+    wsetlab_begin(g, out_lab);
+
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    p = p->left;
+    str_clear(g->failure_str);
+
+    if (p == 0) {
+        /* p should never be 0 after an or: there should be at least two
+         * sub nodes. */
+        fprintf(stderr, "Error: \"or\" node without children nodes.");
+        exit (1);
+    }
+    while (p->right != 0) {
+        g->failure_label = new_label(g);
+        wsetlab_begin(g, g->failure_label);
+        generate(g, p);
+        if (!g->unreachable) wgotol(g, out_lab);
+        wsetlab_end(g);
+        g->unreachable = false;
+        if (keep_c) write_restorecursor(g, p, savevar);
+        p = p->right;
+    }
+
+    g->failure_label = a0;
+    str_delete(g->failure_str);
+    g->failure_str = a1;
+
+    generate(g, p);
+    wsetlab_end(g);
+    str_delete(savevar);
+}
+
+static void generate_backwards(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    writef(g,"~Mlimit_backward = cursor; cursor = limit;~N", p);
+    generate(g, p->left);
+    w(g, "~Mcursor = limit_backward;");
+}
+
+
+static void generate_not(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+
+    int a0 = g->failure_label;
+    struct str * a1 = str_copy(g->failure_str);
+
+    write_comment(g, p);
+    if (keep_c) {
+        write_block_start(g);
+        write_savecursor(g, p, savevar);
+    }
+
+    g->failure_label = new_label(g);
+    str_clear(g->failure_str);
+
+    wsetlab_begin(g, g->failure_label);
+
+    generate(g, p->left);
+
+    g->failure_label = a0;
+    str_delete(g->failure_str);
+    g->failure_str = a1;
+
+    if (!g->unreachable) write_failure(g);
+
+    wsetlab_end(g);
+    g->unreachable = false;
+
+    if (keep_c) write_restorecursor(g, p, savevar);
+    if (keep_c) write_block_end(g);
+    str_delete(savevar);
+}
+
+
+static void generate_try(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+
+    write_comment(g, p);
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    g->failure_label = new_label(g);
+    if (keep_c) restore_string(p, g->failure_str, savevar);
+
+    wsetlab_begin(g, g->failure_label);
+    generate(g, p->left);
+    wsetlab_end(g);
+    g->unreachable = false;
+
+    str_delete(savevar);
+}
+
+static void generate_set(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    writef(g, "~M~V0 = true;~N", p);
+}
+
+static void generate_unset(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    writef(g, "~M~V0 = false;~N", p);
+}
+
+static void generate_fail(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    generate(g, p->left);
+    if (!g->unreachable) write_failure(g);
+}
+
+/* generate_test() also implements 'reverse' */
+
+static void generate_test(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+
+    write_comment(g, p);
+
+    if (keep_c) {
+        write_savecursor(g, p, savevar);
+    }
+
+    generate(g, p->left);
+
+    if (!g->unreachable) {
+        if (keep_c) {
+            write_restorecursor(g, p, savevar);
+        }
+    }
+    str_delete(savevar);
+}
+
+static void generate_do(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = K_needed(g, p->left);
+    write_comment(g, p);
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    g->failure_label = new_label(g);
+    str_clear(g->failure_str);
+
+    wsetlab_begin(g, g->failure_label);
+    generate(g, p->left);
+    wsetlab_end(g);
+    g->unreachable = false;
+
+    if (keep_c) write_restorecursor(g, p, savevar);
+    str_delete(savevar);
+}
+
+static void generate_GO(struct generator * g, struct node * p, int style) {
+
+    int end_unreachable = false;
+    struct str * savevar = vars_newname(g);
+    int keep_c = style == 1 || repeat_restore(g, p->left);
+
+    int a0 = g->failure_label;
+    struct str * a1 = str_copy(g->failure_str);
+
+    int golab = new_label(g);
+    g->I[0] = golab;
+    write_comment(g, p);
+    w(g, "~Mgolab~I0: while(true)~N");
+    w(g, "~{");
+
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    g->failure_label = new_label(g);
+    wsetlab_begin(g, g->failure_label);
+    generate(g, p->left);
+
+    if (g->unreachable) {
+        /* Cannot break out of this loop: therefore the code after the
+         * end of the loop is unreachable.*/
+        end_unreachable = true;
+    } else {
+        /* include for goto; omit for gopast */
+        if (style == 1) write_restorecursor(g, p, savevar);
+        g->I[0] = golab;
+        w(g, "~Mbreak golab~I0;~N");
+    }
+    g->unreachable = false;
+    wsetlab_end(g);
+    if (keep_c) write_restorecursor(g, p, savevar);
+
+    g->failure_label = a0;
+    str_delete(g->failure_str);
+    g->failure_str = a1;
+
+    write_check_limit(g, p);
+    write_inc_cursor(g, p);
+    write_block_end(g);
+    str_delete(savevar);
+    g->unreachable = end_unreachable;
+}
+
+static void generate_loop(struct generator * g, struct node * p) {
+
+    struct str * loopvar = vars_newname(g);
+    write_comment(g, p);
+    g->B[0] = str_data(loopvar);
+    write_declare(g, "int ~B0", p);
+    w(g, "~Mfor (~B0 = ");
+    generate_AE(g, p->AE);
+    g->B[0] = str_data(loopvar);
+    writef(g, "; ~B0 > 0; ~B0--)~N", p);
+    writef(g, "~{", p);
+
+    generate(g, p->left);
+
+    w(g, "~}");
+    str_delete(loopvar);
+    g->unreachable = false;
+}
+
+static void generate_repeat(struct generator * g, struct node * p, struct str * loopvar) {
+
+    struct str * savevar = vars_newname(g);
+    int keep_c = repeat_restore(g, p->left);
+    int replab = new_label(g);
+    g->I[0] = replab;
+    write_comment(g, p);
+    writef(g, "~Mreplab~I0: while(true)~N~{", p);
+
+    if (keep_c) write_savecursor(g, p, savevar);
+
+    g->failure_label = new_label(g);
+    str_clear(g->failure_str);
+    wsetlab_begin(g, g->failure_label);
+    generate(g, p->left);
+
+    if (!g->unreachable) {
+        if (loopvar != 0) {
+            g->B[0] = str_data(loopvar);
+            w(g, "~M~B0--;~N");
+        }
+
+        g->I[0] = replab;
+        w(g, "~Mcontinue replab~I0;~N");
+    }
+
+    wsetlab_end(g);
+    g->unreachable = false;
+
+    if (keep_c) write_restorecursor(g, p, savevar);
+
+    g->I[0] = replab;
+    w(g, "~Mbreak replab~I0;~N~}");
+    str_delete(savevar);
+}
+
+static void generate_atleast(struct generator * g, struct node * p) {
+
+    struct str * loopvar = vars_newname(g);
+    write_comment(g, p);
+    w(g, "~{");
+    g->B[0] = str_data(loopvar);
+    w(g, "~Mint ~B0 = ");
+    generate_AE(g, p->AE);
+    w(g, ";~N");
+    {
+        int a0 = g->failure_label;
+        struct str * a1 = str_copy(g->failure_str);
+
+        generate_repeat(g, p, loopvar);
+
+        g->failure_label = a0;
+        str_delete(g->failure_str);
+        g->failure_str = a1;
+    }
+    g->B[0] = str_data(loopvar);
+    write_failure_if(g, "~B0 > 0", p);
+    w(g, "~}");
+    str_delete(loopvar);
+}
+
+static void generate_setmark(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    writef(g, "~M~V0 = cursor;~N", p);
+}
+
+static void generate_tomark(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? ">" : "<";
+
+    w(g, "~Mif (cursor ~S0 "); generate_AE(g, p->AE); w(g, ")~N");
+    write_block_start(g);
+    write_failure(g);
+    write_block_end(g);
+    g->unreachable = false;
+    w(g, "~Mcursor = "); generate_AE(g, p->AE); writef(g, ";~N", p);
+}
+
+static void generate_atmark(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    w(g, "~Mif (cursor != "); generate_AE(g, p->AE); writef(g, ")~N", p);
+    write_block_start(g);
+    write_failure(g);
+    write_block_end(g);
+    g->unreachable = false;
+}
+
+
+static void generate_hop(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "+" : "-";
+
+    w(g, "~{~Mint c = cursor ~S0 ");
+    generate_AE(g, p->AE);
+    w(g, ";~N");
+
+    g->S[0] = p->mode == m_forward ? "0" : "limit_backward";
+
+    write_failure_if(g, "~S0 > c || c > limit", p);
+    writef(g, "~Mcursor = c;~N", p);
+    writef(g, "~}", p);
+}
+
+static void generate_delete(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    writef(g, "~Mslice_del();~N", p);
+}
+
+
+static void generate_next(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    write_check_limit(g, p);
+    write_inc_cursor(g, p);
+}
+
+static void generate_tolimit(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "limit" : "limit_backward";
+    writef(g, "~Mcursor = ~S0;~N", p);
+}
+
+static void generate_atlimit(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "limit" : "limit_backward";
+    g->S[1] = p->mode == m_forward ? "<" : ">";
+    write_failure_if(g, "cursor ~S1 ~S0", p);
+}
+
+static void generate_leftslice(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "bra" : "ket";
+    writef(g, "~M~S0 = cursor;~N", p);
+}
+
+static void generate_rightslice(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "ket" : "bra";
+    writef(g, "~M~S0 = cursor;~N", p);
+}
+
+static void generate_assignto(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    writef(g, "~M~V0 = assign_to(~V0);~N", p);
+}
+
+static void generate_sliceto(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    writef(g, "~M~V0 = slice_to(~V0);~N", p);
+}
+
+static void generate_address(struct generator * g, struct node * p) {
+
+    symbol * b = p->literalstring;
+    if (b != 0) {
+        write_literal_string(g, b);
+    } else {
+        write_varref(g, p->name);
+    }
+}
+
+static void generate_insert(struct generator * g, struct node * p, int style) {
+
+    int keep_c = style == c_attach;
+    write_comment(g, p);
+    if (p->mode == m_backward) keep_c = !keep_c;
+    if (keep_c) w(g, "~{~Mint c = cursor;~N");
+    writef(g, "~Minsert(cursor, cursor, ", p);
+    generate_address(g, p);
+    writef(g, ");~N", p);
+    if (keep_c) w(g, "~Mcursor = c;~N~}");
+}
+
+static void generate_assignfrom(struct generator * g, struct node * p) {
+
+    int keep_c = p->mode == m_forward; /* like 'attach' */
+
+    write_comment(g, p);
+    if (keep_c) writef(g, "~{~Mint c = cursor;~N", p);
+    if (p->mode == m_forward) {
+        writef(g, "~Minsert(cursor, limit, ", p);
+    } else {
+        writef(g, "~Minsert(limit_backward, cursor, ", p);
+    }
+    generate_address(g, p);
+    writef(g, ");~N", p);
+    if (keep_c) w(g, "~Mcursor = c;~N~}");
+}
+
+
+static void generate_slicefrom(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    w(g, "~Mslice_from(");
+    generate_address(g, p);
+    writef(g, ");~N", p);
+}
+
+static void generate_setlimit(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    struct str * varname = vars_newname(g);
+    write_comment(g, p);
+    write_savecursor(g, p, savevar);
+    generate(g, p->left);
+
+    if (!g->unreachable) {
+        g->B[0] = str_data(varname);
+        write_declare(g, "int ~B0", p);
+        if (p->mode == m_forward) {
+            w(g, "~M~B0 = limit - cursor;~N");
+            w(g, "~Mlimit = cursor;~N");
+        } else {
+            w(g, "~M~B0 = limit_backward;~N");
+            w(g, "~Mlimit_backward = cursor;~N");
+        }
+        write_restorecursor(g, p, savevar);
+
+        if (p->mode == m_forward) {
+            str_assign(g->failure_str, "limit += ");
+            str_append(g->failure_str, varname);
+            str_append_ch(g->failure_str, ';');
+        } else {
+            str_assign(g->failure_str, "limit_backward = ");
+            str_append(g->failure_str, varname);
+            str_append_ch(g->failure_str, ';');
+        }
+        generate(g, p->aux);
+
+        if (!g->unreachable) {
+            write_margin(g);
+            write_str(g, g->failure_str);
+            write_newline(g);
+        }
+    }
+    str_delete(varname);
+    str_delete(savevar);
+}
+
+/* dollar sets snowball up to operate on a string variable as if it were the
+ * current string */
+static void generate_dollar(struct generator * g, struct node * p) {
+
+    struct str * savevar = vars_newname(g);
+    write_comment(g, p);
+    g->V[0] = p->name;
+
+    str_assign(g->failure_str, "copy_from(");
+    str_append(g->failure_str, savevar);
+    str_append_string(g->failure_str, ");");
+    g->B[0] = str_data(savevar);
+    writef(g, "~{~M~n ~B0 = this;~N"
+             "~Mcurrent = new StringBuffer(~V0.toString());~N"
+             "~Mcursor = 0;~N"
+             "~Mlimit = (current.length());~N", p);
+    generate(g, p->left);
+    if (!g->unreachable) {
+        write_margin(g);
+        write_str(g, g->failure_str);
+        write_newline(g);
+    }
+    w(g, "~}");
+    str_delete(savevar);
+}
+
+static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
+
+    g->V[0] = p->name;
+    g->S[0] = s;
+    w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); w(g, ";~N");
+}
+
+static void generate_integer_test(struct generator * g, struct node * p, char * s) {
+
+    g->V[0] = p->name;
+    g->S[0] = s;
+    w(g, "~Mif (!(~V0 ~S0 "); generate_AE(g, p->AE); w(g, "))~N");
+    write_block_start(g);
+    write_failure(g);
+    write_block_end(g);
+    g->unreachable = false;
+}
+
+static void generate_call(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    write_failure_if(g, "!~V0()", p);
+}
+
+static void generate_grouping(struct generator * g, struct node * p, int complement) {
+
+    struct grouping * q = p->name->grouping;
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->S[1] = complement ? "out" : "in";
+    g->V[0] = p->name;
+    g->I[0] = q->smallest_ch;
+    g->I[1] = q->largest_ch;
+    if (q->no_gaps)
+        write_failure_if(g, "!(~S1_range~S0(~I0, ~I1))", p);
+    else
+        write_failure_if(g, "!(~S1_grouping~S0(~V0, ~I0, ~I1))", p);
+}
+
+static void generate_namedstring(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->V[0] = p->name;
+    write_failure_if(g, "!(eq_v~S0(~V0))", p);
+}
+
+static void generate_literalstring(struct generator * g, struct node * p) {
+
+    symbol * b = p->literalstring;
+    write_comment(g, p);
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->I[0] = SIZE(b);
+    g->L[0] = b;
+    write_failure_if(g, "!(eq_s~S0(~I0, ~L0))", p);
+}
+
+static void generate_define(struct generator * g, struct node * p) {
+
+    struct name * q = p->name;
+
+    struct str * saved_output = g->outbuf;
+    struct str * saved_declarations = g->declarations;
+
+    g->S[0] = q->type == t_routine ? "private" : "public";
+    g->V[0] = q;
+    w(g, "~+~+~N~M~S0 boolean ~V0() {~+~N");
+
+    g->outbuf = str_new();
+    g->declarations = str_new();
+
+    g->next_label = 0;
+    g->var_number = 0;
+
+    if (p->amongvar_needed) write_declare(g, "int among_var", p);
+    str_clear(g->failure_str);
+    g->failure_label = x_return;
+    g->unreachable = false;
+    generate(g, p->left);
+    if (!g->unreachable) w(g, "~Mreturn true;~N");
+    w(g, "~}~-~-");
+
+    str_append(saved_output, g->declarations);
+    str_append(saved_output, g->outbuf);
+    str_delete(g->declarations);
+    str_delete(g->outbuf);
+    g->declarations = saved_declarations;
+    g->outbuf = saved_output;
+}
+
+static void generate_substring(struct generator * g, struct node * p) {
+
+    struct among * x = p->among;
+
+    write_comment(g, p);
+
+    g->S[0] = p->mode == m_forward ? "" : "_b";
+    g->I[0] = x->number;
+    g->I[1] = x->literalstring_count;
+
+    if (x->command_count == 0 && x->starter == 0) {
+        write_failure_if(g, "find_among~S0(a_~I0, ~I1) == 0", p);
+    } else {
+        writef(g, "~Mamong_var = find_among~S0(a_~I0, ~I1);~N", p);
+        write_failure_if(g, "among_var == 0", p);
+    }
+}
+
+static void generate_among(struct generator * g, struct node * p) {
+
+    struct among * x = p->among;
+    int case_number = 1;
+
+    if (x->substring == 0) generate_substring(g, p);
+    if (x->command_count == 0 && x->starter == 0) return;
+
+    if (x->starter != 0) generate(g, x->starter);
+
+    p = p->left;
+    if (p != 0 && p->type != c_literalstring) p = p->right;
+    w(g, "~Mswitch(among_var) {~N~+");
+    w(g, "~Mcase 0:~N~+");
+    write_failure(g);
+    g->unreachable = false;
+    w(g, "~-");
+
+    while (p != 0) {
+        if (p->type == c_bra && p->left != 0) {
+            g->I[0] = case_number++;
+            w(g, "~Mcase ~I0:~N~+");
+            generate(g, p);
+            if (!g->unreachable) w(g, "~Mbreak;~N");
+            w(g, "~-");
+            g->unreachable = false;
+        }
+        p = p->right;
+    }
+    write_block_end(g);
+}
+
+static void generate_booltest(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->V[0] = p->name;
+    write_failure_if(g, "!(~V0)", p);
+}
+
+static void generate_false(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    write_failure(g);
+}
+
+static void generate_debug(struct generator * g, struct node * p) {
+
+    write_comment(g, p);
+    g->I[0] = g->debug_count++;
+    g->I[1] = p->line_number;
+    writef(g, "~Mdebug(~I0, ~I1);~N", p);
+}
+
+static void generate(struct generator * g, struct node * p) {
+
+    int a0;
+    struct str * a1;
+
+    if (g->unreachable) return;
+
+    a0 = g->failure_label;
+    a1 = str_copy(g->failure_str);
+
+    switch (p->type)
+    {
+        case c_define:        generate_define(g, p); break;
+        case c_bra:           generate_bra(g, p); break;
+        case c_and:           generate_and(g, p); break;
+        case c_or:            generate_or(g, p); break;
+        case c_backwards:     generate_backwards(g, p); break;
+        case c_not:           generate_not(g, p); break;
+        case c_set:           generate_set(g, p); break;
+        case c_unset:         generate_unset(g, p); break;
+        case c_try:           generate_try(g, p); break;
+        case c_fail:          generate_fail(g, p); break;
+        case c_reverse:
+        case c_test:          generate_test(g, p); break;
+        case c_do:            generate_do(g, p); break;
+        case c_goto:          generate_GO(g, p, 1); break;
+        case c_gopast:        generate_GO(g, p, 0); break;
+        case c_repeat:        generate_repeat(g, p, 0); break;
+        case c_loop:          generate_loop(g, p); break;
+        case c_atleast:       generate_atleast(g, p); break;
+        case c_setmark:       generate_setmark(g, p); break;
+        case c_tomark:        generate_tomark(g, p); break;
+        case c_atmark:        generate_atmark(g, p); break;
+        case c_hop:           generate_hop(g, p); break;
+        case c_delete:        generate_delete(g, p); break;
+        case c_next:          generate_next(g, p); break;
+        case c_tolimit:       generate_tolimit(g, p); break;
+        case c_atlimit:       generate_atlimit(g, p); break;
+        case c_leftslice:     generate_leftslice(g, p); break;
+        case c_rightslice:    generate_rightslice(g, p); break;
+        case c_assignto:      generate_assignto(g, p); break;
+        case c_sliceto:       generate_sliceto(g, p); break;
+        case c_assign:        generate_assignfrom(g, p); break;
+        case c_insert:
+        case c_attach:        generate_insert(g, p, p->type); break;
+        case c_slicefrom:     generate_slicefrom(g, p); break;
+        case c_setlimit:      generate_setlimit(g, p); break;
+        case c_dollar:        generate_dollar(g, p); break;
+        case c_mathassign:    generate_integer_assign(g, p, "="); break;
+        case c_plusassign:    generate_integer_assign(g, p, "+="); break;
+        case c_minusassign:   generate_integer_assign(g, p, "-="); break;
+        case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
+        case c_divideassign:  generate_integer_assign(g, p, "/="); break;
+        case c_eq:            generate_integer_test(g, p, "=="); break;
+        case c_ne:            generate_integer_test(g, p, "!="); break;
+        case c_gr:            generate_integer_test(g, p, ">"); break;
+        case c_ge:            generate_integer_test(g, p, ">="); break;
+        case c_ls:            generate_integer_test(g, p, "<"); break;
+        case c_le:            generate_integer_test(g, p, "<="); break;
+        case c_call:          generate_call(g, p); break;
+        case c_grouping:      generate_grouping(g, p, false); break;
+        case c_non:           generate_grouping(g, p, true); break;
+        case c_name:          generate_namedstring(g, p); break;
+        case c_literalstring: generate_literalstring(g, p); break;
+        case c_among:         generate_among(g, p); break;
+        case c_substring:     generate_substring(g, p); break;
+        case c_booltest:      generate_booltest(g, p); break;
+        case c_false:         generate_false(g, p); break;
+        case c_true:          break;
+        case c_debug:         generate_debug(g, p); break;
+        default: fprintf(stderr, "%d encountered\n", p->type);
+                 exit(1);
+    }
+
+    g->failure_label = a0;
+    str_delete(g->failure_str);
+    g->failure_str = a1;
+}
+
+static void generate_start_comment(struct generator * g) {
+
+    w(g, "// This file was generated automatically by the Snowball to Java compiler~N");
+    w(g, "~N");
+}
+
+static void generate_class_begin(struct generator * g) {
+
+    w(g, "package " );
+    w(g, g->options->package);
+    w(g, ";~N~N" );
+
+    w(g, "import ");
+    w(g, g->options->among_class );
+    w(g, ";~N"
+         "~N"
+         " /**~N"
+         "  * This class was automatically generated by a Snowball to Java compiler ~N"
+         "  * It implements the stemming algorithm defined by a snowball script.~N"
+         "  */~N"
+         "~N"
+         "public class ~n extends ");
+
+     w(g, g->options->parent_class_name);
+     w(g, " {~N"
+          "~N"
+	  "private static final long serialVersionUID = 1L;~N"
+	  "~N"
+	  "~+~+~Mprivate final static ~n methodObject = new ~n ();~N"
+         "~N");
+}
+
+static void generate_class_end(struct generator * g) {
+
+    w(g, "~N}");
+    w(g, "~N~N");
+}
+
+static void generate_equals(struct generator * g) {
+
+    w(g, "~N"
+         "~Mpublic boolean equals( Object o ) {~N"
+         "~+~Mreturn o instanceof ");
+    w(g, g->options->name);
+	 w(g, ";~N~-~M}~N"
+	      "~N"
+	      "~Mpublic int hashCode() {~N"
+	      "~+~Mreturn ");
+    w(g, g->options->name);
+	 w(g, ".class.getName().hashCode();~N"
+	      "~-~M}~N");
+    w(g, "~N~N");
+}
+
+static void generate_among_table(struct generator * g, struct among * x) {
+
+    struct amongvec * v = x->b;
+
+    g->I[0] = x->number;
+    g->I[1] = x->literalstring_count;
+
+    w(g, "~+~+~Mprivate final static Among a_~I0[] = {~N~+");
+    {
+        int i;
+        for (i = 0; i < x->literalstring_count; i++) {
+            g->I[0] = i;
+            g->I[1] = v->i;
+            g->I[2] = v->result;
+            g->L[0] = v->b;
+            g->S[0] = i < x->literalstring_count - 1 ? "," : "";
+
+            w(g, "~Mnew Among ( ~L0, ~I1, ~I2, \"");
+            if (v->function != 0) {
+                write_varname(g, v->function);
+            }
+            w(g, "\", methodObject )~S0~N");
+            v++;
+        }
+    }
+    w(g, "~-~M};~-~-~N~N");
+}
+
+static void generate_amongs(struct generator * g) {
+
+    struct among * x = g->analyser->amongs;
+    while (x != 0) {
+        generate_among_table(g, x);
+        x = x->next;
+    }
+}
+
+static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
+
+static int bit_is_set(symbol * b, int i) { return b[i/8] & 1 << i%8; }
+
+static void generate_grouping_table(struct generator * g, struct grouping * q) {
+
+    int range = q->largest_ch - q->smallest_ch + 1;
+    int size = (range + 7)/ 8;  /* assume 8 bits per symbol */
+    symbol * b = q->b;
+    symbol * map = create_b(size);
+    int i;
+    for (i = 0; i < size; i++) map[i] = 0;
+
+    /* Using unicode would require revision here */
+
+    for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
+
+    q->no_gaps = true;
+    for (i = 0; i < range; i++) unless (bit_is_set(map, i)) q->no_gaps = false;
+
+    unless (q->no_gaps) {
+        g->V[0] = q->name;
+
+        w(g, "~+~+~Mprivate static final char ~V0[] = {");
+        for (i = 0; i < size; i++) {
+             write_int(g, map[i]);
+             if (i < size - 1) w(g, ", ");
+        }
+        w(g, " };~N~-~-~N");
+    }
+    lose_b(map);
+}
+
+static void generate_groupings(struct generator * g) {
+    struct grouping * q = g->analyser->groupings;
+    until (q == 0) {
+        generate_grouping_table(g, q);
+        q = q->next;
+    }
+}
+
+static void generate_members(struct generator * g) {
+
+    struct name * q = g->analyser->names;
+    until (q == 0) {
+        g->V[0] = q;
+        switch (q->type) {
+            case t_string:
+                w(g, "        private ");
+                w(g, g->options->string_class );
+                w(g, " ~W0 = new ");
+                w(g, g->options->string_class);
+                w(g, "();~N");
+                break;
+            case t_integer:
+                w(g, "        private int ~W0;~N");
+                break;
+            case t_boolean:
+                w(g, "        private boolean ~W0;~N");
+                break;
+        }
+        q = q->next;
+    }
+    w(g, "~N");
+}
+
+static void generate_copyfrom(struct generator * g) {
+
+    struct name * q;
+    w(g, "~+~+~Mprivate void copy_from(~n other) {~+~N");
+    for (q = g->analyser->names; q != 0; q = q->next) {
+        g->V[0] = q;
+        switch (q->type) {
+            case t_string:
+            case t_integer:
+            case t_boolean:
+                w(g, "~M~W0 = other.~W0;~N");
+                break;
+        }
+    }
+    w(g, "~Msuper.copy_from(other);~N");
+    w(g, "~-~M}~-~-~N");
+}
+
+static void generate_methods(struct generator * g) {
+
+    struct node * p = g->analyser->program;
+    while (p != 0) {
+        generate(g, p);
+        g->unreachable = false;
+        p = p->right;
+    }
+}
+
+extern void generate_program_java(struct generator * g) {
+
+    g->outbuf = str_new();
+    g->failure_str = str_new();
+
+    generate_start_comment(g);
+    generate_class_begin(g);
+
+    generate_amongs(g);
+    generate_groupings(g);
+
+    generate_members(g);
+    generate_copyfrom(g);
+    generate_methods(g);
+    generate_equals(g);
+
+    generate_class_end(g);
+
+    output_str(g->options->output_java, g->outbuf);
+    str_delete(g->failure_str);
+    str_delete(g->outbuf);
+}
+
+extern struct generator * create_generator_java(struct analyser * a, struct options * o) {
+
+    NEW(generator, g);
+    g->analyser = a;
+    g->options = o;
+    g->margin = 0;
+    g->debug_count = 0;
+    g->unreachable = false;
+    return g;
+}
+
+extern void close_generator_java(struct generator * g) {
+
+    FREE(g);
+}
+
diff --git a/snowball_code/compiler/header.h b/snowball_code/compiler/header.h
new file mode 100644
index 0000000..d808fb4
--- /dev/null
+++ b/snowball_code/compiler/header.h
@@ -0,0 +1,315 @@
+
+typedef unsigned char byte;
+typedef unsigned short symbol;
+
+#define true 1
+#define false 0
+#define repeat while(true)
+#define unless(C) if(!(C))
+#define until(C) while(!(C))
+
+#define MALLOC check_malloc
+#define FREE check_free
+
+#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
+#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * n)
+
+#define STARTSIZE   10
+#define SIZE(p)     ((int *)(p))[-1]
+#define CAPACITY(p) ((int *)(p))[-2]
+
+extern symbol * create_b(int n);
+extern void report_b(FILE * out, symbol * p);
+extern void lose_b(symbol * p);
+extern symbol * increase_capacity(symbol * p, int n);
+extern symbol * move_to_b(symbol * p, int n, symbol * q);
+extern symbol * add_to_b(symbol * p, int n, symbol * q);
+extern symbol * copy_b(symbol * p);
+extern char * b_to_s(symbol * p);
+extern symbol * add_s_to_b(symbol * p, const char * s);
+
+struct str; /* defined in space.c */
+
+extern struct str * str_new(void);
+extern void str_delete(struct str * str);
+extern void str_append(struct str * str, struct str * add);
+extern void str_append_ch(struct str * str, char add);
+extern void str_append_b(struct str * str, symbol * q);
+extern void str_append_string(struct str * str, const char * s);
+extern void str_append_int(struct str * str, int i);
+extern void str_clear(struct str * str);
+extern void str_assign(struct str * str, char * s);
+extern struct str * str_copy(struct str * old);
+extern symbol * str_data(struct str * str);
+extern int str_len(struct str * str);
+extern int get_utf8(const symbol * p, int * slot);
+extern int put_utf8(int ch, symbol * p);
+
+struct m_pair {
+
+    struct m_pair * next;
+    symbol * name;
+    symbol * value;
+
+};
+
+struct input {
+
+    struct input * next;
+    symbol * p;
+    int c;
+    int line_number;
+
+};
+
+struct include {
+
+    struct include * next;
+    symbol * b;
+
+};
+
+struct tokeniser {
+
+    struct input * next;
+    symbol * p;
+    int c;
+    int line_number;
+    symbol * b;
+    symbol * b2;
+    int number;
+    int m_start;
+    int m_end;
+    struct m_pair * m_pairs;
+    int get_depth;
+    int error_count;
+    int token;
+    int previous_token;
+    byte token_held;
+    byte widechars;
+    byte utf8;
+
+    int omission;
+    struct include * includes;
+
+};
+
+extern symbol * get_input(symbol * p);
+extern struct tokeniser * create_tokeniser(symbol * b);
+extern int read_token(struct tokeniser * t);
+extern byte * name_of_token(int code);
+extern void close_tokeniser(struct tokeniser * t);
+
+enum token_codes {
+
+#include "syswords2.h"
+
+    c_mathassign,
+    c_name,
+    c_number,
+    c_literalstring,
+    c_neg,
+    c_call,
+    c_grouping,
+    c_booltest
+};
+
+extern int space_count;
+extern void * check_malloc(int n);
+extern void check_free(void * p);
+
+struct node;
+
+struct name {
+
+    struct name * next;
+    symbol * b;
+    int type;                   /* t_string etc */
+    int mode;                   /*    )_  for routines, externals */
+    struct node * definition;   /*    )                           */
+    int count;                  /* 0, 1, 2 for each type */
+    struct grouping * grouping; /* for grouping names */
+    byte referenced;
+    byte used;
+
+};
+
+struct literalstring {
+
+    struct literalstring * next;
+    symbol * b;
+
+};
+
+struct amongvec {
+
+    symbol * b;      /* the string giving the case */
+    int size;        /* - and its size */
+    struct node * p; /* the corresponding command */
+    int i;           /* the amongvec index of the longest substring of b */
+    int result;      /* the numeric result for the case */
+    struct name * function;
+
+};
+
+struct among {
+
+    struct among * next;
+    struct amongvec * b;      /* pointer to the amongvec */
+    int number;               /* amongs are numbered 0, 1, 2 ... */
+    int literalstring_count;  /* in this among */
+    int command_count;        /* in this among */
+    struct node * starter;    /* i.e. among( (starter) 'string' ... ) */
+    struct node * substring;  /* i.e. substring ... among ( ... ) */
+};
+
+struct grouping {
+
+    struct grouping * next;
+    int number;               /* groupings are numbered 0, 1, 2 ... */
+    symbol * b;               /* the characters of this group */
+    int largest_ch;           /* character with max code */
+    int smallest_ch;          /* character with min code */
+    byte no_gaps;             /* not used in generator.c after 11/5/05 */
+    struct name * name;       /* so g->name->grouping == g */
+};
+
+struct node {
+
+    struct node * next;
+    struct node * left;
+    struct node * aux;     /* used in setlimit */
+    struct among * among;  /* used in among */
+    struct node * right;
+    int type;
+    int mode;
+    struct node * AE;
+    struct name * name;
+    symbol * literalstring;
+    int number;
+    int line_number;
+    int amongvar_needed;   /* used in routine definitions */
+};
+
+enum name_types {
+
+    t_size = 6,
+
+    t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
+    t_grouping = 5
+
+/*  If this list is extended, adjust wvn in generator.c  */
+};
+
+/*  In name_count[i] below, remember that
+    type   is
+    ----+----
+      0 |  string
+      1 |  boolean
+      2 |  integer
+      3 |  routine
+      4 |  external
+      5 |  grouping
+*/
+
+struct analyser {
+
+    struct tokeniser * tokeniser;
+    struct node * nodes;
+    struct name * names;
+    struct literalstring * literalstrings;
+    int mode;
+    byte modifyable;          /* false inside reverse(...) */
+    struct node * program;
+    struct node * program_end;
+    int name_count[t_size];   /* name_count[i] counts the number of names of type i */
+    struct among * amongs;
+    struct among * amongs_end;
+    int among_count;
+    int amongvar_needed;      /* used in reading routine definitions */
+    struct grouping * groupings;
+    struct grouping * groupings_end;
+    struct node * substring;  /* pending 'substring' in current routine definition */
+    byte utf8;
+};
+
+enum analyser_modes {
+
+    m_forward = 0, m_backward /*, m_integer */
+
+};
+
+extern void print_program(struct analyser * a);
+extern struct analyser * create_analyser(struct tokeniser * t);
+extern void close_analyser(struct analyser * a);
+
+extern void read_program(struct analyser * a);
+
+struct generator {
+
+    struct analyser * analyser;
+    struct options * options;
+    int unreachable;           /* 0 if code can be reached, 1 if current code
+                                * is unreachable. */
+    int var_number;            /* Number of next variable to use. */
+    struct str * outbuf;       /* temporary str to store output */
+    struct str * declarations; /* str storing variable declarations */
+    int next_label;
+    int margin;
+
+    const char * failure_string;     /* String to output in case of a failure. */
+    struct str * failure_str;  /* This is used by the java generator instead of failure_string */
+
+    int label_used;     /* Keep track of whether the failure label is used. */
+    int failure_label;
+    int debug_count;
+
+    const char * S[10];        /* strings */
+    symbol * B[10];      /* blocks */
+    int I[10];           /* integers */
+    struct name * V[5];  /* variables */
+    symbol * L[5];       /* literals, used in formatted write */
+
+    int line_count;      /* counts number of lines output */
+    int line_labelled;   /* in ANSI C, will need extra ';' if it is a block end */
+    int literalstring_count;
+    int keep_count;      /* used to number keep/restore pairs to avoid compiler warnings
+                            about shadowed variables */
+};
+
+struct options {
+
+    /* for the command line: */
+
+    char * output_file;
+    char * name;
+    FILE * output_c;
+    FILE * output_h;
+    FILE * output_java;
+    byte syntax_tree;
+    byte widechars;
+    enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS } make_lang;
+    char * externals_prefix;
+    char * variables_prefix;
+    char * runtime_path;
+    char * parent_class_name;
+    char * package;
+    char * string_class;
+    char * among_class;
+    struct include * includes;
+    struct include * includes_end;
+    byte utf8;
+};
+
+/* Generator for C code. */
+extern struct generator * create_generator_c(struct analyser * a, struct options * o);
+extern void close_generator_c(struct generator * g);
+
+extern void generate_program_c(struct generator * g);
+
+/* Generator for Java code. */
+extern struct generator * create_generator_java(struct analyser * a, struct options * o);
+extern void close_generator_java(struct generator * g);
+
+extern void generate_program_java(struct generator * g);
+
diff --git a/snowball_code/compiler/space.c b/snowball_code/compiler/space.c
new file mode 100644
index 0000000..73bf2d5
--- /dev/null
+++ b/snowball_code/compiler/space.c
@@ -0,0 +1,257 @@
+
+#include <stdio.h>    /* for printf */
+#include <stdlib.h>   /* malloc, free */
+#include <string.h>   /* memmove */
+
+#include "header.h"
+
+#define HEAD 2*sizeof(int)
+#define EXTENDER 40
+
+
+/*  This modules provides a simple mechanism for arbitrary length writable
+    strings, called 'blocks'. They are 'symbol *' items rather than 'char *'
+    items however.
+
+    The calls are:
+
+        symbol * b = create_b(n);
+            - create an empty block b with room for n symbols
+        b = increase_capacity(b, n);
+            - increase the capacity of block b by n symbols (b may change)
+        b2 = copy_b(b)
+            - copy block b into b2
+        lose_b(b);
+            - lose block b
+        b = move_to_b(b, n, p);
+            - set the data in b to be the n symbols at address p
+        b = add_to_b(b, n, p);
+            - add the n symbols at address p to the end of the data in b
+        SIZE(b)
+            - is the number of symbols in b
+        For example:
+
+        symbol * b = create_b(0);
+        {   int i;
+            char p[10];
+            for (i = 0; i < 100; i++) {
+                sprintf(p, " %d", i);
+                add_s_to_b(b, p);
+            }
+        }
+
+    and b contains " 0 1 2 ... 99" spaced out as symbols.
+*/
+
+/*  For a block b, SIZE(b) is the number of symbols so far written into it,
+    CAPACITY(b) the total number it can contain, so SIZE(b) <= CAPACITY(b).
+    In fact blocks have 1 extra character over the promised capacity so
+    they can be zero terminated by 'b[SIZE(b)] = 0;' without fear of
+    overwriting.
+*/
+
+extern symbol * create_b(int n) {
+    symbol * p = (symbol *) (HEAD + (char *) MALLOC(HEAD + (n + 1) * sizeof(symbol)));
+    CAPACITY(p) = n;
+    SIZE(p) = 0;
+    return p;
+}
+
+extern void report_b(FILE * out, symbol * p) {
+    int i;
+    for (i = 0; i < SIZE(p); i++) fprintf(out, "%c", p[i]);
+}
+
+extern void lose_b(symbol * p) {
+    if (p == 0) return;
+    FREE((char *) p - HEAD);
+}
+
+extern symbol * increase_capacity(symbol * p, int n) {
+    symbol * q = create_b(CAPACITY(p) + n + EXTENDER);
+    memmove(q, p, CAPACITY(p) * sizeof(symbol));
+    SIZE(q) = SIZE(p);
+    lose_b(p); return q;
+}
+
+extern symbol * move_to_b(symbol * p, int n, symbol * q) {
+    int x = n - CAPACITY(p);
+    if (x > 0) p = increase_capacity(p, x);
+    memmove(p, q, n * sizeof(symbol)); SIZE(p) = n; return p;
+}
+
+extern symbol * add_to_b(symbol * p, int n, symbol * q) {
+    int x = SIZE(p) + n - CAPACITY(p);
+    if (x > 0) p = increase_capacity(p, x);
+    memmove(p + SIZE(p), q, n * sizeof(symbol)); SIZE(p) += n; return p;
+}
+
+extern symbol * copy_b(symbol * p) {
+    int n = SIZE(p);
+    symbol * q = create_b(n);
+    move_to_b(q, n, p);
+    return q;
+}
+
+int space_count = 0;
+
+extern void * check_malloc(int n) {
+    space_count++;
+    return malloc(n);
+}
+
+extern void check_free(void * p) {
+    space_count--;
+    free(p);
+}
+
+/* To convert a block to a zero terminated string:  */
+
+extern char * b_to_s(symbol * p) {
+    int n = SIZE(p);
+    char * s = (char *)malloc(n + 1);
+    {
+        int i;
+        for (i = 0; i < n; i++) s[i] = (char)p[i]; /* cast to suppress possible warnings */
+    }
+    s[n] = 0;
+    return s;
+}
+
+/* To add a zero terminated string to a block. If p = 0 the
+   block is created. */
+
+extern symbol * add_s_to_b(symbol * p, const char * s) {
+    int n = strlen(s);
+    int k;
+    if (p == 0) p = create_b(n);
+    k = SIZE(p);
+    {
+        int x = k + n - CAPACITY(p);
+        if (x > 0) p = increase_capacity(p, x);
+    }
+    {
+        int i;
+        for (i = 0; i < n; i++) p[i + k] = s[i];
+    }
+    SIZE(p) += n;
+    return p;
+}
+
+/* The next section defines string handling capabilities in terms
+   of the lower level block handling capabilities of space.c */
+/* -------------------------------------------------------------*/
+
+struct str {
+    symbol * data;
+};
+
+/* Create a new string. */
+extern struct str * str_new() {
+
+    struct str * output = (struct str *) malloc(sizeof(struct str));
+    output->data = create_b(0);
+    return output;
+}
+
+/* Delete a string. */
+extern void str_delete(struct str * str) {
+
+    lose_b(str->data);
+    free(str);
+}
+
+/* Append a str to this str. */
+extern void str_append(struct str * str, struct str * add) {
+
+    symbol * q = add->data;
+    str->data = add_to_b(str->data, SIZE(q), q);
+}
+
+/* Append a character to this str. */
+extern void str_append_ch(struct str * str, char add) {
+
+    symbol q[1];
+    q[0] = add;
+    str->data = add_to_b(str->data, 1, q);
+}
+
+/* Append a low level block to a str. */
+extern void str_append_b(struct str * str, symbol * q) {
+
+    str->data = add_to_b(str->data, SIZE(q), q);
+}
+
+/* Append a (char *, null teminated) string to a str. */
+extern void str_append_string(struct str * str, const char * s) {
+
+    str->data = add_s_to_b(str->data, s);
+}
+
+/* Append an integer to a str. */
+extern void str_append_int(struct str * str, int i) {
+
+    char s[30];
+    sprintf(s, "%d", i);
+    str_append_string(str, s);
+}
+
+/* Clear a string */
+extern void str_clear(struct str * str) {
+
+    SIZE(str->data) = 0;
+}
+
+/* Set a string */
+extern void str_assign(struct str * str, char * s) {
+
+    str_clear(str);
+    str_append_string(str, s);
+}
+
+/* Copy a string. */
+extern struct str * str_copy(struct str * old) {
+
+    struct str * newstr = str_new();
+    str_append(newstr, old);
+    return newstr;
+}
+
+/* Get the data stored in this str. */
+extern symbol * str_data(struct str * str) {
+
+    return str->data;
+}
+
+/* Get the length of the str. */
+extern int str_len(struct str * str) {
+
+    return SIZE(str->data);
+}
+
+extern int get_utf8(const symbol * p, int * slot) {
+    int b0, b1;
+    b0 = *p++;
+    if (b0 < 0xC0) {   /* 1100 0000 */
+        * slot = b0; return 1;
+    }
+    b1 = *p++;
+    if (b0 < 0xE0) {   /* 1110 0000 */
+        * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
+    }
+    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (*p & 0x3F); return 3;
+}
+
+extern int put_utf8(int ch, symbol * p) {
+    if (ch < 0x80) {
+        p[0] = ch; return 1;
+    }
+    if (ch < 0x800) {
+        p[0] = (ch >> 6) | 0xC0;
+        p[1] = (ch & 0x3F) | 0x80; return 2;
+    }
+    p[0] = (ch >> 12) | 0xE0;
+    p[1] = ((ch >> 6) & 0x3F) | 0x80;
+    p[2] = (ch & 0x3F) | 0x80; return 3;
+}
+
diff --git a/snowball_code/compiler/syswords.h b/snowball_code/compiler/syswords.h
new file mode 100644
index 0000000..d0700ca
--- /dev/null
+++ b/snowball_code/compiler/syswords.h
@@ -0,0 +1,84 @@
+static struct system_word vocab[80+1] = {
+  { 0, (byte *)"", 80+1},
+
+  { 1, (byte *)"$",             c_dollar },
+  { 1, (byte *)"(",             c_bra },
+  { 1, (byte *)")",             c_ket },
+  { 1, (byte *)"*",             c_multiply },
+  { 1, (byte *)"+",             c_plus },
+  { 1, (byte *)"-",             c_minus },
+  { 1, (byte *)"/",             c_divide },
+  { 1, (byte *)"<",             c_ls },
+  { 1, (byte *)"=",             c_assign },
+  { 1, (byte *)">",             c_gr },
+  { 1, (byte *)"?",             c_debug },
+  { 1, (byte *)"[",             c_leftslice },
+  { 1, (byte *)"]",             c_rightslice },
+  { 2, (byte *)"!=",            c_ne },
+  { 2, (byte *)"*=",            c_multiplyassign },
+  { 2, (byte *)"+=",            c_plusassign },
+  { 2, (byte *)"-=",            c_minusassign },
+  { 2, (byte *)"->",            c_sliceto },
+  { 2, (byte *)"/*",            c_comment2 },
+  { 2, (byte *)"//",            c_comment1 },
+  { 2, (byte *)"/=",            c_divideassign },
+  { 2, (byte *)"<+",            c_insert },
+  { 2, (byte *)"<-",            c_slicefrom },
+  { 2, (byte *)"<=",            c_le },
+  { 2, (byte *)"==",            c_eq },
+  { 2, (byte *)"=>",            c_assignto },
+  { 2, (byte *)">=",            c_ge },
+  { 2, (byte *)"as",            c_as },
+  { 2, (byte *)"do",            c_do },
+  { 2, (byte *)"or",            c_or },
+  { 3, (byte *)"and",           c_and },
+  { 3, (byte *)"for",           c_for },
+  { 3, (byte *)"get",           c_get },
+  { 3, (byte *)"hex",           c_hex },
+  { 3, (byte *)"hop",           c_hop },
+  { 3, (byte *)"non",           c_non },
+  { 3, (byte *)"not",           c_not },
+  { 3, (byte *)"set",           c_set },
+  { 3, (byte *)"try",           c_try },
+  { 4, (byte *)"fail",          c_fail },
+  { 4, (byte *)"goto",          c_goto },
+  { 4, (byte *)"loop",          c_loop },
+  { 4, (byte *)"next",          c_next },
+  { 4, (byte *)"size",          c_size },
+  { 4, (byte *)"test",          c_test },
+  { 4, (byte *)"true",          c_true },
+  { 5, (byte *)"among",         c_among },
+  { 5, (byte *)"false",         c_false },
+  { 5, (byte *)"limit",         c_limit },
+  { 5, (byte *)"unset",         c_unset },
+  { 6, (byte *)"atmark",        c_atmark },
+  { 6, (byte *)"attach",        c_attach },
+  { 6, (byte *)"cursor",        c_cursor },
+  { 6, (byte *)"define",        c_define },
+  { 6, (byte *)"delete",        c_delete },
+  { 6, (byte *)"gopast",        c_gopast },
+  { 6, (byte *)"insert",        c_insert },
+  { 6, (byte *)"maxint",        c_maxint },
+  { 6, (byte *)"minint",        c_minint },
+  { 6, (byte *)"repeat",        c_repeat },
+  { 6, (byte *)"sizeof",        c_sizeof },
+  { 6, (byte *)"tomark",        c_tomark },
+  { 7, (byte *)"atleast",       c_atleast },
+  { 7, (byte *)"atlimit",       c_atlimit },
+  { 7, (byte *)"decimal",       c_decimal },
+  { 7, (byte *)"reverse",       c_reverse },
+  { 7, (byte *)"setmark",       c_setmark },
+  { 7, (byte *)"strings",       c_strings },
+  { 7, (byte *)"tolimit",       c_tolimit },
+  { 8, (byte *)"booleans",      c_booleans },
+  { 8, (byte *)"integers",      c_integers },
+  { 8, (byte *)"routines",      c_routines },
+  { 8, (byte *)"setlimit",      c_setlimit },
+  { 9, (byte *)"backwards",     c_backwards },
+  { 9, (byte *)"externals",     c_externals },
+  { 9, (byte *)"groupings",     c_groupings },
+  { 9, (byte *)"stringdef",     c_stringdef },
+  { 9, (byte *)"substring",     c_substring },
+ { 12, (byte *)"backwardmode",  c_backwardmode },
+ { 13, (byte *)"stringescapes", c_stringescapes }
+};
diff --git a/snowball_code/compiler/syswords2.h b/snowball_code/compiler/syswords2.h
new file mode 100644
index 0000000..7a4f273
--- /dev/null
+++ b/snowball_code/compiler/syswords2.h
@@ -0,0 +1,14 @@
+    c_among = 4, c_and, c_as, c_assign, c_assignto, c_atleast,
+    c_atlimit, c_atmark, c_attach, c_backwardmode, c_backwards,
+    c_booleans, c_bra, c_comment1, c_comment2, c_cursor, c_debug,
+    c_decimal, c_define, c_delete, c_divide, c_divideassign, c_do,
+    c_dollar, c_eq, c_externals, c_fail, c_false, c_for, c_ge, c_get,
+    c_gopast, c_goto, c_gr, c_groupings, c_hex, c_hop, c_insert,
+    c_integers, c_ket, c_le, c_leftslice, c_limit, c_loop, c_ls,
+    c_maxint, c_minint, c_minus, c_minusassign, c_multiply,
+    c_multiplyassign, c_ne, c_next, c_non, c_not, c_or, c_plus,
+    c_plusassign, c_repeat, c_reverse, c_rightslice, c_routines,
+    c_set, c_setlimit, c_setmark, c_size, c_sizeof, c_slicefrom,
+    c_sliceto, c_stringdef, c_stringescapes, c_strings, c_substring,
+    c_test, c_tolimit, c_tomark, c_true, c_try, c_unset,
+
diff --git a/snowball_code/compiler/tokeniser.c b/snowball_code/compiler/tokeniser.c
new file mode 100644
index 0000000..63c2e05
--- /dev/null
+++ b/snowball_code/compiler/tokeniser.c
@@ -0,0 +1,469 @@
+
+#include <stdio.h>   /* stderr etc */
+#include <stdlib.h>  /* malloc free */
+#include <string.h>  /* strlen */
+#include <ctype.h>   /* isalpha etc */
+#include "header.h"
+
+struct system_word {
+    int s_size;   /* size of system word */
+    byte * s;     /* pointer to the system word */
+    int code;     /* it's internal code */
+};
+
+
+/* ASCII collating assumed in syswords.c */
+
+#include "syswords.h"
+
+static int smaller(int a, int b) { return a < b ? a : b; }
+
+extern symbol * get_input(symbol * p) {
+
+    char * s = b_to_s(p);
+    {
+        FILE * input = fopen(s, "r");
+        free(s);
+        if (input == 0) return 0;
+        {
+            symbol * u = create_b(STARTSIZE);
+            int size = 0;
+            repeat
+            {   int ch = getc(input);
+                if (ch == EOF) break;
+                if (size >= CAPACITY(u)) u = increase_capacity(u, size/2);
+                u[size++] = ch;
+            }
+            fclose(input);
+            SIZE(u) = size; return u;
+        }
+    }
+}
+
+static void error(struct tokeniser * t, char * s1, int n, symbol * p, char * s2) {
+    if (t->error_count == 20) { fprintf(stderr, "... etc\n"); exit(1); }
+    fprintf(stderr, "Line %d", t->line_number);
+    if (t->get_depth > 0) fprintf(stderr, " (of included file)");
+    fprintf(stderr, ": ");
+    unless (s1 == 0) fprintf(stderr, "%s", s1);
+    unless (p == 0) {
+        int i;
+        for (i = 0; i < n; i++) fprintf(stderr, "%c", p[i]);
+    }
+    unless (s2 == 0) fprintf(stderr, "%s", s2);
+    fprintf(stderr, "\n");
+    t->error_count++;
+}
+
+static void error1(struct tokeniser * t, char * s) {
+    error(t, s, 0,0, 0);
+}
+
+static void error2(struct tokeniser * t, char * s) {
+    error(t, "unexpected end of text after ", 0,0, s);
+}
+
+static int compare_words(int m, symbol * p, int n, byte * q) {
+    unless (m == n) return m - n;
+    {
+        int i; for (i = 0; i < n; i++) {
+            int diff = p[i] - q[i];
+            unless (diff == 0) return diff;
+        }
+    }
+    return 0;
+}
+
+static int find_word(int n, symbol * p) {
+    int i = 0; int j = vocab->code;
+    repeat {
+        int k = i + (j - i)/2;
+        struct system_word * w = vocab + k;
+        int diff = compare_words(n, p, w->s_size, w->s);
+        if (diff == 0) return w->code;
+        if (diff < 0) j = k; else i = k;
+        if (j - i == 1) break;
+    }
+    return -1;
+}
+
+static int get_number(int n, symbol * p) {
+    int x = 0;
+    int i; for (i = 0; i < n; i++) x = 10*x + p[i] - '0';
+    return x;
+}
+
+static int eq_s(struct tokeniser * t, char * s) {
+    int l = strlen(s);
+    if (SIZE(t->p) - t->c < l) return false;
+    {
+        int i;
+        for (i = 0; i < l; i++) if (t->p[t->c + i] != s[i]) return false;
+    }
+    t->c += l; return true;
+}
+
+static int white_space(struct tokeniser * t, int ch) {
+    switch (ch) {
+        case '\n': t->line_number++;
+        case '\r':
+        case '\t':
+        case ' ': return true;
+    }
+    return false;
+}
+
+static symbol * find_in_m(struct tokeniser * t, int n, symbol * p) {
+    struct m_pair * q = t->m_pairs;
+    repeat {
+        if (q == 0) return 0;
+        {
+            symbol * name = q->name;
+            if (n == SIZE(name) && memcmp(name, p, n * sizeof(symbol)) == 0) return q->value;
+        }
+        q = q->next;
+    }
+}
+
+static int read_literal_string(struct tokeniser * t, int c) {
+    symbol * p = t->p;
+    int ch;
+    SIZE(t->b) = 0;
+    repeat {
+        if (c >= SIZE(p)) { error2(t, "'"); return c; }
+        ch = p[c];
+        if (ch == '\n') { error1(t, "string not terminated"); return c; }
+        c++;
+        if (ch == t->m_start) {
+            int c0 = c;
+            int newlines = false; /* no newlines as yet */
+            int black_found = false; /* no printing chars as yet */
+            repeat {
+                if (c >= SIZE(p)) { error2(t, "'"); return c; }
+                ch = p[c]; c++;
+                if (ch == t->m_end) break;
+                unless (white_space(t, ch)) black_found = true;
+                if (ch == '\n') newlines = true;
+                if (newlines && black_found) {
+                    error1(t, "string not terminated");
+                    return c;
+                }
+            }
+            unless (newlines) {
+                int n = c - c0 - 1;    /* macro size */
+                int firstch = p[c0];
+                symbol * q = find_in_m(t, n, p + c0);
+                if (q == 0) {
+                    if (n == 1 && (firstch == '\'' || firstch == t->m_start))
+                        t->b = add_to_b(t->b, 1, p + c0);
+                    else
+                        error(t, "string macro '", n, p + c0, "' undeclared");
+                } else
+                    t->b = add_to_b(t->b, SIZE(q), q);
+            }
+        } else {
+            if (ch == '\'') return c;
+            t->b = add_to_b(t->b, 1, p + c - 1);
+        }
+    }
+}
+
+static int next_token(struct tokeniser * t) {
+    symbol * p = t->p;
+    int c = t->c;
+    int ch;
+    int code = -1;
+    repeat {
+        if (c >= SIZE(p)) { t->c = c; return -1; }
+        ch = p[c];
+        if (white_space(t, ch)) { c++; continue; }
+        if (isalpha(ch)) {
+            int c0 = c;
+            while (c < SIZE(p) && (isalnum(p[c]) || p[c] == '_')) c++;
+            code = find_word(c - c0, p + c0);
+            if (code < 0) {
+                t->b = move_to_b(t->b, c - c0, p + c0);
+                code = c_name;
+            }
+        } else
+        if (isdigit(ch)) {
+            int c0 = c;
+            while (c < SIZE(p) && isdigit(p[c])) c++;
+            t->number = get_number(c - c0, p + c0);
+            code = c_number;
+        } else
+        if (ch == '\'') {
+            c = read_literal_string(t, c + 1);
+            code = c_literalstring;
+        } else
+        {
+            int lim = smaller(2, SIZE(p) - c);
+            int i;
+            for (i = lim; i > 0; i--) {
+                code = find_word(i, p + c);
+                if (code >= 0) { c += i; break; }
+            }
+        }
+        if (code >= 0) {
+            t->c = c;
+            return code;
+        }
+        error(t, "'", 1, p + c, "' unknown");
+        c++;
+        continue;
+    }
+}
+
+static int next_char(struct tokeniser * t) {
+    if (t->c >= SIZE(t->p)) return -1;
+    return t->p[t->c++];
+}
+
+static int next_real_char(struct tokeniser * t) {
+    repeat {
+        int ch = next_char(t);
+        if (white_space(t, ch)) continue;
+        return ch;
+    }
+}
+
+static void read_chars(struct tokeniser * t) {
+    int ch = next_real_char(t);
+    if (ch < 0) { error2(t, "stringdef"); return; }
+    {
+        int c0 = t->c-1;
+        repeat {
+            ch = next_char(t);
+            if (white_space(t, ch) || ch < 0) break;
+        }
+        t->b2 = move_to_b(t->b2, t->c - c0 - 1, t->p + c0);
+    }
+}
+
+static int decimal_to_num(int ch) {
+    if ('0' <= ch && ch <= '9') return ch - '0';
+    return -1;
+}
+
+static int hex_to_num(int ch) {
+    if ('0' <= ch && ch <= '9') return ch - '0';
+    if ('a' <= ch && ch <= 'f') return ch - 'a' + 10;
+    return -1;
+}
+
+static void convert_numeric_string(struct tokeniser * t, symbol * p, int base) {
+    int c = 0; int d = 0;
+    repeat {
+        while (c < SIZE(p) && p[c] == ' ') c++;
+        if (c == SIZE(p)) break;
+        {
+            int number = 0;
+            repeat {
+                int ch = p[c];
+                if (c == SIZE(p) || ch == ' ') break;
+                if (base == 10) {
+                    ch = decimal_to_num(ch);
+                    if (ch < 0) {
+                        error1(t, "decimal string contains non-digits");
+                        return;
+                    }
+                } else {
+                    ch = hex_to_num(tolower(ch));
+                    if (ch < 0) {
+                        error1(t, "hex string contains non-hex characters");
+                        return;
+                    }
+                }
+                number = base * number + ch;
+                c++;
+            }
+            if (t->widechars || t->utf8) {
+                unless (0 <= number && number <= 0xffff) {
+                    error1(t, "character values exceed 64K");
+                    return;
+                }
+            } else {
+                unless (0 <= number && number <= 0xff) {
+                    error1(t, "character values exceed 256");
+                    return;
+                }
+            }
+            if (t->utf8)
+                d += put_utf8(number, p + d);
+            else
+                p[d++] = number;
+        }
+    }
+    SIZE(p) = d;
+}
+
+extern int read_token(struct tokeniser * t) {
+    symbol * p = t->p;
+    int held = t->token_held;
+    t->token_held = false;
+    if (held) return t->token;
+    repeat {
+        int code = next_token(t);
+        switch (code) {
+            case c_comment1: /*  slash-slash comment */
+               while (t->c < SIZE(p) && p[t->c] != '\n') t->c++;
+               continue;
+            case c_comment2: /* slash-star comment */
+               repeat {
+                   if (t->c >= SIZE(p)) {
+                       error1(t, "/* comment not terminated");
+                       t->token = -1;
+                       return -1;
+                   }
+                   if (p[t->c] == '\n') t->line_number++;
+                   if (eq_s(t, "*/")) break;
+                   t->c++;
+               }
+               continue;
+            case c_stringescapes:
+               {
+                   int ch1 = next_real_char(t);
+                   int ch2 = next_real_char(t);
+                   if (ch2 < 0)
+                       { error2(t, "stringescapes"); continue; }
+                   if (ch1 == '\'')
+                       { error1(t, "first stringescape cannot be '"); continue; }
+                   t->m_start = ch1;
+                   t->m_end = ch2;
+               }
+               continue;
+            case c_stringdef:
+               {
+                   int base = 0;
+                   read_chars(t);
+                   code = read_token(t);
+                   if (code == c_hex) { base = 16; code = read_token(t); } else
+                   if (code == c_decimal) { base = 10; code = read_token(t); }
+                   unless (code == c_literalstring)
+                       { error1(t, "string omitted after stringdef"); continue; }
+                   if (base > 0) convert_numeric_string(t, t->b, base);
+                   {   NEW(m_pair, q);
+                       q->next = t->m_pairs;
+                       q->name = copy_b(t->b2);
+                       q->value = copy_b(t->b);
+                       t->m_pairs = q;
+                   }
+               }
+               continue;
+            case c_get:
+               code = read_token(t);
+               unless (code == c_literalstring) {
+                   error1(t, "string omitted after get"); continue;
+               }
+               t->get_depth++;
+               if (t->get_depth > 10) {
+                   fprintf(stderr, "get directives go 10 deep. Looping?\n");
+                   exit(1);
+               }
+               {
+                   NEW(input, q);
+                   symbol * u = get_input(t->b);
+                   if (u == 0) {
+                       struct include * r = t->includes;
+                       until (r == 0) {
+                           symbol * b = copy_b(r->b);
+                           b = add_to_b(b, SIZE(t->b), t->b);
+                           u = get_input(b);
+                           lose_b(b);
+                           unless (u == 0) break;
+                           r = r->next;
+                       }
+                   }
+                   if (u == 0) {
+                       error(t, "Can't get '", SIZE(t->b), t->b, "'");
+                       exit(1);
+                   }
+                   memmove(q, t, sizeof(struct input));
+                   t->next = q;
+                   t->p = u;
+                   t->c = 0;
+                   t->line_number = 1;
+               }
+               p = t->p;
+               continue;
+            case -1:
+               unless (t->next == 0) {
+                   lose_b(p);
+                   {
+                       struct input * q = t->next;
+                       memmove(t, q, sizeof(struct input)); p = t->p;
+                       FREE(q);
+                   }
+                   t->get_depth--;
+                   continue;
+               }
+               /* drop through */
+            default:
+                t->previous_token = t->token;
+                t->token = code;
+                return code;
+        }
+    }
+}
+
+extern byte * name_of_token(int code) {
+    int i;
+    for (i = 1; i < vocab->code; i++)
+        if ((vocab + i)->code == code) return (vocab + i)->s;
+    switch (code) {
+        case c_mathassign:   return (byte *) "=";
+        case c_name:         return (byte *) "name";
+        case c_number:       return (byte *) "number";
+        case c_literalstring:return (byte *) "literal";
+        case c_neg:          return (byte *) "neg";
+        case c_grouping:     return (byte *) "grouping";
+        case c_call:         return (byte *) "call";
+        case c_booltest:     return (byte *) "Boolean test";
+        case -2:             return (byte *) "start of text";
+        case -1:             return (byte *) "end of text";
+        default:             return (byte *) "?";
+    }
+}
+
+extern struct tokeniser * create_tokeniser(symbol * p) {
+    NEW(tokeniser, t);
+    t->next = 0;
+    t->p = p;
+    t->c = 0;
+    t->line_number = 1;
+    t->b = create_b(0);
+    t->b2 = create_b(0);
+    t->m_start = -1;
+    t->m_pairs = 0;
+    t->get_depth = 0;
+    t->error_count = 0;
+    t->token_held = false;
+    t->token = -2;
+    t->previous_token = -2;
+    return t;
+}
+
+extern void close_tokeniser(struct tokeniser * t) {
+    lose_b(t->b);
+    lose_b(t->b2);
+    {
+        struct m_pair * q = t->m_pairs;
+        until (q == 0) {
+            struct m_pair * q_next = q->next;
+            lose_b(q->name);
+            lose_b(q->value);
+            FREE(q);
+            q = q_next;
+        }
+    }
+    {
+        struct input * q = t->next;
+        until (q == 0) {
+            struct input * q_next = q->next;
+            FREE(q);
+            q = q_next;
+        }
+    }
+    FREE(t);
+}
+
diff --git a/snowball_code/doc/TODO b/snowball_code/doc/TODO
new file mode 100644
index 0000000..0cfa1b1
--- /dev/null
+++ b/snowball_code/doc/TODO
@@ -0,0 +1,15 @@
+Things to do:
+
+ - Write documentation for how to use libstemmer (as opposed to how stemming
+   algorithms themselves work).
+   Currently, the documentation in the include/libstemmer.h header file is
+   pretty clear and comprehensive, but an overview document wouldn't go amiss.
+
+Things that would be nice to include at some point.
+
+ - Add version numbers to each stemming algorithm, and allow the interface to
+   request a specific version of the stemming algorithms.  Default to providing
+   the latest version of the algorithm.
+ - Make mkmodules.pl generate the build system, instead of being called from it.
+   This would allow it to generate the list of modules to be built, so that it's
+   not necessary to change things in more than one place to add a new algorithm.
diff --git a/snowball_code/examples/stemwords.c b/snowball_code/examples/stemwords.c
new file mode 100644
index 0000000..128ea7a
--- /dev/null
+++ b/snowball_code/examples/stemwords.c
@@ -0,0 +1,209 @@
+/* This is a simple program which uses libstemmer to provide a command
+ * line interface for stemming using any of the algorithms provided.
+ */
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc, free */
+#include <string.h> /* for memmove */
+#include <ctype.h>  /* for isupper, tolower */
+
+#include "libstemmer.h"
+
+const char * progname;
+static int pretty = 1;
+
+static void
+stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
+{
+#define INC 10
+    int lim = INC;
+    sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
+
+    while(1) {
+        int ch = getc(f_in);
+        if (ch == EOF) {
+            free(b); return;
+        }
+        {
+            int i = 0;
+	    int inlen = 0;
+            while(1) {
+                if (ch == '\n' || ch == EOF) break;
+                if (i == lim) {
+                    sb_symbol * newb;
+		    newb = (sb_symbol *)
+			    realloc(b, (lim + INC) * sizeof(sb_symbol));
+		    if (newb == 0) goto error;
+		    b = newb;
+                    lim = lim + INC;
+                }
+		/* Update count of utf-8 characters. */
+		if (ch < 0x80 || ch > 0xBF) inlen += 1;
+                /* force lower case: */
+                if (isupper(ch)) ch = tolower(ch);
+
+                b[i] = ch;
+		i++;
+                ch = getc(f_in);
+            }
+
+	    {
+		const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
+                if (stemmed == NULL)
+                {
+                    fprintf(stderr, "Out of memory");
+                    exit(1);
+                }
+                else
+		{
+		    if (pretty == 1) {
+			fwrite(b, i, 1, f_out);
+			fputs(" -> ", f_out);
+		    } else if (pretty == 2) {
+			fwrite(b, i, 1, f_out);
+			if (sb_stemmer_length(stemmer) > 0) {
+			    int j;
+			    if (inlen < 30) {
+				for (j = 30 - inlen; j > 0; j--)
+				    fputs(" ", f_out);
+			    } else {
+				fputs("\n", f_out);
+				for (j = 30; j > 0; j--)
+				    fputs(" ", f_out);
+			    }
+			}
+		    }
+
+		    fputs((char *)stemmed, f_out);
+		    putc('\n', f_out);
+		}
+            }
+        }
+    }
+error:
+    if (b != 0) free(b);
+    return;
+}
+
+/** Display the command line syntax, and then exit.
+ *  @param n The value to exit with.
+ */
+static void
+usage(int n)
+{
+    printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
+	  "\n"
+	  "The input file consists of a list of words to be stemmed, one per\n"
+	  "line. Words should be in lower case, but (for English) A-Z letters\n"
+	  "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
+	  "used.\n"
+	  "\n"
+	  "If -c is given, the argument is the character encoding of the input\n"
+          "and output files.  If it is omitted, the UTF-8 encoding is used.\n"
+	  "\n"
+	  "If -p is given the output file consists of each word of the input\n"
+	  "file followed by \"->\" followed by its stemmed equivalent.\n"
+	  "If -p2 is given the output file is a two column layout containing\n"
+	  "the input words in the first column and the stemmed eqivalents in\n"
+	  "the second column.\n"
+	  "Otherwise, the output file consists of the stemmed words, one per\n"
+	  "line.\n"
+	  "\n"
+	  "-h displays this help\n",
+	  progname);
+    exit(n);
+}
+
+int
+main(int argc, char * argv[])
+{
+    char * in = 0;
+    char * out = 0;
+    FILE * f_in;
+    FILE * f_out;
+    struct sb_stemmer * stemmer;
+
+    char * language = "english";
+    char * charenc = NULL;
+
+    char * s;
+    int i = 1;
+    pretty = 0;
+
+    progname = argv[0];
+
+    while(i < argc) {
+	s = argv[i++];
+	if (s[0] == '-') {
+	    if (strcmp(s, "-o") == 0) {
+		if (i >= argc) {
+		    fprintf(stderr, "%s requires an argument\n", s);
+		    exit(1);
+		}
+		out = argv[i++];
+	    } else if (strcmp(s, "-i") == 0) {
+		if (i >= argc) {
+		    fprintf(stderr, "%s requires an argument\n", s);
+		    exit(1);
+		}
+		in = argv[i++];
+	    } else if (strcmp(s, "-l") == 0) {
+		if (i >= argc) {
+		    fprintf(stderr, "%s requires an argument\n", s);
+		    exit(1);
+		}
+		language = argv[i++];
+	    } else if (strcmp(s, "-c") == 0) {
+		if (i >= argc) {
+		    fprintf(stderr, "%s requires an argument\n", s);
+		    exit(1);
+		}
+		charenc = argv[i++];
+	    } else if (strcmp(s, "-p2") == 0) {
+		pretty = 2;
+	    } else if (strcmp(s, "-p") == 0) {
+		pretty = 1;
+	    } else if (strcmp(s, "-h") == 0) {
+		usage(0);
+	    } else {
+		fprintf(stderr, "option %s unknown\n", s);
+		usage(1);
+	    }
+	} else {
+	    fprintf(stderr, "unexpected parameter %s\n", s);
+	    usage(1);
+	}
+    }
+
+    /* prepare the files */
+    f_in = (in == 0) ? stdin : fopen(in, "r");
+    if (f_in == 0) {
+	fprintf(stderr, "file %s not found\n", in);
+	exit(1);
+    }
+    f_out = (out == 0) ? stdout : fopen(out, "w");
+    if (f_out == 0) {
+	fprintf(stderr, "file %s cannot be opened\n", out);
+	exit(1);
+    }
+
+    /* do the stemming process: */
+    stemmer = sb_stemmer_new(language, charenc);
+    if (stemmer == 0) {
+        if (charenc == NULL) {
+            fprintf(stderr, "language `%s' not available for stemming\n", language);
+            exit(1);
+        } else {
+            fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
+            exit(1);
+        }
+    }
+    stem_file(stemmer, f_in, f_out);
+    sb_stemmer_delete(stemmer);
+
+    if (in != 0) (void) fclose(f_in);
+    if (out != 0) (void) fclose(f_out);
+
+    return 0;
+}
+
diff --git a/snowball_code/include/libstemmer.h b/snowball_code/include/libstemmer.h
new file mode 100644
index 0000000..9d86b85
--- /dev/null
+++ b/snowball_code/include/libstemmer.h
@@ -0,0 +1,79 @@
+
+/* Make header file work when included from C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sb_stemmer;
+typedef unsigned char sb_symbol;
+
+/* FIXME - should be able to get a version number for each stemming
+ * algorithm (which will be incremented each time the output changes). */
+
+/** Returns an array of the names of the available stemming algorithms.
+ *  Note that these are the canonical names - aliases (ie, other names for
+ *  the same algorithm) will not be included in the list.
+ *  The list is terminated with a null pointer.
+ *
+ *  The list must not be modified in any way.
+ */
+const char ** sb_stemmer_list(void);
+
+/** Create a new stemmer object, using the specified algorithm, for the
+ *  specified character encoding.
+ *
+ *  All algorithms will usually be available in UTF-8, but may also be
+ *  available in other character encodings.
+ *
+ *  @param algorithm The algorithm name.  This is either the english
+ *  name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
+ *  language.  Note that case is significant in this parameter - the
+ *  value should be supplied in lower case.
+ *
+ *  @param charenc The character encoding.  NULL may be passed as
+ *  this value, in which case UTF-8 encoding will be assumed. Otherwise,
+ *  the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
+ *  "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian).  Note that
+ *  case is significant in this parameter.
+ *
+ *  @return NULL if the specified algorithm is not recognised, or the
+ *  algorithm is not available for the requested encoding.  Otherwise,
+ *  returns a pointer to a newly created stemmer for the requested algorithm.
+ *  The returned pointer must be deleted by calling sb_stemmer_delete().
+ *
+ *  @note NULL will also be returned if an out of memory error occurs.
+ */
+struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
+
+/** Delete a stemmer object.
+ *
+ *  This frees all resources allocated for the stemmer.  After calling
+ *  this function, the supplied stemmer may no longer be used in any way.
+ *
+ *  It is safe to pass a null pointer to this function - this will have
+ *  no effect.
+ */
+void                sb_stemmer_delete(struct sb_stemmer * stemmer);
+
+/** Stem a word.
+ *
+ *  The return value is owned by the stemmer - it must not be freed or
+ *  modified, and it will become invalid when the stemmer is called again,
+ *  or if the stemmer is freed.
+ *
+ *  The length of the return value can be obtained using sb_stemmer_length().
+ *
+ *  If an out-of-memory error occurs, this will return NULL.
+ */
+const sb_symbol *   sb_stemmer_stem(struct sb_stemmer * stemmer,
+				    const sb_symbol * word, int size);
+
+/** Get the length of the result of the last stemmed word.
+ *  This should not be called before sb_stemmer_stem() has been called.
+ */
+int                 sb_stemmer_length(struct sb_stemmer * stemmer);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/snowball_code/libstemmer/libstemmer.c b/snowball_code/libstemmer/libstemmer.c
new file mode 100644
index 0000000..20c7c9d
--- /dev/null
+++ b/snowball_code/libstemmer/libstemmer.c
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "modules.h"
+
+struct sb_stemmer {
+    struct SN_env * (*create)(void);
+    void (*close)(struct SN_env *);
+    int (*stem)(struct SN_env *);
+
+    struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+    return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+    struct stemmer_encoding * encoding;
+    if (charenc == NULL) return ENC_UTF_8;
+    for (encoding = encodings; encoding->name != 0; encoding++) {
+	if (strcmp(encoding->name, charenc) == 0) break;
+    }
+    if (encoding->name == NULL) return ENC_UNKNOWN;
+    return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+    stemmer_encoding_t enc;
+    struct stemmer_modules * module;
+    struct sb_stemmer * stemmer;
+
+    enc = sb_getenc(charenc);
+    if (enc == ENC_UNKNOWN) return NULL;
+
+    for (module = modules; module->name != 0; module++) {
+	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+    }
+    if (module->name == NULL) return NULL;
+    
+    stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+    if (stemmer == NULL) return NULL;
+
+    stemmer->create = module->create;
+    stemmer->close = module->close;
+    stemmer->stem = module->stem;
+
+    stemmer->env = stemmer->create();
+    if (stemmer->env == NULL)
+    {
+        sb_stemmer_delete(stemmer);
+        return NULL;
+    }
+
+    return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+    if (stemmer == 0) return;
+    if (stemmer->close == 0) return;
+    stemmer->close(stemmer->env);
+    stemmer->close = 0;
+    free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+    int ret;
+    if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+    {
+        stemmer->env->l = 0;
+        return NULL;
+    }
+    ret = stemmer->stem(stemmer->env);
+    if (ret < 0) return NULL;
+    stemmer->env->p[stemmer->env->l] = 0;
+    return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+    return stemmer->env->l;
+}
diff --git a/snowball_code/libstemmer/libstemmer_c.in b/snowball_code/libstemmer/libstemmer_c.in
new file mode 100644
index 0000000..4de5798
--- /dev/null
+++ b/snowball_code/libstemmer/libstemmer_c.in
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "@MODULES_H@"
+
+struct sb_stemmer {
+    struct SN_env * (*create)(void);
+    void (*close)(struct SN_env *);
+    int (*stem)(struct SN_env *);
+
+    struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+    return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+    struct stemmer_encoding * encoding;
+    if (charenc == NULL) return ENC_UTF_8;
+    for (encoding = encodings; encoding->name != 0; encoding++) {
+	if (strcmp(encoding->name, charenc) == 0) break;
+    }
+    if (encoding->name == NULL) return ENC_UNKNOWN;
+    return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+    stemmer_encoding_t enc;
+    struct stemmer_modules * module;
+    struct sb_stemmer * stemmer;
+
+    enc = sb_getenc(charenc);
+    if (enc == ENC_UNKNOWN) return NULL;
+
+    for (module = modules; module->name != 0; module++) {
+	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+    }
+    if (module->name == NULL) return NULL;
+    
+    stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+    if (stemmer == NULL) return NULL;
+
+    stemmer->create = module->create;
+    stemmer->close = module->close;
+    stemmer->stem = module->stem;
+
+    stemmer->env = stemmer->create();
+    if (stemmer->env == NULL)
+    {
+        sb_stemmer_delete(stemmer);
+        return NULL;
+    }
+
+    return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+    if (stemmer == 0) return;
+    if (stemmer->close == 0) return;
+    stemmer->close(stemmer->env);
+    stemmer->close = 0;
+    free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+    int ret;
+    if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+    {
+        stemmer->env->l = 0;
+        return NULL;
+    }
+    ret = stemmer->stem(stemmer->env);
+    if (ret < 0) return NULL;
+    stemmer->env->p[stemmer->env->l] = 0;
+    return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+    return stemmer->env->l;
+}
diff --git a/snowball_code/libstemmer/libstemmer_utf8.c b/snowball_code/libstemmer/libstemmer_utf8.c
new file mode 100644
index 0000000..1cad3e6
--- /dev/null
+++ b/snowball_code/libstemmer/libstemmer_utf8.c
@@ -0,0 +1,95 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include "../include/libstemmer.h"
+#include "../runtime/api.h"
+#include "modules_utf8.h"
+
+struct sb_stemmer {
+    struct SN_env * (*create)(void);
+    void (*close)(struct SN_env *);
+    int (*stem)(struct SN_env *);
+
+    struct SN_env * env;
+};
+
+extern const char **
+sb_stemmer_list(void)
+{
+    return algorithm_names;
+}
+
+static stemmer_encoding_t
+sb_getenc(const char * charenc)
+{
+    struct stemmer_encoding * encoding;
+    if (charenc == NULL) return ENC_UTF_8;
+    for (encoding = encodings; encoding->name != 0; encoding++) {
+	if (strcmp(encoding->name, charenc) == 0) break;
+    }
+    if (encoding->name == NULL) return ENC_UNKNOWN;
+    return encoding->enc;
+}
+
+extern struct sb_stemmer *
+sb_stemmer_new(const char * algorithm, const char * charenc)
+{
+    stemmer_encoding_t enc;
+    struct stemmer_modules * module;
+    struct sb_stemmer * stemmer;
+
+    enc = sb_getenc(charenc);
+    if (enc == ENC_UNKNOWN) return NULL;
+
+    for (module = modules; module->name != 0; module++) {
+	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
+    }
+    if (module->name == NULL) return NULL;
+    
+    stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
+    if (stemmer == NULL) return NULL;
+
+    stemmer->create = module->create;
+    stemmer->close = module->close;
+    stemmer->stem = module->stem;
+
+    stemmer->env = stemmer->create();
+    if (stemmer->env == NULL)
+    {
+        sb_stemmer_delete(stemmer);
+        return NULL;
+    }
+
+    return stemmer;
+}
+
+void
+sb_stemmer_delete(struct sb_stemmer * stemmer)
+{
+    if (stemmer == 0) return;
+    if (stemmer->close == 0) return;
+    stemmer->close(stemmer->env);
+    stemmer->close = 0;
+    free(stemmer);
+}
+
+const sb_symbol *
+sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
+{
+    int ret;
+    if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
+    {
+        stemmer->env->l = 0;
+        return NULL;
+    }
+    ret = stemmer->stem(stemmer->env);
+    if (ret < 0) return NULL;
+    stemmer->env->p[stemmer->env->l] = 0;
+    return (const sb_symbol *)(stemmer->env->p);
+}
+
+int
+sb_stemmer_length(struct sb_stemmer * stemmer)
+{
+    return stemmer->env->l;
+}
diff --git a/snowball_code/libstemmer/mkmodules.pl b/snowball_code/libstemmer/mkmodules.pl
new file mode 100755
index 0000000..ff8c19e
--- /dev/null
+++ b/snowball_code/libstemmer/mkmodules.pl
@@ -0,0 +1,256 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $progname = $0;
+
+if (scalar @ARGV < 4 || scalar @ARGV > 5) {
+  print "Usage: $progname <outfile> <C source directory> <modules description file> <source list file> [<extn>]\n";
+  exit 1;
+}
+
+my $outname = shift(@ARGV);
+my $c_src_dir = shift(@ARGV);
+my $descfile = shift(@ARGV);
+my $srclistfile = shift(@ARGV);
+my $extn = '';
+if (@ARGV) {
+  $extn = '_'.shift(@ARGV);
+}
+
+my %aliases = ();
+my %algorithms = ();
+my %algorithm_encs = ();
+
+my %encs = ();
+
+sub addalgenc($$) {
+  my $alg = shift();
+  my $enc = shift();
+
+  if (defined $algorithm_encs{$alg}) {
+      my $hashref = $algorithm_encs{$alg};
+      $$hashref{$enc}=1;
+  } else {
+      my %newhash = ($enc => 1);
+      $algorithm_encs{$alg}=\%newhash;
+  }
+
+  $encs{$enc} = 1;
+}
+
+sub readinput()
+{
+    open DESCFILE, $descfile;
+    my $line;
+    while($line = <DESCFILE>)
+    {
+        next if $line =~ m/^\s*#/;
+        next if $line =~ m/^\s*$/;
+        my ($alg,$encstr,$aliases) = split(/\s+/, $line);
+        my $enc;
+        my $alias;
+
+        $algorithms{$alg} = 1;
+        foreach $alias (split(/,/, $aliases)) {
+            foreach $enc (split(/,/, $encstr)) {
+                # print "$alias, $enc\n";
+                $aliases{$alias} = $alg;
+                addalgenc($alg, $enc);
+            }
+        }
+    }
+}
+
+sub printoutput()
+{
+    open (OUT, ">$outname") or die "Can't open output file `$outname': $!\n";
+
+    print OUT <<EOS;
+/* $outname: List of stemming modules.
+ *
+ * This file is generated by mkmodules.pl from a list of module names.
+ * Do not edit manually.
+ *
+EOS
+
+    my $line = " * Modules included by this file are: ";
+    print OUT $line;
+    my $linelen = length($line);
+
+    my $need_sep = 0;
+    my $lang;
+    my $enc;
+    my @algorithms = sort keys(%algorithms);
+    foreach $lang (@algorithms) {
+        if ($need_sep) {
+            if (($linelen + 2 + length($lang)) > 77) {
+                print OUT ",\n * ";
+                $linelen = 3;
+            } else {
+                print OUT ', ';
+                $linelen += 2;
+            }
+        }
+        print OUT $lang;
+        $linelen += length($lang);
+        $need_sep = 1;
+    }
+    print OUT "\n */\n\n";
+
+    foreach $lang (@algorithms) {
+        my $hashref = $algorithm_encs{$lang};
+        foreach $enc (sort keys (%$hashref)) {
+            print OUT "#include \"../$c_src_dir/stem_${enc}_$lang.h\"\n";
+        }
+    }
+
+    print OUT <<EOS;
+
+typedef enum {
+  ENC_UNKNOWN=0,
+EOS
+    my $neednl = 0;
+    for $enc (sort keys %encs) {
+        print OUT ",\n" if $neednl;
+        print OUT "  ENC_${enc}";
+        $neednl = 1;
+    }
+    print OUT <<EOS;
+
+} stemmer_encoding_t;
+
+struct stemmer_encoding {
+  const char * name;
+  stemmer_encoding_t enc;
+};
+static struct stemmer_encoding encodings[] = {
+EOS
+    for $enc (sort keys %encs) {
+        print OUT "  {\"${enc}\", ENC_${enc}},\n";
+    }
+    print OUT <<EOS;
+  {0,ENC_UNKNOWN}
+};
+
+struct stemmer_modules {
+  const char * name;
+  stemmer_encoding_t enc; 
+  struct SN_env * (*create)(void);
+  void (*close)(struct SN_env *);
+  int (*stem)(struct SN_env *);
+};
+static struct stemmer_modules modules[] = {
+EOS
+
+    for $lang (sort keys %aliases) {
+        my $l = $aliases{$lang};
+        my $hashref = $algorithm_encs{$l};
+        my $enc;
+        foreach $enc (sort keys (%$hashref)) {
+            my $p = "${l}_${enc}";
+            print OUT "  {\"$lang\", ENC_$enc, ${p}_create_env, ${p}_close_env, ${p}_stem},\n";
+        }
+    }
+
+    print OUT <<EOS;
+  {0,ENC_UNKNOWN,0,0,0}
+};
+EOS
+
+    print OUT <<EOS;
+static const char * algorithm_names[] = {
+EOS
+
+    for $lang (@algorithms) {
+        my $l = $aliases{$lang};
+        print OUT "  \"$lang\", \n";
+    }
+
+    print OUT <<EOS;
+  0
+};
+EOS
+    close OUT or die "Can't close ${outname}: $!\n";
+}
+
+sub printsrclist()
+{
+    open (OUT, ">$srclistfile") or die "Can't open output file `$srclistfile': $!\n";
+
+    print OUT <<EOS;
+# $srclistfile: List of stemming module source files
+#
+# This file is generated by mkmodules.pl from a list of module names.
+# Do not edit manually.
+#
+EOS
+
+    my $line = "# Modules included by this file are: ";
+    print OUT $line;
+    my $linelen = length($line);
+
+    my $need_sep = 0;
+    my $lang;
+    my $srcfile;
+    my $enc;
+    my @algorithms = sort keys(%algorithms);
+    foreach $lang (@algorithms) {
+        if ($need_sep) {
+            if (($linelen + 2 + length($lang)) > 77) {
+                print OUT ",\n# ";
+                $linelen = 3;
+            } else {
+                print OUT ', ';
+                $linelen += 2;
+            }
+        }
+        print OUT $lang;
+        $linelen += length($lang);
+        $need_sep = 1;
+    }
+
+    print OUT "\n\nsnowball_sources= \\\n";
+    for $lang (sort keys %aliases) {
+        my $hashref = $algorithm_encs{$lang};
+        my $enc;
+        foreach $enc (sort keys (%$hashref)) {
+            print OUT "  src_c/stem_${enc}_${lang}.c \\\n";
+        }
+    }
+
+    $need_sep = 0;
+    for $srcfile ('runtime/api.c',
+                  'runtime/utilities.c',
+                  "libstemmer/libstemmer${extn}.c") {
+        print OUT " \\\n" if $need_sep;
+        print OUT "  $srcfile";
+        $need_sep = 1;
+    }
+
+    print OUT "\n\nsnowball_headers= \\\n";
+    for $lang (sort keys %aliases) {
+        my $hashref = $algorithm_encs{$lang};
+        my $enc;
+        foreach $enc (sort keys (%$hashref)) {
+            my $p = "${lang}_${enc}";
+            print OUT "  src_c/stem_${enc}_${lang}.h \\\n";
+        }
+    }
+
+    $need_sep = 0;
+    for $srcfile ('include/libstemmer.h',
+                  "libstemmer/modules${extn}.h",
+                  'runtime/api.h',
+                  'runtime/header.h') {
+        print OUT " \\\n" if $need_sep;
+        print OUT "  $srcfile";
+        $need_sep = 1;
+    }
+
+    print OUT "\n\n";
+    close OUT or die "Can't close ${srclistfile}: $!\n";
+}
+
+readinput();
+printoutput();
+printsrclist();
diff --git a/snowball_code/libstemmer/modules.h b/snowball_code/libstemmer/modules.h
new file mode 100644
index 0000000..7a1f685
--- /dev/null
+++ b/snowball_code/libstemmer/modules.h
@@ -0,0 +1,190 @@
+/* libstemmer/modules.h: List of stemming modules.
+ *
+ * This file is generated by mkmodules.pl from a list of module names.
+ * Do not edit manually.
+ *
+ * Modules included by this file are: danish, dutch, english, finnish, french,
+ * german, hungarian, italian, norwegian, porter, portuguese, romanian,
+ * russian, spanish, swedish, turkish
+ */
+
+#include "../src_c/stem_ISO_8859_1_danish.h"
+#include "../src_c/stem_UTF_8_danish.h"
+#include "../src_c/stem_ISO_8859_1_dutch.h"
+#include "../src_c/stem_UTF_8_dutch.h"
+#include "../src_c/stem_ISO_8859_1_english.h"
+#include "../src_c/stem_UTF_8_english.h"
+#include "../src_c/stem_ISO_8859_1_finnish.h"
+#include "../src_c/stem_UTF_8_finnish.h"
+#include "../src_c/stem_ISO_8859_1_french.h"
+#include "../src_c/stem_UTF_8_french.h"
+#include "../src_c/stem_ISO_8859_1_german.h"
+#include "../src_c/stem_UTF_8_german.h"
+#include "../src_c/stem_ISO_8859_1_hungarian.h"
+#include "../src_c/stem_UTF_8_hungarian.h"
+#include "../src_c/stem_ISO_8859_1_italian.h"
+#include "../src_c/stem_UTF_8_italian.h"
+#include "../src_c/stem_ISO_8859_1_norwegian.h"
+#include "../src_c/stem_UTF_8_norwegian.h"
+#include "../src_c/stem_ISO_8859_1_porter.h"
+#include "../src_c/stem_UTF_8_porter.h"
+#include "../src_c/stem_ISO_8859_1_portuguese.h"
+#include "../src_c/stem_UTF_8_portuguese.h"
+#include "../src_c/stem_ISO_8859_2_romanian.h"
+#include "../src_c/stem_UTF_8_romanian.h"
+#include "../src_c/stem_KOI8_R_russian.h"
+#include "../src_c/stem_UTF_8_russian.h"
+#include "../src_c/stem_ISO_8859_1_spanish.h"
+#include "../src_c/stem_UTF_8_spanish.h"
+#include "../src_c/stem_ISO_8859_1_swedish.h"
+#include "../src_c/stem_UTF_8_swedish.h"
+#include "../src_c/stem_UTF_8_turkish.h"
+
+typedef enum {
+  ENC_UNKNOWN=0,
+  ENC_ISO_8859_1,
+  ENC_ISO_8859_2,
+  ENC_KOI8_R,
+  ENC_UTF_8
+} stemmer_encoding_t;
+
+struct stemmer_encoding {
+  const char * name;
+  stemmer_encoding_t enc;
+};
+static struct stemmer_encoding encodings[] = {
+  {"ISO_8859_1", ENC_ISO_8859_1},
+  {"ISO_8859_2", ENC_ISO_8859_2},
+  {"KOI8_R", ENC_KOI8_R},
+  {"UTF_8", ENC_UTF_8},
+  {0,ENC_UNKNOWN}
+};
+
+struct stemmer_modules {
+  const char * name;
+  stemmer_encoding_t enc; 
+  struct SN_env * (*create)(void);
+  void (*close)(struct SN_env *);
+  int (*stem)(struct SN_env *);
+};
+static struct stemmer_modules modules[] = {
+  {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
+  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
+  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
+  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
+  {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
+  {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
+  {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
+  {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
+  {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
+  {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
+  {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
+  {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
+  {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
+  {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
+  {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
+  {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
+  {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
+  {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
+  {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
+  {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
+  {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
+  {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
+  {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
+  {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
+  {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
+  {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
+  {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
+  {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
+  {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
+  {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
+  {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
+  {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
+  {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
+  {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem},
+  {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
+  {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
+  {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
+  {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
+  {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
+  {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
+  {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
+  {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
+  {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
+  {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
+  {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
+  {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
+  {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
+  {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
+  {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
+  {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {0,ENC_UNKNOWN,0,0,0}
+};
+static const char * algorithm_names[] = {
+  "danish", 
+  "dutch", 
+  "english", 
+  "finnish", 
+  "french", 
+  "german", 
+  "hungarian", 
+  "italian", 
+  "norwegian", 
+  "porter", 
+  "portuguese", 
+  "romanian", 
+  "russian", 
+  "spanish", 
+  "swedish", 
+  "turkish", 
+  0
+};
diff --git a/snowball_code/libstemmer/modules.txt b/snowball_code/libstemmer/modules.txt
new file mode 100644
index 0000000..ddde920
--- /dev/null
+++ b/snowball_code/libstemmer/modules.txt
@@ -0,0 +1,50 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+#  First item is name of stemmer.
+#  Second item is comma separated list of character sets.
+#  Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8, and also with
+# the most commonly used encoding.
+
+danish          UTF_8,ISO_8859_1        danish,da,dan
+dutch           UTF_8,ISO_8859_1        dutch,nl,dut,nld
+english         UTF_8,ISO_8859_1        english,en,eng
+finnish         UTF_8,ISO_8859_1        finnish,fi,fin
+french          UTF_8,ISO_8859_1        french,fr,fre,fra
+german          UTF_8,ISO_8859_1        german,de,ger,deu
+hungarian       UTF_8,ISO_8859_1        hungarian,hu,hun
+italian         UTF_8,ISO_8859_1        italian,it,ita
+norwegian       UTF_8,ISO_8859_1        norwegian,no,nor
+portuguese      UTF_8,ISO_8859_1        portuguese,pt,por
+romanian        UTF_8,ISO_8859_2        romanian,ro,rum,ron
+russian         UTF_8,KOI8_R            russian,ru,rus
+spanish         UTF_8,ISO_8859_1        spanish,es,esl,spa
+swedish         UTF_8,ISO_8859_1        swedish,sv,swe
+turkish         UTF_8                   turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter          UTF_8,ISO_8859_1        porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported.  These
+# algorithms are:
+#
+# german2          - This is a slight modification of the german stemmer.
+#german2          UTF_8,ISO_8859_1        german2
+#
+# kraaij_pohlmann  - This is a different dutch stemmer.
+#kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann
+#
+# lovins           - This is an english stemmer, but fairly outdated, and
+#                    only really applicable to a restricted type of input text
+#                    (keywords in academic publications).
+#lovins           UTF_8,ISO_8859_1        lovins
diff --git a/snowball_code/libstemmer/modules_utf8.h b/snowball_code/libstemmer/modules_utf8.h
new file mode 100644
index 0000000..6a7cc92
--- /dev/null
+++ b/snowball_code/libstemmer/modules_utf8.h
@@ -0,0 +1,121 @@
+/* libstemmer/modules_utf8.h: List of stemming modules.
+ *
+ * This file is generated by mkmodules.pl from a list of module names.
+ * Do not edit manually.
+ *
+ * Modules included by this file are: danish, dutch, english, finnish, french,
+ * german, hungarian, italian, norwegian, porter, portuguese, romanian,
+ * russian, spanish, swedish, turkish
+ */
+
+#include "../src_c/stem_UTF_8_danish.h"
+#include "../src_c/stem_UTF_8_dutch.h"
+#include "../src_c/stem_UTF_8_english.h"
+#include "../src_c/stem_UTF_8_finnish.h"
+#include "../src_c/stem_UTF_8_french.h"
+#include "../src_c/stem_UTF_8_german.h"
+#include "../src_c/stem_UTF_8_hungarian.h"
+#include "../src_c/stem_UTF_8_italian.h"
+#include "../src_c/stem_UTF_8_norwegian.h"
+#include "../src_c/stem_UTF_8_porter.h"
+#include "../src_c/stem_UTF_8_portuguese.h"
+#include "../src_c/stem_UTF_8_romanian.h"
+#include "../src_c/stem_UTF_8_russian.h"
+#include "../src_c/stem_UTF_8_spanish.h"
+#include "../src_c/stem_UTF_8_swedish.h"
+#include "../src_c/stem_UTF_8_turkish.h"
+
+typedef enum {
+  ENC_UNKNOWN=0,
+  ENC_UTF_8
+} stemmer_encoding_t;
+
+struct stemmer_encoding {
+  const char * name;
+  stemmer_encoding_t enc;
+};
+static struct stemmer_encoding encodings[] = {
+  {"UTF_8", ENC_UTF_8},
+  {0,ENC_UNKNOWN}
+};
+
+struct stemmer_modules {
+  const char * name;
+  stemmer_encoding_t enc; 
+  struct SN_env * (*create)(void);
+  void (*close)(struct SN_env *);
+  int (*stem)(struct SN_env *);
+};
+static struct stemmer_modules modules[] = {
+  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
+  {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
+  {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
+  {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
+  {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
+  {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
+  {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
+  {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
+  {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
+  {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
+  {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
+  {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
+  {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
+  {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
+  {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
+  {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
+  {0,ENC_UNKNOWN,0,0,0}
+};
+static const char * algorithm_names[] = {
+  "danish", 
+  "dutch", 
+  "english", 
+  "finnish", 
+  "french", 
+  "german", 
+  "hungarian", 
+  "italian", 
+  "norwegian", 
+  "porter", 
+  "portuguese", 
+  "romanian", 
+  "russian", 
+  "spanish", 
+  "swedish", 
+  "turkish", 
+  0
+};
diff --git a/snowball_code/libstemmer/modules_utf8.txt b/snowball_code/libstemmer/modules_utf8.txt
new file mode 100644
index 0000000..60a0e1d
--- /dev/null
+++ b/snowball_code/libstemmer/modules_utf8.txt
@@ -0,0 +1,49 @@
+# This file contains a list of stemmers to include in the distribution.
+# The format is a set of space separated lines - on each line:
+#  First item is name of stemmer.
+#  Second item is comma separated list of character sets.
+#  Third item is comma separated list of names to refer to the stemmer by.
+#
+# Lines starting with a #, or blank lines, are ignored.
+
+# List all the main algorithms for each language, in UTF-8.
+
+danish          UTF_8                   danish,da,dan
+dutch           UTF_8                   dutch,nl,dut,nld
+english         UTF_8                   english,en,eng
+finnish         UTF_8                   finnish,fi,fin
+french          UTF_8                   french,fr,fre,fra
+german          UTF_8                   german,de,ger,deu
+hungarian       UTF_8                   hungarian,hu,hun
+italian         UTF_8                   italian,it,ita
+norwegian       UTF_8                   norwegian,no,nor
+portuguese      UTF_8                   portuguese,pt,por
+romanian        UTF_8                   romanian,ro,rum,ron
+russian         UTF_8                   russian,ru,rus
+spanish         UTF_8                   spanish,es,esl,spa
+swedish         UTF_8                   swedish,sv,swe
+turkish         UTF_8                   turkish,tr,tur
+
+# Also include the traditional porter algorithm for english.
+# The porter algorithm is included in the libstemmer distribution to assist
+# with backwards compatibility, but for new systems the english algorithm
+# should be used in preference.
+porter          UTF_8                   porter
+
+# Some other stemmers in the snowball project are not included in the standard
+# distribution. To compile a libstemmer with them in, add them to this list,
+# and regenerate the distribution. (You will need a full source checkout for
+# this.) They are included in the snowball website as curiosities, but are not
+# intended for general use, and use of them is is not fully supported.  These
+# algorithms are:
+#
+# german2          - This is a slight modification of the german stemmer.
+#german2          UTF_8                   german2
+#
+# kraaij_pohlmann  - This is a different dutch stemmer.
+#kraaij_pohlmann  UTF_8                   kraaij_pohlmann
+#
+# lovins           - This is an english stemmer, but fairly outdated, and
+#                    only really applicable to a restricted type of input text
+#                    (keywords in academic publications).
+#lovins           UTF_8                   lovins
diff --git a/snowball_code/runtime/api.c b/snowball_code/runtime/api.c
new file mode 100644
index 0000000..40039ef
--- /dev/null
+++ b/snowball_code/runtime/api.c
@@ -0,0 +1,66 @@
+
+#include <stdlib.h> /* for calloc, free */
+#include "header.h"
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
+{
+    struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
+    if (z == NULL) return NULL;
+    z->p = create_s();
+    if (z->p == NULL) goto error;
+    if (S_size)
+    {
+        int i;
+        z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
+        if (z->S == NULL) goto error;
+
+        for (i = 0; i < S_size; i++)
+        {
+            z->S[i] = create_s();
+            if (z->S[i] == NULL) goto error;
+        }
+    }
+
+    if (I_size)
+    {
+        z->I = (int *) calloc(I_size, sizeof(int));
+        if (z->I == NULL) goto error;
+    }
+
+    if (B_size)
+    {
+        z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
+        if (z->B == NULL) goto error;
+    }
+
+    return z;
+error:
+    SN_close_env(z, S_size);
+    return NULL;
+}
+
+extern void SN_close_env(struct SN_env * z, int S_size)
+{
+    if (z == NULL) return;
+    if (S_size)
+    {
+        int i;
+        for (i = 0; i < S_size; i++)
+        {
+            lose_s(z->S[i]);
+        }
+        free(z->S);
+    }
+    free(z->I);
+    free(z->B);
+    if (z->p) lose_s(z->p);
+    free(z);
+}
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
+{
+    int err = replace_s(z, 0, z->l, size, s, NULL);
+    z->c = 0;
+    return err;
+}
+
diff --git a/snowball_code/runtime/api.h b/snowball_code/runtime/api.h
new file mode 100644
index 0000000..8b997f0
--- /dev/null
+++ b/snowball_code/runtime/api.h
@@ -0,0 +1,26 @@
+
+typedef unsigned char symbol;
+
+/* Or replace 'char' above with 'short' for 16 bit characters.
+
+   More precisely, replace 'char' with whatever type guarantees the
+   character width you need. Note however that sizeof(symbol) should divide
+   HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
+   there is an alignment problem. In the unlikely event of a problem here,
+   consult Martin Porter.
+
+*/
+
+struct SN_env {
+    symbol * p;
+    int c; int l; int lb; int bra; int ket;
+    symbol * * S;
+    int * I;
+    unsigned char * B;
+};
+
+extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
+extern void SN_close_env(struct SN_env * z, int S_size);
+
+extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
+
diff --git a/snowball_code/runtime/header.h b/snowball_code/runtime/header.h
new file mode 100644
index 0000000..4d3078f
--- /dev/null
+++ b/snowball_code/runtime/header.h
@@ -0,0 +1,58 @@
+
+#include <limits.h>
+
+#include "api.h"
+
+#define MAXINT INT_MAX
+#define MININT INT_MIN
+
+#define HEAD 2*sizeof(int)
+
+#define SIZE(p)        ((int *)(p))[-1]
+#define SET_SIZE(p, n) ((int *)(p))[-1] = n
+#define CAPACITY(p)    ((int *)(p))[-2]
+
+struct among
+{   int s_size;     /* number of chars in string */
+    const symbol * s;       /* search string */
+    int substring_i;/* index to longest matching substring */
+    int result;     /* result of the lookup */
+    int (* function)(struct SN_env *);
+};
+
+extern symbol * create_s(void);
+extern void lose_s(symbol * p);
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
+extern int eq_v(struct SN_env * z, const symbol * p);
+extern int eq_v_b(struct SN_env * z, const symbol * p);
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size);
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
+
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
+extern int slice_from_v(struct SN_env * z, const symbol * p);
+extern int slice_del(struct SN_env * z);
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
+
+extern symbol * slice_to(struct SN_env * z, symbol * p);
+extern symbol * assign_to(struct SN_env * z, symbol * p);
+
+extern void debug(struct SN_env * z, int number, int line_count);
+
diff --git a/snowball_code/runtime/utilities.c b/snowball_code/runtime/utilities.c
new file mode 100644
index 0000000..1840f02
--- /dev/null
+++ b/snowball_code/runtime/utilities.c
@@ -0,0 +1,478 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "header.h"
+
+#define unless(C) if(!(C))
+
+#define CREATE_SIZE 1
+
+extern symbol * create_s(void) {
+    symbol * p;
+    void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
+    if (mem == NULL) return NULL;
+    p = (symbol *) (HEAD + (char *) mem);
+    CAPACITY(p) = CREATE_SIZE;
+    SET_SIZE(p, CREATE_SIZE);
+    return p;
+}
+
+extern void lose_s(symbol * p) {
+    if (p == NULL) return;
+    free((char *) p - HEAD);
+}
+
+/*
+   new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c
+   if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new
+   position, or 0 on failure.
+
+   -- used to implement hop and next in the utf8 case.
+*/
+
+extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
+    int b;
+    if (n >= 0) {
+        for (; n > 0; n--) {
+            if (c >= l) return -1;
+            b = p[c++];
+            if (b >= 0xC0) {   /* 1100 0000 */
+                while (c < l) {
+                    b = p[c];
+                    if (b >= 0xC0 || b < 0x80) break;
+                    /* break unless b is 10------ */
+                    c++;
+                }
+            }
+        }
+    } else {
+        for (; n < 0; n++) {
+            if (c <= lb) return -1;
+            b = p[--c];
+            if (b >= 0x80) {   /* 1000 0000 */
+                while (c > lb) {
+                    b = p[c];
+                    if (b >= 0xC0) break; /* 1100 0000 */
+                    c--;
+                }
+            }
+        }
+    }
+    return c;
+}
+
+/* Code for character groupings: utf8 cases */
+
+static int get_utf8(const symbol * p, int c, int l, int * slot) {
+    int b0, b1;
+    if (c >= l) return 0;
+    b0 = p[c++];
+    if (b0 < 0xC0 || c == l) {   /* 1100 0000 */
+        * slot = b0; return 1;
+    }
+    b1 = p[c++];
+    if (b0 < 0xE0 || c == l) {   /* 1110 0000 */
+        * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2;
+    }
+    * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3;
+}
+
+static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
+    int b0, b1;
+    if (c <= lb) return 0;
+    b0 = p[--c];
+    if (b0 < 0x80 || c == lb) {   /* 1000 0000 */
+        * slot = b0; return 1;
+    }
+    b1 = p[--c];
+    if (b1 >= 0xC0 || c == lb) {   /* 1100 0000 */
+        * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2;
+    }
+    * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3;
+}
+
+extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_utf8(z->p, z->c, z->l, & ch);
+	unless (w) return -1;
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c += w;
+    } while (repeat);
+    return 0;
+}
+
+extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+	unless (w) return -1;
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c -= w;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_utf8(z->p, z->c, z->l, & ch);
+	unless (w) return -1;
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c += w;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	int w = get_b_utf8(z->p, z->c, z->lb, & ch);
+	unless (w) return -1;
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return w;
+	z->c -= w;
+    } while (repeat);
+    return 0;
+}
+
+/* Code for character groupings: non-utf8 cases */
+
+extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c >= z->l) return -1;
+	ch = z->p[z->c];
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c++;
+    } while (repeat);
+    return 0;
+}
+
+extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c <= z->lb) return -1;
+	ch = z->p[z->c - 1];
+	if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c--;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c >= z->l) return -1;
+	ch = z->p[z->c];
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c++;
+    } while (repeat);
+    return 0;
+}
+
+extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
+    do {
+	int ch;
+	if (z->c <= z->lb) return -1;
+	ch = z->p[z->c - 1];
+	unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
+	    return 1;
+	z->c--;
+    } while (repeat);
+    return 0;
+}
+
+extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
+    if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c += s_size; return 1;
+}
+
+extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
+    if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
+    z->c -= s_size; return 1;
+}
+
+extern int eq_v(struct SN_env * z, const symbol * p) {
+    return eq_s(z, SIZE(p), p);
+}
+
+extern int eq_v_b(struct SN_env * z, const symbol * p) {
+    return eq_s_b(z, SIZE(p), p);
+}
+
+extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
+
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int l = z->l;
+    symbol * q = z->p + c;
+
+    const struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; /* smaller */
+        w = v + k;
+        {
+            int i2; for (i2 = common; i2 < w->s_size; i2++) {
+                if (c + common == l) { diff = -1; break; }
+                diff = q[common] - w->s[i2];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1) {
+            if (i > 0) break; /* v->s has been inspected */
+            if (j == i) break; /* only one item in v */
+
+            /* - but now we need to go round once more to get
+               v->s inspected. This looks messy, but is actually
+               the optimal approach.  */
+
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1) {
+        w = v + i;
+        if (common_i >= w->s_size) {
+            z->c = c + w->s_size;
+            if (w->function == 0) return w->result;
+            {
+                int res = w->function(z);
+                z->c = c + w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+/* find_among_b is for backwards processing. Same comments apply */
+
+extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
+
+    int i = 0;
+    int j = v_size;
+
+    int c = z->c; int lb = z->lb;
+    symbol * q = z->p + c - 1;
+
+    const struct among * w;
+
+    int common_i = 0;
+    int common_j = 0;
+
+    int first_key_inspected = 0;
+
+    while(1) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        w = v + k;
+        {
+            int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) {
+                if (c - common == lb) { diff = -1; break; }
+                diff = q[- common] - w->s[i2];
+                if (diff != 0) break;
+                common++;
+            }
+        }
+        if (diff < 0) { j = k; common_j = common; }
+                 else { i = k; common_i = common; }
+        if (j - i <= 1) {
+            if (i > 0) break;
+            if (j == i) break;
+            if (first_key_inspected) break;
+            first_key_inspected = 1;
+        }
+    }
+    while(1) {
+        w = v + i;
+        if (common_i >= w->s_size) {
+            z->c = c - w->s_size;
+            if (w->function == 0) return w->result;
+            {
+                int res = w->function(z);
+                z->c = c - w->s_size;
+                if (res) return w->result;
+            }
+        }
+        i = w->substring_i;
+        if (i < 0) return 0;
+    }
+}
+
+
+/* Increase the size of the buffer pointed to by p to at least n symbols.
+ * If insufficient memory, returns NULL and frees the old buffer.
+ */
+static symbol * increase_size(symbol * p, int n) {
+    symbol * q;
+    int new_size = n + 20;
+    void * mem = realloc((char *) p - HEAD,
+                         HEAD + (new_size + 1) * sizeof(symbol));
+    if (mem == NULL) {
+        lose_s(p);
+        return NULL;
+    }
+    q = (symbol *) (HEAD + (char *)mem);
+    CAPACITY(q) = new_size;
+    return q;
+}
+
+/* to replace symbols between c_bra and c_ket in z->p by the
+   s_size symbols at s.
+   Returns 0 on success, -1 on error.
+   Also, frees z->p (and sets it to NULL) on error.
+*/
+extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
+{
+    int adjustment;
+    int len;
+    if (z->p == NULL) {
+        z->p = create_s();
+        if (z->p == NULL) return -1;
+    }
+    adjustment = s_size - (c_ket - c_bra);
+    len = SIZE(z->p);
+    if (adjustment != 0) {
+        if (adjustment + len > CAPACITY(z->p)) {
+            z->p = increase_size(z->p, adjustment + len);
+            if (z->p == NULL) return -1;
+        }
+        memmove(z->p + c_ket + adjustment,
+                z->p + c_ket,
+                (len - c_ket) * sizeof(symbol));
+        SET_SIZE(z->p, adjustment + len);
+        z->l += adjustment;
+        if (z->c >= c_ket)
+            z->c += adjustment;
+        else
+            if (z->c > c_bra)
+                z->c = c_bra;
+    }
+    unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
+    if (adjptr != NULL)
+        *adjptr = adjustment;
+    return 0;
+}
+
+static int slice_check(struct SN_env * z) {
+
+    if (z->bra < 0 ||
+        z->bra > z->ket ||
+        z->ket > z->l ||
+        z->p == NULL ||
+        z->l > SIZE(z->p)) /* this line could be removed */
+    {
+#if 0
+        fprintf(stderr, "faulty slice operation:\n");
+        debug(z, -1, 0);
+#endif
+        return -1;
+    }
+    return 0;
+}
+
+extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
+    if (slice_check(z)) return -1;
+    return replace_s(z, z->bra, z->ket, s_size, s, NULL);
+}
+
+extern int slice_from_v(struct SN_env * z, const symbol * p) {
+    return slice_from_s(z, SIZE(p), p);
+}
+
+extern int slice_del(struct SN_env * z) {
+    return slice_from_s(z, 0, 0);
+}
+
+extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
+    int adjustment;
+    if (replace_s(z, bra, ket, s_size, s, &adjustment))
+        return -1;
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+    return 0;
+}
+
+extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
+    int adjustment;
+    if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
+        return -1;
+    if (bra <= z->bra) z->bra += adjustment;
+    if (bra <= z->ket) z->ket += adjustment;
+    return 0;
+}
+
+extern symbol * slice_to(struct SN_env * z, symbol * p) {
+    if (slice_check(z)) {
+        lose_s(p);
+        return NULL;
+    }
+    {
+        int len = z->ket - z->bra;
+        if (CAPACITY(p) < len) {
+            p = increase_size(p, len);
+            if (p == NULL)
+                return NULL;
+        }
+        memmove(p, z->p + z->bra, len * sizeof(symbol));
+        SET_SIZE(p, len);
+    }
+    return p;
+}
+
+extern symbol * assign_to(struct SN_env * z, symbol * p) {
+    int len = z->l;
+    if (CAPACITY(p) < len) {
+        p = increase_size(p, len);
+        if (p == NULL)
+            return NULL;
+    }
+    memmove(p, z->p, len * sizeof(symbol));
+    SET_SIZE(p, len);
+    return p;
+}
+
+#if 0
+extern void debug(struct SN_env * z, int number, int line_count) {
+    int i;
+    int limit = SIZE(z->p);
+    /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++) {
+        if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/liblingua-stem-snowball-perl.git



More information about the Pkg-perl-cvs-commits mailing list