[nltk] 01/08: Imported Upstream version 3.2.1

Gianfranco Costamagna locutusofborg at moszumanska.debian.org
Tue May 17 08:55:39 UTC 2016


This is an automated email from the git hooks/post-receive script.

locutusofborg pushed a commit to annotated tag debian/3.2.1-1
in repository nltk.

commit bbfa547c996defe179c67b7e1523b99c53c08b8f
Author: Gianfranco Costamagna <locutusofborg at debian.org>
Date:   Tue May 17 10:48:36 2016 +0200

    Imported Upstream version 3.2.1
---
 LICENSE.txt                                    |   2 +-
 PKG-INFO                                       |   2 +-
 nltk.egg-info/PKG-INFO                         |   2 +-
 nltk.egg-info/SOURCES.txt                      |   6 +
 nltk/VERSION                                   |   2 +-
 nltk/__init__.py                               |   4 +-
 nltk/app/__init__.py                           |   2 +-
 nltk/app/chartparser_app.py                    |   6 +-
 nltk/app/chunkparser_app.py                    |  15 +-
 nltk/app/collocations_app.py                   |   6 +-
 nltk/app/concordance_app.py                    |   2 +-
 nltk/app/rdparser_app.py                       |   6 +-
 nltk/app/srparser_app.py                       |   2 +-
 nltk/app/wordfreq_app.py                       |   2 +-
 nltk/app/wordnet_app.py                        |   8 +-
 nltk/book.py                                   |   2 +-
 nltk/ccg/__init__.py                           |   2 +-
 nltk/ccg/api.py                                |   6 +-
 nltk/ccg/chart.py                              |  71 +++-
 nltk/ccg/combinator.py                         |   2 +-
 nltk/ccg/lexicon.py                            |  74 +++-
 nltk/ccg/logic.py                              |  46 ++
 nltk/chat/__init__.py                          |   2 +-
 nltk/chat/eliza.py                             |   2 +-
 nltk/chat/iesha.py                             |   2 +-
 nltk/chat/rude.py                              |   2 +-
 nltk/chat/suntsu.py                            |   2 +-
 nltk/chat/util.py                              |   2 +-
 nltk/chat/zen.py                               |   2 +-
 nltk/chunk/__init__.py                         |   2 +-
 nltk/chunk/api.py                              |   2 +-
 nltk/chunk/named_entity.py                     |   2 +-
 nltk/chunk/regexp.py                           |   2 +-
 nltk/chunk/util.py                             |   8 +-
 nltk/classify/__init__.py                      |   2 +-
 nltk/classify/api.py                           |   2 +-
 nltk/classify/decisiontree.py                  |   6 +-
 nltk/classify/maxent.py                        |  12 +-
 nltk/classify/megam.py                         |   2 +-
 nltk/classify/naivebayes.py                    |   2 +-
 nltk/classify/rte_classify.py                  |   2 +-
 nltk/classify/senna.py                         |   2 +-
 nltk/classify/svm.py                           |   2 +-
 nltk/classify/tadm.py                          |   2 +-
 nltk/classify/textcat.py                       |   2 +-
 nltk/classify/util.py                          |  14 +-
 nltk/classify/weka.py                          |   4 +-
 nltk/cluster/__init__.py                       |   2 +-
 nltk/cluster/api.py                            |   2 +-
 nltk/cluster/em.py                             |   2 +-
 nltk/cluster/gaac.py                           |   2 +-
 nltk/cluster/kmeans.py                         |   8 +-
 nltk/cluster/util.py                           |   2 +-
 nltk/collocations.py                           |   2 +-
 nltk/compat.py                                 |  46 +-
 nltk/corpus/__init__.py                        |   4 +-
 nltk/corpus/europarl_raw.py                    |   2 +-
 nltk/corpus/reader/__init__.py                 |   5 +-
 nltk/corpus/reader/aligned.py                  |   2 +-
 nltk/corpus/reader/api.py                      |   4 +-
 nltk/corpus/reader/bnc.py                      |   2 +-
 nltk/corpus/reader/bracket_parse.py            |   2 +-
 nltk/corpus/reader/categorized_sents.py        |   2 +-
 nltk/corpus/reader/chasen.py                   |   2 +-
 nltk/corpus/reader/childes.py                  |  61 ++-
 nltk/corpus/reader/chunked.py                  |   2 +-
 nltk/corpus/reader/cmudict.py                  |   2 +-
 nltk/corpus/reader/comparative_sents.py        |   2 +-
 nltk/corpus/reader/conll.py                    |   2 +-
 nltk/corpus/reader/crubadan.py                 |   2 +-
 nltk/corpus/reader/dependency.py               |   2 +-
 nltk/corpus/reader/framenet.py                 |   2 +-
 nltk/corpus/reader/ieer.py                     |   2 +-
 nltk/corpus/reader/indian.py                   |   2 +-
 nltk/corpus/reader/ipipan.py                   |   2 +-
 nltk/corpus/reader/knbc.py                     |   2 +-
 nltk/corpus/reader/lin.py                      |   2 +-
 nltk/corpus/reader/mte.py                      | 172 ++++----
 nltk/corpus/reader/nkjp.py                     |   2 +-
 nltk/corpus/reader/nombank.py                  |   2 +-
 nltk/corpus/reader/nps_chat.py                 |   2 +-
 nltk/corpus/reader/opinion_lexicon.py          |   2 +-
 nltk/corpus/reader/panlex_lite.py              | 165 ++++++++
 nltk/corpus/reader/pl196x.py                   |   2 +-
 nltk/corpus/reader/plaintext.py                |   9 +-
 nltk/corpus/reader/ppattach.py                 |   2 +-
 nltk/corpus/reader/propbank.py                 |   2 +-
 nltk/corpus/reader/pros_cons.py                |   2 +-
 nltk/corpus/reader/reviews.py                  |   8 +-
 nltk/corpus/reader/rte.py                      |   2 +-
 nltk/corpus/reader/semcor.py                   |   2 +-
 nltk/corpus/reader/senseval.py                 |   2 +-
 nltk/corpus/reader/sentiwordnet.py             |  18 +-
 nltk/corpus/reader/sinica_treebank.py          |   2 +-
 nltk/corpus/reader/string_category.py          |   2 +-
 nltk/corpus/reader/switchboard.py              |   2 +-
 nltk/corpus/reader/tagged.py                   |   2 +-
 nltk/corpus/reader/toolbox.py                  |   2 +-
 nltk/corpus/reader/twitter.py                  |   2 +-
 nltk/corpus/reader/util.py                     |   5 +-
 nltk/corpus/reader/verbnet.py                  |   2 +-
 nltk/corpus/reader/wordlist.py                 |   2 +-
 nltk/corpus/reader/wordnet.py                  |   4 +-
 nltk/corpus/reader/xmldocs.py                  |   2 +-
 nltk/corpus/util.py                            |   2 +-
 nltk/data.py                                   |  25 +-
 nltk/downloader.py                             |  19 +-
 nltk/draw/__init__.py                          |   2 +-
 nltk/draw/cfg.py                               |   4 +-
 nltk/draw/dispersion.py                        |   2 +-
 nltk/draw/table.py                             |  24 +-
 nltk/draw/tree.py                              |   2 +-
 nltk/draw/util.py                              |   2 +-
 nltk/featstruct.py                             |   2 +-
 nltk/grammar.py                                |   6 +-
 nltk/help.py                                   |   2 +-
 nltk/inference/__init__.py                     |   2 +-
 nltk/inference/nonmonotonic.py                 |   2 +-
 nltk/inference/prover9.py                      |   2 +-
 nltk/inference/resolution.py                   |   2 +-
 nltk/inference/tableau.py                      |   2 +-
 nltk/internals.py                              |  69 ++-
 nltk/jsontags.py                               |   2 +-
 nltk/metrics/__init__.py                       |   2 +-
 nltk/metrics/agreement.py                      |  12 +-
 nltk/metrics/association.py                    |  23 +-
 nltk/metrics/confusionmatrix.py                |   2 +-
 nltk/metrics/distance.py                       |  13 +-
 nltk/metrics/paice.py                          |  42 +-
 nltk/metrics/scores.py                         |  20 +-
 nltk/metrics/segmentation.py                   |   2 +-
 nltk/metrics/spearman.py                       |   5 +-
 nltk/misc/__init__.py                          |   2 +-
 nltk/misc/minimalset.py                        |   2 +-
 nltk/misc/sort.py                              |   2 +-
 nltk/misc/wordfinder.py                        |   2 +-
 nltk/parse/__init__.py                         |   2 +-
 nltk/parse/api.py                              |   2 +-
 nltk/parse/bllip.py                            |   2 +-
 nltk/parse/chart.py                            |   2 +-
 nltk/parse/dependencygraph.py                  |   2 +-
 nltk/parse/earleychart.py                      |   2 +-
 nltk/parse/evaluate.py                         |   2 +-
 nltk/parse/featurechart.py                     |   2 +-
 nltk/parse/generate.py                         |   2 +-
 nltk/parse/malt.py                             |  12 +-
 nltk/parse/nonprojectivedependencyparser.py    |   2 +-
 nltk/parse/pchart.py                           |   2 +-
 nltk/parse/projectivedependencyparser.py       |   2 +-
 nltk/parse/recursivedescent.py                 |   2 +-
 nltk/parse/shiftreduce.py                      |   2 +-
 nltk/parse/stanford.py                         |  14 +-
 nltk/parse/transitionparser.py                 |   2 +-
 nltk/parse/util.py                             |   2 +-
 nltk/parse/viterbi.py                          |   2 +-
 nltk/probability.py                            | 170 ++++++--
 nltk/sem/__init__.py                           |   2 +-
 nltk/sem/boxer.py                              |   2 +-
 nltk/sem/chat80.py                             |  21 +-
 nltk/sem/cooper_storage.py                     |   2 +-
 nltk/sem/drt.py                                |   2 +-
 nltk/sem/drt_glue_demo.py                      |   2 +-
 nltk/sem/evaluate.py                           |   2 +-
 nltk/sem/glue.py                               |   6 +-
 nltk/sem/hole.py                               |   2 +-
 nltk/sem/lfg.py                                |   2 +-
 nltk/sem/linearlogic.py                        |   2 +-
 nltk/sem/logic.py                              |  24 +-
 nltk/sem/relextract.py                         |   2 +-
 nltk/sem/skolemize.py                          |   2 +-
 nltk/sem/util.py                               |  18 +-
 nltk/sentiment/__init__.py                     |   2 +-
 nltk/sentiment/sentiment_analyzer.py           |   5 +-
 nltk/sentiment/util.py                         |  12 +-
 nltk/sentiment/vader.py                        |  15 +-
 nltk/stem/__init__.py                          |   2 +-
 nltk/stem/api.py                               |   2 +-
 nltk/stem/isri.py                              |   2 +-
 nltk/stem/lancaster.py                         |   2 +-
 nltk/stem/porter.py                            |   2 +-
 nltk/stem/regexp.py                            |   2 +-
 nltk/stem/rslp.py                              |   2 +-
 nltk/stem/snowball.py                          |   2 +-
 nltk/stem/util.py                              |   2 +-
 nltk/stem/wordnet.py                           |   2 +-
 nltk/tag/__init__.py                           |   2 +-
 nltk/tag/api.py                                |   2 +-
 nltk/tag/brill.py                              |   2 +-
 nltk/tag/brill_trainer.py                      |   2 +-
 nltk/tag/crf.py                                |   8 +-
 nltk/tag/hmm.py                                |   2 +-
 nltk/tag/hunpos.py                             |   2 +-
 nltk/tag/mapping.py                            |   2 +-
 nltk/tag/perceptron.py                         |  20 +-
 nltk/tag/senna.py                              |   2 +-
 nltk/tag/sequential.py                         |  25 +-
 nltk/tag/stanford.py                           |   9 +-
 nltk/tag/tnt.py                                |  32 +-
 nltk/tag/util.py                               |   2 +-
 nltk/tbl/__init__.py                           |   2 +-
 nltk/tbl/demo.py                               |   2 +-
 nltk/tbl/erroranalysis.py                      |   2 +-
 nltk/tbl/feature.py                            |   2 +-
 nltk/tbl/rule.py                               |   2 +-
 nltk/tbl/template.py                           |   2 +-
 nltk/test/__init__.py                          |   2 +-
 nltk/test/bnc.doctest                          |   2 +-
 nltk/test/ccg.doctest                          |  44 +-
 nltk/test/ccg_semantics.doctest                | 553 +++++++++++++++++++++++++
 nltk/test/chat80.doctest                       |   2 +-
 nltk/test/chunk.doctest                        |   2 +-
 nltk/test/classify.doctest                     |   2 +-
 nltk/test/collocations.doctest                 |   2 +-
 nltk/test/corpus.doctest                       |   4 +-
 nltk/test/crubadan.doctest                     |   2 +-
 nltk/test/data.doctest                         |   2 +-
 nltk/test/dependency.doctest                   |   2 +-
 nltk/test/discourse.doctest                    |   2 +-
 nltk/test/drt.doctest                          |   2 +-
 nltk/test/featgram.doctest                     |   2 +-
 nltk/test/featstruct.doctest                   |   2 +-
 nltk/test/framenet.doctest                     |   2 +-
 nltk/test/generate.doctest                     |   2 +-
 nltk/test/gensim.doctest                       |   2 +-
 nltk/test/gluesemantics.doctest                |  18 +-
 nltk/test/gluesemantics_malt.doctest           |   2 +-
 nltk/test/grammar.doctest                      |   2 +-
 nltk/test/grammartestsuites.doctest            |   2 +-
 nltk/test/index.doctest                        |   2 +-
 nltk/test/inference.doctest                    |   2 +-
 nltk/test/internals.doctest                    |   2 +-
 nltk/test/japanese.doctest                     |   2 +-
 nltk/test/logic.doctest                        |   2 +-
 nltk/test/metrics.doctest                      |   2 +-
 nltk/test/misc.doctest                         |   2 +-
 nltk/test/nonmonotonic.doctest                 |   2 +-
 nltk/test/parse.doctest                        |   2 +-
 nltk/test/portuguese_en.doctest                |   2 +-
 nltk/test/probability.doctest                  |  50 ++-
 nltk/test/propbank.doctest                     |   2 +-
 nltk/test/relextract.doctest                   |   2 +-
 nltk/test/resolution.doctest                   |   2 +-
 nltk/test/semantics.doctest                    |   2 +-
 nltk/test/sentiment.doctest                    |   2 +-
 nltk/test/sentiwordnet.doctest                 |   8 +-
 nltk/test/simple.doctest                       |   2 +-
 nltk/test/stem.doctest                         |   2 +-
 nltk/test/tag.doctest                          |  13 +-
 nltk/test/tokenize.doctest                     |  57 ++-
 nltk/test/toolbox.doctest                      |   2 +-
 nltk/test/translate.doctest                    |   2 +-
 nltk/test/tree.doctest                         |   2 +-
 nltk/test/treeprettyprinter.doctest            |   2 +-
 nltk/test/treetransforms.doctest               |   2 +-
 nltk/test/unit/test_2x_compat.py               |  37 ++
 nltk/test/unit/test_json2csv_corpus.py         |   2 +-
 nltk/test/unit/test_tag.py                     |   2 +-
 nltk/test/unit/test_tgrep.py                   |   2 +-
 nltk/test/unit/test_tokenize.py                |  23 +
 nltk/test/unit/test_twitter_auth.py            |  14 +-
 nltk/test/unit/translate/test_bleu.py          |  91 +++-
 nltk/test/unit/translate/test_stack_decoder.py |   2 +-
 nltk/test/util.doctest                         |   2 +-
 nltk/test/wordnet.doctest                      |   2 +-
 nltk/test/wordnet_lch.doctest                  |   2 +-
 nltk/test/wsd.doctest                          |   2 +-
 nltk/text.py                                   |   4 +-
 nltk/tgrep.py                                  |   2 +-
 nltk/tokenize/__init__.py                      |   4 +-
 nltk/tokenize/api.py                           |   9 +-
 nltk/tokenize/casual.py                        |  41 +-
 nltk/tokenize/mwe.py                           |  82 ++--
 nltk/tokenize/punkt.py                         |  24 +-
 nltk/tokenize/regexp.py                        |   7 +-
 nltk/tokenize/sexpr.py                         |   2 +-
 nltk/tokenize/simple.py                        |   2 +-
 nltk/tokenize/stanford.py                      |  10 +-
 nltk/tokenize/stanford_segmenter.py            | 157 +++++++
 nltk/tokenize/texttiling.py                    |   2 +-
 nltk/tokenize/treebank.py                      |  67 +--
 nltk/tokenize/util.py                          |  10 +-
 nltk/toolbox.py                                |   2 +-
 nltk/translate/__init__.py                     |   5 +-
 nltk/translate/api.py                          |   4 +-
 nltk/translate/bleu_score.py                   | 434 ++++++++++++++++---
 nltk/translate/gdfa.py                         |   2 +-
 nltk/translate/ibm2.py                         |   2 +-
 nltk/translate/ibm3.py                         |   2 +-
 nltk/translate/ibm4.py                         |   4 +-
 nltk/translate/ibm5.py                         |   4 +-
 nltk/translate/ibm_model.py                    |   6 +-
 nltk/translate/metrics.py                      |   5 +-
 nltk/translate/phrase_based.py                 |   2 +-
 nltk/translate/ribes_score.py                  | 325 +++++++++++++++
 nltk/translate/stack_decoder.py                |   2 +-
 nltk/tree.py                                   |   2 +-
 nltk/treeprettyprinter.py                      |   2 +-
 nltk/twitter/__init__.py                       |   2 +-
 nltk/twitter/api.py                            |   2 +-
 nltk/twitter/common.py                         |   2 +-
 nltk/twitter/twitter_demo.py                   |   2 +-
 nltk/twitter/twitterclient.py                  |   2 +-
 nltk/twitter/util.py                           |   2 +-
 nltk/util.py                                   | 215 +++++++++-
 nltk/wsd.py                                    |   2 +-
 setup.py                                       |   2 +-
 306 files changed, 3270 insertions(+), 949 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index c8d5879..7a5a3aa 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (C) 2001-2015 NLTK Project
+Copyright (C) 2001-2016 NLTK Project
 
 Licensed under the Apache License, Version 2.0 (the 'License');
 you may not use this file except in compliance with the License.
diff --git a/PKG-INFO b/PKG-INFO
index fd16391..a471bee 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nltk
-Version: 3.1
+Version: 3.2.1
 Summary: Natural Language Toolkit
 Home-page: http://nltk.org/
 Author: Steven Bird
diff --git a/nltk.egg-info/PKG-INFO b/nltk.egg-info/PKG-INFO
index fd16391..a471bee 100644
--- a/nltk.egg-info/PKG-INFO
+++ b/nltk.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nltk
-Version: 3.1
+Version: 3.2.1
 Summary: Natural Language Toolkit
 Home-page: http://nltk.org/
 Author: Steven Bird
diff --git a/nltk.egg-info/SOURCES.txt b/nltk.egg-info/SOURCES.txt
index f905614..023c765 100644
--- a/nltk.egg-info/SOURCES.txt
+++ b/nltk.egg-info/SOURCES.txt
@@ -47,6 +47,7 @@ nltk/ccg/api.py
 nltk/ccg/chart.py
 nltk/ccg/combinator.py
 nltk/ccg/lexicon.py
+nltk/ccg/logic.py
 nltk/chat/__init__.py
 nltk/chat/eliza.py
 nltk/chat/iesha.py
@@ -108,6 +109,7 @@ nltk/corpus/reader/nkjp.py
 nltk/corpus/reader/nombank.py
 nltk/corpus/reader/nps_chat.py
 nltk/corpus/reader/opinion_lexicon.py
+nltk/corpus/reader/panlex_lite.py
 nltk/corpus/reader/pl196x.py
 nltk/corpus/reader/plaintext.py
 nltk/corpus/reader/ppattach.py
@@ -235,6 +237,7 @@ nltk/test/all.py
 nltk/test/bleu.doctest
 nltk/test/bnc.doctest
 nltk/test/ccg.doctest
+nltk/test/ccg_semantics.doctest
 nltk/test/chat80.doctest
 nltk/test/childes.doctest
 nltk/test/childes_fixt.py
@@ -317,6 +320,7 @@ nltk/test/unit/test_seekable_unicode_stream_reader.py
 nltk/test/unit/test_stem.py
 nltk/test/unit/test_tag.py
 nltk/test/unit/test_tgrep.py
+nltk/test/unit/test_tokenize.py
 nltk/test/unit/test_twitter_auth.py
 nltk/test/unit/utils.py
 nltk/test/unit/translate/__init__.py
@@ -337,6 +341,7 @@ nltk/tokenize/regexp.py
 nltk/tokenize/sexpr.py
 nltk/tokenize/simple.py
 nltk/tokenize/stanford.py
+nltk/tokenize/stanford_segmenter.py
 nltk/tokenize/texttiling.py
 nltk/tokenize/treebank.py
 nltk/tokenize/util.py
@@ -353,6 +358,7 @@ nltk/translate/ibm5.py
 nltk/translate/ibm_model.py
 nltk/translate/metrics.py
 nltk/translate/phrase_based.py
+nltk/translate/ribes_score.py
 nltk/translate/stack_decoder.py
 nltk/twitter/__init__.py
 nltk/twitter/api.py
diff --git a/nltk/VERSION b/nltk/VERSION
index 8c50098..e4604e3 100644
--- a/nltk/VERSION
+++ b/nltk/VERSION
@@ -1 +1 @@
-3.1
+3.2.1
diff --git a/nltk/__init__.py b/nltk/__init__.py
index 5f3467d..cfa1f32 100644
--- a/nltk/__init__.py
+++ b/nltk/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit (NLTK)
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -41,7 +41,7 @@ if __doc__ is not None:  # fix for the ``python -OO``
 
 # Copyright notice
 __copyright__ = """\
-Copyright (C) 2001-2015 NLTK Project.
+Copyright (C) 2001-2016 NLTK Project.
 
 Distributed and Licensed under the Apache License, Version 2.0,
 which is included by reference.
diff --git a/nltk/app/__init__.py b/nltk/app/__init__.py
index 7e02d78..882ffc2 100644
--- a/nltk/app/__init__.py
+++ b/nltk/app/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Applications package
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/app/chartparser_app.py b/nltk/app/chartparser_app.py
index da38b15..7e2b970 100644
--- a/nltk/app/chartparser_app.py
+++ b/nltk/app/chartparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chart Parser Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Jean Mark Gawron <gawron at mail.sdsu.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
@@ -37,7 +37,7 @@ edge you wish to apply a rule to.
 # widget system.
 
 
-
+from __future__ import division
 import nltk.compat
 import pickle
 from tkinter.filedialog import asksaveasfilename, askopenfilename
@@ -1190,7 +1190,7 @@ class ChartView(object):
         self._chart_canvas.yview('moveto', 1.0)
         if self._chart_height != 0:
             self._chart_canvas.yview('moveto',
-                                     float(y-dy)/self._chart_height)
+                                     (y-dy)/self._chart_height)
 
     def _draw_edge(self, edge, lvl):
         """
diff --git a/nltk/app/chunkparser_app.py b/nltk/app/chunkparser_app.py
index 23c24a3..7559c40 100644
--- a/nltk/app/chunkparser_app.py
+++ b/nltk/app/chunkparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Regexp Chunk Parser Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -15,6 +15,7 @@ parser ``nltk.chunk.RegexpChunkParser``.
 # configuration parameters to select what's being chunked (eg VP vs NP)
 # and what part of the data is being used as the development set.
 
+from __future__ import division
 import nltk.compat
 import time
 import textwrap
@@ -378,7 +379,7 @@ class RegexpChunkApp(object):
         self._font = tkinter.font.Font(family='helvetica',
                                  size=-self._size.get())
         self._smallfont = tkinter.font.Font(family='helvetica',
-                                      size=-(int(self._size.get()*14/20)))
+                                      size=-(int(self._size.get()*14//20)))
 
     def _init_menubar(self, parent):
         menubar = Menu(parent)
@@ -454,10 +455,10 @@ class RegexpChunkApp(object):
         self.evalbox.delete('all')
 
         # Draw the precision & recall labels.
-        tag = self.evalbox.create_text(10, height/2-10, justify='left',
+        tag = self.evalbox.create_text(10, height//2-10, justify='left',
                                  anchor='w', text='Precision')
         left, right = self.evalbox.bbox(tag)[2] + 5, width-10
-        tag = self.evalbox.create_text(left + (width-left)/2, height-10,
+        tag = self.evalbox.create_text(left + (width-left)//2, height-10,
                                 anchor='s', text='Recall', justify='center')
         top, bot = 10, self.evalbox.bbox(tag)[1]-10
 
@@ -984,8 +985,8 @@ class RegexpChunkApp(object):
         self.devsetbox['state'] = 'disabled'
 
         # Update the scrollbar
-        first = float(self.devset_index)/self._devset_size.get()
-        last = float(self.devset_index+2)/self._devset_size.get()
+        first = self.devset_index/self._devset_size.get()
+        last = (self.devset_index + 2) / self._devset_size.get()
         self.devset_scroll.set(first, last)
 
     def _chunks(self, tree):
@@ -1241,7 +1242,7 @@ class RegexpChunkApp(object):
         if size is not None: self._size.set(size)
         size = self._size.get()
         self._font.configure(size=-(abs(size)))
-        self._smallfont.configure(size=min(-10, -(abs(size))*14/20))
+        self._smallfont.configure(size=min(-10, -(abs(size))*14//20))
 
     def mainloop(self, *args, **kwargs):
         """
diff --git a/nltk/app/collocations_app.py b/nltk/app/collocations_app.py
index b1a2f8a..7293b73 100644
--- a/nltk/app/collocations_app.py
+++ b/nltk/app/collocations_app.py
@@ -1,12 +1,14 @@
 # Natural Language Toolkit: Collocations Application
 # Much of the GUI code is imported from concordance.py; We intend to merge these tools together
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 #
 
 
+from __future__ import division
+
 import nltk.compat
 import threading
 import tkinter.font
@@ -327,7 +329,7 @@ class CollocationsModel:
                 text = [w for w in words if len(w) > 2]
                 fd = FreqDist(tuple(text[i:i+2]) for i in range(len(text)-1))
                 vocab = FreqDist(text)
-                scored = [((w1,w2), fd[(w1,w2)] ** 3 / float(vocab[w1] * vocab[w2])) for w1, w2 in fd]
+                scored = [((w1,w2), fd[(w1,w2)] ** 3 / (vocab[w1] * vocab[w2])) for w1, w2 in fd]
                 scored.sort(key=itemgetter(1), reverse=True)
                 self.model.collocations = list(map(itemgetter(0), scored))
                 self.model.queue.put(CORPUS_LOADED_EVENT)
diff --git a/nltk/app/concordance_app.py b/nltk/app/concordance_app.py
index 0ccdcca..0244612 100755
--- a/nltk/app/concordance_app.py
+++ b/nltk/app/concordance_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Concordance Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/rdparser_app.py b/nltk/app/rdparser_app.py
index ca4d98d..962dc40 100644
--- a/nltk/app/rdparser_app.py
+++ b/nltk/app/rdparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Recursive Descent Parser Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -63,7 +63,7 @@ Keyboard Shortcuts::
       [Ctrl-p]\t Print
       [q]\t Quit
 """
-
+from __future__ import division
 import nltk.compat
 import tkinter.font
 from tkinter import (Listbox, IntVar, Button,
@@ -806,7 +806,7 @@ class RecursiveDescentApp(object):
     def _animate_match_backtrack(self, treeloc):
         widget = self._get(self._tree, treeloc)
         node = widget.parent().label()
-        dy = (1.0 * (node.bbox()[3] - widget.bbox()[1] + 14) /
+        dy = ((node.bbox()[3] - widget.bbox()[1] + 14) /
               max(1, self._animation_frames.get()))
         self._animate_match_backtrack_frame(self._animation_frames.get(),
                                             widget, dy)
diff --git a/nltk/app/srparser_app.py b/nltk/app/srparser_app.py
index 1de4dc3..6ae5455 100644
--- a/nltk/app/srparser_app.py
+++ b/nltk/app/srparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Shift-Reduce Parser Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/wordfreq_app.py b/nltk/app/wordfreq_app.py
index 2d9bb9b..c5cb8cf 100644
--- a/nltk/app/wordfreq_app.py
+++ b/nltk/app/wordfreq_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Wordfreq Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py
index 82506cd..a59b9eb 100644
--- a/nltk/app/wordnet_app.py
+++ b/nltk/app/wordnet_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: WordNet Browser Application
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
 #         Paul Bone <pbone at students.csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
@@ -799,7 +799,7 @@ def get_static_web_help_page():
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
      <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2015 NLTK Project
+            Copyright (C) 2001-2016 NLTK Project
             Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
             URL: <http://nltk.org/>
             For license information, see LICENSE.TXT -->
@@ -870,7 +870,7 @@ def get_static_index_page(with_shutdown):
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"  "http://www.w3.org/TR/html4/frameset.dtd">
 <HTML>
      <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2015 NLTK Project
+            Copyright (C) 2001-2016 NLTK Project
             Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
             URL: <http://nltk.org/>
             For license information, see LICENSE.TXT -->
@@ -904,7 +904,7 @@ def get_static_upper_page(with_shutdown):
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
     <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-        Copyright (C) 2001-2015 NLTK Project
+        Copyright (C) 2001-2016 NLTK Project
         Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
         URL: <http://nltk.org/>
         For license information, see LICENSE.TXT -->
diff --git a/nltk/book.py b/nltk/book.py
index 7357446..d1f315c 100644
--- a/nltk/book.py
+++ b/nltk/book.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Some texts for exploration in chapter 1 of the book
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/ccg/__init__.py b/nltk/ccg/__init__.py
index 34b5acb..3b9ac14 100644
--- a/nltk/ccg/__init__.py
+++ b/nltk/ccg/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/api.py b/nltk/ccg/api.py
index bf73fc8..fb7ddb6 100644
--- a/nltk/ccg/api.py
+++ b/nltk/ccg/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CCG Categories
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -91,6 +91,10 @@ class CCGVar(AbstractCCGCategory):
         cls._maxID = cls._maxID + 1
         return cls._maxID - 1
 
+    @classmethod
+    def reset_id(cls):
+        cls._maxID = 0
+
     def is_primitive(self):
         return False
 
diff --git a/nltk/ccg/chart.py b/nltk/ccg/chart.py
index 9b89e8d..f3214ef 100644
--- a/nltk/ccg/chart.py
+++ b/nltk/ccg/chart.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -37,12 +37,15 @@ from nltk.parse import ParserI
 from nltk.parse.chart import AbstractChartRule, EdgeI, Chart
 from nltk.tree import Tree
 
-from nltk.ccg.lexicon import fromstring
+from nltk.ccg.lexicon import fromstring, Token
 from nltk.ccg.combinator import (ForwardT, BackwardT, ForwardApplication,
                                  BackwardApplication, ForwardComposition,
                                  BackwardComposition, ForwardSubstitution,
                                  BackwardBx, BackwardSx)
 from nltk.compat import python_2_unicode_compatible, string_types
+from nltk.ccg.combinator import *
+from nltk.ccg.logic import *
+from nltk.sem.logic import *
 
 # Based on the EdgeI class from NLTK.
 # A number of the properties of the EdgeI interface don't
@@ -73,14 +76,14 @@ class CCGLeafEdge(EdgeI):
     '''
     Class representing leaf edges in a CCG derivation.
     '''
-    def __init__(self, pos, categ, leaf):
+    def __init__(self, pos, token, leaf):
         self._pos = pos
-        self._categ = categ
+        self._token = token
         self._leaf = leaf
-        self._comparison_key = (pos, categ, leaf)
+        self._comparison_key = (pos, token.categ(), leaf)
 
     # Accessors
-    def lhs(self): return self._categ
+    def lhs(self): return self._token.categ()
     def span(self): return (self._pos, self._pos+1)
     def start(self): return self._pos
     def end(self): return self._pos + 1
@@ -91,7 +94,8 @@ class CCGLeafEdge(EdgeI):
     def is_incomplete(self): return False
     def nextsym(self): return None
 
-    def categ(self): return self._categ
+    def token(self): return self._token
+    def categ(self): return self._token.categ()
     def leaf(self): return self._leaf
 
 @python_2_unicode_compatible
@@ -202,8 +206,8 @@ class CCGChartParser(ParserI):
 
         # Initialize leaf edges.
         for index in range(chart.num_leaves()):
-            for cat in lex.categories(chart.leaf(index)):
-                new_edge = CCGLeafEdge(index, cat, chart.leaf(index))
+            for token in lex.categories(chart.leaf(index)):
+                new_edge = CCGLeafEdge(index, token, chart.leaf(index))
                 chart.insert(new_edge, ())
 
 
@@ -242,24 +246,48 @@ class CCGChart(Chart):
             return memo[edge]
 
         if isinstance(edge,CCGLeafEdge):
-            word = tree_class(edge.lhs(), [self._tokens[edge.start()]])
-            leaf = tree_class((edge.lhs(), "Leaf"), [word])
+            word = tree_class(edge.token(), [self._tokens[edge.start()]])
+            leaf = tree_class((edge.token(), "Leaf"), [word])
             memo[edge] = [leaf]
             return [leaf]
 
         memo[edge] = []
         trees = []
-        lhs = (edge.lhs(), "%s" % edge.rule())
 
         for cpl in self.child_pointer_lists(edge):
             child_choices = [self._trees(cp, complete, memo, tree_class)
                              for cp in cpl]
             for children in itertools.product(*child_choices):
+                lhs = (Token(self._tokens[edge.start():edge.end()], edge.lhs(), compute_semantics(children, edge)), str(edge.rule()))
                 trees.append(tree_class(lhs, children))
 
         memo[edge] = trees
         return trees
 
+           
+def compute_semantics(children, edge):
+    if children[0].label()[0].semantics() is None:
+        return None
+        
+    if len(children) is 2:
+        if isinstance(edge.rule(), BackwardCombinator):
+            children = [children[1],children[0]]
+
+        combinator = edge.rule()._combinator
+        function = children[0].label()[0].semantics()
+        argument = children[1].label()[0].semantics()
+
+        if isinstance(combinator, UndirectedFunctionApplication):
+            return compute_function_semantics(function, argument)
+        elif isinstance(combinator, UndirectedComposition):
+            return compute_composition_semantics(function, argument)
+        elif isinstance(combinator, UndirectedSubstitution):
+            return compute_substitution_semantics(function, argument)
+        else:
+            raise AssertionError('Unsupported combinator \'' + combinator + '\'')
+    else:
+        return compute_type_raised_semantics(children[0].label()[0].semantics())
+
 #--------
 # Displaying derivations
 #--------
@@ -273,8 +301,6 @@ def printCCGDerivation(tree):
     # category aligned.
     for (leaf, cat) in leafcats:
         str_cat = "%s" % cat
-#        print(cat.__class__)
-#        print("str_cat", str_cat)
         nextlen = 2 + max(len(leaf), len(str_cat))
         lcatlen = (nextlen - len(str_cat)) // 2
         rcatlen = lcatlen + (nextlen - len(str_cat)) % 2
@@ -282,8 +308,8 @@ def printCCGDerivation(tree):
         lleaflen = (nextlen - len(leaf)) // 2
         rleaflen = lleaflen + (nextlen - len(leaf)) % 2
         leafstr += ' '*lleaflen + leaf + ' '*rleaflen
-    print(leafstr)
-    print(catstr)
+    print(leafstr.rstrip())
+    print(catstr.rstrip())
 
     # Display the derivation steps
     printCCGTree(0,tree)
@@ -294,7 +320,7 @@ def printCCGTree(lwidth,tree):
 
     # Is a leaf (word).
     # Increment the span by the space occupied by the leaf.
-    if not isinstance(tree,Tree):
+    if not isinstance(tree, Tree):
         return 2 + lwidth + len(tree)
 
     # Find the width of the current derivation step
@@ -307,17 +333,22 @@ def printCCGTree(lwidth,tree):
         return max(rwidth,2 + lwidth + len("%s" % tree.label()),
                   2 + lwidth + len(tree[0]))
 
-    (res,op) = tree.label()
+    (token, op) = tree.label()
+
+    if op == u'Leaf':
+        return rwidth
+
     # Pad to the left with spaces, followed by a sequence of '-'
     # and the derivation rule.
     print(lwidth*' ' + (rwidth-lwidth)*'-' + "%s" % op)
     # Print the resulting category on a new line.
-    str_res = "%s" % res
+    str_res = "%s" % (token.categ())
+    if token.semantics() is not None:
+        str_res += " {" + str(token.semantics()) + "}"
     respadlen = (rwidth - lwidth - len(str_res)) // 2 + lwidth
     print(respadlen*' ' + str_res)
     return rwidth
 
-
 ### Demonstration code
 
 # Construct the lexicon
diff --git a/nltk/ccg/combinator.py b/nltk/ccg/combinator.py
index d79e8b9..d617b42 100644
--- a/nltk/ccg/combinator.py
+++ b/nltk/ccg/combinator.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/lexicon.py b/nltk/ccg/lexicon.py
index 6c3e12b..9ff0a9d 100644
--- a/nltk/ccg/lexicon.py
+++ b/nltk/ccg/lexicon.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -17,6 +17,8 @@ from nltk.ccg.api import PrimitiveCategory, Direction, CCGVar, FunctionalCategor
 from nltk.compat import python_2_unicode_compatible
 from nltk.internals import deprecated
 
+from nltk.sem.logic import *
+
 #------------
 # Regular expressions used for parsing components of the lexicon
 #------------
@@ -31,15 +33,50 @@ NEXTPRIM_RE = re.compile(r'''([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)''')
 # Separates the next application operator from the remainder
 APP_RE = re.compile(r'''([\\/])([.,]?)([.,]?)(.*)''')
 
-# Parses the definition of the category of either a word or a family
-LEX_RE = re.compile(r'''([\w_]+)\s*(::|[-=]+>)\s*(.+)''', re.UNICODE)
+# Parses the definition of the right-hand side (rhs) of either a word or a family
+LEX_RE = re.compile(r'''([\S_]+)\s*(::|[-=]+>)\s*(.+)''', re.UNICODE)
+
+# Parses the right hand side that contains category and maybe semantic predicate
+RHS_RE = re.compile(r'''([^{}]*[^ {}])\s*(\{[^}]+\})?''', re.UNICODE)
+
+# Parses the semantic predicate
+SEMANTICS_RE = re.compile(r'''\{([^}]+)\}''', re.UNICODE)
 
 # Strips comments from a line
 COMMENTS_RE = re.compile('''([^#]*)(?:#.*)?''')
 
-#----------
-# Lexicons
-#----------
+class Token(object):
+    """
+    Class representing a token.
+
+    token => category {semantics}
+    e.g. eat => S\\var[pl]/var {\\x y.eat(x,y)}
+
+    * `token` (string)
+    * `categ` (string)
+    * `semantics` (Expression)
+    """
+    def __init__(self, token, categ, semantics=None):
+        self._token = token
+        self._categ = categ
+        self._semantics = semantics
+        
+    def categ(self):
+        return self._categ
+    
+    def semantics(self):
+        return self._semantics
+        
+    def __str__(self):
+        semantics_str = ""
+        if self._semantics is not None:
+            semantics_str = " {" + str(self._semantics) + "}"
+        return "" + str(self._categ) + semantics_str
+    
+    def __cmp__(self, other):
+        if not isinstance(other, Token): return -1
+        return cmp((self._categ,self._semantics),
+                    other.categ(),other.semantics())
 
 @python_2_unicode_compatible
 class CCGLexicon(object):
@@ -76,7 +113,7 @@ class CCGLexicon(object):
         """
         string = ""
         first = True
-        for ident in self._entries:
+        for ident in sorted(self._entries):
             if not first:
                 string = string + "\n"
             string = string + ident + " => "
@@ -169,13 +206,6 @@ def parsePrimitiveCategory(chunks, primitives, families, var):
     raise AssertionError('String \'' + catstr + '\' is neither a family nor primitive category.')
 
 
-def parseCategory(line, primitives, families):
-    """
-    Drop the 'var' from the tuple
-    """
-    return augParseCategory(line, primitives, families)[0]
-
-
 def augParseCategory(line, primitives, families, var=None):
     """
     Parse a string representing a category, and returns a tuple with
@@ -208,11 +238,11 @@ def augParseCategory(line, primitives, families, var=None):
 
     return (res, var)
 
-
-def fromstring(lex_str):
+def fromstring(lex_str, include_semantics=False):
     """
     Convert string representation into a lexicon for CCGs.
     """
+    CCGVar.reset_id()
     primitives = []
     families = {}
     entries = defaultdict(list)
@@ -229,16 +259,24 @@ def fromstring(lex_str):
             primitives = primitives + [prim.strip() for prim in line[2:].strip().split(',')]
         else:
             # Either a family definition, or a word definition
-            (ident, sep, catstr) = LEX_RE.match(line).groups()
+            (ident, sep, rhs) = LEX_RE.match(line).groups()
+            (catstr, semantics_str) = RHS_RE.match(rhs).groups()
             (cat, var) = augParseCategory(catstr, primitives, families)
+
             if sep == '::':
                 # Family definition
                 # ie, Det :: NP/N
                 families[ident] = (cat, var)
             else:
+                semantics = None
+                if include_semantics is True:
+                    if semantics_str is None:
+                        raise AssertionError(line + " must contain semantics because include_semantics is set to True")
+                    else:
+                        semantics = Expression.fromstring(SEMANTICS_RE.match(semantics_str).groups()[0])
                 # Word definition
                 # ie, which => (N\N)/(S/NP)
-                entries[ident].append(cat)
+                entries[ident].append(Token(ident, cat, semantics))
     return CCGLexicon(primitives[0], primitives, families, entries)
 
 
diff --git a/nltk/ccg/logic.py b/nltk/ccg/logic.py
new file mode 100644
index 0000000..85652ff
--- /dev/null
+++ b/nltk/ccg/logic.py
@@ -0,0 +1,46 @@
+# Natural Language Toolkit: Combinatory Categorial Grammar
+#
+# Copyright (C) 2001-2016 NLTK Project
+# Author: Tanin Na Nakorn (@tanin)
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""
+Helper functions for CCG semantics computation
+"""
+
+from nltk.sem.logic import *
+
+def compute_type_raised_semantics(semantics):
+    core = semantics
+    parent = None
+    while isinstance(core, LambdaExpression):
+        parent = core
+        core = core.term
+        
+    var = Variable("F")
+    while var in core.free():
+        var = unique_variable(pattern=var)
+    core = ApplicationExpression(FunctionVariableExpression(var), core)
+    
+    if parent is not None:
+        parent.term = core
+    else:
+        semantics = core
+    
+    return LambdaExpression(var, semantics)
+
+def compute_function_semantics(function, argument):
+    return ApplicationExpression(function, argument).simplify()
+
+def compute_composition_semantics(function, argument):
+    assert isinstance(argument, LambdaExpression), "`" + str(argument) + "` must be a lambda expression"
+    return LambdaExpression(argument.variable, ApplicationExpression(function, argument.term).simplify())
+
+def compute_substitution_semantics(function, argument):
+    assert isinstance(function, LambdaExpression) and isinstance(function.term, LambdaExpression), "`" + str(function) + "` must be a lambda expression with 2 arguments"
+    assert isinstance(argument, LambdaExpression), "`" + str(argument) + "` must be a lambda expression"
+
+    new_argument = ApplicationExpression(argument, VariableExpression(function.variable)).simplify()
+    new_term = ApplicationExpression(function.term, new_argument).simplify() 
+
+    return LambdaExpression(function.variable, new_term)
diff --git a/nltk/chat/__init__.py b/nltk/chat/__init__.py
index 881a73e..e35ac1d 100644
--- a/nltk/chat/__init__.py
+++ b/nltk/chat/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chatbots
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/eliza.py b/nltk/chat/eliza.py
index 000009e..2f37e84 100644
--- a/nltk/chat/eliza.py
+++ b/nltk/chat/eliza.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Eliza
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/chat/iesha.py b/nltk/chat/iesha.py
index 8b856d3..f4321e3 100644
--- a/nltk/chat/iesha.py
+++ b/nltk/chat/iesha.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Teen Chatbot
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Selina Dennis <sjmd at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/rude.py b/nltk/chat/rude.py
index 292d54d..cac31e9 100644
--- a/nltk/chat/rude.py
+++ b/nltk/chat/rude.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Rude Chatbot
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Peter Spiller <pspiller at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/suntsu.py b/nltk/chat/suntsu.py
index f8ddd6f..7189b63 100644
--- a/nltk/chat/suntsu.py
+++ b/nltk/chat/suntsu.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sun Tsu-Bot
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Sam Huston 2007
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/util.py b/nltk/chat/util.py
index 9a2e6c4..f19374c 100644
--- a/nltk/chat/util.py
+++ b/nltk/chat/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chatbot Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/zen.py b/nltk/chat/zen.py
index a6ed163..ae119c4 100644
--- a/nltk/chat/zen.py
+++ b/nltk/chat/zen.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Zen Chatbot
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Amy Holland <amyrh at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chunk/__init__.py b/nltk/chunk/__init__.py
index 7d33a48..d54759b 100644
--- a/nltk/chunk/__init__.py
+++ b/nltk/chunk/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunkers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/api.py b/nltk/chunk/api.py
index 32d3c45..677ec8b 100644
--- a/nltk/chunk/api.py
+++ b/nltk/chunk/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunk parsing API
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/named_entity.py b/nltk/chunk/named_entity.py
index 8cf1077..91d3f4f 100644
--- a/nltk/chunk/named_entity.py
+++ b/nltk/chunk/named_entity.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunk parsing API
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py
index 6e4d347..4ccdb9e 100644
--- a/nltk/chunk/regexp.py
+++ b/nltk/chunk/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Regular Expression Chunkers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/util.py b/nltk/chunk/util.py
index e6b99ac..0ef7a6d 100644
--- a/nltk/chunk/util.py
+++ b/nltk/chunk/util.py
@@ -1,11 +1,11 @@
 # Natural Language Toolkit: Chunk format conversions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 import re
 
@@ -190,7 +190,7 @@ class ChunkScore(object):
         self._updateMeasures()
         div = self._tp_num + self._fp_num
         if div == 0: return 0
-        else: return float(self._tp_num) / div
+        else: return self._tp_num / div
 
     def recall(self):
         """
@@ -202,7 +202,7 @@ class ChunkScore(object):
         self._updateMeasures()
         div = self._tp_num + self._fn_num
         if div == 0: return 0
-        else: return float(self._tp_num) / div
+        else: return self._tp_num / div
 
     def f_measure(self, alpha=0.5):
         """
diff --git a/nltk/classify/__init__.py b/nltk/classify/__init__.py
index 1f57ee8..5e2bfda 100644
--- a/nltk/classify/__init__.py
+++ b/nltk/classify/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifiers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/api.py b/nltk/classify/api.py
index 641f96f..2e70a23 100644
--- a/nltk/classify/api.py
+++ b/nltk/classify/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifier Interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/classify/decisiontree.py b/nltk/classify/decisiontree.py
index bc4cb02..ab43796 100644
--- a/nltk/classify/decisiontree.py
+++ b/nltk/classify/decisiontree.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Decision Tree Classifiers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -10,7 +10,7 @@ A classifier model that decides which label to assign to a token on
 the basis of a tree structure, where branches correspond to conditions
 on feature values, and leaves correspond to label assignments.
 """
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 from collections import defaultdict
 
@@ -69,7 +69,7 @@ class DecisionTreeClassifier(ClassifierI):
         for featureset, label in labeled_featuresets:
             if self.classify(featureset) != label:
                 errors += 1
-        return float(errors)/len(labeled_featuresets)
+        return errors/len(labeled_featuresets)
 
     def pretty_format(self, width=70, prefix='', depth=4):
         """
diff --git a/nltk/classify/maxent.py b/nltk/classify/maxent.py
index 5465e12..c42bce1 100644
--- a/nltk/classify/maxent.py
+++ b/nltk/classify/maxent.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Maximum Entropy Classifiers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Dmitry Chichkov <dchichkov at gmail.com> (TypedMaxentFeatureEncoding)
 # URL: <http://nltk.org/>
@@ -1022,11 +1022,6 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
     log_empirical_fcount = numpy.log2(empirical_fcount)
     del empirical_fcount
 
-    # Old log-likelihood and accuracy; used to check if the change
-    # in log-likelihood or accuracy is sufficient to indicate convergence.
-    ll_old = None
-    acc_old = None
-
     if trace > 0:
         print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
     if trace > 2:
@@ -1150,11 +1145,6 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
         print('      Iteration    Log Likelihood    Accuracy')
         print('      ---------------------------------------')
 
-    # Old log-likelihood and accuracy; used to check if the change
-    # in log-likelihood or accuracy is sufficient to indicate convergence.
-    ll_old = None
-    acc_old = None
-
     # Train the classifier.
     try:
         while True:
diff --git a/nltk/classify/megam.py b/nltk/classify/megam.py
index 6b44df8..b144f31 100644
--- a/nltk/classify/megam.py
+++ b/nltk/classify/megam.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to Megam Classifier
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/naivebayes.py b/nltk/classify/naivebayes.py
index 6f473e2..6719df1 100644
--- a/nltk/classify/naivebayes.py
+++ b/nltk/classify/naivebayes.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Naive Bayes Classifiers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/rte_classify.py b/nltk/classify/rte_classify.py
index 5bcca62..a4a98fe 100644
--- a/nltk/classify/rte_classify.py
+++ b/nltk/classify/rte_classify.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: RTE Classifier
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py
index b8b0aad..87f55fd 100644
--- a/nltk/classify/senna.py
+++ b/nltk/classify/senna.py
@@ -1,7 +1,7 @@
 # encoding: utf-8
 # Natural Language Toolkit: Senna Interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/svm.py b/nltk/classify/svm.py
index 265bdec..c1d1616 100644
--- a/nltk/classify/svm.py
+++ b/nltk/classify/svm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: SVM-based classifier
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Leon Derczynski <leon at dcs.shef.ac.uk>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/classify/tadm.py b/nltk/classify/tadm.py
index c019f00..4ca101c 100644
--- a/nltk/classify/tadm.py
+++ b/nltk/classify/tadm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to TADM Classifier
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Joseph Frazee <jfrazee at mail.utexas.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/textcat.py b/nltk/classify/textcat.py
index cb29805..b672e16 100644
--- a/nltk/classify/textcat.py
+++ b/nltk/classify/textcat.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Language ID module using TextCat algorithm
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Avital Pekker <avital.pekker at utoronto.ca>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/classify/util.py b/nltk/classify/util.py
index d40c173..e14707b 100644
--- a/nltk/classify/util.py
+++ b/nltk/classify/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifier Utility Functions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -9,7 +9,7 @@
 """
 Utility functions and classes for classifiers.
 """
-from __future__ import print_function
+from __future__ import print_function, division
 
 import math
 
@@ -81,13 +81,13 @@ def attested_labels(tokens):
 def log_likelihood(classifier, gold):
     results = classifier.prob_classify_many([fs for (fs, l) in gold])
     ll = [pdist.prob(l) for ((fs, l), pdist) in zip(gold, results)]
-    return math.log(float(sum(ll))/len(ll))
+    return math.log(sum(ll) / len(ll))
 
 def accuracy(classifier, gold):
     results = classifier.classify_many([fs for (fs, l) in gold])
     correct = [l == r for ((fs, l), r) in zip(gold, results)]
     if correct:
-        return float(sum(correct))/len(correct)
+        return sum(correct) / len(correct)
     else:
         return 0
 
@@ -195,7 +195,7 @@ def names_demo(trainer, features=names_demo_features):
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
+        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
         print()
         print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
         for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
@@ -249,7 +249,7 @@ def partial_names_demo(trainer, features=names_demo_features):
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
+        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
         print()
         print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
         for ((name, is_male), pdist) in zip(test, pdists)[:5]:
@@ -303,7 +303,7 @@ def wsd_demo(trainer, word, features, n=1000):
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
+        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
     except NotImplementedError:
         pass
 
diff --git a/nltk/classify/weka.py b/nltk/classify/weka.py
index 643dac3..3fb9353 100644
--- a/nltk/classify/weka.py
+++ b/nltk/classify/weka.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to Weka Classsifiers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -63,7 +63,7 @@ def config_weka(classpath=None):
 def _check_weka_version(jar):
     try:
         zf = zipfile.ZipFile(jar)
-    except SystemExit as KeyboardInterrupt:
+    except (SystemExit, KeyboardInterrupt):
         raise
     except:
         return None
diff --git a/nltk/cluster/__init__.py b/nltk/cluster/__init__.py
index 67e3e96..21ec307 100644
--- a/nltk/cluster/__init__.py
+++ b/nltk/cluster/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/api.py b/nltk/cluster/api.py
index 9d2999a..79c91ae 100644
--- a/nltk/cluster/api.py
+++ b/nltk/cluster/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterer Interfaces
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # Porting: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/cluster/em.py b/nltk/cluster/em.py
index 9a10ef1..0ac2a2c 100644
--- a/nltk/cluster/em.py
+++ b/nltk/cluster/em.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Expectation Maximization Clusterer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/gaac.py b/nltk/cluster/gaac.py
index 6b06a4e..729cc52 100644
--- a/nltk/cluster/gaac.py
+++ b/nltk/cluster/gaac.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Group Average Agglomerative Clusterer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/kmeans.py b/nltk/cluster/kmeans.py
index 88672ce..2b28b57 100644
--- a/nltk/cluster/kmeans.py
+++ b/nltk/cluster/kmeans.py
@@ -1,10 +1,10 @@
 # Natural Language Toolkit: K-Means Clusterer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 import copy
 import random
@@ -165,7 +165,7 @@ class KMeansClusterer(VectorSpaceClusterer):
             centroid = copy.copy(mean)
             for vector in cluster:
                 centroid += vector
-            return centroid / (1+float(len(cluster)))
+            return centroid / (1+len(cluster))
         else:
             if not len(cluster):
                 sys.stderr.write('Error: no centroid defined for empty cluster.\n')
@@ -174,7 +174,7 @@ class KMeansClusterer(VectorSpaceClusterer):
             centroid = copy.copy(cluster[0])
             for vector in cluster[1:]:
                 centroid += vector
-            return centroid / float(len(cluster))
+            return centroid / len(cluster)
 
     def __repr__(self):
         return '<KMeansClusterer means=%s repeats=%d>' % \
diff --git a/nltk/cluster/util.py b/nltk/cluster/util.py
index 422aab4..ad46242 100644
--- a/nltk/cluster/util.py
+++ b/nltk/cluster/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterer Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/collocations.py b/nltk/collocations.py
index ee48b4c..18f7569 100644
--- a/nltk/collocations.py
+++ b/nltk/collocations.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Collocations and Association Measures
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/compat.py b/nltk/compat.py
index 04ac2be..b4c269a 100755
--- a/nltk/compat.py
+++ b/nltk/compat.py
@@ -1,15 +1,17 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Compatibility
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 #
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
 from __future__ import absolute_import, print_function
+import os
 import sys
 import types
 from functools import wraps
+import fractions
 
 # Python 2/3 compatibility layer. Based on six.
 
@@ -530,19 +532,14 @@ except ImportError:  # python 2.6
 # The following datasets have a /PY3 subdirectory containing
 # a full copy of the data which has been re-encoded or repickled.
 
-_PY3_DATA_UPDATES = []
+import os.path
 
-if sys.platform.startswith('win'):
-    _PY3_DATA_UPDATES = ["chunkers\maxent_ne_chunker",
-                         "help\tagsets",
-                         "taggers\maxent_treebank_pos_tagger",
-                         "tokenizers\punkt"]
-else:
-    _PY3_DATA_UPDATES = ["chunkers/maxent_ne_chunker",
-                         "help/tagsets",
-                         "taggers/maxent_treebank_pos_tagger",
-                         "tokenizers/punkt"]
+DATA_UPDATES = [("chunkers", "maxent_ne_chunker"),
+                ("help", "tagsets"),
+                ("taggers", "maxent_treebank_pos_tagger"),
+                ("tokenizers", "punkt")]
 
+_PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
 
 def add_py3_data(path):
     if PY3:
@@ -682,3 +679,28 @@ def _7bit(method):
 def _was_fixed(method):
     return (getattr(method, "_nltk_compat_7bit", False) or
             getattr(method, "_nltk_compat_transliterated", False))
+
+
+class Fraction(fractions.Fraction):
+    """
+    This is a simplified backwards compatible version of fractions.Fraction from
+    Python >=3.5. It adds the `_normalize` parameter such that it does
+    not normalize the denominator to the Greatest Common Divisor (gcd) when
+    the numerator is 0.
+    
+    This is most probably only used by the nltk.translate.bleu_score.py where
+    numerator and denominator of the different ngram precisions are mutable.
+    But the idea of "mutable" fraction might not be applicable to other usages, 
+    See http://stackoverflow.com/questions/34561265
+    
+    This objects should be deprecated once NLTK stops supporting Python < 3.5
+    See https://github.com/nltk/nltk/issues/1330
+    """
+    def __new__(cls, numerator=0, denominator=None, _normalize=True):
+        cls = super(Fraction, cls).__new__(cls, numerator, denominator)
+        # To emulate fraction.Fraction.from_float across Python >=2.7,
+        # check that numerator is an integer and denominator is not None.
+        if not _normalize and type(numerator) == int and denominator:
+            cls._numerator = numerator
+            cls._denominator = denominator
+        return cls
diff --git a/nltk/corpus/__init__.py b/nltk/corpus/__init__.py
index c0d40db..3da480e 100644
--- a/nltk/corpus/__init__.py
+++ b/nltk/corpus/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Readers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -161,6 +161,8 @@ nps_chat = LazyCorpusLoader(
 opinion_lexicon = LazyCorpusLoader(
     'opinion_lexicon', OpinionLexiconCorpusReader, r'(\w+)\-words\.txt',
     encoding='ISO-8859-2')
+panlex_lite = LazyCorpusLoader(
+    'panlex_lite', PanLexLiteCorpusReader)
 pl196x = LazyCorpusLoader(
     'pl196x', Pl196xCorpusReader, r'[a-z]-.*\.xml',
     cat_file='cats.txt', textid_file='textids.txt', encoding='utf8')
diff --git a/nltk/corpus/europarl_raw.py b/nltk/corpus/europarl_raw.py
index 9ff0b61..7a1cb8e 100644
--- a/nltk/corpus/europarl_raw.py
+++ b/nltk/corpus/europarl_raw.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Europarl Corpus Readers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author:  Nitin Madnani <nmadnani at umiacs.umd.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/__init__.py b/nltk/corpus/reader/__init__.py
index ca33bb3..ebaac13 100644
--- a/nltk/corpus/reader/__init__.py
+++ b/nltk/corpus/reader/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Readers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -102,6 +102,7 @@ from nltk.corpus.reader.opinion_lexicon import *
 from nltk.corpus.reader.pros_cons import *
 from nltk.corpus.reader.categorized_sents import *
 from nltk.corpus.reader.comparative_sents import *
+from nltk.corpus.reader.panlex_lite import *
 
 # Make sure that nltk.corpus.reader.bracket_parse gives the module, not
 # the function bracket_parse() defined in nltk.tree:
@@ -140,5 +141,5 @@ __all__ = [
     'TwitterCorpusReader', 'NKJPCorpusReader', 'CrubadanCorpusReader',
     'MTECorpusReader', 'ReviewsCorpusReader', 'OpinionLexiconCorpusReader',
     'ProsConsCorpusReader', 'CategorizedSentencesCorpusReader',
-    'ComparativeSentencesCorpusReader'
+    'ComparativeSentencesCorpusReader', 'PanLexLiteCorpusReader'
 ]
diff --git a/nltk/corpus/reader/aligned.py b/nltk/corpus/reader/aligned.py
index 133f443..00804c1 100644
--- a/nltk/corpus/reader/aligned.py
+++ b/nltk/corpus/reader/aligned.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Aligned Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/api.py b/nltk/corpus/reader/api.py
index 837e090..ea0a4ab 100644
--- a/nltk/corpus/reader/api.py
+++ b/nltk/corpus/reader/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: API for Corpus Readers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -30,7 +30,7 @@ class CorpusReader(object):
     identified by its ``file identifier``, which is the relative path
     to the file from the root directory.
 
-    A separate subclass is be defined for each corpus format.  These
+    A separate subclass is defined for each corpus format.  These
     subclasses define one or more methods that provide 'views' on the
     corpus contents, such as ``words()`` (for a list of words) and
     ``parsed_sents()`` (for a list of parsed sentences).  Called with
diff --git a/nltk/corpus/reader/bnc.py b/nltk/corpus/reader/bnc.py
index a36934e..847f145 100644
--- a/nltk/corpus/reader/bnc.py
+++ b/nltk/corpus/reader/bnc.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Plaintext Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/bracket_parse.py b/nltk/corpus/reader/bracket_parse.py
index a2a1f88..dcbc97f 100644
--- a/nltk/corpus/reader/bracket_parse.py
+++ b/nltk/corpus/reader/bracket_parse.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Penn Treebank Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/categorized_sents.py b/nltk/corpus/reader/categorized_sents.py
index e74b5fe..501bbe2 100644
--- a/nltk/corpus/reader/categorized_sents.py
+++ b/nltk/corpus/reader/categorized_sents.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Categorized Sentences Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/chasen.py b/nltk/corpus/reader/chasen.py
index aac675f..aa927de 100644
--- a/nltk/corpus/reader/chasen.py
+++ b/nltk/corpus/reader/chasen.py
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Masato Hagiwara <hagisan at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/childes.py b/nltk/corpus/reader/childes.py
index 5ab4b3a..0f96e4c 100644
--- a/nltk/corpus/reader/childes.py
+++ b/nltk/corpus/reader/childes.py
@@ -1,6 +1,6 @@
 # CHILDES XML Corpus Reader
 
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tomonori Nagano <tnagano at gc.cuny.edu>
 #         Alexis Dimitriadis <A.Dimitriadis at uu.nl>
 # URL: <http://nltk.org/>
@@ -16,7 +16,7 @@ __docformat__ = 'epytext en'
 import re
 from collections import defaultdict
 
-from nltk.util import flatten
+from nltk.util import flatten, LazyMap, LazyConcatenation
 from nltk.compat import string_types
 
 from nltk.corpus.reader.util import concat
@@ -60,8 +60,13 @@ class CHILDESCorpusReader(XMLCorpusReader):
         """
         sent=None
         pos=False
-        return concat([self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace) for fileid in self.abspaths(fileids)])
+        if not self._lazy:
+            return [self._get_words(fileid, speaker, sent, stem, relation,
+                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
+
+        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
+            pos, strip_space, replace)
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
 
     def tagged_words(self, fileids=None, speaker='ALL', stem=False,
             relation=False, strip_space=True, replace=False):
@@ -85,8 +90,13 @@ class CHILDESCorpusReader(XMLCorpusReader):
         """
         sent=None
         pos=True
-        return concat([self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace) for fileid in self.abspaths(fileids)])
+        if not self._lazy:
+            return [self._get_words(fileid, speaker, sent, stem, relation,
+                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
+
+        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
+            pos, strip_space, replace)
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
 
     def sents(self, fileids=None, speaker='ALL', stem=False,
             relation=None, strip_space=True, replace=False):
@@ -110,8 +120,13 @@ class CHILDESCorpusReader(XMLCorpusReader):
         """
         sent=True
         pos=False
-        return concat([self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace) for fileid in self.abspaths(fileids)])
+        if not self._lazy:
+            return [self._get_words(fileid, speaker, sent, stem, relation,
+                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
+        
+        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
+            pos, strip_space, replace)
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
 
     def tagged_sents(self, fileids=None, speaker='ALL', stem=False,
             relation=None, strip_space=True, replace=False):
@@ -135,15 +150,22 @@ class CHILDESCorpusReader(XMLCorpusReader):
         """
         sent=True
         pos=True
-        return concat([self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace) for fileid in self.abspaths(fileids)])
+        if not self._lazy:
+            return [self._get_words(fileid, speaker, sent, stem, relation,
+                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
+        
+        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
+            pos, strip_space, replace)
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
 
     def corpus(self, fileids=None):
         """
         :return: the given file(s) as a dict of ``(corpus_property_key, value)``
         :rtype: list(dict)
         """
-        return [self._get_corpus(fileid) for fileid in self.abspaths(fileids)]
+        if not self._lazy:
+            return [self._get_corpus(fileid) for fileid in self.abspaths(fileids)]
+        return LazyMap(self._get_corpus, self.abspaths(fileids))
 
     def _get_corpus(self, fileid):
         results = dict()
@@ -158,8 +180,9 @@ class CHILDESCorpusReader(XMLCorpusReader):
             ``(participant_property_key, value)``
         :rtype: list(dict)
         """
-        return [self._get_participants(fileid)
-                            for fileid in self.abspaths(fileids)]
+        if not self._lazy:
+            return [self._get_participants(fileid) for fileid in self.abspaths(fileids)]
+        return LazyMap(self._get_participants, self.abspaths(fileids))
 
     def _get_participants(self, fileid):
         # multidimensional dicts
@@ -182,8 +205,11 @@ class CHILDESCorpusReader(XMLCorpusReader):
 
         :param month: If true, return months instead of year-month-date
         """
-        return [self._get_age(fileid, speaker, month)
+        if not self._lazy:
+            return [self._get_age(fileid, speaker, month)
                 for fileid in self.abspaths(fileids)]
+        get_age = lambda fileid: self._get_age(fileid, speaker, month)
+        return LazyMap(get_age, self.abspaths(fileids))
 
     def _get_age(self, fileid, speaker, month):
         xmldoc = ElementTree.parse(fileid).getroot()
@@ -216,8 +242,11 @@ class CHILDESCorpusReader(XMLCorpusReader):
         :return: the given file(s) as a floating number
         :rtype: list(float)
         """
-        return [self._getMLU(fileid, speaker=speaker)
+        if not self._lazy:
+            return [self._getMLU(fileid, speaker=speaker)
                 for fileid in self.abspaths(fileids)]
+        get_MLU = lambda fileid: self._getMLU(fileid, speaker=speaker)
+        return LazyMap(get_MLU, self.abspaths(fileids))
 
     def _getMLU(self, fileid, speaker):
         sents = self._get_words(fileid, speaker=speaker, sent=True, stem=True,
@@ -374,7 +403,7 @@ class CHILDESCorpusReader(XMLCorpusReader):
                     results.append(sents)
                 else:
                     results.extend(sents)
-        return results
+        return LazyMap(lambda x: x, results)
 
 
     # Ready-to-use browser opener
diff --git a/nltk/corpus/reader/chunked.py b/nltk/corpus/reader/chunked.py
index 10f9ab3..f4a079f 100644
--- a/nltk/corpus/reader/chunked.py
+++ b/nltk/corpus/reader/chunked.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunked Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/cmudict.py b/nltk/corpus/reader/cmudict.py
index 7dc83a5..6ccee07 100644
--- a/nltk/corpus/reader/cmudict.py
+++ b/nltk/corpus/reader/cmudict.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/comparative_sents.py b/nltk/corpus/reader/comparative_sents.py
index f577cee..0ea92a5 100644
--- a/nltk/corpus/reader/comparative_sents.py
+++ b/nltk/corpus/reader/comparative_sents.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Comparative Sentence Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/conll.py b/nltk/corpus/reader/conll.py
index af4b0c4..4b68e61 100644
--- a/nltk/corpus/reader/conll.py
+++ b/nltk/corpus/reader/conll.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CONLL Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/crubadan.py b/nltk/corpus/reader/crubadan.py
index 73e3fbf..4642353 100644
--- a/nltk/corpus/reader/crubadan.py
+++ b/nltk/corpus/reader/crubadan.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: An Crubadan N-grams Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Avital Pekker <avital.pekker at utoronto.ca>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/dependency.py b/nltk/corpus/reader/dependency.py
index fc0a758..c72358f 100644
--- a/nltk/corpus/reader/dependency.py
+++ b/nltk/corpus/reader/dependency.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Kepa Sarasola <kepa.sarasola at ehu.es>
 #         Iker Manterola <returntothehangar at hotmail.com>
 #
diff --git a/nltk/corpus/reader/framenet.py b/nltk/corpus/reader/framenet.py
index 7c76957..9548781 100644
--- a/nltk/corpus/reader/framenet.py
+++ b/nltk/corpus/reader/framenet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Framenet Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Chuck Wooters <wooters at icsi.berkeley.edu>,
 #          Nathan Schneider <nschneid at cs.cmu.edu>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/ieer.py b/nltk/corpus/reader/ieer.py
index a44bfe0..977b285 100644
--- a/nltk/corpus/reader/ieer.py
+++ b/nltk/corpus/reader/ieer.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: IEER Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/indian.py b/nltk/corpus/reader/indian.py
index d1a2714..f7dee59 100644
--- a/nltk/corpus/reader/indian.py
+++ b/nltk/corpus/reader/indian.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/ipipan.py b/nltk/corpus/reader/ipipan.py
index dc37404..fa62e48 100644
--- a/nltk/corpus/reader/ipipan.py
+++ b/nltk/corpus/reader/ipipan.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: IPI PAN Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Konrad Goluchowski <kodie at mimuw.edu.pl>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/knbc.py b/nltk/corpus/reader/knbc.py
index af1de33..e280fc9 100644
--- a/nltk/corpus/reader/knbc.py
+++ b/nltk/corpus/reader/knbc.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # KNB Corpus reader
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Masato Hagiwara <hagisan at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/lin.py b/nltk/corpus/reader/lin.py
index 05aeb97..cb25822 100644
--- a/nltk/corpus/reader/lin.py
+++ b/nltk/corpus/reader/lin.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Lin's Thesaurus
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Dan Blanchard <dblanchard at ets.org>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.txt
diff --git a/nltk/corpus/reader/mte.py b/nltk/corpus/reader/mte.py
index 28d5767..71dd1a8 100644
--- a/nltk/corpus/reader/mte.py
+++ b/nltk/corpus/reader/mte.py
@@ -5,25 +5,24 @@ import os
 from functools import reduce
 from nltk import compat
 from nltk.corpus.reader import concat, TaggedCorpusReader
+from nltk.corpus.reader.xmldocs import XMLCorpusView
 
-lxmlAvailable = False
-try:
-    from lxml import etree
-    lxmlAvailable = True
-except ImportError:
-    #first try c version of ElementTree
-    try:
-        import xml.etree.cElementTree as etree
-    except ImportError:
-        import xml.etree.ElementTree as etree
+import xml.etree.ElementTree as etree
 import re
 
 def xpath(root, path, ns):
-    if lxmlAvailable:
-        return root.xpath(path, namespaces=ns)
-    else:
-        return root.findall(path, ns)
+    return root.findall(path, ns)
 
+class MTECorpusView(XMLCorpusView):
+    """
+    Class for lazy viewing the MTE Corpus.
+    """
+
+    def __init__(self, fileid, tagspec, elt_handler=None):
+        XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
+
+    def read_block(self, stream, tagspec=None, elt_handler=None):
+        return list(filter(lambda x: x is not None, XMLCorpusView.read_block(self, stream, tagspec, elt_handler)))
 
 class MTEFileReader:
     """
@@ -31,85 +30,105 @@ class MTEFileReader:
     parses the xml files and does some tag-filtering depending on the
     given method parameters.
     """
-    ns = {'tei': 'http://www.tei-c.org/ns/1.0', 'xml': 'http://www.w3.org/XML/1998/namespace'}
+    ns = {'tei': 'http://www.tei-c.org/ns/1.0',
+          'xml': 'http://www.w3.org/XML/1998/namespace'}
     tag_ns = '{http://www.tei-c.org/ns/1.0}'
     xml_ns = '{http://www.w3.org/XML/1998/namespace}'
+    word_path = "TEI/text/body/div/div/p/s/(w|c)"
+    sent_path = "TEI/text/body/div/div/p/s"
+    para_path = "TEI/text/body/div/div/p"
 
-    def __init__(self, file_path):
-        tree = etree.parse(file_path)
-        self.__root = xpath(tree.getroot(), './tei:text/tei:body', self.ns)[0]
 
-    @classmethod
-    def _words(self, text_root):
-        return [w.text for w in xpath(text_root, './/*', self.ns) if
-                w.tag == self.tag_ns + "w" or w.tag == self.tag_ns + "c"]
+    def __init__(self, file_path):
+        self.__file_path = file_path
 
     @classmethod
-    def _sents(self, text_root):
-        return [MTEFileReader._words(s) for s in xpath(text_root, './/tei:s', self.ns)]
+    def _word_elt(self, elt, context):
+        return elt.text
 
     @classmethod
-    def _paras(self, text_root):
-        return [MTEFileReader._sents(p) for p in xpath(text_root, './/tei:p', self.ns)]
+    def _sent_elt(self, elt, context):
+        return [self._word_elt(w, None) for w in xpath(elt, '*', self.ns)]
 
     @classmethod
-    def _lemma_words(self, text_root):
-        return [(w.text, w.attrib['lemma']) for w in xpath(text_root, './/tei:w', self.ns)]
+    def _para_elt(self, elt, context):
+        return [self._sent_elt(s, None) for s in xpath(elt, '*', self.ns)]
 
     @classmethod
-    def _tagged_words(self, text_root, tags=""):
-        if tags is None or tags == "":
-            return [(w.text, w.attrib['ana']) for w in xpath(text_root, './/tei:w', self.ns)]
-
+    def _tagged_word_elt(self, elt, context):
+        if ('ana' not in elt.attrib):
+            return (elt.text, '')
+
+        if self.__tags == "" and self.__tagset == "msd":
+            return (elt.text, elt.attrib['ana'])
+        elif self.__tags == "" and self.__tagset == "universal":
+            return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana']))
         else:
-            tags = re.compile('^' + re.sub("-",".",tags) + '.*$')
-            return [(w.text, w.attrib['ana']) for w in xpath(text_root, './/tei:w', self.ns)
-                                              if tags.match(w.attrib['ana'])]
+            tags = re.compile('^' + re.sub("-", ".", self.__tags) + '.*$')
+            if (tags.match(elt.attrib['ana'])):
+                if self.__tagset == "msd":
+                    return (elt.text, elt.attrib['ana'])
+                else:
+                    return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana']))
+            else:
+                return None
 
     @classmethod
-    def _lemma_sents(self, text_root):
-        return [MTEFileReader._lemma_words(s) for s in xpath(text_root, './/tei:s', self.ns)]
+    def _tagged_sent_elt(self, elt, context):
+        return list(filter(lambda x: x is not None, [self._tagged_word_elt(w, None) for w in xpath(elt, '*', self.ns)]))
 
     @classmethod
-    def _tagged_sents(self, text_root, tags=""):
-        # double list comprehension to remove empty sentences in case there is a sentence only containing punctuation marks
-        return [t for t in [MTEFileReader._tagged_words(s, tags) for s in xpath(text_root, './/tei:s', self.ns)] if len(t) > 0]
+    def _tagged_para_elt(self, elt, context):
+        return list(filter(lambda x: x is not None, [self._tagged_sent_elt(s, None) for s in xpath(elt, '*', self.ns)]))
 
     @classmethod
-    def _lemma_paras(self, text_root):
-        return [MTEFileReader._lemma_sents(p) for p in xpath(text_root, './/tei:p', self.ns)]
+    def _lemma_word_elt(self, elt, context):
+        if ('lemma' not in elt.attrib):
+            return (elt.text, '')
+        else:
+            return (elt.text, elt.attrib['lemma'])
 
     @classmethod
-    def _tagged_paras(self, text_root, tags=""):
-        return [t for t in [MTEFileReader._tagged_sents(p, tags) for p in xpath(text_root, './/tei:p', self.ns)] if len(t) > 0]
+    def _lemma_sent_elt(self, elt, context):
+        return [self._lemma_word_elt(w, None) for w in xpath(elt, '*', self.ns)]
 
+    @classmethod
+    def _lemma_para_elt(self, elt, context):
+        return [self._lemma_sent_elt(s, None) for s in xpath(elt, '*', self.ns)]
 
     def words(self):
-        return MTEFileReader._words(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._word_elt)
 
     def sents(self):
-        return MTEFileReader._sents(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._sent_elt)
 
     def paras(self):
-        return MTEFileReader._paras(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._para_elt)
 
     def lemma_words(self):
-        return MTEFileReader._lemma_words(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._lemma_word_elt)
 
-    def tagged_words(self, tags=""):
-        return MTEFileReader._tagged_words(self.__root, tags)
+    def tagged_words(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._tagged_word_elt)
 
     def lemma_sents(self):
-        return MTEFileReader._lemma_sents(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._lemma_sent_elt)
 
-    def tagged_sents(self, tags=""):
-        return MTEFileReader._tagged_sents(self.__root)
+    def tagged_sents(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._tagged_sent_elt)
 
     def lemma_paras(self):
-        return MTEFileReader._lemma_paras(self.__root)
+        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._lemma_para_elt)
+
+    def tagged_paras(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._tagged_para_elt)
 
-    def tagged_paras(self, tags=""):
-        return MTEFileReader._tagged_paras(self.__root)
 
 class MTETagConverter:
     """
@@ -183,7 +202,7 @@ class MTECorpusReader(TaggedCorpusReader):
         :return: the given file(s) as a single string.
         :rtype: str
         """
-        return concat([self.open(f).read() for f in self.__fileids(fileids)])
+        return reduce([self.open(f).read() for f in self.__fileids(fileids)], [])
 
     def words(self, fileids=None):
         """
@@ -191,7 +210,7 @@ class MTECorpusReader(TaggedCorpusReader):
         :return: the given file(s) as a list of words and punctuation symbols.
         :rtype: list(str)
         """
-        return  reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).words() for f in self.__fileids(fileids)], [])
+        return  concat([MTEFileReader(os.path.join(self._root, f)).words() for f in self.__fileids(fileids)])
 
     def sents(self, fileids=None):
         """
@@ -200,7 +219,7 @@ class MTECorpusReader(TaggedCorpusReader):
                  each encoded as a list of word strings
         :rtype: list(list(str))
         """
-        return  reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).sents() for f in self.__fileids(fileids)], [])
+        return  concat([MTEFileReader(os.path.join(self._root, f)).sents() for f in self.__fileids(fileids)])
 
     def paras(self, fileids=None):
         """
@@ -209,7 +228,7 @@ class MTECorpusReader(TaggedCorpusReader):
                  of sentences, which are in turn encoded as lists of word string
         :rtype: list(list(list(str)))
         """
-        return  reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).paras() for f in self.__fileids(fileids)], [])
+        return  concat([MTEFileReader(os.path.join(self._root, f)).paras() for f in self.__fileids(fileids)])
 
     def lemma_words(self, fileids=None):
         """
@@ -218,9 +237,9 @@ class MTECorpusReader(TaggedCorpusReader):
                  and punctuation symbols, encoded as tuples (word, lemma)
         :rtype: list(tuple(str,str))
         """
-        return  reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).lemma_words() for f in self.__fileids(fileids)], [])
+        return  concat([MTEFileReader(os.path.join(self._root, f)).lemma_words() for f in self.__fileids(fileids)])
 
-    def tagged_words(self, fileids=None, tagset="msd", tags=None):
+    def tagged_words(self, fileids=None, tagset="msd", tags=""):
         """
 	    :param fileids: A list specifying the fileids that should be used.
         :param tagset: The tagset that should be used in the returned object,
@@ -231,11 +250,8 @@ class MTECorpusReader(TaggedCorpusReader):
                  encoded as tuples (word, tag)
         :rtype: list(tuple(str, str))
         """
-        words = reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).tagged_words(tags=tags) for f in self.__fileids(fileids)], [])
-        if tagset == "universal":
-            return map(lambda wt : (wt[0], MTETagConverter.msd_to_universal(wt[1])), words)
-        elif tagset == "msd":
-            return words
+        if tagset == "universal" or tagset == "msd":
+            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_words(tagset, tags) for f in self.__fileids(fileids)])
         else:
             print("Unknown tagset specified.")
 
@@ -247,10 +263,10 @@ class MTECorpusReader(TaggedCorpusReader):
                  lemma (word, lemma)
         :rtype: list(list(tuple(str, str)))
         """
-        return  reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).lemma_sents() for f in self.__fileids(fileids)], [])
+        return  concat([MTEFileReader(os.path.join(self._root, f)).lemma_sents() for f in self.__fileids(fileids)])
 
 
-    def tagged_sents(self, fileids=None, tagset="msd", tags=None):
+    def tagged_sents(self, fileids=None, tagset="msd", tags=""):
         """
 	    :param fileids: A list specifying the fileids that should be used.
         :param tagset: The tagset that should be used in the returned object,
@@ -261,11 +277,8 @@ class MTECorpusReader(TaggedCorpusReader):
                  each encoded as a list of (word,tag) tuples
         :rtype: list(list(tuple(str, str)))
         """
-        sents = reduce(lambda a, b : a + b, [MTEFileReader(os.path.join(self._root, f)).tagged_sents(tags=tags) for f in self.__fileids(fileids)], [])
-        if tagset == "universal":
-            return map(lambda s : map (lambda wt : (wt[0], MTETagConverter.msd_to_universal(wt[1])), s), sents)
-        elif tagset == "msd":
-            return sents
+        if tagset == "universal" or tagset == "msd":
+            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_sents(tagset, tags) for f in self.__fileids(fileids)])
         else:
             print("Unknown tagset specified.")
 
@@ -277,9 +290,9 @@ class MTECorpusReader(TaggedCorpusReader):
                  tuples of the word and the corresponding lemma (word, lemma)
         :rtype: list(List(List(tuple(str, str))))
         """
-        return reduce(lambda a, b : a + b ,[MTEFileReader(os.path.join(self._root, f)).lemma_paras() for f in self.__fileids(fileids)], [])
+        return concat([MTEFileReader(os.path.join(self._root, f)).lemma_paras() for f in self.__fileids(fileids)])
 
-    def tagged_paras(self, fileids=None, tagset="msd", tags=None):
+    def tagged_paras(self, fileids=None, tagset="msd", tags=""):
         """
 	    :param fileids: A list specifying the fileids that should be used.
         :param tagset: The tagset that should be used in the returned object,
@@ -291,10 +304,7 @@ class MTECorpusReader(TaggedCorpusReader):
                  of (word,tag) tuples
         :rtype: list(list(list(tuple(str, str))))
         """
-        paras = reduce(lambda a, b : a + b, [MTEFileReader(os.path.join(self._root, f)).tagged_paras(tags=tags) for f in self.__fileids(fileids)], [])
-        if tagset == "universal":
-            return map(lambda p : map(lambda s : map (lambda wt : (wt[0], MTETagConverter.msd_to_universal(wt[0])), s), p), paras)
-        elif tagset == "msd":
-            return paras
+        if tagset == "universal" or tagset == "msd":
+            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_paras(tagset, tags) for f in self.__fileids(fileids)])
         else:
             print("Unknown tagset specified.")
diff --git a/nltk/corpus/reader/nkjp.py b/nltk/corpus/reader/nkjp.py
index c7cd522..a6af20b 100644
--- a/nltk/corpus/reader/nkjp.py
+++ b/nltk/corpus/reader/nkjp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: NKJP Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Gabriela Kaczka
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/nombank.py b/nltk/corpus/reader/nombank.py
index 5c8e7cc..8b5db56 100644
--- a/nltk/corpus/reader/nombank.py
+++ b/nltk/corpus/reader/nombank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: NomBank Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Paul Bedaride <paul.bedaride at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/nps_chat.py b/nltk/corpus/reader/nps_chat.py
index 367efec..cf45466 100644
--- a/nltk/corpus/reader/nps_chat.py
+++ b/nltk/corpus/reader/nps_chat.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: NPS Chat Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/opinion_lexicon.py b/nltk/corpus/reader/opinion_lexicon.py
index baaf096..a436eee 100644
--- a/nltk/corpus/reader/opinion_lexicon.py
+++ b/nltk/corpus/reader/opinion_lexicon.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Opinion Lexicon Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/panlex_lite.py b/nltk/corpus/reader/panlex_lite.py
new file mode 100644
index 0000000..62cc6b2
--- /dev/null
+++ b/nltk/corpus/reader/panlex_lite.py
@@ -0,0 +1,165 @@
+# Natural Language Toolkit: PanLex Corpus Reader
+#
+# Copyright (C) 2001-2016 NLTK Project
+# Author: David Kamholz <kamholz at panlex.org>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for PanLex Lite, a stripped down version of PanLex distributed
+as an SQLite database. See the README.txt in the panlex_lite corpus directory
+for more information on PanLex Lite.
+"""
+
+import os
+import sqlite3
+
+from nltk.corpus.reader.api import CorpusReader
+
+class PanLexLiteCorpusReader(CorpusReader):
+    MEANING_Q = """
+        SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv
+        FROM dnx
+        JOIN ex ON (ex.ex = dnx.ex)
+        JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)
+        JOIN ex ex2 ON (ex2.ex = dnx2.ex)
+        WHERE dnx.ex != dnx2.ex AND ex.tt = ? AND ex.lv = ?
+        ORDER BY dnx2.uq DESC
+    """
+
+    TRANSLATION_Q = """
+        SELECT s.tt, sum(s.uq) AS trq FROM (
+            SELECT ex2.tt, max(dnx.uq) AS uq
+            FROM dnx
+            JOIN ex ON (ex.ex = dnx.ex)
+            JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)
+            JOIN ex ex2 ON (ex2.ex = dnx2.ex)
+            WHERE dnx.ex != dnx2.ex AND ex.lv = ? AND ex.tt = ? AND ex2.lv = ?
+            GROUP BY ex2.tt, dnx.ui
+        ) s
+        GROUP BY s.tt
+        ORDER BY trq DESC, s.tt
+    """
+
+    def __init__(self, root):
+        self._c = sqlite3.connect(os.path.join(root, 'db.sqlite')).cursor()
+
+        self._uid_lv = {}
+        self._lv_uid = {}
+
+        for row in self._c.execute('SELECT uid, lv FROM lv'):
+            self._uid_lv[row[0]] = row[1]
+            self._lv_uid[row[1]] = row[0]
+
+    def language_varieties(self, lc=None):
+        """
+        Return a list of PanLex language varieties.
+
+        :param lc: ISO 639 alpha-3 code. If specified, filters returned varieties
+            by this code. If unspecified, all varieties are returned.
+        :return: the specified language varieties as a list of tuples. The first
+            element is the language variety's seven-character uniform identifier,
+            and the second element is its default name.
+        :rtype: list(tuple)
+        """
+
+        if lc == None:
+            return self._c.execute('SELECT uid, tt FROM lv ORDER BY uid').fetchall()
+        else:
+            return self._c.execute('SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid', (lc,)).fetchall()
+
+    def meanings(self, expr_uid, expr_tt):
+        """
+        Return a list of meanings for an expression.
+
+        :param expr_uid: the expression's language variety, as a seven-character
+            uniform identifier.
+        :param expr_tt: the expression's text.
+        :return: a list of Meaning objects.
+        :rtype: list(Meaning)
+        """
+
+        expr_lv = self._uid_lv[expr_uid]
+
+        mn_info = {}
+
+        for i in self._c.execute(self.MEANING_Q, (expr_tt, expr_lv)):
+            mn = i[0]
+            uid = self._lv_uid[i[5]]
+
+            if not mn in mn_info:
+                mn_info[mn] = { 'uq': i[1], 'ap': i[2], 'ui': i[3], 'ex': { expr_uid: [expr_tt] } }
+
+            if not uid in mn_info[mn]['ex']:
+                mn_info[mn]['ex'][uid] = []
+
+            mn_info[mn]['ex'][uid].append(i[4])
+
+        return [ Meaning(mn, mn_info[mn]) for mn in mn_info ]
+
+    def translations(self, from_uid, from_tt, to_uid):
+        """
+        Return a list of translations for an expression into a single language
+            variety.
+
+        :param from_uid: the source expression's language variety, as a
+            seven-character uniform identifier.
+        :param from_tt: the source expression's text.
+        :param to_uid: the target language variety, as a seven-character
+            uniform identifier.
+        :return a list of translation tuples. The first element is the expression 
+            text and the second element is the translation quality.
+        :rtype: list(tuple)
+        """
+
+        from_lv = self._uid_lv[from_uid]
+        to_lv = self._uid_lv[to_uid]
+
+        return self._c.execute(self.TRANSLATION_Q, (from_lv, from_tt, to_lv)).fetchall()
+
+class Meaning(dict):
+    """
+    Represents a single PanLex meaning. A meaning is a translation set derived
+    from a single source.
+    """
+
+    def __init__(self, mn, attr):
+        super(Meaning, self).__init__(**attr)
+        self['mn'] = mn
+
+    def id(self):
+        """
+        :return: the meaning's id.
+        :rtype: int
+        """
+        return self['mn']
+
+    def quality(self):
+        """
+        :return: the meaning's source's quality (0=worst, 9=best).
+        :rtype: int
+        """
+        return self['uq']
+
+    def source(self):
+        """
+        :return: the meaning's source id.
+        :rtype: int
+        """
+        return self['ap']
+
+    def source_group(self):
+        """
+        :return: the meaning's source group id.
+        :rtype: int
+        """
+        return self['ui']
+
+    def expressions(self):
+        """
+        :return: the meaning's expressions as a dictionary whose keys are language
+            variety uniform identifiers and whose values are lists of expression
+            texts.
+        :rtype: dict
+        """
+        return self['ex']
diff --git a/nltk/corpus/reader/pl196x.py b/nltk/corpus/reader/pl196x.py
index d51cee9..e782785 100644
--- a/nltk/corpus/reader/pl196x.py
+++ b/nltk/corpus/reader/pl196x.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit:
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Piotr Kasprzyk <p.j.kasprzyk at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py
index b12669f..f834b7c 100644
--- a/nltk/corpus/reader/plaintext.py
+++ b/nltk/corpus/reader/plaintext.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Plaintext Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Nitin Madnani <nmadnani at umiacs.umd.edu>
@@ -71,7 +71,12 @@ class PlaintextCorpusReader(CorpusReader):
         """
         if fileids is None: fileids = self._fileids
         elif isinstance(fileids, string_types): fileids = [fileids]
-        return concat([self.open(f).read() for f in fileids])
+        raw_texts = []
+        for f in fileids:
+            _fin = self.open(f)
+            raw_texts.append(_fin.read())
+            _fin.close() 
+        return concat(raw_texts)
 
     def words(self, fileids=None):
         """
diff --git a/nltk/corpus/reader/ppattach.py b/nltk/corpus/reader/ppattach.py
index 46db6ec..d4f4563 100644
--- a/nltk/corpus/reader/ppattach.py
+++ b/nltk/corpus/reader/ppattach.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: PP Attachment Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/propbank.py b/nltk/corpus/reader/propbank.py
index 41340ca..d672e97 100644
--- a/nltk/corpus/reader/propbank.py
+++ b/nltk/corpus/reader/propbank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: PropBank Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/pros_cons.py b/nltk/corpus/reader/pros_cons.py
index 5f5e2c6..6bb2eb6 100644
--- a/nltk/corpus/reader/pros_cons.py
+++ b/nltk/corpus/reader/pros_cons.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Pros and Cons Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/reviews.py b/nltk/corpus/reader/reviews.py
index accc294..783ac5c 100644
--- a/nltk/corpus/reader/reviews.py
+++ b/nltk/corpus/reader/reviews.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Product Reviews Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -58,6 +58,9 @@ Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not
     level classification and analysis, this peculiarity should be taken into
     consideration.
 """
+
+from __future__ import division
+
 import re
 
 from nltk.corpus.reader.api import *
@@ -168,10 +171,11 @@ class ReviewsCorpusReader(CorpusReader):
 
     We can compute stats for specific product features:
 
+        >>> from __future__ import division
         >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
         >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
         >>> # We use float for backward compatibility with division in Python2.7
-        >>> mean = float(tot)/n_reviews
+        >>> mean = tot / n_reviews
         >>> print(n_reviews, tot, mean)
         15 24 1.6
     """
diff --git a/nltk/corpus/reader/rte.py b/nltk/corpus/reader/rte.py
index f194ff7..98f39f5 100644
--- a/nltk/corpus/reader/rte.py
+++ b/nltk/corpus/reader/rte.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: RTE Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author:  Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/semcor.py b/nltk/corpus/reader/semcor.py
index 3865212..10811e6 100644
--- a/nltk/corpus/reader/semcor.py
+++ b/nltk/corpus/reader/semcor.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: SemCor Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Nathan Schneider <nschneid at cs.cmu.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/senseval.py b/nltk/corpus/reader/senseval.py
index 0ac040b..fa59e90 100644
--- a/nltk/corpus/reader/senseval.py
+++ b/nltk/corpus/reader/senseval.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Senseval 2 Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Steven Bird <stevenbird1 at gmail.com> (modifications)
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/sentiwordnet.py b/nltk/corpus/reader/sentiwordnet.py
index ec678ce..c501f8d 100644
--- a/nltk/corpus/reader/sentiwordnet.py
+++ b/nltk/corpus/reader/sentiwordnet.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
-# Natural Language Toolkit: WordNet
+# Natural Language Toolkit: SentiWordNet
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Christopher Potts <cgpotts at stanford.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -20,10 +20,12 @@ http://sentiwordnet.isti.cnr.it/
     >>> print(swn.senti_synset('breakdown.n.03'))
     <breakdown.n.03: PosScore=0.0 NegScore=0.25>
     >>> list(swn.senti_synsets('slow'))
-    [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),\
-    SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),\
-    SentiSynset('slow.a.02'), SentiSynset('slow.a.04'),\
-    SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]
+    [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
+    SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
+    SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
+    SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
+    SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
+    SentiSynset('behind.r.03')]
     >>> happy = swn.senti_synsets('happy', 'a')
     >>> happy0 = list(happy)[0]
     >>> happy0.pos_score()
@@ -70,11 +72,15 @@ class SentiWordNetCorpusReader(CorpusReader):
         if tuple(vals) in self._db:
             pos_score, neg_score = self._db[tuple(vals)]
             pos, offset = vals
+            if pos == 's':
+                pos = 'a'
             synset = wn._synset_from_pos_and_offset(pos, offset)
             return SentiSynset(pos_score, neg_score, synset)
         else:
             synset = wn.synset(vals[0])
             pos = synset.pos()
+            if pos == 's':
+                pos = 'a'
             offset = synset.offset()
             if (pos, offset) in self._db:
                 pos_score, neg_score = self._db[(pos, offset)]
diff --git a/nltk/corpus/reader/sinica_treebank.py b/nltk/corpus/reader/sinica_treebank.py
index 27c93b9..06d609a 100644
--- a/nltk/corpus/reader/sinica_treebank.py
+++ b/nltk/corpus/reader/sinica_treebank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sinica Treebank Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/string_category.py b/nltk/corpus/reader/string_category.py
index 8335c6b..b3d9087 100644
--- a/nltk/corpus/reader/string_category.py
+++ b/nltk/corpus/reader/string_category.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: String Category Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/switchboard.py b/nltk/corpus/reader/switchboard.py
index 12f1baa..1b5555f 100644
--- a/nltk/corpus/reader/switchboard.py
+++ b/nltk/corpus/reader/switchboard.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Switchboard Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/tagged.py b/nltk/corpus/reader/tagged.py
index 69ba4dd..2a3bf75 100644
--- a/nltk/corpus/reader/tagged.py
+++ b/nltk/corpus/reader/tagged.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagged Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Jacob Perkins <japerk at gmail.com>
diff --git a/nltk/corpus/reader/toolbox.py b/nltk/corpus/reader/toolbox.py
index 380102a..bef15ca 100644
--- a/nltk/corpus/reader/toolbox.py
+++ b/nltk/corpus/reader/toolbox.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Toolbox Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Greg Aumann <greg_aumann at sil.org>
 #         Stuart Robinson <Stuart.Robinson at mpi.nl>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/corpus/reader/twitter.py b/nltk/corpus/reader/twitter.py
index cd957a4..813f022 100644
--- a/nltk/corpus/reader/twitter.py
+++ b/nltk/corpus/reader/twitter.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Twitter Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/util.py b/nltk/corpus/reader/util.py
index bb31cb9..4357645 100644
--- a/nltk/corpus/reader/util.py
+++ b/nltk/corpus/reader/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Reader Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -334,6 +334,9 @@ class StreamBackedCorpusView(AbstractLazySequence):
 
         # If we reach this point, then we should know our length.
         assert self._len is not None
+        # Enforce closing of stream once we reached end of file
+        # We should have reached EOF once we're out of the while loop.
+        self.close()
 
     # Use concat for these, so we can use a ConcatenatedCorpusView
     # when possible.
diff --git a/nltk/corpus/reader/verbnet.py b/nltk/corpus/reader/verbnet.py
index 78a0401..370fd2a 100644
--- a/nltk/corpus/reader/verbnet.py
+++ b/nltk/corpus/reader/verbnet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Verbnet Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/wordlist.py b/nltk/corpus/reader/wordlist.py
index 74d5f22..a8b2fcf 100644
--- a/nltk/corpus/reader/wordlist.py
+++ b/nltk/corpus/reader/wordlist.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Word List Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/wordnet.py b/nltk/corpus/reader/wordnet.py
index 7fec8d9..845e447 100644
--- a/nltk/corpus/reader/wordnet.py
+++ b/nltk/corpus/reader/wordnet.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: WordNet
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bethard <Steven.Bethard at colorado.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
@@ -1460,7 +1460,7 @@ class WordNetCorpusReader(CorpusReader):
 
     def all_lemma_names(self, pos=None, lang='eng'):
         """Return all lemma names for all synsets for the given
-        part of speech tag and langauge or languages. If pos is not specified, all synsets
+        part of speech tag and language or languages. If pos is not specified, all synsets
         for all parts of speech will be used."""
 
         if lang == 'eng':
diff --git a/nltk/corpus/reader/xmldocs.py b/nltk/corpus/reader/xmldocs.py
index 757e5b1..786b8ec 100644
--- a/nltk/corpus/reader/xmldocs.py
+++ b/nltk/corpus/reader/xmldocs.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: XML Corpus Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/util.py b/nltk/corpus/util.py
index 166dfb4..7e67d05 100644
--- a/nltk/corpus/util.py
+++ b/nltk/corpus/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Reader Utility Functions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/data.py b/nltk/data.py
index ffef466..2feb5b2 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Utility functions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -369,25 +369,30 @@ class BufferedGzipFile(GzipFile):
         """
         GzipFile.__init__(self, filename, mode, compresslevel, fileobj)
         self._size = kwargs.get('size', self.SIZE)
-        self._buffer = BytesIO()
+        # Note: In > Python3.5, GzipFile is already using a 
+        # buffered reader in the backend which has a variable self._buffer
+        # See https://github.com/nltk/nltk/issues/1308
+        if sys.version.startswith('3.5'):
+            sys.stderr.write("Use the native Python gzip.GzipFile instead.")
+        self._nltk_buffer = BytesIO()
         # cStringIO does not support len.
         self._len = 0
 
     def _reset_buffer(self):
         # For some reason calling BytesIO.truncate() here will lead to
         # inconsistent writes so just set _buffer to a new BytesIO object.
-        self._buffer = BytesIO()
+        self._nltk_buffer = BytesIO()
         self._len = 0
 
     def _write_buffer(self, data):
         # Simply write to the buffer and increment the buffer size.
         if data is not None:
-            self._buffer.write(data)
+            self._nltk_buffer.write(data)
             self._len += len(data)
 
     def _write_gzip(self, data):
         # Write the current buffer to the GzipFile.
-        GzipFile.write(self, self._buffer.getvalue())
+        GzipFile.write(self, self._nltk_buffer.getvalue())
         # Then reset the buffer and write the new data to the buffer.
         self._reset_buffer()
         self._write_buffer(data)
@@ -400,7 +405,7 @@ class BufferedGzipFile(GzipFile):
         return GzipFile.close(self)
 
     def flush(self, lib_mode=FLUSH):
-        self._buffer.flush()
+        self._nltk_buffer.flush()
         GzipFile.flush(self, lib_mode)
 
     def read(self, size=None):
@@ -974,11 +979,17 @@ class OpenOnDemandZipFile(zipfile.ZipFile):
         zipfile.ZipFile.__init__(self, filename)
         assert self.filename == filename
         self.close()
+        # After closing a ZipFile object, the _fileRefCnt needs to be cleared 
+        # for Python2and3 compatible code.
+        self._fileRefCnt = 0
 
     def read(self, name):
         assert self.fp is None
         self.fp = open(self.filename, 'rb')
         value = zipfile.ZipFile.read(self, name)
+        # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code.
+        # Since we only opened one file here, we add 1.
+        self._fileRefCnt += 1
         self.close()
         return value
 
@@ -1252,7 +1263,7 @@ class SeekableUnicodeStreamReader(object):
         ignoring all buffers.
 
         :param est_bytes: A hint, giving an estimate of the number of
-            bytes that will be neded to move forward by ``offset`` chars.
+            bytes that will be needed to move forward by ``offset`` chars.
             Defaults to ``offset``.
         """
         if est_bytes is None:
diff --git a/nltk/downloader.py b/nltk/downloader.py
index 9d9984a..d4ea94c 100644
--- a/nltk/downloader.py
+++ b/nltk/downloader.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus & Model Downloader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -54,9 +54,9 @@ NLTK Download Server
 Before downloading any packages, the corpus and module downloader
 contacts the NLTK download server, to retrieve an index file
 describing the available packages.  By default, this index file is
-loaded from ``http://www.nltk.org/nltk_data/``.  If necessary, it is
-possible to create a new ``Downloader`` object, specifying a different
-URL for the package index file.
+loaded from ``https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml``.
+If necessary, it is possible to create a new ``Downloader`` object,
+specifying a different URL for the package index file.
 
 Usage::
 
@@ -377,7 +377,7 @@ class Downloader(object):
        server index will be considered 'stale,' and will be
        re-downloaded."""
 
-    DEFAULT_URL = 'http://www.nltk.org/nltk_data/'
+    DEFAULT_URL = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml'
     """The default URL for the NLTK data server's index.  An
        alternative URL can be specified when creating a new
        ``Downloader`` object."""
@@ -568,7 +568,7 @@ class Downloader(object):
             if isinstance(item, Package):
                 delta = 1./num_packages
             else:
-                delta = float(len(item.packages))/num_packages
+                delta = len(item.packages)/num_packages
             for msg in self.incr_download(item, download_dir, force):
                 if isinstance(msg, ProgressMessage):
                     yield ProgressMessage(progress + msg.progress*delta)
@@ -613,7 +613,7 @@ class Downloader(object):
             infile = compat.urlopen(info.url)
             with open(filepath, 'wb') as outfile:
                 #print info.size
-                num_blocks = max(1, float(info.size)/(1024*16))
+                num_blocks = max(1, info.size/(1024*16))
                 for block in itertools.count():
                     s = infile.read(1024*16) # 16k blocks.
                     outfile.write(s)
@@ -1118,7 +1118,7 @@ class DownloaderShell(object):
                 if new_url in ('', 'x', 'q', 'X', 'Q'):
                     print('  Cancelled!')
                 else:
-                    if not new_url.startswith('http://'):
+                    if not new_url.startswith(('http://', 'https://')):
                         new_url = 'http://'+new_url
                     try: self._ds.url = new_url
                     except Exception as e:
@@ -2093,7 +2093,8 @@ def build_index(root, base_url):
         pkg_xml.set('checksum', '%s' % md5_hexdigest(zf.filename))
         pkg_xml.set('subdir', subdir)
         #pkg_xml.set('svn_revision', _svn_revision(zf.filename))
-        pkg_xml.set('url', url)
+        if not pkg_xml.get('url'):
+            pkg_xml.set('url', url)
 
         # Record the package.
         packages.append(pkg_xml)
diff --git a/nltk/draw/__init__.py b/nltk/draw/__init__.py
index 5d197fc..78088aa 100644
--- a/nltk/draw/__init__.py
+++ b/nltk/draw/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: graphical representations package
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/draw/cfg.py b/nltk/draw/cfg.py
index d691dde..2cdd7b1 100644
--- a/nltk/draw/cfg.py
+++ b/nltk/draw/cfg.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CFG visualization
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -108,7 +108,7 @@ underscores (_).  Nonterminals are colored blue.  If you place the
 mouse over any nonterminal, then all occurrences of that nonterminal
 will be highlighted.
 
-Termianals must be surrounded by single quotes (') or double
+Terminals must be surrounded by single quotes (') or double
 quotes(\").  For example, "dog" and "New York" are terminals.
 Currently, the string within the quotes must consist of alphanumeric
 characters, underscores, and spaces.
diff --git a/nltk/draw/dispersion.py b/nltk/draw/dispersion.py
index 4de4744..206c05f 100644
--- a/nltk/draw/dispersion.py
+++ b/nltk/draw/dispersion.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dispersion Plots
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/table.py b/nltk/draw/table.py
index 98fff2e..898496e 100644
--- a/nltk/draw/table.py
+++ b/nltk/draw/table.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Table widget
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -9,6 +9,8 @@
 Tkinter widgets for displaying multi-column listboxes and tables.
 """
 
+from __future__ import division
+
 import nltk.compat
 import operator
 
@@ -184,12 +186,12 @@ class MultiListbox(Frame):
 
     def _resize_column_motion_cb(self, event):
         lb = self._listboxes[self._resize_column_index]
-        charwidth = lb.winfo_width() / float(lb['width'])
+        charwidth = lb.winfo_width() / lb['width']
 
         x1 = event.x + event.widget.winfo_x()
         x2 = lb.winfo_x() + lb.winfo_width()
 
-        lb['width'] = max(3, lb['width'] + int((x1-x2)/charwidth))
+        lb['width'] = max(3, lb['width'] + (x1-x2) // charwidth)
 
     def _resize_column_buttonrelease_cb(self, event):
         event.widget.unbind('<ButtonRelease-%d>' % event.num)
@@ -1068,12 +1070,18 @@ def demo():
         if pos[0] != 'N': continue
         word = word.lower()
         for synset in wordnet.synsets(word):
-            hyper = (synset.hypernyms()+[''])[0]
-            hypo = (synset.hyponyms()+[''])[0]
+            try:
+                hyper_def = synset.hypernyms()[0].definition()
+            except:
+                hyper_def = '*none*'
+            try:
+                hypo_def = synset.hypernyms()[0].definition()
+            except:
+                hypo_def = '*none*'
             table.append([word,
-                          getattr(synset, 'definition', '*none*'),
-                          getattr(hyper, 'definition', '*none*'),
-                          getattr(hypo, 'definition', '*none*')])
+                          synset.definition(),
+                          hyper_def,
+                          hypo_def])
 
     table.columnconfig('Word', background='#afa')
     table.columnconfig('Synset', background='#efe')
diff --git a/nltk/draw/tree.py b/nltk/draw/tree.py
index 7a10a5d..8b4e37e 100644
--- a/nltk/draw/tree.py
+++ b/nltk/draw/tree.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Graphical Representations for Trees
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/util.py b/nltk/draw/util.py
index 468775b..c5a2e4e 100644
--- a/nltk/draw/util.py
+++ b/nltk/draw/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Drawing utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/featstruct.py b/nltk/featstruct.py
index 412dae9..81bf6b3 100644
--- a/nltk/featstruct.py
+++ b/nltk/featstruct.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Feature Structures
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>,
 #         Rob Speer,
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/grammar.py b/nltk/grammar.py
index 04301a0..8ae8d45 100644
--- a/nltk/grammar.py
+++ b/nltk/grammar.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Context Free Grammars
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Jason Narad <jason.narad at gmail.com>
@@ -68,7 +68,7 @@ The operation of replacing the left hand side (*lhs*) of a production
 with the right hand side (*rhs*) in a tree (*tree*) is known as
 "expanding" *lhs* to *rhs* in *tree*.
 """
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division 
 
 import re
 
@@ -1130,7 +1130,7 @@ def induce_pcfg(start, productions):
         pcount[prod]       = pcount.get(prod,       0) + 1
 
     prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
-                                prob=float(pcount[p]) / lcount[p.lhs()])
+                                prob=pcount[p] / lcount[p.lhs()])
              for p in pcount]
     return PCFG(start, prods)
 
diff --git a/nltk/help.py b/nltk/help.py
index e76671c..cad41d3 100644
--- a/nltk/help.py
+++ b/nltk/help.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit (NLTK) Help
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/inference/__init__.py b/nltk/inference/__init__.py
index 172df36..a5d89c4 100644
--- a/nltk/inference/__init__.py
+++ b/nltk/inference/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Inference
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>
 #
diff --git a/nltk/inference/nonmonotonic.py b/nltk/inference/nonmonotonic.py
index c0c4198..0659448 100644
--- a/nltk/inference/nonmonotonic.py
+++ b/nltk/inference/nonmonotonic.py
@@ -2,7 +2,7 @@
 #
 # Author: Daniel H. Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/inference/prover9.py b/nltk/inference/prover9.py
index ac93834..83d8ae3 100644
--- a/nltk/inference/prover9.py
+++ b/nltk/inference/prover9.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to the Prover9 Theorem Prover
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>
 #
diff --git a/nltk/inference/resolution.py b/nltk/inference/resolution.py
index cf1fef9..95d757e 100755
--- a/nltk/inference/resolution.py
+++ b/nltk/inference/resolution.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/inference/tableau.py b/nltk/inference/tableau.py
index 74965eb..996faa5 100644
--- a/nltk/inference/tableau.py
+++ b/nltk/inference/tableau.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: First-Order Tableau Theorem Prover
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/internals.py b/nltk/internals.py
index 41e378d..5f95cc8 100644
--- a/nltk/internals.py
+++ b/nltk/internals.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Internal utility functions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Nitin Madnani <nmadnani at ets.org>
@@ -172,6 +172,31 @@ def read_str(s, start_position):
     given string, then return a tuple ``(val, end_position)``
     containing the value of the string literal and the position where
     it ends.  Otherwise, raise a ``ReadError``.
+
+    :param s: A string that will be checked to see if within which a 
+        Python string literal exists.
+    :type s: str
+    
+    :param start_position: The specified beginning position of the string ``s``
+        to begin regex matching.
+    :type start_position: int
+    
+    :return: A tuple containing the matched string literal evaluated as a 
+        string and the end position of the string literal.
+    :rtype: tuple(str, int)
+
+    :raise ReadError: If the ``_STRING_START_RE`` regex doesn't return a
+        match in ``s`` at ``start_position``, i.e., open quote. If the 
+        ``_STRING_END_RE`` regex doesn't return a match in ``s`` at the 
+        end of the first match, i.e., close quote.
+    :raise ValueError: If an invalid string (i.e., contains an invalid
+        escape sequence) is passed into the ``eval``.
+
+    :Example:
+    >>> from nltk.internals import read_str
+    >>> read_str('"Hello", World!', 0)
+    ('Hello', 7)
+
     """
     # Read the open quote, and any modifiers.
     m = _STRING_START_RE.match(s, start_position)
@@ -201,6 +226,27 @@ def read_int(s, start_position):
     string, then return a tuple ``(val, end_position)`` containing the
     value of the integer and the position where it ends.  Otherwise,
     raise a ``ReadError``.
+
+    :param s: A string that will be checked to see if within which a 
+        Python integer exists.
+    :type s: str
+    
+    :param start_position: The specified beginning position of the string ``s``
+        to begin regex matching.
+    :type start_position: int
+    
+    :return: A tuple containing the matched integer casted to an int,
+        and the end position of the int in ``s``.
+    :rtype: tuple(int, int)
+
+    :raise ReadError: If the ``_READ_INT_RE`` regex doesn't return a
+        match in ``s`` at ``start_position``.
+
+    :Example:
+    >>> from nltk.internals import read_int
+    >>> read_int('42 is the answer', 0)
+    (42, 2)
+    
     """
     m = _READ_INT_RE.match(s, start_position)
     if not m: raise ReadError('integer', start_position)
@@ -213,6 +259,27 @@ def read_number(s, start_position):
     given string, then return a tuple ``(val, end_position)``
     containing the value of the number and the position where it ends.
     Otherwise, raise a ``ReadError``.
+
+    :param s: A string that will be checked to see if within which a 
+        Python number exists.
+    :type s: str
+    
+    :param start_position: The specified beginning position of the string ``s``
+        to begin regex matching.
+    :type start_position: int
+    
+    :return: A tuple containing the matched number casted to a ``float``,
+        and the end position of the number in ``s``.
+    :rtype: tuple(float, int)
+
+    :raise ReadError: If the ``_READ_NUMBER_VALUE`` regex doesn't return a
+        match in ``s`` at ``start_position``.
+
+    :Example:
+    >>> from nltk.internals import read_number
+    >>> read_number('Pi is 3.14159', 6)
+    (3.14159, 13)
+    
     """
     m = _READ_NUMBER_VALUE.match(s, start_position)
     if not m or not (m.group(1) or m.group(2)):
diff --git a/nltk/jsontags.py b/nltk/jsontags.py
index 4059501..28c7ec0 100644
--- a/nltk/jsontags.py
+++ b/nltk/jsontags.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: JSON Encoder/Decoder Helpers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/__init__.py b/nltk/metrics/__init__.py
index b6b108d..e835385 100644
--- a/nltk/metrics/__init__.py
+++ b/nltk/metrics/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/agreement.py b/nltk/metrics/agreement.py
index b379a07..78b6958 100644
--- a/nltk/metrics/agreement.py
+++ b/nltk/metrics/agreement.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Agreement Metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tom Lippincott <tom at cs.columbia.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -68,7 +68,7 @@ Expected results from the Artstein and Poesio survey paper:
     1.0
 
 """
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 import logging
 from itertools import groupby
@@ -180,7 +180,7 @@ class AnnotationTask(object):
 
         """
         data = self._grouped_data('item', (x for x in self.data if x['coder'] in (cA, cB)))
-        ret = float(sum(self.agr(cA, cB, item, item_data) for item, item_data in data)) / float(len(self.I))
+        ret = sum(self.agr(cA, cB, item, item_data) for item, item_data in data) / len(self.I)
         log.debug("Observed agreement between %s and %s: %f", cA, cB, ret)
         return ret
 
@@ -220,7 +220,7 @@ class AnnotationTask(object):
             for j, nj in iteritems(label_freqs):
                 for l, nl in iteritems(label_freqs):
                     total += float(nj * nl) * self.distance(l, j)
-        ret = (1.0 / float((len(self.I) * len(self.C) * (len(self.C) - 1)))) * total
+        ret = (1.0 / (len(self.I) * len(self.C) * (len(self.C) - 1))) * total
         log.debug("Observed disagreement: %f", ret)
         return ret
 
@@ -252,7 +252,7 @@ class AnnotationTask(object):
         """Bennett, Albert and Goldstein 1954
 
         """
-        Ae = 1.0 / float(len(self.K))
+        Ae = 1.0 / len(self.K)
         ret = (self.avg_Ao() - Ae) / (1.0 - Ae)
         return ret
 
@@ -265,7 +265,7 @@ class AnnotationTask(object):
         label_freqs = FreqDist(x['labels'] for x in self.data)
         for k, f in iteritems(label_freqs):
             total += f ** 2
-        Ae = total / float((len(self.I) * len(self.C)) ** 2)
+        Ae = total / ((len(self.I) * len(self.C)) ** 2)
         return (self.avg_Ao() - Ae) / (1 - Ae)
 
     def Ae_kappa(self, cA, cB):
diff --git a/nltk/metrics/association.py b/nltk/metrics/association.py
index e57be2f..b5677e7 100644
--- a/nltk/metrics/association.py
+++ b/nltk/metrics/association.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Ngram Association Measures
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
@@ -11,6 +11,7 @@ generic, abstract implementation in ``NgramAssocMeasures``, and n-specific
 ``BigramAssocMeasures`` and ``TrigramAssocMeasures``.
 """
 
+from __future__ import division
 import math as _math
 from functools import reduce
 _log2 = lambda x: _math.log(x, 2.0)
@@ -83,12 +84,12 @@ class NgramAssocMeasures(object):
             yield (_product(sum(cont[x] for x in range(2 ** cls._n)
                                 if (x & j) == (i & j))
                             for j in bits) /
-                   float(n_all ** (cls._n - 1)))
+                   (n_all ** (cls._n - 1)))
 
     @staticmethod
     def raw_freq(*marginals):
         """Scores ngrams by their frequency"""
-        return float(marginals[NGRAM]) / marginals[TOTAL]
+        return marginals[NGRAM] / marginals[TOTAL]
 
     @classmethod
     def student_t(cls, *marginals):
@@ -97,7 +98,7 @@ class NgramAssocMeasures(object):
         """
         return ((marginals[NGRAM] -
                   _product(marginals[UNIGRAMS]) /
-                  float(marginals[TOTAL] ** (cls._n - 1))) /
+                  (marginals[TOTAL] ** (cls._n - 1))) /
                 (marginals[NGRAM] + _SMALL) ** .5)
 
     @classmethod
@@ -117,7 +118,7 @@ class NgramAssocMeasures(object):
         logarithm of the result is calculated.
         """
         return (marginals[NGRAM] ** kwargs.get('power', 3) /
-                float(_product(marginals[UNIGRAMS])))
+                _product(marginals[UNIGRAMS]))
 
     @classmethod
     def pmi(cls, *marginals):
@@ -133,21 +134,21 @@ class NgramAssocMeasures(object):
         """
         cont = cls._contingency(*marginals)
         return (cls._n *
-                sum(obs * _ln(float(obs) / (exp + _SMALL) + _SMALL)
+                sum(obs * _ln(obs / (exp + _SMALL) + _SMALL)
                     for obs, exp in zip(cont, cls._expected_values(cont))))
 
     @classmethod
     def poisson_stirling(cls, *marginals):
         """Scores ngrams using the Poisson-Stirling measure."""
         exp = (_product(marginals[UNIGRAMS]) /
-               float(marginals[TOTAL] ** (cls._n - 1)))
+               (marginals[TOTAL] ** (cls._n - 1)))
         return marginals[NGRAM] * (_log2(marginals[NGRAM] / exp) - 1)
 
     @classmethod
     def jaccard(cls, *marginals):
         """Scores ngrams using the Jaccard index."""
         cont = cls._contingency(*marginals)
-        return float(cont[0]) / sum(cont[:-1])
+        return cont[0] / sum(cont[:-1])
 
 
 class BigramAssocMeasures(NgramAssocMeasures):
@@ -199,7 +200,7 @@ class BigramAssocMeasures(NgramAssocMeasures):
         n_xx = sum(cont)
         # For each contingency table cell
         for i in range(4):
-            yield (cont[i] + cont[i ^ 1]) * (cont[i] + cont[i ^ 2]) / float(n_xx)
+            yield (cont[i] + cont[i ^ 1]) * (cont[i] + cont[i ^ 2]) / n_xx
 
     @classmethod
     def phi_sq(cls, *marginals):
@@ -208,7 +209,7 @@ class BigramAssocMeasures(NgramAssocMeasures):
         """
         n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)
 
-        return (float((n_ii*n_oo - n_io*n_oi)**2) /
+        return ((n_ii*n_oo - n_io*n_oi)**2 /
                 ((n_ii + n_io) * (n_ii + n_oi) * (n_io + n_oo) * (n_oi + n_oo)))
 
     @classmethod
@@ -235,7 +236,7 @@ class BigramAssocMeasures(NgramAssocMeasures):
     def dice(n_ii, n_ix_xi_tuple, n_xx):
         """Scores bigrams using Dice's coefficient."""
         (n_ix, n_xi) = n_ix_xi_tuple
-        return 2 * float(n_ii) / (n_ix + n_xi)
+        return 2 * n_ii / (n_ix + n_xi)
 
 
 class TrigramAssocMeasures(NgramAssocMeasures):
diff --git a/nltk/metrics/confusionmatrix.py b/nltk/metrics/confusionmatrix.py
index 5ffcd24..b00118e 100644
--- a/nltk/metrics/confusionmatrix.py
+++ b/nltk/metrics/confusionmatrix.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Confusion Matrices
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/distance.py b/nltk/metrics/distance.py
index 049897a..bd72a3d 100644
--- a/nltk/metrics/distance.py
+++ b/nltk/metrics/distance.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Distance Metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Tom Lippincott <tom at cs.columbia.edu>
@@ -20,6 +20,7 @@ As metrics, they must satisfy the following three requirements:
 """
 
 from __future__ import print_function
+from __future__ import division
 
 
 def _edit_dist_init(len1, len2):
@@ -106,7 +107,7 @@ def jaccard_distance(label1, label2):
     """Distance metric comparing set-similarity.
 
     """
-    return (len(label1.union(label2)) - len(label1.intersection(label2)))/float(len(label1.union(label2)))
+    return (len(label1.union(label2)) - len(label1.intersection(label2)))/len(label1.union(label2))
 
 
 def masi_distance(label1, label2):
@@ -134,7 +135,7 @@ def masi_distance(label1, label2):
     else:
         m = 0
 
-    return 1 - (len_intersection / float(len_union)) * m
+    return 1 - (len_intersection / len_union) * m
 
 
 def interval_distance(label1,label2):
@@ -163,10 +164,10 @@ def presence(label):
 
 def fractional_presence(label):
     return lambda x, y:\
-        abs((float(1.0 / len(x)) - float(1.0 / len(y)))) * (label in x and label in y) \
+        abs(((1.0 / len(x)) - (1.0 / len(y)))) * (label in x and label in y) \
         or 0.0 * (label not in x and label not in y) \
-        or abs(float(1.0 / len(x))) * (label in x and label not in y) \
-        or (float(1.0 / len(y))) * (label not in x and label in y)
+        or abs((1.0 / len(x))) * (label in x and label not in y) \
+        or ((1.0 / len(y))) * (label not in x and label in y)
 
 
 def custom_distance(file):
diff --git a/nltk/metrics/paice.py b/nltk/metrics/paice.py
index e05e598..727a204 100644
--- a/nltk/metrics/paice.py
+++ b/nltk/metrics/paice.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Agreement Metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Lauri Hallila <laurihallila at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -29,9 +29,9 @@ def get_words_from_dictionary(lemmas):
 
     :param lemmas: A dictionary where keys are lemmas and values are sets
     or lists of words corresponding to that lemma.
-    :type lemmas: dict
+    :type lemmas: dict(str): list(str)
     :return: Set of words that exist as values in the dictionary
-    :rtype: set
+    :rtype: set(str)
     '''
     words = set()
     for lemma in lemmas:
@@ -44,11 +44,11 @@ def _truncate(words, cutlength):
 
     :param words: Set of words used for analysis
     :param cutlength: Words are stemmed by cutting at this length.
-    :type words: set or list
+    :type words: set(str) or list(str)
     :type cutlength: int
     :return: Dictionary where keys are stems and values are sets of words
     corresponding to that stem.
-    :rtype: dict
+    :rtype: dict(str): set(str)
     '''
     stems = {}
     for word in words:
@@ -66,10 +66,10 @@ def _count_intersection(l1, l2):
 
     :param l1: Tuple of two coordinate pairs defining the first line segment
     :param l2: Tuple of two coordinate pairs defining the second line segment
-    :type l1: tuple
-    :type l2: tuple
+    :type l1: tuple(float, float)
+    :type l2: tuple(float, float)
     :return: Coordinates of the intersection
-    :rtype: tuple
+    :rtype: tuple(float, float)
     '''
     x1, y1 = l1[0]
     x2, y2 = l1[1]
@@ -96,7 +96,7 @@ def _get_derivative(coordinates):
     '''Get derivative of the line from (0,0) to given coordinates.
 
     :param coordinates: A coordinate pair
-    :type coordinates: tuple
+    :type coordinates: tuple(float, float)
     :return: Derivative; inf if x is zero
     :rtype: float
     '''
@@ -112,11 +112,11 @@ def _calculate_cut(lemmawords, stems):
     :param lemmawords: Set or list of words corresponding to certain lemma.
     :param stems: A dictionary where keys are stems and values are sets
     or lists of words corresponding to that stem.
-    :type lemmawords: set or list
-    :type stems: dict
+    :type lemmawords: set(str) or list(str)
+    :type stems: dict(str): set(str)
     :return: Amount of understemmed and overstemmed pairs contributed by words
     existing in both lemmawords and stems.
-    :rtype: tuple
+    :rtype: tuple(float, float)
     '''
     umt, wmt = 0.0, 0.0
     for stem in stems:
@@ -138,13 +138,13 @@ def _calculate(lemmas, stems):
     or lists of words corresponding to that lemma.
     :param stems: A dictionary where keys are stems and values are sets
     or lists of words corresponding to that stem.
-    :type lemmas: dict
-    :type stems: dict
+    :type lemmas: dict(str): list(str)
+    :type stems: dict(str): set(str)
     :return: Global unachieved merge total (gumt),
     global desired merge total (gdmt),
     global wrongly merged total (gwmt) and
     global desired non-merge total (gdnt).
-    :rtype: tuple
+    :rtype: tuple(float, float, float, float)
     '''
 
     n = sum(len(lemmas[word]) for word in lemmas)
@@ -183,7 +183,7 @@ def _indexes(gumt, gdmt, gwmt, gdnt):
     :return: Understemming Index (UI),
     Overstemming Index (OI) and
     Stemming Weight (SW).
-    :rtype: tuple
+    :rtype: tuple(float, float, float)
     '''
     # Calculate Understemming Index (UI),
     # Overstemming Index (OI) and Stemming Weight (SW)
@@ -217,8 +217,8 @@ class Paice(object):
         or lists of words corresponding to that lemma.
         :param stems: A dictionary where keys are stems and values are sets
         or lists of words corresponding to that stem.
-        :type lemmas: dict
-        :type stems: dict
+        :type lemmas: dict(str): list(str)
+        :type stems: dict(str): set(str)
         '''
         self.lemmas = lemmas
         self.stems = stems
@@ -246,10 +246,10 @@ class Paice(object):
 
         :param words: Words used for the analysis
         :param cutlength: Words are stemmed by cutting them at this length
-        :type words: set or list
+        :type words: set(str) or list(str)
         :type cutlength: int
         :return: Understemming and overstemming indexes
-        :rtype: tuple
+        :rtype: tuple(int, int)
         '''
 
         truncated = _truncate(words, cutlength)
@@ -266,7 +266,7 @@ class Paice(object):
         intersection.
         :type cutlength: int
         :return: List of coordinate pairs that define the truncation line
-        :rtype: list
+        :rtype: list(tuple(float, float))
         '''
         words = get_words_from_dictionary(self.lemmas)
         maxlength = max(len(word) for word in words)
diff --git a/nltk/metrics/scores.py b/nltk/metrics/scores.py
index 8e504ad..9113fbc 100644
--- a/nltk/metrics/scores.py
+++ b/nltk/metrics/scores.py
@@ -1,11 +1,11 @@
 # Natural Language Toolkit: Evaluation
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
-from __future__ import print_function
+from __future__ import print_function, division
 
 from math import fabs
 import operator
@@ -37,7 +37,7 @@ def accuracy(reference, test):
     """
     if len(reference) != len(test):
         raise ValueError("Lists must have the same length.")
-    return float(sum(x == y for x, y in izip(reference, test))) / len(test)
+    return sum(x == y for x, y in izip(reference, test)) / len(test)
 
 def precision(reference, test):
     """
@@ -59,7 +59,7 @@ def precision(reference, test):
     if len(test) == 0:
         return None
     else:
-        return float(len(reference.intersection(test)))/len(test)
+        return len(reference.intersection(test)) / len(test)
 
 def recall(reference, test):
     """
@@ -81,7 +81,7 @@ def recall(reference, test):
     if len(reference) == 0:
         return None
     else:
-        return float(len(reference.intersection(test)))/len(reference)
+        return len(reference.intersection(test)) / len(reference)
 
 def f_measure(reference, test, alpha=0.5):
     """
@@ -113,7 +113,7 @@ def f_measure(reference, test, alpha=0.5):
         return None
     if p == 0 or r == 0:
         return 0
-    return 1.0/(alpha/p + (1-alpha)/r)
+    return 1.0 / (alpha / p + (1-alpha) / r)
 
 def log_likelihood(reference, test):
     """
@@ -133,7 +133,7 @@ def log_likelihood(reference, test):
     # Return the average value of dist.logprob(val).
     total_likelihood = sum(dist.logprob(val)
                             for (val, dist) in izip(reference, test))
-    return total_likelihood/len(reference)
+    return total_likelihood / len(reference)
 
 def approxrand(a, b, **kwargs):
     """
@@ -160,7 +160,7 @@ def approxrand(a, b, **kwargs):
     # there's no point in trying to shuffle beyond all possible permutations
     shuffles = \
         min(shuffles, reduce(operator.mul, xrange(1, len(a) + len(b) + 1)))
-    stat = kwargs.get('statistic', lambda lst: float(sum(lst)) / len(lst))
+    stat = kwargs.get('statistic', lambda lst: sum(lst) / len(lst))
     verbose = kwargs.get('verbose', False)
 
     if verbose:
@@ -191,10 +191,10 @@ def approxrand(a, b, **kwargs):
 
         if verbose and i % 10 == 0:
             print('pseudo-statistic: %f' % pseudo_stat)
-            print('significance: %f' % (float(c + 1) / (i + 1)))
+            print('significance: %f' % ((c + 1) / (i + 1)))
             print('-' * 60)
 
-    significance = float(c + 1) / (shuffles + 1)
+    significance = (c + 1) / (shuffles + 1)
 
     if verbose:
         print('significance: %f' % significance)
diff --git a/nltk/metrics/segmentation.py b/nltk/metrics/segmentation.py
index 12d4d87..a0e6298 100644
--- a/nltk/metrics/segmentation.py
+++ b/nltk/metrics/segmentation.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Text Segmentation Metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         David Doukhan <david.doukhan at gmail.com>
diff --git a/nltk/metrics/spearman.py b/nltk/metrics/spearman.py
index 9b9a86b..7caf055 100644
--- a/nltk/metrics/spearman.py
+++ b/nltk/metrics/spearman.py
@@ -1,9 +1,10 @@
 # Natural Language Toolkit: Spearman Rank Correlation
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
+from __future__ import division
 
 """
 Tools for comparing ranked lists.
@@ -35,7 +36,7 @@ def spearman_correlation(ranks1, ranks2):
         res += d * d
         n += 1
     try:
-        return 1 - (6 * float(res) / (n * (n*n - 1)))
+        return 1 - (6 * res / (n * (n*n - 1)))
     except ZeroDivisionError:
         # Result is undefined if only one item is ranked
         return 0.0
diff --git a/nltk/misc/__init__.py b/nltk/misc/__init__.py
index 38552ec..b382bc1 100644
--- a/nltk/misc/__init__.py
+++ b/nltk/misc/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Miscellaneous modules
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/minimalset.py b/nltk/misc/minimalset.py
index 689341a..4704589 100644
--- a/nltk/misc/minimalset.py
+++ b/nltk/misc/minimalset.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Minimal Sets
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/sort.py b/nltk/misc/sort.py
index 3b7aed2..6c993fe 100644
--- a/nltk/misc/sort.py
+++ b/nltk/misc/sort.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: List Sorting
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/wordfinder.py b/nltk/misc/wordfinder.py
index ce63311..7c28e5c 100644
--- a/nltk/misc/wordfinder.py
+++ b/nltk/misc/wordfinder.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Word Finder
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/parse/__init__.py b/nltk/parse/__init__.py
index 076b41e..ef230e6 100644
--- a/nltk/parse/__init__.py
+++ b/nltk/parse/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Parsers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/api.py b/nltk/parse/api.py
index 37bcdaf..6dc0205 100644
--- a/nltk/parse/api.py
+++ b/nltk/parse/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Parser API
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/bllip.py b/nltk/parse/bllip.py
index ee0f8cd..8da15ba 100644
--- a/nltk/parse/bllip.py
+++ b/nltk/parse/bllip.py
@@ -2,7 +2,7 @@
 #
 # Author: David McClosky <dmcc at bigasterisk.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/parse/chart.py b/nltk/parse/chart.py
index 233421d..5853e0e 100644
--- a/nltk/parse/chart.py
+++ b/nltk/parse/chart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: A Chart Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Jean Mark Gawron <gawron at mail.sdsu.edu>
diff --git a/nltk/parse/dependencygraph.py b/nltk/parse/dependencygraph.py
index e76cef8..f0dfe55 100755
--- a/nltk/parse/dependencygraph.py
+++ b/nltk/parse/dependencygraph.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (modifications)
 #
diff --git a/nltk/parse/earleychart.py b/nltk/parse/earleychart.py
index 8dcccd2..9e0e810 100644
--- a/nltk/parse/earleychart.py
+++ b/nltk/parse/earleychart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: An Incremental Earley Chart Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 #         Rob Speer <rspeer at mit.edu>
 #         Edward Loper <edloper at gmail.com>
diff --git a/nltk/parse/evaluate.py b/nltk/parse/evaluate.py
index bd20686..43e2ab4 100644
--- a/nltk/parse/evaluate.py
+++ b/nltk/parse/evaluate.py
@@ -2,7 +2,7 @@
 #
 # Author: Long Duong <longdt219 at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/parse/featurechart.py b/nltk/parse/featurechart.py
index 339a1a4..fe098af 100644
--- a/nltk/parse/featurechart.py
+++ b/nltk/parse/featurechart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Chart Parser for Feature-Based Grammars
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Rob Speer <rspeer at mit.edu>
 #         Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py
index b24e3c5..eb3089d 100644
--- a/nltk/parse/generate.py
+++ b/nltk/parse/generate.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Generating from a CFG
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
index 41141fe..5799559 100644
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -2,8 +2,9 @@
 # Natural Language Toolkit: Interface to MaltParser
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
+# Contributor: Liling Tan, Mustufain, osamamukhtar11
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
@@ -12,6 +13,7 @@ from __future__ import unicode_literals
 from nltk.six import text_type
 
 import os
+import sys
 import tempfile
 import subprocess
 import inspect
@@ -66,7 +68,7 @@ def find_maltparser(parser_dirname):
     # Checks that that the found directory contains all the necessary .jar
     malt_dependencies = ['','','']
     _malt_jars = set(find_jars_within_path(_malt_dir))
-    _jars = set(jar.rpartition('/')[2] for jar in _malt_jars)
+    _jars = set(os.path.split(jar)[1] for jar in _malt_jars)
     malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
 
     assert malt_dependencies.issubset(_jars)
@@ -215,8 +217,10 @@ class MaltParser(ParserI):
         """
 
         cmd = ['java']
-        cmd+= self.additional_java_args # Adds additional java arguments.
-        cmd+= ['-cp', ':'.join(self.malt_jars)] # Adds classpaths for jars
+        cmd+= self.additional_java_args # Adds additional java arguments
+        # Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
+        classpaths_separator = ';' if sys.platform.startswith('win') else ':'
+        cmd+= ['-cp', classpaths_separator.join(self.malt_jars)] # Adds classpaths for jars
         cmd+= ['org.maltparser.Malt'] # Adds the main function.
 
         # Adds the model file.
diff --git a/nltk/parse/nonprojectivedependencyparser.py b/nltk/parse/nonprojectivedependencyparser.py
index d880e68..bd40d6a 100644
--- a/nltk/parse/nonprojectivedependencyparser.py
+++ b/nltk/parse/nonprojectivedependencyparser.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/pchart.py b/nltk/parse/pchart.py
index 1ea0998..18c7434 100644
--- a/nltk/parse/pchart.py
+++ b/nltk/parse/pchart.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Probabilistic Chart Parsers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/projectivedependencyparser.py b/nltk/parse/projectivedependencyparser.py
index 363f53a..db3703b 100644
--- a/nltk/parse/projectivedependencyparser.py
+++ b/nltk/parse/projectivedependencyparser.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/recursivedescent.py b/nltk/parse/recursivedescent.py
index 2617aaa..1f9feba 100644
--- a/nltk/parse/recursivedescent.py
+++ b/nltk/parse/recursivedescent.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Recursive Descent Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/shiftreduce.py b/nltk/parse/shiftreduce.py
index f646412..68fcd89 100644
--- a/nltk/parse/shiftreduce.py
+++ b/nltk/parse/shiftreduce.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Shift-Reduce Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/stanford.py b/nltk/parse/stanford.py
index 63b1a4f..e7bdfe0 100644
--- a/nltk/parse/stanford.py
+++ b/nltk/parse/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
@@ -17,7 +17,7 @@ from subprocess import PIPE
 from io import StringIO
 
 from nltk import compat
-from nltk.internals import find_jar, find_jar_iter, config_java, java, _java_options
+from nltk.internals import find_jar, find_jar_iter, config_java, java, _java_options, find_jars_within_path
 
 from nltk.parse.api import ParserI
 from nltk.parse.dependencygraph import DependencyGraph
@@ -61,7 +61,11 @@ class GenericStanfordParser(ParserI):
             key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)
         )
 
-        self._classpath = (stanford_jar, model_jar)
+        #self._classpath = (stanford_jar, model_jar)
+        
+        # Adding logging jar files to classpath 
+        stanford_dir = os.path.split(stanford_jar)[0]
+        self._classpath = tuple([model_jar] + find_jars_within_path(stanford_dir))
 
         self.model_path = model_path
         self._encoding = encoding
@@ -210,7 +214,9 @@ class GenericStanfordParser(ParserI):
                 cmd.append(input_file.name)
                 stdout, stderr = java(cmd, classpath=self._classpath,
                                       stdout=PIPE, stderr=PIPE)
-
+                
+            stdout = stdout.replace(b'\xc2\xa0',b' ')
+            stdout = stdout.replace(b'\xa0',b' ')
             stdout = stdout.decode(encoding)
 
         os.unlink(input_file.name)
diff --git a/nltk/parse/transitionparser.py b/nltk/parse/transitionparser.py
index e9ffc37..15bac50 100644
--- a/nltk/parse/transitionparser.py
+++ b/nltk/parse/transitionparser.py
@@ -2,7 +2,7 @@
 #
 # Author: Long Duong <longdt219 at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/parse/util.py b/nltk/parse/util.py
index c117fe0..a8eec83 100644
--- a/nltk/parse/util.py
+++ b/nltk/parse/util.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py
index 282d9aa..a75b0c7 100644
--- a/nltk/parse/viterbi.py
+++ b/nltk/parse/viterbi.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Viterbi Probabilistic Parser
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/probability.py b/nltk/probability.py
index b63835f..56f5739 100644
--- a/nltk/probability.py
+++ b/nltk/probability.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Probability and Statistics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (additions)
 #         Trevor Cohn <tacohn at cs.mu.oz.au> (additions)
@@ -37,7 +37,7 @@ implementation of the ``ConditionalProbDistI`` interface is
 ``ConditionalProbDist``, a derived distribution.
 
 """
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 import math
 import random
@@ -194,7 +194,7 @@ class FreqDist(Counter):
         """
         if self.N() == 0:
             return 0
-        return float(self[sample]) / self.N()
+        return self[sample] / self.N()
 
     def max(self):
         """
@@ -265,6 +265,8 @@ class FreqDist(Counter):
 
         :param samples: The samples to plot (default is all samples)
         :type samples: list
+        :param cumulative: A flag to specify whether the freqs are cumulative (default = False)
+        :type title: bool
         """
         if len(args) == 0:
             args = [len(self)]
@@ -277,11 +279,14 @@ class FreqDist(Counter):
             freqs = [self[sample] for sample in samples]
         # percents = [f * 100 for f in freqs]  only in ProbDist?
 
+        width = max(len("%s" % s) for s in samples)
+        width = max(width, max(len("%d" % f) for f in freqs))
+
         for i in range(len(samples)):
-            print("%4s" % samples[i], end=' ')
+            print("%*s" % (width, samples[i]), end=' ')
         print()
         for i in range(len(samples)):
-            print("%4d" % freqs[i], end=' ')
+            print("%*d" % (width, freqs[i]), end=' ')
         print()
 
     def copy(self):
@@ -292,6 +297,48 @@ class FreqDist(Counter):
         """
         return self.__class__(self)
 
+    # Mathematical operatiors 
+    
+    def __add__(self, other):
+        """
+        Add counts from two counters.
+
+        >>> FreqDist('abbb') + FreqDist('bcc')
+        FreqDist({'b': 4, 'c': 2, 'a': 1})
+
+        """
+        return self.__class__(super(FreqDist, self).__add__(other))
+
+    def __sub__(self, other):
+        """
+        Subtract count, but keep only results with positive counts.
+
+        >>> FreqDist('abbbc') - FreqDist('bccd')
+        FreqDist({'b': 2, 'a': 1})
+
+        """
+        return self.__class__(super(FreqDist, self).__sub__(other))
+
+    def __or__(self, other):
+        """
+        Union is the maximum of value in either of the input counters.
+
+        >>> FreqDist('abbb') | FreqDist('bcc')
+        FreqDist({'b': 3, 'c': 2, 'a': 1})
+
+        """
+        return self.__class__(super(FreqDist, self).__or__(other))
+
+    def __and__(self, other):
+        """
+        Intersection is the minimum of corresponding counts.
+
+        >>> FreqDist('abbb') & FreqDist('bcc')
+        FreqDist({'b': 1})
+
+        """
+        return self.__class__(super(FreqDist, self).__and__(other))
+
     def __le__(self, other):
         if not isinstance(other, FreqDist):
             raise_unorderable_types("<=", self, other)
@@ -1097,12 +1144,12 @@ class WittenBellProbDist(ProbDistI):
             # if freqdist is empty, we approximate P(0) by a UniformProbDist:
             self._P0 = 1.0 / self._Z
         else:
-            self._P0 = self._T / float(self._Z * (self._N + self._T))
+            self._P0 = self._T / (self._Z * (self._N + self._T))
 
     def prob(self, sample):
         # inherit docs from ProbDistI
         c = self._freqdist[sample]
-        return (c / float(self._N + self._T) if c != 0 else self._P0)
+        return (c / (self._N + self._T) if c != 0 else self._P0)
 
     def max(self):
         return self._freqdist.max()
@@ -1266,8 +1313,8 @@ class SimpleGoodTuringProbDist(ProbDistI):
         log_zr = [math.log(i) for i in zr]
 
         xy_cov = x_var = 0.0
-        x_mean = 1.0 * sum(log_r) / len(log_r)
-        y_mean = 1.0 * sum(log_zr) / len(log_zr)
+        x_mean = sum(log_r) / len(log_r)
+        y_mean = sum(log_zr) / len(log_zr)
         for (x, y) in zip(log_r, log_zr):
             xy_cov += (x - x_mean) * (y - y_mean)
             x_var += (x - x_mean)**2
@@ -1292,7 +1339,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
 
             Sr = self.smoothedNr
             smooth_r_star = (r_ + 1) * Sr(r_+1) / Sr(r_)
-            unsmooth_r_star = 1.0 * (r_ + 1) * nr[i+1] / nr[i]
+            unsmooth_r_star = (r_ + 1) * nr[i+1] / nr[i]
 
             std = math.sqrt(self._variance(r_, nr[i], nr[i+1]))
             if abs(unsmooth_r_star-smooth_r_star) <= 1.96 * std:
@@ -1349,7 +1396,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
             if self._bins == self._freqdist.B():
                 p = 0.0
             else:
-                p = p / (1.0 * self._bins - self._freqdist.B())
+                p = p / (self._bins - self._freqdist.B())
         else:
             p = p * self._renormal
         return p
@@ -1358,11 +1405,11 @@ class SimpleGoodTuringProbDist(ProbDistI):
         if count == 0 and self._freqdist.N() == 0 :
             return 1.0
         elif count == 0 and self._freqdist.N() != 0:
-            return 1.0 * self._freqdist.Nr(1) / self._freqdist.N()
+            return self._freqdist.Nr(1) / self._freqdist.N()
 
         if self._switch_at > count:
-            Er_1 = 1.0 * self._freqdist.Nr(count+1)
-            Er = 1.0 * self._freqdist.Nr(count)
+            Er_1 = self._freqdist.Nr(count+1)
+            Er = self._freqdist.Nr(count)
         else:
             Er_1 = self.smoothedNr(count+1)
             Er = self.smoothedNr(count)
@@ -1382,7 +1429,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
         This function returns the total mass of probability transfers from the
         seen samples to the unseen samples.
         """
-        return  1.0 * self.smoothedNr(1) / self._freqdist.N()
+        return  self.smoothedNr(1) / self._freqdist.N()
 
     def max(self):
         return self._freqdist.max()
@@ -1785,10 +1832,10 @@ class ConditionalFreqDist(defaultdict):
 
         :param samples: The samples to plot
         :type samples: list
-        :param title: The title for the graph
-        :type title: str
         :param conditions: The conditions to plot (default is all)
         :type conditions: list
+        :param cumulative: A flag to specify whether the freqs are cumulative (default = False)
+        :type title: bool
         """
 
         cumulative = _get_kwarg(kwargs, 'cumulative', False)
@@ -1796,22 +1843,95 @@ class ConditionalFreqDist(defaultdict):
         samples = _get_kwarg(kwargs, 'samples',
                              sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
 
+        width = max(len("%s" % s) for s in samples)
+        freqs = dict()
+        for c in conditions:
+            if cumulative:
+                freqs[c] = list(self[c]._cumulative_frequencies(samples))
+            else:
+                freqs[c] = [self[c][sample] for sample in samples]
+            width = max(width, max(len("%d" % f) for f in freqs[c]))
+
         condition_size = max(len("%s" % c) for c in conditions)
         print(' ' * condition_size, end=' ')
         for s in samples:
-            print("%4s" % s, end=' ')
+            print("%*s" % (width, s), end=' ')
         print()
         for c in conditions:
             print("%*s" % (condition_size, c), end=' ')
-            if cumulative:
-                freqs = list(self[c]._cumulative_frequencies(samples))
-            else:
-                freqs = [self[c][sample] for sample in samples]
-
-            for f in freqs:
-                print("%4d" % f, end=' ')
+            for f in freqs[c]:
+                print("%*d" % (width, f), end=' ')
             print()
 
+    # Mathematical operators
+    
+    def __add__(self, other):
+        """
+        Add counts from two ConditionalFreqDists.
+        """
+        if not isinstance(other, ConditionalFreqDist):
+            return NotImplemented
+        result = ConditionalFreqDist()
+        for cond in self.conditions():
+            newfreqdist = self[cond] + other[cond]
+            if newfreqdist:
+                result[cond] = newfreqdist
+        for cond in other.conditions():
+            if cond not in self.conditions():
+                for elem, count in other[cond].items():
+                    if count > 0:
+                        result[cond][elem] = count
+        return result
+
+    def __sub__(self, other):
+        """
+        Subtract count, but keep only results with positive counts.
+        """
+        if not isinstance(other, ConditionalFreqDist):
+            return NotImplemented
+        result = ConditionalFreqDist()
+        for cond in self.conditions():
+            newfreqdist = self[cond] - other[cond]
+            if newfreqdist:
+                result[cond] = newfreqdist
+        for cond in other.conditions():
+            if cond not in self.conditions():
+                for elem, count in other[cond].items():
+                    if count < 0:
+                        result[cond][elem] = 0 - count
+        return result
+
+    def __or__(self, other):
+        """
+        Union is the maximum of value in either of the input counters.
+        """
+        if not isinstance(other, ConditionalFreqDist):
+            return NotImplemented
+        result = ConditionalFreqDist()
+        for cond in self.conditions():
+            newfreqdist = self[cond] | other[cond]
+            if newfreqdist:
+                result[cond] = newfreqdist
+        for cond in other.conditions():
+            if cond not in self.conditions():
+                for elem, count in other[cond].items():
+                    if count > 0:
+                        result[cond][elem] = count
+        return result
+
+    def __and__(self, other):
+        """ 
+        Intersection is the minimum of corresponding counts.
+        """
+        if not isinstance(other, ConditionalFreqDist):
+            return NotImplemented
+        result = ConditionalFreqDist()
+        for cond in self.conditions():
+            newfreqdist = self[cond] & other[cond]
+            if newfreqdist:
+                result[cond] = newfreqdist
+        return result
+
     # @total_ordering doesn't work here, since the class inherits from a builtin class
     def __le__(self, other):
         if not isinstance(other, ConditionalFreqDist):
diff --git a/nltk/sem/__init__.py b/nltk/sem/__init__.py
index 047757d..dd0f11b 100644
--- a/nltk/sem/__init__.py
+++ b/nltk/sem/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Semantic Interpretation
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/boxer.py b/nltk/sem/boxer.py
index b8685bb..8bf8c85 100644
--- a/nltk/sem/boxer.py
+++ b/nltk/sem/boxer.py
@@ -3,7 +3,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/sem/chat80.py b/nltk/sem/chat80.py
index ab7d216..25bae0c 100644
--- a/nltk/sem/chat80.py
+++ b/nltk/sem/chat80.py
@@ -1,7 +1,7 @@
 # Natural Language Toolkit: Chat-80 KB Reader
 # See http://www.w3.org/TR/swbp-skos-core-guide/
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>,
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
@@ -503,9 +503,9 @@ def process_bundle(rels):
     dictionary of concepts, indexed by the relation name.
 
     :param rels: bundle of metadata needed for constructing a concept
-    :type rels: list of dict
+    :type rels: list(dict)
     :return: a dictionary of concepts, indexed by the relation name.
-    :rtype: dict
+    :rtype: dict(str): Concept 
     """
     concepts = {}
     for rel in rels:
@@ -549,7 +549,8 @@ def make_valuation(concepts, read=False, lexicon=False):
         # add labels for individuals
         val = label_indivs(val, lexicon=lexicon)
         return val
-    else: return vals
+    else:
+        return vals
 
 
 def val_dump(rels, db):
@@ -561,7 +562,7 @@ def val_dump(rels, db):
     :type rels: list of dict
     :param db: name of file to which data is written.
                The suffix '.db' will be automatically appended.
-    :type db: string
+    :type db: str
     """
     concepts = process_bundle(rels).values()
     valuation = make_valuation(concepts, read=True)
@@ -578,7 +579,7 @@ def val_load(db):
 
     :param db: name of file from which data is read.
                The suffix '.db' should be omitted from the name.
-    :type db: string
+    :type db: str
     """
     dbname = db+".db"
 
@@ -640,8 +641,8 @@ def make_lex(symbols):
     create a lexical rule for the proper name 'Zloty'.
 
     :param symbols: a list of individual constants in the semantic representation
-    :type symbols: sequence
-    :rtype: list
+    :type symbols: sequence -- set(str) 
+    :rtype: list(str)
     """
     lex = []
     header = """
@@ -671,9 +672,9 @@ def concepts(items = items):
     Build a list of concepts corresponding to the relation names in ``items``.
 
     :param items: names of the Chat-80 relations to extract
-    :type items: list of strings
+    :type items: list(str)
     :return: the ``Concept`` objects which are extracted from the relations
-    :rtype: list
+    :rtype: list(Concept)
     """
     if isinstance(items, string_types): items = (items,)
 
diff --git a/nltk/sem/cooper_storage.py b/nltk/sem/cooper_storage.py
index 831adaf..3a1878e 100644
--- a/nltk/sem/cooper_storage.py
+++ b/nltk/sem/cooper_storage.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/drt.py b/nltk/sem/drt.py
index f433c37..0c07762 100644
--- a/nltk/sem/drt.py
+++ b/nltk/sem/drt.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, unicode_literals
diff --git a/nltk/sem/drt_glue_demo.py b/nltk/sem/drt_glue_demo.py
index ccd3c87..bb55042 100644
--- a/nltk/sem/drt_glue_demo.py
+++ b/nltk/sem/drt_glue_demo.py
@@ -3,7 +3,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/sem/evaluate.py b/nltk/sem/evaluate.py
index 2a8cd21..0c42cd5 100644
--- a/nltk/sem/evaluate.py
+++ b/nltk/sem/evaluate.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Models for first-order languages with lambda
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>,
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/glue.py b/nltk/sem/glue.py
index 7ec94a2..8c7541b 100644
--- a/nltk/sem/glue.py
+++ b/nltk/sem/glue.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, division, unicode_literals
@@ -103,6 +103,10 @@ class GlueFormula(object):
     def __ne__(self, other):
         return not self == other
 
+    # sorting for use in doctests which must be deterministic
+    def __lt__(self, other):
+        return str(self) < str(other)
+
     def __str__(self):
         assert isinstance(self.indices, set)
         accum = '%s : %s' % (self.meaning, self.glue)
diff --git a/nltk/sem/hole.py b/nltk/sem/hole.py
index 2cbe90d..25aaada 100644
--- a/nltk/sem/hole.py
+++ b/nltk/sem/hole.py
@@ -3,7 +3,7 @@
 # Author:     Peter Wang
 # Updated by: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/sem/lfg.py b/nltk/sem/lfg.py
index c8a99b3..610d641 100644
--- a/nltk/sem/lfg.py
+++ b/nltk/sem/lfg.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, division, unicode_literals
diff --git a/nltk/sem/linearlogic.py b/nltk/sem/linearlogic.py
index 483f31f..cfbb615 100644
--- a/nltk/sem/linearlogic.py
+++ b/nltk/sem/linearlogic.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, unicode_literals
diff --git a/nltk/sem/logic.py b/nltk/sem/logic.py
index efcfdd9..aa54e8e 100644
--- a/nltk/sem/logic.py
+++ b/nltk/sem/logic.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
@@ -17,6 +17,7 @@ import operator
 from collections import defaultdict
 from functools import reduce
 
+from nltk.util import Trie
 from nltk.internals import Counter
 from nltk.compat import (total_ordering, string_types,
                          python_2_unicode_compatible)
@@ -153,7 +154,7 @@ class LogicParser(object):
         """Split the data into tokens"""
         out = []
         mapping = {}
-        tokenTrie = StringTrie(self.get_all_symbols())
+        tokenTrie = Trie(self.get_all_symbols())
         token = ''
         data_idx = 0
         token_start_idx = data_idx
@@ -176,7 +177,7 @@ class LogicParser(object):
                     c = data[data_idx+len(symbol)]
                 else:
                     break
-            if StringTrie.LEAF in st:
+            if Trie.LEAF in st:
                 #token is a complete symbol
                 if token:
                     mapping[len(out)] = token_start_idx
@@ -1788,23 +1789,6 @@ class EqualityExpression(BinaryExpression):
 
 ### Utilities
 
-class StringTrie(defaultdict):
-    LEAF = "<leaf>"
-
-    def __init__(self, strings=None):
-        defaultdict.__init__(self, StringTrie)
-        if strings:
-            for string in strings:
-                self.insert(string)
-
-    def insert(self, string):
-        if len(string):
-            self[string[0]].insert(string[1:])
-        else:
-            #mark the string is complete
-            self[StringTrie.LEAF] = None
-
-
 class LogicalExpressionException(Exception):
     def __init__(self, index, message):
         self.index = index
diff --git a/nltk/sem/relextract.py b/nltk/sem/relextract.py
index 630923a..26d4fec 100644
--- a/nltk/sem/relextract.py
+++ b/nltk/sem/relextract.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Relation Extraction
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/skolemize.py b/nltk/sem/skolemize.py
index 96cf6e4..f9ced7f 100644
--- a/nltk/sem/skolemize.py
+++ b/nltk/sem/skolemize.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/sem/util.py b/nltk/sem/util.py
index 6a92a97..9e3c710 100644
--- a/nltk/sem/util.py
+++ b/nltk/sem/util.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
@@ -27,12 +27,12 @@ def parse_sents(inputs, grammar, trace=0):
     Convert input sentences into syntactic trees.
 
     :param inputs: sentences to be parsed
-    :type inputs: list of str
+    :type inputs: list(str)
     :param grammar: ``FeatureGrammar`` or name of feature-based grammar
-    :rtype: dict
+    :type grammar: nltk.grammar.FeatureGrammar
+    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)
     :return: a mapping from input sentences to a list of ``Tree``s
     """
-
     # put imports here to avoid circult dependencies
     from nltk.grammar import FeatureGrammar
     from nltk.parse import FeatureChartParser, load_parser
@@ -74,9 +74,11 @@ def interpret_sents(inputs, grammar, semkey='SEM', trace=0):
     of each input sentence.
 
     :param inputs: a list of sentences
+    :type inputs: list(str)
     :param grammar: ``FeatureGrammar`` or name of feature-based grammar
+    :type grammar: nltk.grammar.FeatureGrammar
     :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
-    :rtype: dict
+    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
     """
     return [[(syn, root_semrep(syn, semkey)) for syn in syntrees]
             for syntrees in parse_sents(inputs, grammar, trace=trace)]
@@ -87,9 +89,11 @@ def evaluate_sents(inputs, grammar, model, assignment, trace=0):
     for each syntactic parse of each input sentences.
 
     :param inputs: a list of sentences
+    :type inputs: list(str)
     :param grammar: ``FeatureGrammar`` or name of feature-based grammar
+    :type grammar: nltk.grammar.FeatureGrammar
     :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
-    :rtype: dict
+    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
     """
     return [[(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
             for (syn, sem) in interpretations]
@@ -241,5 +245,5 @@ def demo():
                 n += 1
 
 if __name__ == "__main__":
-    #demo()
+    demo()
     demo_legacy_grammar()
diff --git a/nltk/sentiment/__init__.py b/nltk/sentiment/__init__.py
index 33477f1..08a0336 100644
--- a/nltk/sentiment/__init__.py
+++ b/nltk/sentiment/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Sentiment Analysis
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sentiment/sentiment_analyzer.py b/nltk/sentiment/sentiment_analyzer.py
index 094b8c7..9429075 100644
--- a/nltk/sentiment/sentiment_analyzer.py
+++ b/nltk/sentiment/sentiment_analyzer.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: Sentiment Analyzer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -167,6 +167,7 @@ class SentimentAnalyzer(object):
         :param kwargs: additional parameters that will be passed as arguments to
             the classifier `train` function.
         :return: A classifier instance trained on the training set.
+        :rtype: 
         """
         print("Training classifier")
         self.classifier = trainer(training_set, **kwargs)
@@ -187,7 +188,7 @@ class SentimentAnalyzer(object):
         :param precision: if `True`, evaluate classifier precision.
         :param recall: if `True`, evaluate classifier recall.
         :return: evaluation results.
-        :rtype: dict
+        :rtype: dict(str): float
         """
         if classifier is None:
             classifier = self.classifier
diff --git a/nltk/sentiment/util.py b/nltk/sentiment/util.py
index 009348a..5b3dacb 100644
--- a/nltk/sentiment/util.py
+++ b/nltk/sentiment/util.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: Sentiment Analyzer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Pierpaolo Pantone <24alsecondo at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -407,7 +407,7 @@ def demo_tweets(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.tokenize import TweetTokenizer
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import twitter_samples, stopwords
 
     # Different customizations for the TweetTokenizer
@@ -484,7 +484,7 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
     :param output: the output file where results have to be reported.
     """
     from nltk.corpus import movie_reviews
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
 
     if n_instances is not None:
         n_instances = int(n_instances/2)
@@ -536,7 +536,7 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
         and negative.
     :param output: the output file where results have to be reported.
     """
-    from sentiment_analyzer import SentimentAnalyzer
+    from nltk.sentiment import SentimentAnalyzer
     from nltk.corpus import subjectivity
 
     if n_instances is not None:
@@ -650,7 +650,7 @@ def demo_vader_instance(text):
 
     :param text: a text whose polarity has to be evaluated.
     """
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     vader_analyzer = SentimentIntensityAnalyzer()
     print(vader_analyzer.polarity_scores(text))
 
@@ -663,7 +663,7 @@ def demo_vader_tweets(n_instances=None, output=None):
     """
     from collections import defaultdict
     from nltk.corpus import twitter_samples
-    from vader import SentimentIntensityAnalyzer
+    from nltk.sentiment import SentimentIntensityAnalyzer
     from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
         recall as eval_recall, f_measure as eval_f_measure)
 
diff --git a/nltk/sentiment/vader.py b/nltk/sentiment/vader.py
index 9f826ad..9eb5514 100644
--- a/nltk/sentiment/vader.py
+++ b/nltk/sentiment/vader.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 # Natural Language Toolkit: vader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: C.J. Hutto <Clayton.Hutto at gtri.gatech.edu>
 #         Ewan Klein <ewan at inf.ed.ac.uk> (modifications)
 #         Pierpaolo Pantone <24alsecondo at gmail.com> (modifications)
@@ -22,9 +22,9 @@ Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
 
 import codecs
 import math
-import os
 import re
 import string
+import nltk.data
 
 ##Constants##
 
@@ -197,8 +197,8 @@ class SentimentIntensityAnalyzer(object):
     """
     Give a sentiment intensity score to sentences.
     """
-    def __init__(self, lexicon_file="vader_lexicon.txt"):
-        self.lexicon_file = os.path.join(os.path.dirname(__file__), lexicon_file)
+    def __init__(self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt"):
+        self.lexicon_file = nltk.data.load(lexicon_file)
         self.lexicon = self.make_lex_dict()
 
     def make_lex_dict(self):
@@ -206,10 +206,9 @@ class SentimentIntensityAnalyzer(object):
         Convert lexicon file to a dictionary
         """
         lex_dict = {}
-        with codecs.open(self.lexicon_file, encoding='utf8') as infile:
-            for line in infile:
-                (word, measure) = line.strip().split('\t')[0:2]
-                lex_dict[word] = float(measure)
+        for line in self.lexicon_file.split('\n'):
+            (word, measure) = line.strip().split('\t')[0:2]
+            lex_dict[word] = float(measure)
         return lex_dict
 
     def polarity_scores(self, text):
diff --git a/nltk/stem/__init__.py b/nltk/stem/__init__.py
index 72fa1f3..a7deafc 100644
--- a/nltk/stem/__init__.py
+++ b/nltk/stem/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/api.py b/nltk/stem/api.py
index 1bbea64..5866a0c 100644
--- a/nltk/stem/api.py
+++ b/nltk/stem/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmer Interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/isri.py b/nltk/stem/isri.py
index bb854a1..4e1bcde 100644
--- a/nltk/stem/isri.py
+++ b/nltk/stem/isri.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: The ISRI Arabic Stemmer
 #
-# Copyright (C) 2001-2015 NLTK Proejct
+# Copyright (C) 2001-2016 NLTK Proejct
 # Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005)
 # Author: Hosam Algasaier <hosam_hme at yahoo.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/stem/lancaster.py b/nltk/stem/lancaster.py
index dbabcfd..37f8de5 100644
--- a/nltk/stem/lancaster.py
+++ b/nltk/stem/lancaster.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Tomcavage <stomcava at law.upenn.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/stem/porter.py b/nltk/stem/porter.py
index ac03067..2db3b18 100644
--- a/nltk/stem/porter.py
+++ b/nltk/stem/porter.py
@@ -12,7 +12,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
 # USA
 #
 # This software is maintained by Vivake (vivakeATomniscia.org) and is available at:
diff --git a/nltk/stem/regexp.py b/nltk/stem/regexp.py
index e738378..0a68df0 100644
--- a/nltk/stem/regexp.py
+++ b/nltk/stem/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/rslp.py b/nltk/stem/rslp.py
index 6487a2c..eedd09f 100644
--- a/nltk/stem/rslp.py
+++ b/nltk/stem/rslp.py
@@ -2,7 +2,7 @@
 
 # Natural Language Toolkit: RSLP Stemmer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tiago Tresoldi <tresoldi at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/stem/snowball.py b/nltk/stem/snowball.py
index 76d62e0..10811fb 100644
--- a/nltk/stem/snowball.py
+++ b/nltk/stem/snowball.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: Snowball Stemmer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Peter Michael Stahl <pemistahl at gmail.com>
 #         Peter Ljunglof <peter.ljunglof at heatherleaf.se> (revisions)
 # Algorithms: Dr Martin Porter <martin at tartarus.org>
diff --git a/nltk/stem/util.py b/nltk/stem/util.py
index 174e0fb..6bb386a 100644
--- a/nltk/stem/util.py
+++ b/nltk/stem/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmer Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Helder <he7d3r at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py
index 1c8b71a..29428ab 100644
--- a/nltk/stem/wordnet.py
+++ b/nltk/stem/wordnet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: WordNet stemmer interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/__init__.py b/nltk/tag/__init__.py
index 847f6a7..1b52e63 100644
--- a/nltk/tag/__init__.py
+++ b/nltk/tag/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Taggers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/api.py b/nltk/tag/api.py
index 236a488..f6aa0c1 100644
--- a/nltk/tag/api.py
+++ b/nltk/tag/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagger Interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/brill.py b/nltk/tag/brill.py
index 5d2f646..bd74545 100644
--- a/nltk/tag/brill.py
+++ b/nltk/tag/brill.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tag/brill_trainer.py b/nltk/tag/brill_trainer.py
index 7ed4583..fde697e 100644
--- a/nltk/tag/brill_trainer.py
+++ b/nltk/tag/brill_trainer.py
@@ -102,7 +102,7 @@ class BrillTaggerTrainer(object):
         #imports
         >>> from nltk.tbl.template import Template
         >>> from nltk.tag.brill import Pos, Word
-        >>> from nltk.tag import RegexpTagger, BrillTaggerTrainer
+        >>> from nltk.tag import untag, RegexpTagger, BrillTaggerTrainer
 
         #some data
         >>> from nltk.corpus import treebank
diff --git a/nltk/tag/crf.py b/nltk/tag/crf.py
index 096426a..453e111 100644
--- a/nltk/tag/crf.py
+++ b/nltk/tag/crf.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the CRFSuite Tagger
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Long Duong <longdt219 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -111,7 +111,11 @@ class CRFTagger(TaggerI):
         """ 
         token = tokens[idx]
         
-        feature_list = []  
+        feature_list = []
+        
+        if not token:
+            return feature_list
+            
         # Capitalization 
         if token[0].isupper():
             feature_list.append('CAPITALIZATION')
diff --git a/nltk/tag/hmm.py b/nltk/tag/hmm.py
index f9b01e0..61f2b6a 100644
--- a/nltk/tag/hmm.py
+++ b/nltk/tag/hmm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Hidden Markov Model
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Trevor Cohn <tacohn at csse.unimelb.edu.au>
 #         Philip Blunsom <pcbl at csse.unimelb.edu.au>
 #         Tiago Tresoldi <tiago at tresoldi.pro.br> (fixes)
diff --git a/nltk/tag/hunpos.py b/nltk/tag/hunpos.py
index f40ebc7..4113629 100644
--- a/nltk/tag/hunpos.py
+++ b/nltk/tag/hunpos.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the HunPos POS-tagger
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 #         Dávid Márk Nemeskey <nemeskeyd at gmail.com> (modifications)
 #         Attila Zséder <zseder at gmail.com> (modifications)
diff --git a/nltk/tag/mapping.py b/nltk/tag/mapping.py
index fea50c3..afa73e2 100644
--- a/nltk/tag/mapping.py
+++ b/nltk/tag/mapping.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagset Mapping
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Nathan Schneider <nathan at cmu.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/perceptron.py b/nltk/tag/perceptron.py
index e1e3f39..ff59e55 100644
--- a/nltk/tag/perceptron.py
+++ b/nltk/tag/perceptron.py
@@ -18,7 +18,7 @@ import pickle
 import logging
 
 from nltk.tag.api import TaggerI
-from nltk.data import find
+from nltk.data import find, load
 from nltk.compat import python_2_unicode_compatible
 
 PICKLE = "averaged_perceptron_tagger.pickle"
@@ -81,7 +81,7 @@ class AveragedPerceptron(object):
                 param = (feat, clas)
                 total = self._totals[param]
                 total += (self.i - self._tstamps[param]) * weight
-                averaged = round(total / float(self.i), 3)
+                averaged = round(total / self.i, 3)
                 if averaged:
                     new_feat_weights[clas] = averaged
             self.weights[feat] = new_feat_weights
@@ -93,8 +93,7 @@ class AveragedPerceptron(object):
 
     def load(self, path):
         '''Load the pickled model weights.'''
-        with open(path,'rb') as fin:
-            self.weights = pickle.load(fin)
+        self.weights = load(path)
 
 @python_2_unicode_compatible
 class PerceptronTagger(TaggerI):
@@ -138,7 +137,7 @@ class PerceptronTagger(TaggerI):
         self.tagdict = {}
         self.classes = set()
         if load:
-            AP_MODEL_LOC = str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
+            AP_MODEL_LOC = 'file:'+str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
             self.load(AP_MODEL_LOC)
 
     def tag(self, tokens):
@@ -206,11 +205,8 @@ class PerceptronTagger(TaggerI):
         :param loc: Load a pickled model at location.
         :type loc: str 
         '''
-        
-        with open(loc, 'rb') as fin:
-            w_td_c = pickle.load(fin)
-        
-        self.model.weights, self.tagdict, self.classes = w_td_c
+
+        self.model.weights, self.tagdict, self.classes = load(loc)
         self.model.classes = self.classes
         
 
@@ -276,12 +272,12 @@ class PerceptronTagger(TaggerI):
             n = sum(tag_freqs.values())
             # Don't add rare words to the tag dictionary
             # Only add quite unambiguous words
-            if n >= freq_thresh and (float(mode) / n) >= ambiguity_thresh:
+            if n >= freq_thresh and (mode / n) >= ambiguity_thresh:
                 self.tagdict[word] = tag
 
 
 def _pc(n, d):
-    return (float(n) / d) * 100
+    return (n / d) * 100
 
 def _load_data_conll_format(filename):
     print ('Read from file: ', filename)
diff --git a/nltk/tag/senna.py b/nltk/tag/senna.py
index 64b742c..93030b3 100644
--- a/nltk/tag/senna.py
+++ b/nltk/tag/senna.py
@@ -1,7 +1,7 @@
 # encoding: utf-8
 # Natural Language Toolkit: Senna POS Tagger
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/tag/sequential.py b/nltk/tag/sequential.py
index e153775..b98ae52 100644
--- a/nltk/tag/sequential.py
+++ b/nltk/tag/sequential.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sequential Backoff Taggers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 #         Tiago Tresoldi <tresoldi at users.sf.net> (original affix tagger)
@@ -529,34 +529,27 @@ class RegexpTagger(SequentialBackoffTagger):
         """
         """
         SequentialBackoffTagger.__init__(self, backoff)
-        labels = ['g'+str(i) for i in range(len(regexps))]
-        tags = [tag for regex, tag in regexps]
-        self._map = dict(zip(labels, tags))
-        regexps_labels = [(regex, label) for ((regex,tag),label) in zip(regexps,labels)]
-        self._regexs = re.compile('|'.join('(?P<%s>%s)' % (label, regex) for regex,label in regexps_labels))
-        self._size=len(regexps)
+        self._regexs = [(re.compile(regexp), tag,) for regexp, tag in regexps]
 
     def encode_json_obj(self):
-        return self._map, self._regexs.pattern, self._size, self.backoff
+        return [(regexp.patten, tag,) for regexp, tag in self._regexs], self.backoff
 
     @classmethod
     def decode_json_obj(cls, obj):
-        _map, _regexs, _size, backoff = obj
+        regexps, backoff = obj
         self = cls(())
-        self._map = _map
-        self._regexs = re.compile(_regexs)
-        self._size = _size
+        self._regexs = [(re.compile(regexp), tag,) for regexp, tag in regexps]
         SequentialBackoffTagger.__init__(self, backoff)
         return self
 
     def choose_tag(self, tokens, index, history):
-        m = self._regexs.match(tokens[index])
-        if m:
-          return self._map[m.lastgroup]
+        for regexp, tag in self._regexs:
+            if re.match(regexp, tokens[index]):
+                return tag
         return None
 
     def __repr__(self):
-        return '<Regexp Tagger: size=%d>' % self._size
+        return '<Regexp Tagger: size=%d>' % len(self._regexs)
 
 
 @python_2_unicode_compatible
diff --git a/nltk/tag/stanford.py b/nltk/tag/stanford.py
index 5484126..570046e 100644
--- a/nltk/tag/stanford.py
+++ b/nltk/tag/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford Part-of-speech and Named-Entity Taggers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Nitin Madnani <nmadnani at ets.org>
 #         Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
 # URL: <http://nltk.org/>
@@ -22,7 +22,7 @@ import tempfile
 from subprocess import PIPE
 import warnings
 
-from nltk.internals import find_file, find_jar, config_java, java, _java_options
+from nltk.internals import find_file, find_jar, config_java, java, _java_options, find_jars_within_path
 from nltk.tag.api import TaggerI
 from nltk import compat
 
@@ -54,6 +54,11 @@ class StanfordTagger(TaggerI):
 
         self._stanford_model = find_file(model_filename,
                 env_vars=('STANFORD_MODELS',), verbose=verbose)
+        
+        # Adding logging jar files to classpath 
+        stanford_dir = os.path.split(self._stanford_jar)[0]
+        self._stanford_jar = tuple(find_jars_within_path(stanford_dir))
+        
         self._encoding = encoding
         self.java_options = java_options
 
diff --git a/nltk/tag/tnt.py b/nltk/tag/tnt.py
index e9d693f..47759d3 100755
--- a/nltk/tag/tnt.py
+++ b/nltk/tag/tnt.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: TnT Tagger
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Sam Huston <sjh900 at gmail.com>
 #
 # URL: <http://nltk.org/>
@@ -12,7 +12,7 @@ by Thorsten Brants
 
 http://acl.ldc.upenn.edu/A/A00/A00-1031.pdf
 '''
-from __future__ import print_function
+from __future__ import print_function, division
 from math import log
 
 from operator import itemgetter
@@ -237,14 +237,14 @@ class TnT(TaggerI):
 
                 # if c3, and c2 are equal and larger than c1
                 elif (c3 == c2) and (c3 > c1):
-                    tl2 += float(self._tri[history][tag]) /2.0
-                    tl3 += float(self._tri[history][tag]) /2.0
+                    tl2 += self._tri[history][tag] / 2.0
+                    tl3 += self._tri[history][tag] / 2.0
 
                 # if c1, and c2 are equal and larger than c3
                 # this might be a dumb thing to do....(not sure yet)
                 elif (c2 == c1) and (c1 > c3):
-                    tl1 += float(self._tri[history][tag]) /2.0
-                    tl2 += float(self._tri[history][tag]) /2.0
+                    tl1 += self._tri[history][tag] / 2.0
+                    tl2 += self._tri[history][tag] / 2.0
 
                 # otherwise there might be a problem
                 # eg: all values = 0
@@ -268,7 +268,7 @@ class TnT(TaggerI):
         if v2 == 0:
             return -1
         else:
-            return float(v1) / float(v2)
+            return v1 / v2
 
     def tagdata(self, data):
         '''
@@ -367,7 +367,7 @@ class TnT(TaggerI):
                     p_uni = self._uni.freq((t,C))
                     p_bi = self._bi[history[-1]].freq((t,C))
                     p_tri = self._tri[tuple(history[-2:])].freq((t,C))
-                    p_wd = float(self._wd[word][t])/float(self._uni[(t,C)])
+                    p_wd = self._wd[word][t] / self._uni[(t,C)]
                     p = self._l1 *p_uni + self._l2 *p_bi + self._l3 *p_tri
                     p2 = log(p, 2) + log(p_wd, 2)
 
@@ -514,8 +514,8 @@ def demo2():
 
     for i in range(10):
         tacc = t.evaluate(d[i*100:((i+1)*100)])
-        tp_un = float(t.unknown) / float(t.known +t.unknown)
-        tp_kn = float(t.known) / float(t.known + t.unknown)
+        tp_un = t.unknown / (t.known + t.unknown)
+        tp_kn = t.known / (t.known + t.unknown)
         t.unknown = 0
         t.known = 0
 
@@ -526,8 +526,8 @@ def demo2():
         print('Accuracy over known words:', (tacc / tp_kn))
 
         sacc = s.evaluate(d[i*100:((i+1)*100)])
-        sp_un = float(s.unknown) / float(s.known +s.unknown)
-        sp_kn = float(s.known) / float(s.known + s.unknown)
+        sp_un = s.unknown / (s.known + s.unknown)
+        sp_kn = s.known / (s.known + s.unknown)
         s.unknown = 0
         s.known = 0
 
@@ -571,15 +571,15 @@ def demo3():
         s.train(etrain)
 
         tacc = t.evaluate(dtest)
-        tp_un = float(t.unknown) / float(t.known +t.unknown)
-        tp_kn = float(t.known) / float(t.known + t.unknown)
+        tp_un = t.unknown / (t.known + t.unknown)
+        tp_kn = t.known / (t.known + t.unknown)
         tknown += tp_kn
         t.unknown = 0
         t.known = 0
 
         sacc = s.evaluate(etest)
-        sp_un = float(s.unknown) / float(s.known + s.unknown)
-        sp_kn = float(s.known) / float(s.known + s.unknown)
+        sp_un = s.unknown / (s.known + s.unknown)
+        sp_kn = s.known / (s.known + s.unknown)
         sknown += sp_kn
         s.unknown = 0
         s.known = 0
diff --git a/nltk/tag/util.py b/nltk/tag/util.py
index b60f5f8..eee98e7 100644
--- a/nltk/tag/util.py
+++ b/nltk/tag/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagger Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tbl/__init__.py b/nltk/tbl/__init__.py
index 9c85c90..cd56392 100644
--- a/nltk/tbl/__init__.py
+++ b/nltk/tbl/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/demo.py b/nltk/tbl/demo.py
index 41b0a3f..06d4b1e 100644
--- a/nltk/tbl/demo.py
+++ b/nltk/tbl/demo.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/erroranalysis.py b/nltk/tbl/erroranalysis.py
index 3ce3c89..72253d7 100644
--- a/nltk/tbl/erroranalysis.py
+++ b/nltk/tbl/erroranalysis.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/feature.py b/nltk/tbl/feature.py
index 8dd704e..1f54245 100644
--- a/nltk/tbl/feature.py
+++ b/nltk/tbl/feature.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/rule.py b/nltk/tbl/rule.py
index 8919fb7..d039718 100644
--- a/nltk/tbl/rule.py
+++ b/nltk/tbl/rule.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/template.py b/nltk/tbl/template.py
index 237807a..cb66129 100644
--- a/nltk/tbl/template.py
+++ b/nltk/tbl/template.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/test/__init__.py b/nltk/test/__init__.py
index b137d3a..91ed07a 100644
--- a/nltk/test/__init__.py
+++ b/nltk/test/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Unit Tests
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/test/bnc.doctest b/nltk/test/bnc.doctest
index 0c96b06..242a999 100644
--- a/nltk/test/bnc.doctest
+++ b/nltk/test/bnc.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
     >>> import os.path
diff --git a/nltk/test/ccg.doctest b/nltk/test/ccg.doctest
index f79b6ec..0cc13d4 100644
--- a/nltk/test/ccg.doctest
+++ b/nltk/test/ccg.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============================
@@ -64,7 +64,7 @@ Construct a lexicon:
     ...     ''')
 
     >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> for parse in parser.parse("you prefer that cake".split()):  # doctest: +SKIP
+    >>> for parse in parser.parse("you prefer that cake".split()):
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -77,7 +77,7 @@ Construct a lexicon:
     --------------------------------<
                    S
 
-    >>> for parse in parser.parse("that is the cake which you prefer".split()):  # doctest: +SKIP
+    >>> for parse in parser.parse("that is the cake which you prefer".split()):
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -114,7 +114,7 @@ Without Substitution (no output)
 
 With Substitution:
 
-    >>> for parse in parser.parse(sent):  # doctest: +SKIP
+    >>> for parse in parser.parse(sent):
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -185,9 +185,9 @@ Note that while the two derivations are different, they are semantically equival
     >>> lex = lexicon.parseLexicon(test1_lex)
     >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
     >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
-    ...     printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP
+    ...     printCCGDerivation(parse)
      I      will       cook               and                might       eat     the    mushrooms             and             parsnips
-     NP  ((S\NP)/VP)  (VP/NP)  ((_var2\.,_var2)/.,_var2)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var2\.,_var2)/.,_var2)     N
+     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
         ---------------------->B
              ((S\NP)/NP)
                                                          ---------------------->B
@@ -207,7 +207,7 @@ Note that while the two derivations are different, they are semantically equival
     -----------------------------------------------------------------------------------------------------------------------------------<
                                                                      S
      I      will       cook               and                might       eat     the    mushrooms             and             parsnips
-     NP  ((S\NP)/VP)  (VP/NP)  ((_var2\.,_var2)/.,_var2)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var2\.,_var2)/.,_var2)     N
+     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
         ---------------------->B
              ((S\NP)/NP)
                                                          ---------------------->B
@@ -234,9 +234,9 @@ Interesting to point that the two parses are clearly semantically different.
     >>> lex = lexicon.parseLexicon(test2_lex)
     >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
     >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
-    ...     printCCGDerivation(parse)  # doctest: +NORMALIZE_WHITESPACE +SKIP
+    ...     printCCGDerivation(parse)
      articles      which       I      will       file               and             forget         without           reading
-        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var3\.,_var3)/.,_var3)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                               -----------------<
                                    (S/VP)
                                                                                             ------------------------------------->B
@@ -254,7 +254,7 @@ Interesting to point that the two parses are clearly semantically different.
     -----------------------------------------------------------------------------------------------------------------------------<
                                                                   N
      articles      which       I      will       file               and             forget         without           reading
-        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var3\.,_var3)/.,_var3)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                               -----------------<
                                    (S/VP)
                                                         ------------------------------------>
@@ -345,42 +345,23 @@ Lexicons for the tests:
 
     >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
     >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
-    ...     printCCGDerivation(parse)
+    ...     printCCGDerivation(parse) # doctest: +SKIP 
+    ...     # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
     ...     break
        el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
      (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
-    --------Leaf
-     (NP/N)
-            ----------Leaf
-                N
     ------------------>
             NP
     ------------------>T
         (S/(S\NP))
-                      -------------Leaf
-                       ((S\NP)/NP)
-                                   --------------------------Leaf
-                                    (((S/NP)\(S/NP))/(S/NP))
-                                                             --------Leaf
-                                                              (NP/N)
-                                                                     ------------Leaf
-                                                                          N
                                                              -------------------->
                                                                       NP
                                                              -------------------->T
                                                                   (S/(S\NP))
-                                                                                 -------------Leaf
-                                                                                  ((S\NP)/NP)
                                                              --------------------------------->B
                                                                           (S/NP)
                                    ----------------------------------------------------------->
                                                          ((S/NP)\(S/NP))
-                                                                                              --------Leaf
-                                                                                               (NP/N)
-                                                                                                      -------Leaf
-                                                                                                       (N/N)
-                                                                                                             -----Leaf
-                                                                                                               N
                                                                                                       ------------>
                                                                                                            N
                                                                                               -------------------->
@@ -393,4 +374,3 @@ Lexicons for the tests:
                                                                  (S/NP)
     -------------------------------------------------------------------------------------------------------------->
                                                           S
-
diff --git a/nltk/test/ccg_semantics.doctest b/nltk/test/ccg_semantics.doctest
new file mode 100644
index 0000000..81e4290
--- /dev/null
+++ b/nltk/test/ccg_semantics.doctest
@@ -0,0 +1,553 @@
+.. Copyright (C) 2001-2016 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==============================================
+Combinatory Categorial Grammar with semantics
+==============================================
+
+-----
+Chart
+-----
+
+
+    >>> from nltk.ccg import chart, lexicon
+    >>> from nltk.ccg.chart import printCCGDerivation
+
+No semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP
+    ...     has => (S\\NP)/NP
+    ...     books => NP
+    ...     ''',
+    ...     False)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has books".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+         -------------------->
+                (S\NP)
+    -------------------------<
+                S
+
+    >>> printCCGDerivation(parses[1])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+    ----->T
+    (S/(S\NP))
+         -------------------->
+                (S\NP)
+    ------------------------->
+                S
+
+
+    >>> printCCGDerivation(parses[2])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+    ----->T
+    (S/(S\NP))
+    ------------------>B
+          (S/NP)
+    ------------------------->
+                S
+
+Simple semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP {she}
+    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
+    ...     a => NP/N {\\P.exists z.P(z)}
+    ...     book => N {book}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has a book".split()))
+    >>> print(str(len(parses)) + " parses")
+    7 parses
+
+    >>> printCCGDerivation(parses[0])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    -----------------------------------------------------------------------------<
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[1])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    -----------------------------------------------------------------------------<
+                           S {have(she,exists z.book(z))}
+    
+    >>> printCCGDerivation(parses[2])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[3])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[4])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+    ---------------------------------------->B
+            (S/NP) {\x.have(she,x)}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[5])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+    ------------------------------------------------------------------->B
+                    (S/N) {\P.have(she,exists z.P(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[6])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+    ---------------------------------------->B
+            (S/NP) {\x.have(she,x)}
+    ------------------------------------------------------------------->B
+                    (S/N) {\P.have(she,exists z.P(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+Complex semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP {she}
+    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
+    ...     a => ((S\\NP)\\((S\\NP)/NP))/N {\\P R x.(exists z.P(z) & R(z,x))}
+    ...     book => N {book}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has a book".split()))
+    >>> print(str(len(parses)) + " parses")
+    2 parses
+
+    >>> printCCGDerivation(parses[0])
+       She                 has                                           a                                 book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+                                            ---------------------------------------------------------------------->
+                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+              ----------------------------------------------------------------------------------------------------<
+                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
+    --------------------------------------------------------------------------------------------------------------<
+                                         S {(exists z.book(z) & have(she,z))}
+
+    >>> printCCGDerivation(parses[1])
+       She                 has                                           a                                 book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+                                            ---------------------------------------------------------------------->
+                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+              ----------------------------------------------------------------------------------------------------<
+                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
+    -------------------------------------------------------------------------------------------------------------->
+                                         S {(exists z.book(z) & have(she,z))}
+
+Using conjunctions
+---------------------
+
+    # TODO: The semantics of "and" should have been more flexible
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     I => NP {I}
+    ...     cook => (S\\NP)/NP {\\x y.cook(x,y)}
+    ...     and => var\\.,var/.,var {\\P Q x y.(P(x,y) & Q(x,y))}
+    ...     eat => (S\\NP)/NP {\\x y.eat(x,y)}
+    ...     the => NP/N {\\x.the(x)}
+    ...     bacon => N {bacon}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("I cook and eat the bacon".split()))
+    >>> print(str(len(parses)) + " parses")
+    7 parses
+
+    >>> printCCGDerivation(parses[0])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[1])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[2])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[3])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[4])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+    --------------------------------------------------------------------------------------------------------------------------->B
+                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[5])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[6])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+    --------------------------------------------------------------------------------------------------------------------------->B
+                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+Tests from published papers
+------------------------------
+
+An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP
+    ...     I => NP {I}
+    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+    ...     them => NP {them}
+    ...     money => NP {money}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("I give them money".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+            -------------------------------------------------------------->
+                            (S\NP) {\z.give(money,them,z)}
+    ----------------------------------------------------------------------<
+                            S {give(money,them,I)}
+
+    >>> printCCGDerivation(parses[1])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+            -------------------------------------------------------------->
+                            (S\NP) {\z.give(money,them,z)}
+    ---------------------------------------------------------------------->
+                            S {give(money,them,I)}
+
+    
+    >>> printCCGDerivation(parses[2])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+    ---------------------------------------------------------->B
+                    (S/NP) {\y.give(y,them,I)}
+    ---------------------------------------------------------------------->
+                            S {give(money,them,I)}
+
+
+An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- N, NP, S
+    ...     money => N {money}
+    ...     that => (N\\N)/(S/NP) {\\P Q x.(P(x) & Q(x))}
+    ...     I => NP {I}
+    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+    ...     them => NP {them}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("money that I give them".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+               ------------------------------------------------------------------------------------------------->
+                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
+    ------------------------------------------------------------------------------------------------------------<
+                                         N {\x.(give(x,them,I) & money(x))}
+
+    >>> printCCGDerivation(parses[1])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+    ----------->T
+    (N/(N\N)) {\F.F(money)}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+               ------------------------------------------------------------------------------------------------->
+                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
+    ------------------------------------------------------------------------------------------------------------>
+                                         N {\x.(give(x,them,I) & money(x))}
+
+    >>> printCCGDerivation(parses[2])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+    ----------->T
+    (N/(N\N)) {\F.F(money)}
+    -------------------------------------------------->B
+           (N/(S/NP)) {\P x.(P(x) & money(x))}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+    ------------------------------------------------------------------------------------------------------------>
+                                         N {\x.(give(x,them,I) & money(x))}
+
+
+-------
+Lexicon
+-------
+
+    >>> from nltk.ccg import lexicon
+
+Parse lexicon with semantics
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...
+    ...     IntransVsg :: S\\NP[sg]
+    ...     
+    ...     sleeps => IntransVsg {\\x.sleep(x)}
+    ...     eats => S\\NP[sg]/NP {\\x y.eat(x,y)}
+    ...        
+    ...     and => var\\var/var {\\x y.x & y}
+    ...     ''',
+    ...     True
+    ... )))
+    and => ((_var0\_var0)/_var0) {(\x y.x & y)}
+    eats => ((S\NP['sg'])/NP) {\x y.eat(x,y)}
+    sleeps => (S\NP['sg']) {\x.sleep(x)}
+
+Parse lexicon without semantics
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...
+    ...     IntransVsg :: S\\NP[sg]
+    ...     
+    ...     sleeps => IntransVsg
+    ...     eats => S\\NP[sg]/NP {sem=\\x y.eat(x,y)}
+    ...        
+    ...     and => var\\var/var
+    ...     ''',
+    ...     False
+    ... )))
+    and => ((_var0\_var0)/_var0)
+    eats => ((S\NP['sg'])/NP)
+    sleeps => (S\NP['sg'])
+
+Semantics are missing
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...     
+    ...     eats => S\\NP[sg]/NP
+    ...     ''',
+    ...     True
+    ... )))
+    Traceback (most recent call last):
+      ...
+    AssertionError: eats => S\NP[sg]/NP must contain semantics because include_semantics is set to True
+
+
+------------------------------------
+CCG combinator semantics computation
+------------------------------------
+
+    >>> from nltk.sem.logic import *
+    >>> from nltk.ccg.logic import *
+
+    >>> read_expr = Expression.fromstring
+
+Compute semantics from function application
+
+    >>> print(str(compute_function_semantics(read_expr(r'\x.P(x)'), read_expr(r'book'))))
+    P(book)
+
+    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'read'))))
+    read(book)
+
+    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'\x.read(x)'))))
+    read(book)
+
+Compute semantics from composition
+
+    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'\x.Q(x)'))))
+    \x.P(Q(x))
+
+    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+    Traceback (most recent call last):
+      ...
+    AssertionError: `read` must be a lambda expression
+
+Compute semantics from substitution
+
+    >>> print(str(compute_substitution_semantics(read_expr(r'\x y.P(x,y)'), read_expr(r'\x.Q(x)'))))
+    \x.P(x,Q(x))
+    
+    >>> print(str(compute_substitution_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+    Traceback (most recent call last):
+      ...
+    AssertionError: `\x.P(x)` must be a lambda expression with 2 arguments
+
+Compute type-raise semantics
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.P(x)'))))
+    \F x.F(P(x))
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.F(x)'))))
+    \F1 x.F1(F(x))
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x y z.P(x,y,z)'))))
+    \F x y z.F(P(x,y,z))
+
diff --git a/nltk/test/chat80.doctest b/nltk/test/chat80.doctest
index 3080177..64b2f38 100644
--- a/nltk/test/chat80.doctest
+++ b/nltk/test/chat80.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======
diff --git a/nltk/test/chunk.doctest b/nltk/test/chunk.doctest
index b2952d5..d877083 100644
--- a/nltk/test/chunk.doctest
+++ b/nltk/test/chunk.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========
diff --git a/nltk/test/classify.doctest b/nltk/test/classify.doctest
index 0dc59a3..3f00b1d 100644
--- a/nltk/test/classify.doctest
+++ b/nltk/test/classify.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =============
diff --git a/nltk/test/collocations.doctest b/nltk/test/collocations.doctest
index c4e1fa4..01f8c23 100644
--- a/nltk/test/collocations.doctest
+++ b/nltk/test/collocations.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============
diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest
index cf38764..3976774 100644
--- a/nltk/test/corpus.doctest
+++ b/nltk/test/corpus.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ================
@@ -640,7 +640,7 @@ We can compute stats for specific product features:
     >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
     >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
     >>> # We use float for backward compatibility with division in Python2.7
-    >>> mean = float(tot)/n_reviews
+    >>> mean = tot/float(n_reviews)
     >>> print(n_reviews, tot, mean)
     15 24 1.6
 
diff --git a/nltk/test/crubadan.doctest b/nltk/test/crubadan.doctest
index c45fe91..d485ffc 100644
--- a/nltk/test/crubadan.doctest
+++ b/nltk/test/crubadan.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 Crubadan Corpus Reader
diff --git a/nltk/test/data.doctest b/nltk/test/data.doctest
index 71a3a98..9cc7663 100644
--- a/nltk/test/data.doctest
+++ b/nltk/test/data.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================================
diff --git a/nltk/test/dependency.doctest b/nltk/test/dependency.doctest
index 0972a11..80607ee 100755
--- a/nltk/test/dependency.doctest
+++ b/nltk/test/dependency.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===================
diff --git a/nltk/test/discourse.doctest b/nltk/test/discourse.doctest
index 21f40ca..e84dec0 100644
--- a/nltk/test/discourse.doctest
+++ b/nltk/test/discourse.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================
diff --git a/nltk/test/drt.doctest b/nltk/test/drt.doctest
index 0f00911..8f73283 100644
--- a/nltk/test/drt.doctest
+++ b/nltk/test/drt.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ================================
diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest
index b4a4e46..ded2f8c 100644
--- a/nltk/test/featgram.doctest
+++ b/nltk/test/featgram.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================
diff --git a/nltk/test/featstruct.doctest b/nltk/test/featstruct.doctest
index ee0c052..55e5eb8 100644
--- a/nltk/test/featstruct.doctest
+++ b/nltk/test/featstruct.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================================
diff --git a/nltk/test/framenet.doctest b/nltk/test/framenet.doctest
index 7eb496e..e1b1681 100644
--- a/nltk/test/framenet.doctest
+++ b/nltk/test/framenet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ========
diff --git a/nltk/test/generate.doctest b/nltk/test/generate.doctest
index 82489b8..3536444 100644
--- a/nltk/test/generate.doctest
+++ b/nltk/test/generate.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================================
diff --git a/nltk/test/gensim.doctest b/nltk/test/gensim.doctest
index 5df4508..eae4a6b 100644
--- a/nltk/test/gensim.doctest
+++ b/nltk/test/gensim.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======================================
diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest
index 06ca81f..f8d6b47 100644
--- a/nltk/test/gluesemantics.doctest
+++ b/nltk/test/gluesemantics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============================================================================
@@ -343,13 +343,13 @@ Dependency Graph to Glue Formulas
     ... 4	dog	_	NN	NN	_	2	OBJ	_	_
     ... """)
     >>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph)
-    >>> for gf in gfl:
+    >>> for gf in sorted(gfl):
     ...     print(gf)
-    \x y.sees(x,y) : (f -o (i -o g))
     \P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F2) -o F2))
+    \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I5) -o I5))
+    \x y.sees(x,y) : (f -o (i -o g))
     \x.John(x) : (fv -o fr)
     \x.dog(x) : (iv -o ir)
-    \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I5) -o I5))
     >>> glue = Glue()
     >>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str):
     ...     print(r)
@@ -363,7 +363,7 @@ Dependency Graph to LFG f-structure
 
     >>> fstruct = FStructure.read_depgraph(depgraph)
 
-    >>> print(fstruct)
+    >>> print(fstruct) # doctest: +SKIP
     f:[pred 'sees'
        obj h:[pred 'dog'
               spec 'a']
@@ -375,12 +375,12 @@ Dependency Graph to LFG f-structure
 ---------------------------------
 LFG f-structure to Glue
 ---------------------------------
-    >>> for gf in fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')): # doctest: +SKIP
+    >>> for gf in sorted(fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype'))):
     ...     print(gf)
+    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G5) -o G5))
+    \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I2) -o I2))
     \x y.sees(x,y) : (i -o (g -o f))
-    \x.dog(x) : (gv -o gr)
-    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3))
-    \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4))
     \x.John(x) : (iv -o ir)
+    \x.dog(x) : (gv -o gr)
 
 .. see gluesemantics_malt.doctest for more
diff --git a/nltk/test/gluesemantics_malt.doctest b/nltk/test/gluesemantics_malt.doctest
index 599a573..308d0ec 100644
--- a/nltk/test/gluesemantics_malt.doctest
+++ b/nltk/test/gluesemantics_malt.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. see also: gluesemantics.doctest
diff --git a/nltk/test/grammar.doctest b/nltk/test/grammar.doctest
index 1c41429..de232f1 100644
--- a/nltk/test/grammar.doctest
+++ b/nltk/test/grammar.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============
diff --git a/nltk/test/grammartestsuites.doctest b/nltk/test/grammartestsuites.doctest
index 7fd09b9..731ae4a 100644
--- a/nltk/test/grammartestsuites.doctest
+++ b/nltk/test/grammartestsuites.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========================
diff --git a/nltk/test/index.doctest b/nltk/test/index.doctest
index eb504c2..a039849 100644
--- a/nltk/test/index.doctest
+++ b/nltk/test/index.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. _align howto: align.html
diff --git a/nltk/test/inference.doctest b/nltk/test/inference.doctest
index cf8191c..962e0d5 100644
--- a/nltk/test/inference.doctest
+++ b/nltk/test/inference.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ====================================
diff --git a/nltk/test/internals.doctest b/nltk/test/internals.doctest
index 84a628a..f6fc2a2 100644
--- a/nltk/test/internals.doctest
+++ b/nltk/test/internals.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========================================
diff --git a/nltk/test/japanese.doctest b/nltk/test/japanese.doctest
index 3c8eb48..08dac17 100644
--- a/nltk/test/japanese.doctest
+++ b/nltk/test/japanese.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ============================
diff --git a/nltk/test/logic.doctest b/nltk/test/logic.doctest
index a503d07..2dffc3a 100644
--- a/nltk/test/logic.doctest
+++ b/nltk/test/logic.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======================
diff --git a/nltk/test/metrics.doctest b/nltk/test/metrics.doctest
index 5cae4a0..36ee980 100644
--- a/nltk/test/metrics.doctest
+++ b/nltk/test/metrics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======
diff --git a/nltk/test/misc.doctest b/nltk/test/misc.doctest
index 141507f..6555519 100644
--- a/nltk/test/misc.doctest
+++ b/nltk/test/misc.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 --------------------------------------------------------------------------------
diff --git a/nltk/test/nonmonotonic.doctest b/nltk/test/nonmonotonic.doctest
index 4dfad9c..73ed059 100644
--- a/nltk/test/nonmonotonic.doctest
+++ b/nltk/test/nonmonotonic.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
diff --git a/nltk/test/parse.doctest b/nltk/test/parse.doctest
index 6eea819..d9dac28 100644
--- a/nltk/test/parse.doctest
+++ b/nltk/test/parse.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========
diff --git a/nltk/test/portuguese_en.doctest b/nltk/test/portuguese_en.doctest
index 0cef76b..83087af 100644
--- a/nltk/test/portuguese_en.doctest
+++ b/nltk/test/portuguese_en.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================================
diff --git a/nltk/test/probability.doctest b/nltk/test/probability.doctest
index 9569057..54d4e93 100644
--- a/nltk/test/probability.doctest
+++ b/nltk/test/probability.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===========
@@ -54,6 +54,49 @@ Note that items are sorted in order of decreasing frequency; two items of the sa
     >>> fd1 == pickle.loads(pickled)
     True
 
+Mathematical operations:
+
+    >>> FreqDist('abbb') + FreqDist('bcc')
+    FreqDist({'b': 4, 'c': 2, 'a': 1})
+    >>> FreqDist('abbbc') - FreqDist('bccd')
+    FreqDist({'b': 2, 'a': 1})
+    >>> FreqDist('abbb') | FreqDist('bcc')
+    FreqDist({'b': 3, 'c': 2, 'a': 1})
+    >>> FreqDist('abbb') & FreqDist('bcc')
+    FreqDist({'b': 1})
+
+ConditionalFreqDist
+-------------------
+
+    >>> cfd1 = ConditionalFreqDist()
+    >>> cfd1[1] = FreqDist('abbbb')
+    >>> cfd1[2] = FreqDist('xxxxyy')
+    >>> cfd1
+    <ConditionalFreqDist with 2 conditions>
+
+    >>> cfd2 = ConditionalFreqDist()
+    >>> cfd2[1] = FreqDist('bbccc')
+    >>> cfd2[2] = FreqDist('xxxyyyzz')
+    >>> cfd2[3] = FreqDist('m')
+    >>> cfd2
+    <ConditionalFreqDist with 3 conditions>
+
+    >>> r = cfd1 + cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 6, 'c': 3, 'a': 1})), (2, FreqDist({'x': 7, 'y': 5, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+    >>> r = cfd1 - cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 2, 'a': 1})), (2, FreqDist({'x': 1}))]
+
+    >>> r = cfd1 | cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 4, 'c': 3, 'a': 1})), (2, FreqDist({'x': 4, 'y': 3, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+    >>> r = cfd1 & cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 2})), (2, FreqDist({'x': 3, 'y': 2}))]
+
 Testing some HMM estimators
 ---------------------------
 
@@ -73,11 +116,6 @@ from the whole corpus, not just the training corpus
     >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
     >>> print(len(symbols))
     1464
-    >>> print(len(tag_set))
-    92
-    >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
-    >>> print(len(symbols))
-    1464
     >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
 
 We divide the corpus into 90% training and 10% testing
diff --git a/nltk/test/propbank.doctest b/nltk/test/propbank.doctest
index 23cb7fa..4d3cca3 100644
--- a/nltk/test/propbank.doctest
+++ b/nltk/test/propbank.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ========
diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest
index f29e464..758c42c 100644
--- a/nltk/test/relextract.doctest
+++ b/nltk/test/relextract.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
diff --git a/nltk/test/resolution.doctest b/nltk/test/resolution.doctest
index 6bbae37..a714677 100644
--- a/nltk/test/resolution.doctest
+++ b/nltk/test/resolution.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================
diff --git a/nltk/test/semantics.doctest b/nltk/test/semantics.doctest
index a8b9f84..2aa9d6c 100644
--- a/nltk/test/semantics.doctest
+++ b/nltk/test/semantics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========
diff --git a/nltk/test/sentiment.doctest b/nltk/test/sentiment.doctest
index 660653f..e6e9430 100644
--- a/nltk/test/sentiment.doctest
+++ b/nltk/test/sentiment.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===================
diff --git a/nltk/test/sentiwordnet.doctest b/nltk/test/sentiwordnet.doctest
index f9fad04..e032482 100644
--- a/nltk/test/sentiwordnet.doctest
+++ b/nltk/test/sentiwordnet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
@@ -31,8 +31,10 @@ Lookup
     >>> list(swn.senti_synsets('slow')) # doctest: +NORMALIZE_WHITESPACE
     [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
     SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
-    SentiSynset('slow.a.02'), SentiSynset('slow.a.04'),
-    SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]
+    SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
+    SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
+    SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
+    SentiSynset('behind.r.03')]
 
     >>> happy = swn.senti_synsets('happy', 'a')
 
diff --git a/nltk/test/simple.doctest b/nltk/test/simple.doctest
index c29753f..073169a 100644
--- a/nltk/test/simple.doctest
+++ b/nltk/test/simple.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/stem.doctest b/nltk/test/stem.doctest
index 2150b64..4f99072 100644
--- a/nltk/test/stem.doctest
+++ b/nltk/test/stem.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========
diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest
index 415aa44..1b888c7 100644
--- a/nltk/test/tag.doctest
+++ b/nltk/test/tag.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 Regression Tests
@@ -20,3 +20,14 @@ Add tests for:
     backoff tagger if the backoff tagger gets that context correct at
     *all* locations.
 
+
+Regression Testing for issue #1025
+==================================
+
+We want to ensure that a RegexpTagger can be created with more than 100 patterns
+and does not fail with:
+ "AssertionError: sorry, but this version only supports 100 named groups"
+
+    >>> from nltk.tag import RegexpTagger
+    >>> patterns = [(str(i), 'NNP',) for i in range(200)]
+    >>> tagger = RegexpTagger(patterns)
diff --git a/nltk/test/tokenize.doctest b/nltk/test/tokenize.doctest
index f271251..8981c6d 100644
--- a/nltk/test/tokenize.doctest
+++ b/nltk/test/tokenize.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
     >>> from __future__ import print_function
@@ -109,3 +109,58 @@ A simple sentence tokenizer '\.(\s+|$)'
     >>> regexp_tokenize(s, pattern=r'\.(?:\s+|$)', gaps=True)
     ['Good muffins cost $3.88\nin New York',
      'Please buy me\ntwo of them', 'Thanks']
+
+
+Regression Tests: TweetTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TweetTokenizer is a tokenizer specifically designed for micro-blogging tokenization tasks.
+
+    >>> from nltk.tokenize import TweetTokenizer
+    >>> tknzr = TweetTokenizer()
+    >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
+    >>> tknzr.tokenize(s0)
+    ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
+    >>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)"
+    >>> tknzr.tokenize(s1)
+    ['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)']
+    >>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn"
+    >>> tknzr.tokenize(s2)
+    ['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn']
+    >>> s3 = "@Insanomania They do... Their mentality doesn't :("
+    >>> tknzr.tokenize(s3)
+    ['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':(']
+    >>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!"
+    >>> tknzr.tokenize(s4)
+    ['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!']
+    >>> tknzr = TweetTokenizer(reduce_len=True)
+    >>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :("
+    >>> tknzr.tokenize(s5)
+    ['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':(']
+
+It is possible to specify `strip_handles` and `reduce_len` parameters for a TweetTokenizer instance. Setting `strip_handles` to True, the tokenizer will remove Twitter handles (e.g. usernames). Setting `reduce_len` to True, repeated character sequences of length 3 or greater will be replaced with sequences of length 3.
+
+    >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
+    >>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!'
+    >>> tknzr.tokenize(s6)
+    [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!']
+    >>> s7 = '@_willy65: No place for @chuck tonight. Sorry.'
+    >>> tknzr.tokenize(s7)
+    [':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.']
+    >>> s8 = '@mar_tin is a great developer. Contact him at mar_tin at email.com.'
+    >>> tknzr.tokenize(s8)
+    ['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin at email.com', '.']
+
+The `preserve_case` parameter (default: True) allows to convert uppercase tokens to lowercase tokens. Emoticons are not affected:
+
+    >>> tknzr = TweetTokenizer(preserve_case=False)
+    >>> s9 = "@jrmy: I'm REALLY HAPPYYY about that! NICEEEE :D :P"
+    >>> tknzr.tokenize(s9)
+    ['@jrmy', ':', "i'm", 'really', 'happyyy', 'about', 'that', '!', 'niceeee', ':D', ':P']
+
+It should not hang on long sequences of the same punctuation character.
+
+    >>> tknzr = TweetTokenizer()
+    >>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L"
+    >>> tknzr.tokenize(s10)
+    [u'Photo', u':', u"Aujourd'hui", u'sur', u'http://t.co/0gebOFDUzn', u'Projet', u'...', u'http://t.co/bKfIUbydz2', u'...', u'http://fb.me/3b6uXpz0L']
\ No newline at end of file
diff --git a/nltk/test/toolbox.doctest b/nltk/test/toolbox.doctest
index c373f15..d1b70fe 100644
--- a/nltk/test/toolbox.doctest
+++ b/nltk/test/toolbox.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
diff --git a/nltk/test/translate.doctest b/nltk/test/translate.doctest
index 78887d9..8cf9a47 100644
--- a/nltk/test/translate.doctest
+++ b/nltk/test/translate.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. -*- coding: utf-8 -*-
diff --git a/nltk/test/tree.doctest b/nltk/test/tree.doctest
index 179f972..792dc97 100644
--- a/nltk/test/tree.doctest
+++ b/nltk/test/tree.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
diff --git a/nltk/test/treeprettyprinter.doctest b/nltk/test/treeprettyprinter.doctest
index 4cee8ff..7e8d315 100644
--- a/nltk/test/treeprettyprinter.doctest
+++ b/nltk/test/treeprettyprinter.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ========================================================
diff --git a/nltk/test/treetransforms.doctest b/nltk/test/treetransforms.doctest
index bce92e0..eb636bc 100644
--- a/nltk/test/treetransforms.doctest
+++ b/nltk/test/treetransforms.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 -------------------------------------------
diff --git a/nltk/test/unit/test_2x_compat.py b/nltk/test/unit/test_2x_compat.py
index 78329bf..4de9b53 100644
--- a/nltk/test/unit/test_2x_compat.py
+++ b/nltk/test/unit/test_2x_compat.py
@@ -24,3 +24,40 @@ class TestTextTransliteration(unittest.TestCase):
     def test_str(self):
         self.assertEqual(str(self.txt), b"<Text: Sao Tome and Principe...>")
 
+
+class TestFraction(unittest.TestCase):
+    def test_unnoramlize_fraction(self):
+        from fractions import Fraction as NativePythonFraction
+        from nltk.compat import Fraction as NLTKFraction
+        
+        # The native fraction should throw a TypeError in Python < 3.5
+        with self.assertRaises(TypeError):
+            NativePythonFraction(0, 1000, _normalize=False)
+        
+        # Using nltk.compat.Fraction in Python < 3.5
+        compat_frac = NLTKFraction(0, 1000, _normalize=False)
+        # The numerator and denominator does not change. 
+        assert compat_frac.numerator == 0
+        assert compat_frac.denominator == 1000
+        # The floating point value remains normalized. 
+        assert float(compat_frac) == 0.0
+        
+        # Checks that the division is not divided by 
+        # # by greatest common divisor (gcd).
+        six_twelve = NLTKFraction(6, 12, _normalize=False)
+        assert six_twelve.numerator == 6
+        assert six_twelve.denominator == 12
+        
+        one_two = NLTKFraction(1, 2, _normalize=False)
+        assert one_two.numerator == 1
+        assert one_two.denominator == 2
+        
+        # Checks against the native fraction.
+        six_twelve_original = NativePythonFraction(6, 12)
+        # Checks that rational values of one_two and six_twelve is the same.
+        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
+        
+        # Checks that the fraction does get normalized, even when
+        # _normalize == False when numerator is using native 
+        # fractions.Fraction.from_float 
+        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142)
diff --git a/nltk/test/unit/test_json2csv_corpus.py b/nltk/test/unit/test_json2csv_corpus.py
index bcbff7e..aa13f80 100644
--- a/nltk/test/unit/test_json2csv_corpus.py
+++ b/nltk/test/unit/test_json2csv_corpus.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter client
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/test/unit/test_tag.py b/nltk/test/unit/test_tag.py
index 99d6113..095faba 100644
--- a/nltk/test/unit/test_tag.py
+++ b/nltk/test/unit/test_tag.py
@@ -8,7 +8,7 @@ def test_basic():
     result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
     assert result == [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'),
                       ('idea', 'NN'), ('is', 'VBZ'), ("n't", 'RB'),
-                      ('all', 'DT'), ('that', 'DT'), ('bad', 'JJ'),
+                      ('all', 'PDT'), ('that', 'DT'), ('bad', 'JJ'),
                       ('.', '.')]
 
 
diff --git a/nltk/test/unit/test_tgrep.py b/nltk/test/unit/test_tgrep.py
index 224fb47..9add766 100644
--- a/nltk/test/unit/test_tgrep.py
+++ b/nltk/test/unit/test_tgrep.py
@@ -3,7 +3,7 @@
 #
 # Natural Language Toolkit: TGrep search
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Will Roberts <wildwilhelm at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/test/unit/test_tokenize.py b/nltk/test/unit/test_tokenize.py
new file mode 100644
index 0000000..af55fc3
--- /dev/null
+++ b/nltk/test/unit/test_tokenize.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.tokenize.
+See also nltk/test/tokenize.doctest
+"""
+
+from __future__ import unicode_literals
+from nltk.tokenize import TweetTokenizer
+import unittest
+
+class TestTokenize(unittest.TestCase):
+
+    def test_tweet_tokenizer(self):
+        """
+        Test TweetTokenizer using words with special and accented characters.
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
+        s9 = "@myke: Let's test these words: resumé España München français"
+        tokens = tokenizer.tokenize(s9)
+        expected = [':', "Let's", 'test', 'these', 'words', ':', 'resumé',
+                    'España', 'München', 'français']
+        self.assertEqual(tokens, expected)
diff --git a/nltk/test/unit/test_twitter_auth.py b/nltk/test/unit/test_twitter_auth.py
index e266256..47a2acc 100644
--- a/nltk/test/unit/test_twitter_auth.py
+++ b/nltk/test/unit/test_twitter_auth.py
@@ -5,6 +5,12 @@ Tests for static parts of Twitter package
 
 import os
 import unittest
+from nose import SkipTest
+
+try:
+    import twython
+except ImportError as e:
+    raise SkipTest("The twython library has not been installed.")
 
 from nltk.twitter import Authenticate
 
@@ -42,7 +48,6 @@ class TestCredentials(unittest.TestCase):
             self.fail('Unexpected exception thrown: %s' % e)
         else:
             self.fail('OSError exception not thrown.')
-            
 
     def test_empty_subdir2(self):
         """
@@ -75,7 +80,6 @@ class TestCredentials(unittest.TestCase):
         else:
             self.fail('OSError exception not thrown.')
 
-
     def test_missingfile1(self):
         """
         Defaults for authentication will fail since 'credentials.txt' not
@@ -94,7 +98,6 @@ class TestCredentials(unittest.TestCase):
         else:
             self.fail('OSError exception not thrown.')
 
-
     def test_missingfile2(self):
         """
         Credentials file 'foobar' cannot be found in default subdir.
@@ -112,8 +115,6 @@ class TestCredentials(unittest.TestCase):
         else:
             self.fail('OSError exception not thrown.')
 
-
-
     def test_incomplete_file(self):
         """
         Credentials file 'bad_oauth1-1.txt' is incomplete
@@ -128,7 +129,6 @@ class TestCredentials(unittest.TestCase):
         else:
             self.fail('ValueError exception not thrown.')
 
-
     def test_malformed_file1(self):
         """
         First key in credentials file 'bad_oauth1-2.txt' is ill-formed
@@ -165,7 +165,6 @@ class TestCredentials(unittest.TestCase):
         self.auth.load_creds(subdir=self.subdir)
         self.auth.creds_fullpath = os.path.join(self.subdir, self.auth.creds_file)
 
-
     def test_correct_file1(self):
         """
         Default credentials file is identified
@@ -173,7 +172,6 @@ class TestCredentials(unittest.TestCase):
         self.auth.load_creds(subdir=self.subdir)
         self.assertEqual(self.auth.creds_file, 'credentials.txt')
 
-
     def test_correct_file2(self):
         """
         Default credentials file has been read correctluy
diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py
index 762bd73..1af711f 100644
--- a/nltk/test/unit/translate/test_bleu.py
+++ b/nltk/test/unit/translate/test_bleu.py
@@ -4,10 +4,12 @@ Tests for BLEU translation evaluation metric
 """
 
 import unittest
-from nltk.translate.bleu_score import _modified_precision
+from nltk.translate.bleu_score import modified_precision, brevity_penalty
+from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
+
 
 class TestBLEU(unittest.TestCase):
-    def test__modified_precision(self):
+    def test_modified_precision(self):
         """
         Examples from the original BLEU paper 
         http://www.aclweb.org/anthology/P02-1040.pdf
@@ -22,11 +24,13 @@ class TestBLEU(unittest.TestCase):
         references = [ref1, ref2] 
         
         # Testing modified unigram precision.
-        hyp1_unigram_precision =  _modified_precision(references, hyp1, n=1) 
+        hyp1_unigram_precision =  float(modified_precision(references, hyp1, n=1))
         assert (round(hyp1_unigram_precision, 4) == 0.2857)
+        # With assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
         
         # Testing modified bigram precision.
-        assert(_modified_precision(references, hyp1, n=2) == 0.0)
+        assert(float(modified_precision(references, hyp1, n=2)) == 0.0)
         
         
         # Example 2: the "of the" example.
@@ -42,10 +46,10 @@ class TestBLEU(unittest.TestCase):
         
         references = [ref1, ref2, ref3] 
         # Testing modified unigram precision.
-        assert (_modified_precision(references, hyp1, n=1) == 1.0)
+        assert (float(modified_precision(references, hyp1, n=1)) == 1.0)
         
         # Testing modified bigram precision.
-        assert(_modified_precision(references, hyp1, n=2) == 1.0)
+        assert(float(modified_precision(references, hyp1, n=2)) == 1.0)
         
 
         # Example 3: Proper MT outputs.
@@ -57,25 +61,78 @@ class TestBLEU(unittest.TestCase):
         references = [ref1, ref2, ref3]
         
         # Unigram precision.
-        hyp1_unigram_precision = _modified_precision(references, hyp1, n=1)
-        hyp2_unigram_precision = _modified_precision(references, hyp2, n=1)
-        # Test unigram precision without rounding.
-        assert (hyp1_unigram_precision == 0.9444444444444444)
-        assert (hyp2_unigram_precision == 0.5714285714285714)
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
+        # Test unigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
+        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
         # Test unigram precision with rounding.
         assert (round(hyp1_unigram_precision, 4) == 0.9444)
         assert (round(hyp2_unigram_precision, 4) == 0.5714)
         
         # Bigram precision
-        hyp1_bigram_precision = _modified_precision(references, hyp1, n=2)
-        hyp2_bigram_precision = _modified_precision(references, hyp2, n=2)
-        # Test bigram precision without rounding.
-        assert (hyp1_bigram_precision == 0.5882352941176471)
-        assert (hyp2_bigram_precision == 0.07692307692307693)
+        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
+        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
+        # Test bigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
+        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
         # Test bigram precision with rounding.
         assert (round(hyp1_bigram_precision, 4) == 0.5882)
         assert (round(hyp2_bigram_precision, 4) == 0.0769)
         
+    def test_zero_matches(self):
+        # Test case where there's 0 matches
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = 'John loves Mary'.split()
+        
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis). 
+        for n in range(1,len(hypothesis)):
+            weights = [1.0/n] * n # Uniform weights.
+            assert(sentence_bleu(references, hypothesis, weights) == 0)
+    
+    def test_full_matches(self):    
+        # Test case where there's 100% matches
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+    
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis). 
+        for n in range(1,len(hypothesis)):
+            weights = [1.0/n] * n # Uniform weights.
+            assert(sentence_bleu(references, hypothesis, weights) == 1.0)
+    
+    def test_partial_matches_hypothesis_longer_than_reference(self):
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary who loves Mike'.split()
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.4729, places=4)
+            
+
+ at unittest.skip("Skipping fringe cases for BLEU.")
+class TestBLEUFringeCases(unittest.TestCase):
+
+    def test_case_where_n_is_bigger_than_hypothesis_length(self):
+        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
+        # TODO: Currently this test breaks the BLEU implementation (13.03.2016)
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+        n = len(hypothesis) + 1 # 
+        weights = [1.0/n] * n # Uniform weights.
+        assert(sentence_bleu(references, hypothesis, weights) == 1.0)
+    
+    def test_empty_hypothesis(self):
+        # Test case where there's hypothesis is empty.
+        # TODO: Currently this test breaks the BLEU implementation (13.03.2016)
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = []
+        assert(sentence_bleu(references, hypothesis) == 0)
+        
+    def test_empty_references(self):
+        # Test case where there's reference is empty.
+        # TODO: Currently this test breaks the BLEU implementation (13.03.2016)
+        references = [[]]
+        hypothesis = 'John loves Mary'.split()
+        assert(sentence_bleu(references, hypothesis) == 0)
+        
+        
     def test_brevity_penalty(self):
         pass
-    
\ No newline at end of file
+    
diff --git a/nltk/test/unit/translate/test_stack_decoder.py b/nltk/test/unit/translate/test_stack_decoder.py
index ea20e08..5055d31 100644
--- a/nltk/test/unit/translate/test_stack_decoder.py
+++ b/nltk/test/unit/translate/test_stack_decoder.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Stack decoder
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/test/util.doctest b/nltk/test/util.doctest
index c28f7cd..472624c 100644
--- a/nltk/test/util.doctest
+++ b/nltk/test/util.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest
index 7c72631..178e968 100644
--- a/nltk/test/wordnet.doctest
+++ b/nltk/test/wordnet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/wordnet_lch.doctest b/nltk/test/wordnet_lch.doctest
index 51d676f..2f65833 100644
--- a/nltk/test/wordnet_lch.doctest
+++ b/nltk/test/wordnet_lch.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
diff --git a/nltk/test/wsd.doctest b/nltk/test/wsd.doctest
index 1e97cf7..7fea034 100644
--- a/nltk/test/wsd.doctest
+++ b/nltk/test/wsd.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2015 NLTK Project
+.. Copyright (C) 2001-2016 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. -*- coding: utf-8 -*-
diff --git a/nltk/text.py b/nltk/text.py
index 90ac7be..2340269 100644
--- a/nltk/text.py
+++ b/nltk/text.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Texts
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -565,7 +565,7 @@ class TextCollection(Text):
         if idf is None:
             matches = len([True for text in self._texts if term in text])
             # FIXME Should this raise some kind of error instead?
-            idf = (log(float(len(self._texts)) / matches) if matches else 0.0)
+            idf = (log(len(self._texts) / matches) if matches else 0.0)
             self._idf_cache[term] = idf
         return idf
 
diff --git a/nltk/tgrep.py b/nltk/tgrep.py
index eeea37f..11fa8cf 100644
--- a/nltk/tgrep.py
+++ b/nltk/tgrep.py
@@ -3,7 +3,7 @@
 #
 # Natural Language Toolkit: TGrep search
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Will Roberts <wildwilhelm at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/tokenize/__init__.py b/nltk/tokenize/__init__.py
index 4e00346..4d36b9e 100644
--- a/nltk/tokenize/__init__.py
+++ b/nltk/tokenize/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -73,6 +73,8 @@ from nltk.tokenize.stanford import StanfordTokenizer
 from nltk.tokenize.texttiling import TextTilingTokenizer
 from nltk.tokenize.casual   import (TweetTokenizer, casual_tokenize)
 from nltk.tokenize.mwe      import MWETokenizer
+from nltk.tokenize.util     import string_span_tokenize, regexp_span_tokenize
+from nltk.tokenize.stanford_segmenter import StanfordSegmenter
 
 # Standard sentence tokenizer.
 def sent_tokenize(text, language='english'):
diff --git a/nltk/tokenize/api.py b/nltk/tokenize/api.py
index c2d8743..8a121e8 100644
--- a/nltk/tokenize/api.py
+++ b/nltk/tokenize/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizer Interface
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
@@ -10,14 +10,19 @@
 Tokenizer Interface
 """
 
+from abc import ABCMeta, abstractmethod
+from nltk.six import add_metaclass
+
 from nltk.internals import overridden
 from nltk.tokenize.util import string_span_tokenize
 
+ at add_metaclass(ABCMeta)
 class TokenizerI(object):
     """
     A processing interface for tokenizing a string.
     Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
     """
+    @abstractmethod
     def tokenize(self, s):
         """
         Return a tokenized copy of *s*.
@@ -26,8 +31,6 @@ class TokenizerI(object):
         """
         if overridden(self.tokenize_sents):
             return self.tokenize_sents([s])[0]
-        else:
-            raise NotImplementedError()
 
     def span_tokenize(self, s):
         """
diff --git a/nltk/tokenize/casual.py b/nltk/tokenize/casual.py
index f45926a..8d73fcf 100644
--- a/nltk/tokenize/casual.py
+++ b/nltk/tokenize/casual.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: Twitter Tokenizer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Christopher Potts <cgpotts at stanford.edu>
 #         Ewan Klein <ewan at inf.ed.ac.uk> (modifications)
 #         Pierpaolo Pantone <> (modifications)
@@ -153,10 +153,12 @@ REGEXPS = (
     # Twitter hashtags:
     r"""(?:\#+[\w_]+[\w\'_\-]*[\w_]+)"""
     ,
-
+    # email addresses
+    r"""[\w.+-]+@[\w-]+\.(?:[\w-]\.?)+[\w-]"""
+    ,
     # Remaining word types:
     r"""
-    (?:[a-z][a-z'\-_]+[a-z])       # Words with apostrophes or dashes.
+    (?:[^\W\d_](?:[^\W\d_]|['\-_])+[^\W\d_]) # Words with apostrophes or dashes.
     |
     (?:[+\-]?\d+[,/.:-]\d+[+\-]?)  # Numbers, including fractions, decimals.
     |
@@ -174,6 +176,9 @@ REGEXPS = (
 WORD_RE = re.compile(r"""(%s)""" % "|".join(REGEXPS), re.VERBOSE | re.I
                      | re.UNICODE)
 
+# WORD_RE performs poorly on these patterns:
+HANG_RE = re.compile(r'([^a-zA-Z0-9])\1{3,}')
+
 # The emoticon string gets its own regex so that we can preserve case for
 # them as needed:
 EMOTICON_RE = re.compile(EMOTICONS, re.VERBOSE | re.I | re.UNICODE)
@@ -264,35 +269,13 @@ class TweetTokenizer:
         >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
         >>> tknzr.tokenize(s0)
         ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
-        >>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)"
-        >>> tknzr.tokenize(s1)
-        ['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)']
-        >>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn"
-        >>> tknzr.tokenize(s2)
-        ['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn']
-        >>> s3 = "@Insanomania They do... Their mentality doesn't :("
-        >>> tknzr.tokenize(s3)
-        ['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':(']
-        >>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!"
-        >>> tknzr.tokenize(s4)
-        ['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!']
-        >>> tknzr = TweetTokenizer(reduce_len=True)
-        >>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :("
-        >>> tknzr.tokenize(s5)
-        ['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':(']
 
     Examples using `strip_handles` and `reduce_len parameters`:
 
         >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
-        >>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!'
-        >>> tknzr.tokenize(s6)
+        >>> s1 = '@remy: This is waaaaayyyy too much for you!!!!!!'
+        >>> tknzr.tokenize(s1)
         [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!']
-        >>> s7 = '@_willy65: No place for @chuck tonight. Sorry.'
-        >>> tknzr.tokenize(s7)
-        [':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.']
-        >>> s8 = '@mar_tin is a great developer. Contact him at mar_tin at email.com'
-        >>> tknzr.tokenize(s8)
-        ['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin', '@email', '.', 'com']
     """
 
     def __init__(self, preserve_case=True, reduce_len=False, strip_handles=False):
@@ -315,8 +298,10 @@ class TweetTokenizer:
         # Normalize word lengthening
         if self.reduce_len:
             text = reduce_lengthening(text)
+        # Shorten problematic sequences of characters
+        safe_text = HANG_RE.sub(r'\1\1\1', text)
         # Tokenize:
-        words = WORD_RE.findall(text)
+        words = WORD_RE.findall(safe_text)
         # Possibly alter the case, but avoid changing emoticons like :D into :d:
         if not self.preserve_case:
             words = list(map((lambda x : x if EMOTICON_RE.search(x) else
diff --git a/nltk/tokenize/mwe.py b/nltk/tokenize/mwe.py
index c13aabd..3c2dda0 100644
--- a/nltk/tokenize/mwe.py
+++ b/nltk/tokenize/mwe.py
@@ -1,6 +1,6 @@
 # Multi-Word Expression tokenizer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Rob Malouf <rmalouf at mail.sdsu.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -28,62 +28,70 @@ of MWEs:
     ['In', 'a_little', 'or', 'a_little_bit', 'or', 'a_lot', 'in_spite_of']
 
 """
+from nltk.util import Trie
 
 from nltk.tokenize.api import TokenizerI
 
 
 class MWETokenizer(TokenizerI):
-    """
-    A tokenizer that processes tokenized text and merges multi-word expressions
-    into single tokens:
-
-        >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+')
-        >>> tokenizer.tokenize("An hors d'oeuvre tonight, sir?".split())
-        ['An', "hors+d'oeuvre", 'tonight,', 'sir?']
-
-    :type mwes: list(list(str))
-    :param mwes: A sequence of multi-word expressions to be merged, where
-        each MWE is a sequence of strings.
-    :type separator: str
-    :param separator: String that should be inserted between words in a multi-word
-        expression token.
-
+    """A tokenizer that processes tokenized text and merges multi-word expressions
+    into single tokens.
     """
 
     def __init__(self, mwes=None, separator='_'):
+        """Initialize the multi-word tokenizer with a list of expressions and a
+        separator
+
+        :type mwes: list(list(str))
+        :param mwes: A sequence of multi-word expressions to be merged, where
+            each MWE is a sequence of strings.
+        :type separator: str
+        :param separator: String that should be inserted between words in a multi-word
+            expression token. (Default is '_')
 
+        """
         if not mwes:
             mwes = []
-        self._mwes = dict()
+        self._mwes = Trie(mwes)
         self._separator = separator
-        for mwe in mwes:
-            self.add_mwe(mwe)
 
-    def add_mwe(self, mwe, _trie=None):
-        """
-        Add a multi-word expression to the lexicon (stored as a word trie)
+    def add_mwe(self, mwe):
+        """Add a multi-word expression to the lexicon (stored as a word trie)
 
-        We represent the trie as a dict of dicts:
+        We use ``util.Trie`` to represent the trie. Its form is a dict of dicts. 
+        The key True marks the end of a valid MWE.
 
-            >>> tokenizer = MWETokenizer([('a', 'b'), ('a', 'b', 'c'), ('a', 'x')])
-            >>> tokenizer._mwes
-            {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        :param mwe: The multi-word expression we're adding into the word trie
+        :type mwe: tuple(str) or list(str)
 
-        The key True marks the end of a valid MWE
+        :Example:
 
-        """
+        >>> tokenizer = MWETokenizer()
+        >>> tokenizer.add_mwe(('a', 'b'))
+        >>> tokenizer.add_mwe(('a', 'b', 'c'))
+        >>> tokenizer.add_mwe(('a', 'x'))
+        >>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
+        >>> tokenizer._mwes.as_dict() == expected
+        True
 
-        if _trie is None:
-            _trie = self._mwes
-        if mwe:
-            if mwe[0] not in _trie:
-                _trie[mwe[0]] = dict()
-            self.add_mwe(mwe[1:], _trie=_trie[mwe[0]])
-        else:
-            _trie[True] = None
+        """
+        self._mwes.insert(mwe)
 
     def tokenize(self, text):
+        """
+
+        :param text: A list containing tokenized text
+        :type text: list(str)
+        :return: A list of the tokenized text with multi-words merged together
+        :rtype: list(str)
+
+        :Example:
 
+        >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+')
+        >>> tokenizer.tokenize("An hors d'oeuvre tonight, sir?".split())
+        ['An', "hors+d'oeuvre", 'tonight,', 'sir?']
+        
+        """
         i = 0
         n = len(text)
         result = []
@@ -97,7 +105,7 @@ class MWETokenizer(TokenizerI):
                     trie = trie[text[j]]
                     j = j + 1
                 else:
-                    if True in trie:
+                    if Trie.LEAF in trie:
                         # success!
                         result.append(self._separator.join(text[i:j]))
                         i = j
diff --git a/nltk/tokenize/punkt.py b/nltk/tokenize/punkt.py
index 31db861..4aa11f1 100644
--- a/nltk/tokenize/punkt.py
+++ b/nltk/tokenize/punkt.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Punkt sentence tokenizer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Algorithm: Kiss & Strunk (2006)
 # Author: Willy <willy at csse.unimelb.edu.au> (original Python port)
 #         Steven Bird <stevenbird1 at gmail.com> (additions)
@@ -99,7 +99,7 @@ The algorithm for this tokenizer is described in::
   Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence
     Boundary Detection.  Computational Linguistics 32: 485-525.
 """
-from __future__ import print_function, unicode_literals
+from __future__ import print_function, unicode_literals, division
 
 # TODO: Make orthographic heuristic less susceptible to overtraining
 # TODO: Frequent sentence starters optionally exclude always-capitalised words
@@ -1036,12 +1036,12 @@ class PunktTrainer(PunktBaseClass):
         ratio scores for abbreviation candidates.  The details of how
         this works is available in the paper.
         """
-        p1 = float(count_b) / N
+        p1 = count_b / N
         p2 = 0.99
 
-        null_hypo = (float(count_ab) * math.log(p1) +
+        null_hypo = (count_ab * math.log(p1) +
                      (count_a - count_ab) * math.log(1.0 - p1))
-        alt_hypo  = (float(count_ab) * math.log(p2) +
+        alt_hypo  = (count_ab * math.log(p2) +
                      (count_a - count_ab) * math.log(1.0 - p2))
 
         likelihood = null_hypo - alt_hypo
@@ -1060,9 +1060,9 @@ class PunktTrainer(PunktBaseClass):
         """
         import math
 
-        p = 1.0 * count_b / N
-        p1 = 1.0 * count_ab / count_a
-        p2 = 1.0 * (count_b - count_ab) / (N - count_a)
+        p = count_b / N
+        p1 = count_ab / count_a
+        p2 = (count_b - count_ab) / (N - count_a)
 
         summand1 = (count_ab * math.log(p) +
                     (count_a - count_ab) * math.log(1.0 - p))
@@ -1126,8 +1126,8 @@ class PunktTrainer(PunktBaseClass):
                                               col_count, self._type_fdist.N())
                 # Filter out the not-so-collocative
                 if (ll >= self.COLLOCATION and
-                    (float(self._type_fdist.N())/typ1_count >
-                     float(typ2_count)/col_count)):
+                    (self._type_fdist.N()/typ1_count >
+                     typ2_count/col_count)):
                     yield (typ1, typ2), ll
 
     #////////////////////////////////////////////////////////////
@@ -1166,8 +1166,8 @@ class PunktTrainer(PunktBaseClass):
                                           self._type_fdist.N())
 
             if (ll >= self.SENT_STARTER and
-                float(self._type_fdist.N())/self._sentbreak_count >
-                float(typ_count)/typ_at_break_count):
+                self._type_fdist.N()/self._sentbreak_count >
+                typ_count/typ_at_break_count):
 
                 yield typ, ll
 
diff --git a/nltk/tokenize/regexp.py b/nltk/tokenize/regexp.py
index 5385e1f..eb5a5e9 100644
--- a/nltk/tokenize/regexp.py
+++ b/nltk/tokenize/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Trevor Cohn <tacohn at csse.unimelb.edu.au>
@@ -83,7 +83,8 @@ class RegexpTokenizer(TokenizerI):
 
     :type pattern: str
     :param pattern: The pattern used to build this tokenizer.
-        (This pattern may safely contain capturing parentheses.)
+        (This pattern must not contain capturing parentheses;
+        Use non-capturing parentheses, e.g. (?:...), instead)
     :type gaps: bool
     :param gaps: True if this tokenizer's pattern should be used
         to find separators between tokens; False if this
@@ -112,7 +113,7 @@ class RegexpTokenizer(TokenizerI):
         
     def _check_regexp(self):
         if self._regexp is None:
-            self._regexp = re.compile(self._pattern)
+            self._regexp = re.compile(self._pattern, self._flags)
         
     def tokenize(self, text):
         self._check_regexp()
diff --git a/nltk/tokenize/sexpr.py b/nltk/tokenize/sexpr.py
index 2e818f0..18a1cdf 100644
--- a/nltk/tokenize/sexpr.py
+++ b/nltk/tokenize/sexpr.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Yoav Goldberg <yoavg at cs.bgu.ac.il>
 #         Steven Bird <stevenbird1 at gmail.com> (minor edits)
 # URL: <http://nltk.sourceforge.net>
diff --git a/nltk/tokenize/simple.py b/nltk/tokenize/simple.py
index fbf6002..e519b25 100644
--- a/nltk/tokenize/simple.py
+++ b/nltk/tokenize/simple.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Simple Tokenizers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.sourceforge.net>
diff --git a/nltk/tokenize/stanford.py b/nltk/tokenize/stanford.py
index 74a18e5..e214d21 100644
--- a/nltk/tokenize/stanford.py
+++ b/nltk/tokenize/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford Tokenizer
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
@@ -15,7 +15,7 @@ import json
 from subprocess import PIPE
 
 from nltk import compat
-from nltk.internals import find_jar, config_java, java, _java_options
+from nltk.internals import find_jar, config_java, java, _java_options, find_jars_within_path
 
 from nltk.tokenize.api import TokenizerI
 
@@ -43,7 +43,11 @@ class StanfordTokenizer(TokenizerI):
             searchpath=(), url=_stanford_url,
             verbose=verbose
         )
-
+        
+        # Adding logging jar files to classpath 
+        stanford_dir = os.path.split(self._stanford_jar)[0]
+        self._stanford_jar = tuple(find_jars_within_path(stanford_dir))
+        
         self._encoding = encoding
         self.java_options = java_options
 
diff --git a/nltk/tokenize/stanford_segmenter.py b/nltk/tokenize/stanford_segmenter.py
new file mode 100644
index 0000000..54a7dbf
--- /dev/null
+++ b/nltk/tokenize/stanford_segmenter.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Interface to the Stanford Chinese Segmenter
+#
+# Copyright (C) 2001-2016 NLTK Project
+# Author: 52nlp <52nlpcn at gmail.com>
+#         Casper Lehmann-Strøm <casperlehmann at gmail.com>
+#
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import unicode_literals, print_function
+
+import tempfile
+import os
+import json
+from subprocess import PIPE
+
+from nltk import compat
+from nltk.internals import find_jar, config_java, java, _java_options
+
+from nltk.tokenize.api import TokenizerI
+
+_stanford_url = 'http://nlp.stanford.edu/software'
+
+class StanfordSegmenter(TokenizerI):
+    r"""
+    Interface to the Stanford Segmenter
+    >>> from nltk.tokenize.stanford_segmenter import StanfordSegmenter
+    >>> segmenter = StanfordSegmenter(
+    ...     path_to_jar="stanford-segmenter-3.6.0.jar",
+    ...     path_to_slf4j = "slf4j-api.jar"
+    ...     path_to_sihan_corpora_dict="./data",
+    ...     path_to_model="./data/pku.gz",
+    ...     path_to_dict="./data/dict-chris6.ser.gz")
+    >>> sentence = u"这是斯坦福中文分词器测试"
+    >>> segmenter.segment(sentence)
+    >>> u'\u8fd9 \u662f \u65af\u5766\u798f \u4e2d\u6587 \u5206\u8bcd\u5668 \u6d4b\u8bd5\n'
+    >>> segmenter.segment_file("test.simp.utf8")
+    >>> u'\u9762\u5bf9 \u65b0 \u4e16\u7eaa \uff0c \u4e16\u754c \u5404\u56fd ...
+    """
+
+    _JAR = 'stanford-segmenter.jar'
+    _SLF4J = 'slf4j-api.jar'
+
+    def __init__(self, path_to_jar=None, path_to_slf4j=None,
+            path_to_sihan_corpora_dict=None,
+            path_to_model=None, path_to_dict=None,
+            encoding='UTF-8', options=None,
+            verbose=False, java_options='-mx2g'):
+        stanford_segmenter = find_jar(
+                self._JAR, path_to_jar,
+                env_vars=('STANFORD_SEGMENTER',),
+                searchpath=(), url=_stanford_url,
+                verbose=verbose)
+        slf4j = find_jar(
+                self._SLF4J, path_to_slf4j,
+                env_vars=('SLF4J',),
+                searchpath=(), url=_stanford_url,
+                verbose=verbose)
+
+        # This is passed to java as the -cp option, the segmenter needs slf4j.
+        self._stanford_jar = ':'.join(
+            [_ for _ in [stanford_segmenter, slf4j] if not _ is None])
+
+        self._sihan_corpora_dict = path_to_sihan_corpora_dict
+        self._model = path_to_model
+        self._dict = path_to_dict
+
+        self._encoding = encoding
+        self.java_options = java_options
+        options = {} if options is None else options
+        self._options_cmd = ','.join('{0}={1}'.format(key, json.dumps(val)) for key, val in options.items())
+
+    def tokenize(self, s):
+        super().tokenize(s)
+
+    def segment_file(self, input_file_path):
+        """
+        """
+        cmd = [
+            'edu.stanford.nlp.ie.crf.CRFClassifier',
+            '-sighanCorporaDict', self._sihan_corpora_dict,
+            '-textFile', input_file_path,
+            '-sighanPostProcessing', 'true',
+            '-keepAllWhitespaces', 'false',
+            '-loadClassifier', self._model,
+            '-serDictionary', self._dict
+        ]
+
+        stdout = self._execute(cmd)
+
+        return stdout
+
+    def segment(self, tokens):
+        return self.segment_sents([tokens])
+
+    def segment_sents(self, sentences):
+        """
+        """
+        encoding = self._encoding
+        # Create a temporary input file
+        _input_fh, self._input_file_path = tempfile.mkstemp(text=True)
+
+        # Write the actural sentences to the temporary input file
+        _input_fh = os.fdopen(_input_fh, 'wb')
+        _input = '\n'.join((' '.join(x) for x in sentences))
+        if isinstance(_input, compat.text_type) and encoding:
+            _input = _input.encode(encoding)
+        _input_fh.write(_input)
+        _input_fh.close()
+
+        cmd = [
+            'edu.stanford.nlp.ie.crf.CRFClassifier',
+            '-sighanCorporaDict', self._sihan_corpora_dict,
+            '-textFile', self._input_file_path,
+            '-sighanPostProcessing', 'true',
+            '-keepAllWhitespaces', 'false',
+            '-loadClassifier', self._model,
+            '-serDictionary', self._dict
+        ]
+
+        stdout = self._execute(cmd)
+
+        # Delete the temporary file
+        os.unlink(self._input_file_path)
+
+        return stdout
+
+    def _execute(self, cmd, verbose=False):
+        encoding = self._encoding
+        cmd.extend(['-inputEncoding', encoding])
+        _options_cmd = self._options_cmd
+        if _options_cmd:
+            cmd.extend(['-options', self._options_cmd])
+
+        default_options = ' '.join(_java_options)
+
+        # Configure java.
+        config_java(options=self.java_options, verbose=verbose)
+
+        stdout, _stderr = java(
+            cmd,classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE)
+        stdout = stdout.decode(encoding)
+
+        # Return java configurations to their default values.
+        config_java(options=default_options, verbose=False)
+
+        return stdout
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        StanfordSegmenter()
+    except LookupError:
+        raise SkipTest('doctests from nltk.tokenize.stanford_segmenter are skipped because the stanford segmenter jar doesn\'t exist')
diff --git a/nltk/tokenize/texttiling.py b/nltk/tokenize/texttiling.py
index 8e7bcf2..9fa359e 100644
--- a/nltk/tokenize/texttiling.py
+++ b/nltk/tokenize/texttiling.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: TextTiling
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: George Boutsioukis
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/tokenize/treebank.py b/nltk/tokenize/treebank.py
index 55f4e62..73216d9 100644
--- a/nltk/tokenize/treebank.py
+++ b/nltk/tokenize/treebank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Michael Heilman <mheilman at cmu.edu> (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed)
 #
@@ -45,6 +45,40 @@ class TreebankWordTokenizer(TokenizerI):
         ['hi', ',', 'my', 'name', 'ca', "n't", 'hello', ',']
     """
 
+    #starting quotes
+    STARTING_QUOTES = [
+        (re.compile(r'^\"'), r'``'),
+        (re.compile(r'(``)'), r' \1 '),
+        (re.compile(r'([ (\[{<])"'), r'\1 `` '),
+    ]
+
+    #punctuation
+    PUNCTUATION = [
+        (re.compile(r'([:,])([^\d])'), r' \1 \2'),
+        (re.compile(r'([:,])$'), r' \1 '),
+        (re.compile(r'\.\.\.'), r' ... '),
+        (re.compile(r'[;@#$%&]'), r' \g<0> '),
+        (re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'), r'\1 \2\3 '),
+        (re.compile(r'[?!]'), r' \g<0> '),
+
+        (re.compile(r"([^'])' "), r"\1 ' "),
+    ]
+
+    #parens, brackets, etc.
+    PARENS_BRACKETS = [
+        (re.compile(r'[\]\[\(\)\{\}\<\>]'), r' \g<0> '),
+        (re.compile(r'--'), r' -- '),
+    ]
+
+    #ending quotes
+    ENDING_QUOTES = [
+        (re.compile(r'"'), " '' "),
+        (re.compile(r'(\S)(\'\')'), r'\1 \2 '),
+
+        (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "),
+        (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "),
+    ]
+
     # List of contractions adapted from Robert MacIntyre's tokenizer.
     CONTRACTIONS2 = [re.compile(r"(?i)\b(can)(not)\b"),
                      re.compile(r"(?i)\b(d)('ye)\b"),
@@ -60,35 +94,20 @@ class TreebankWordTokenizer(TokenizerI):
                      re.compile(r"(?i)\b(wha)(t)(cha)\b")]
 
     def tokenize(self, text):
-        #starting quotes
-        text = re.sub(r'^\"', r'``', text)
-        text = re.sub(r'(``)', r' \1 ', text)
-        text = re.sub(r'([ (\[{<])"', r'\1 `` ', text)
-
-        #punctuation
-        text = re.sub(r'([:,])([^\d])', r' \1 \2', text)
-        text = re.sub(r'([:,])$', r' \1 ', text)
-        text = re.sub(r'\.\.\.', r' ... ', text)
-        text = re.sub(r'[;@#$%&]', r' \g<0> ', text)
-        text = re.sub(r'([^\.])(\.)([\]\)}>"\']*)\s*$', r'\1 \2\3 ', text)
-        text = re.sub(r'[?!]', r' \g<0> ', text)
+        for regexp, substitution in self.STARTING_QUOTES:
+            text = regexp.sub(substitution, text)
 
-        text = re.sub(r"([^'])' ", r"\1 ' ", text)
+        for regexp, substitution in self.PUNCTUATION:
+            text = regexp.sub(substitution, text)
 
-        #parens, brackets, etc.
-        text = re.sub(r'[\]\[\(\)\{\}\<\>]', r' \g<0> ', text)
-        text = re.sub(r'--', r' -- ', text)
+        for regexp, substitution in self.PARENS_BRACKETS:
+            text = regexp.sub(substitution, text)
 
         #add extra space to make things easier
         text = " " + text + " "
 
-        #ending quotes
-        text = re.sub(r'"', " '' ", text)
-        text = re.sub(r'(\S)(\'\')', r'\1 \2 ', text)
-
-        text = re.sub(r"([^' ])('[sS]|'[mM]|'[dD]|') ", r"\1 \2 ", text)
-        text = re.sub(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) ", r"\1 \2 ",
-                      text)
+        for regexp, substitution in self.ENDING_QUOTES:
+            text = regexp.sub(substitution, text)
 
         for regexp in self.CONTRACTIONS2:
             text = regexp.sub(r' \1 \2 ', text)
diff --git a/nltk/tokenize/util.py b/nltk/tokenize/util.py
index 08b73fe..a49900b 100644
--- a/nltk/tokenize/util.py
+++ b/nltk/tokenize/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizer Utilities
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
@@ -45,23 +45,23 @@ def regexp_span_tokenize(s, regexp):
     Return the offsets of the tokens in *s*, as a sequence of ``(start, end)``
     tuples, by splitting the string at each successive match of *regexp*.
 
-        >>> from nltk.tokenize import WhitespaceTokenizer
+        >>> from nltk.tokenize.util import regexp_span_tokenize
         >>> s = '''Good muffins cost $3.88\nin New York.  Please buy me
         ... two of them.\n\nThanks.'''
-        >>> list(WhitespaceTokenizer().span_tokenize(s))
+        >>> list(regexp_span_tokenize(s, r'\s'))
         [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36),
         (38, 44), (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]
 
     :param s: the string to be tokenized
     :type s: str
-    :param regexp: regular expression that matches token separators
+    :param regexp: regular expression that matches token separators (must not be empty)
     :type regexp: str
     :rtype: iter(tuple(int, int))
     """
     left = 0
     for m in finditer(regexp, s):
         right, next = m.span()
-        if right != 0:
+        if right != left:
             yield left, right
         left = next
     yield left, len(s)
diff --git a/nltk/toolbox.py b/nltk/toolbox.py
index 8b86fc6..cecb282 100644
--- a/nltk/toolbox.py
+++ b/nltk/toolbox.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 # Natural Language Toolkit: Toolbox Reader
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Greg Aumann <greg_aumann at sil.org>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/translate/__init__.py b/nltk/translate/__init__.py
index 4f83e17..6ec96a0 100644
--- a/nltk/translate/__init__.py
+++ b/nltk/translate/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Machine Translation
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>, Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -18,6 +18,7 @@ from nltk.translate.ibm2 import IBMModel2
 from nltk.translate.ibm3 import IBMModel3
 from nltk.translate.ibm4 import IBMModel4
 from nltk.translate.ibm5 import IBMModel5
-from nltk.translate.bleu_score import bleu
+from nltk.translate.bleu_score import sentence_bleu as bleu
+from nltk.translate.ribes_score import sentence_ribes as ribes
 from nltk.translate.metrics import alignment_error_rate
 from nltk.translate.stack_decoder import StackDecoder
diff --git a/nltk/translate/api.py b/nltk/translate/api.py
index 2302331..d90bd50 100644
--- a/nltk/translate/api.py
+++ b/nltk/translate/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: API for alignment and translation objects 
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Will Zhang <wilzzha at gmail.com>
 #         Guan Gui <ggui at student.unimelb.edu.au>
 #         Steven Bird <stevenbird1 at gmail.com>
@@ -127,7 +127,7 @@ class AlignedSent(object):
             raise Exception('Cannot find the dot binary from Graphviz package')
         out, err = process.communicate(dot_string)
          
-        return out
+        return out.decode('utf8')
     
     
     def __str__(self):
diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
index d028130..668149b 100644
--- a/nltk/translate/bleu_score.py
+++ b/nltk/translate/bleu_score.py
@@ -1,31 +1,36 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: BLEU Score
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Dmitrijs Milajevs
+# Contributors: Dmitrijs Milajevs, Liling Tan
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
-"""BLEU score implementation."""
 
+"""BLEU score implementation."""
 from __future__ import division
 
 import math
+import fractions
+from collections import Counter
 
-from nltk.tokenize import word_tokenize
-from nltk.compat import Counter
 from nltk.util import ngrams
 
+try:
+    fractions.Fraction(0, 1000, _normalize=False)
+    from fractions import Fraction
+except TypeError:
+    from nltk.compat import Fraction
+    
 
-def bleu(references, hypothesis, weights):
+def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25),
+                  smoothing_function=None):
     """
     Calculate BLEU score (Bilingual Evaluation Understudy) from
     Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
     "BLEU: a method for automatic evaluation of machine translation." 
     In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf
 
-
-    >>> weights = [0.25, 0.25, 0.25, 0.25]
     >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
     ...               'ensures', 'that', 'the', 'military', 'always',
     ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
@@ -47,41 +52,146 @@ def bleu(references, hypothesis, weights):
     ...               'army', 'always', 'to', 'heed', 'the', 'directions',
     ...               'of', 'the', 'party']
 
-    >>> bleu([reference1, reference2, reference3], hypothesis1, weights)
-    0.5045666840058485
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
+    0.5045...
 
-    >>> bleu([reference1, reference2, reference3], hypothesis2, weights)
-    0
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS
+    0.3969...
 
+    The default BLEU calculates a score for up to 4grams using uniform
+    weights. To evaluate your translations with higher/lower order ngrams, 
+    use customized weights. E.g. when accounting for up to 6grams with uniform
+    weights:
+
+    >>> weights = (0.1666, 0.1666, 0.1666, 0.1666, 0.1666)
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights)
+    0.45838627164939455
+    
     :param references: reference sentences
     :type references: list(list(str))
     :param hypothesis: a hypothesis sentence
     :type hypothesis: list(str)
     :param weights: weights for unigrams, bigrams, trigrams and so on
     :type weights: list(float)
+    :return: The sentence-level BLEU score.
+    :rtype: float
     """
-    p_ns = (
-        _modified_precision(references, hypothesis, i)
-        for i, _ in enumerate(weights, start=1)
-    )
-
-    try:
-        s = math.fsum(w * math.log(p_n) for w, p_n in zip(weights, p_ns))
-    except ValueError:
-        # some p_ns is 0
-        return 0
+    return corpus_bleu([references], [hypothesis], weights, smoothing_function)
+
+
+def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25),
+                smoothing_function=None):
+    """
+    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all 
+    the hypotheses and their respective references.  
+
+    Instead of averaging the sentence level BLEU scores (i.e. marco-average 
+    precision), the original BLEU metric (Papineni et al. 2002) accounts for 
+    the micro-average precision (i.e. summing the numerators and denominators
+    for each hypothesis-reference(s) pairs before the division).
+    
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+    
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', 
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', 
+    ...          'because', 'he', 'read', 'the', 'book']
+    
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
+    0.5920...
+    
+    The example below show that corpus_bleu() is different from averaging 
+    sentence_bleu() for hypotheses 
+    
+    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
+    >>> score2 = sentence_bleu([ref2a], hyp2)
+    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
+    0.6223...
+    
+    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param weights: weights for unigrams, bigrams, trigrams and so on
+    :type weights: list(float)
+    :return: The corpus-level BLEU score.
+    :rtype: float
+    """
+    # Before proceeding to compute BLEU, perform sanity checks.
 
-    bp = _brevity_penalty(references, hypothesis)
-    return bp * math.exp(s)
+    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
+    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
+    hyp_lengths, ref_lengths = 0, 0
+    
+    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
+    
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        # For each order of ngram, calculate the numerator and
+        # denominator for the corpus-level modified precision.
+        for i, _ in enumerate(weights, start=1): 
+            p_i = modified_precision(references, hypothesis, i)
+            p_numerators[i] += p_i.numerator
+            p_denominators[i] += p_i.denominator
+            
+        # Calculate the hypothesis length and the closest reference length.
+        # Adds them to the corpus-level hypothesis and reference counts.
+        hyp_len =  len(hypothesis)
+        hyp_lengths += hyp_len
+        ref_lengths += closest_ref_length(references, hyp_len)
+    
+    # Calculate corpus-level brevity penalty.
+    bp = brevity_penalty(ref_lengths, hyp_lengths)
+    
+    # Collects the various precision values for the different ngram orders.
+    p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False) 
+           for i, _ in enumerate(weights, start=1)]
+    
+    # Returns 0 if there's no matching n-grams 
+    # We only need to check for p_numerators[1] == 0, since if there's
+    # no unigrams, there won't be any higher order ngrams.
+    if p_numerators[1] == 0:
+        return 0
+    
+    # Smoothen the modified precision.
+    # Note: smooth_precision() converts values into float.
+    if smoothing_function:
+        p_n = smoothing_function(p_n, references=references, 
+                                 hypothesis=hypothesis, hyp_len=hyp_len)
+    
+    # Calculates the overall modified precision for all ngrams.
+    # By sum of the product of the weights and the respective *p_n*
+    s = (w * math.log(p_i) for w, p_i in zip(weights, p_n) 
+         if p_i.numerator != 0)
+        
+    return bp * math.exp(math.fsum(s))
 
 
-def _modified_precision(references, hypothesis, n):
+def modified_precision(references, hypothesis, n):
     """
     Calculate modified ngram precision.
 
     The normal precision method may lead to some wrong translations with
     high-precision, e.g., the translation, in which a word of reference
-    repeats several times, has very high precision. 
+    repeats several times, has very high precision.     
+
+    This function only returns the Fraction object that contains the numerator 
+    and denominator necessary to calculate the corpus-level precision. 
+    To calculate the modified precision for a single pair of hypothesis and 
+    references, cast the Fraction object into a float. 
     
     The famous "the the the ... " example shows that you can get BLEU precision
     by duplicating high frequency words.
@@ -90,8 +200,8 @@ def _modified_precision(references, hypothesis, n):
         >>> reference2 = 'there is a cat on the mat'.split()
         >>> hypothesis1 = 'the the the the the the the'.split()
         >>> references = [reference1, reference2]
-        >>> _modified_precision(references, hypothesis1, n=1)
-        0.2857142857142857
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.2857...
     
     In the modified n-gram precision, a reference word will be considered 
     exhausted after a matching hypothesis word is identified, e.g.
@@ -108,9 +218,9 @@ def _modified_precision(references, hypothesis, n):
         ...               'of', 'the', 'party']
         >>> hypothesis = 'of the'.split()
         >>> references = [reference1, reference2, reference3]
-        >>> _modified_precision(references, hypothesis, n=1)
+        >>> float(modified_precision(references, hypothesis, n=1))
         1.0
-        >>> _modified_precision(references, hypothesis, n=2)
+        >>> float(modified_precision(references, hypothesis, n=2))
         1.0
         
     An example of a normal machine translation hypothesis:
@@ -136,39 +246,67 @@ def _modified_precision(references, hypothesis, n):
         ...               'army', 'always', 'to', 'heed', 'the', 'directions',
         ...               'of', 'the', 'party']
         >>> references = [reference1, reference2, reference3]
-        >>> _modified_precision(references, hypothesis1, n=1)
-        0.9444444444444444
-        >>> _modified_precision(references, hypothesis2, n=1)
-        0.5714285714285714
-        >>> _modified_precision(references, hypothesis1, n=2)
+        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
+        0.9444...
+        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
+        0.5714...
+        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
         0.5882352941176471
-        >>> _modified_precision(references, hypothesis2, n=2)
-        0.07692307692307693
-
+        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
+        0.07692...
+     
+    
     :param references: A list of reference translations.
     :type references: list(list(str))
     :param hypothesis: A hypothesis translation.
     :type hypothesis: list(str)
     :param n: The ngram order.
     :type n: int
+    :return: BLEU's modified precision for the nth order ngram.
+    :rtype: Fraction
     """
+    # Extracts all ngrams in hypothesis.
     counts = Counter(ngrams(hypothesis, n))
 
-    if not counts:
-        return 0
-
+    # Extract a union of references' counts.
+    ## max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
     max_counts = {}
     for reference in references:
         reference_counts = Counter(ngrams(reference, n))
         for ngram in counts:
-            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
+            max_counts[ngram] = max(max_counts.get(ngram, 0), 
+                                    reference_counts[ngram])
+    
+    # Assigns the intersection between hypothesis and references' counts.
+    clipped_counts = {ngram: min(count, max_counts[ngram]) 
+                      for ngram, count in counts.items()}
 
-    clipped_counts = dict((ngram, min(count, max_counts[ngram])) for ngram, count in counts.items())
+    numerator = sum(clipped_counts.values())
+    denominator = sum(counts.values())
+    
+    return Fraction(numerator, denominator, _normalize=False)  
+    
 
-    return sum(clipped_counts.values()) / sum(counts.values())
+def closest_ref_length(references, hyp_len):
+    """
+    This function finds the reference that is the closest length to the 
+    hypothesis. The closest reference length is referred to as *r* variable 
+    from the brevity penalty formula in Papineni et. al. (2002)
+    
+    :param references: A list of reference translations.
+    :type references: list(list(str))
+    :param hypothesis: The length of the hypothesis.
+    :type hypothesis: int
+    :return: The length of the reference that's closest to the hypothesis.
+    :rtype: int    
+    """
+    ref_lens = (len(reference) for reference in references)
+    closest_ref_len = min(ref_lens, key=lambda ref_len: 
+                          (abs(ref_len - hyp_len), ref_len))
+    return closest_ref_len
 
 
-def _brevity_penalty(references, hypothesis):
+def brevity_penalty(closest_ref_len, hyp_len):
     """
     Calculate brevity penalty.
 
@@ -184,7 +322,9 @@ def _brevity_penalty(references, hypothesis):
         >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
         >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
         >>> references = [reference1, reference2, reference3]
-        >>> _brevity_penalty(references, hypothesis)
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> brevity_penalty(closest_ref_len, hyp_len)
         1.0
 
     In case a hypothesis translation is shorter than the references, penalty is
@@ -192,7 +332,9 @@ def _brevity_penalty(references, hypothesis):
 
         >>> references = [['a'] * 28, ['a'] * 28]
         >>> hypothesis = ['a'] * 12
-        >>> _brevity_penalty(references, hypothesis)
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> brevity_penalty(closest_ref_len, hyp_len)
         0.2635971381157267
 
     The length of the closest reference is used to compute the penalty. If the
@@ -202,8 +344,10 @@ def _brevity_penalty(references, hypothesis):
 
         >>> references = [['a'] * 13, ['a'] * 2]
         >>> hypothesis = ['a'] * 12
-        >>> _brevity_penalty(references, hypothesis)
-        0.9200444146293233
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+        0.9200...
 
     The brevity penalty doesn't depend on reference order. More importantly,
     when two reference sentences are at the same distance, the shortest
@@ -211,8 +355,12 @@ def _brevity_penalty(references, hypothesis):
 
         >>> references = [['a'] * 13, ['a'] * 11]
         >>> hypothesis = ['a'] * 12
-        >>> bp1 = _brevity_penalty(references, hypothesis)  
-        >>> bp2 = _brevity_penalty(reversed(references),hypothesis) 
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
+        >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
         >>> bp1 == bp2 == 1
         True
 
@@ -220,25 +368,183 @@ def _brevity_penalty(references, hypothesis):
 
         >>> references = [['a'] * 11, ['a'] * 8]
         >>> hypothesis = ['a'] * 7
-        >>> _brevity_penalty(references, hypothesis)
-        0.8668778997501817
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
+        0.8668...
 
         >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
         >>> hypothesis = ['a'] * 7
-        >>> _brevity_penalty(references, hypothesis)
+        >>> hyp_len = len(hypothesis)
+        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
+        >>> brevity_penalty(closest_ref_len, hyp_len)
         1.0
     
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hypothesis: A hypothesis translation.
-    :type hypothesis: list(str)
+    :param hyp_len: The length of the hypothesis for a single sentence OR the 
+    sum of all the hypotheses' lengths for a corpus
+    :type hyp_len: int
+    :param closest_ref_len: The length of the closest reference for a single 
+    hypothesis OR the sum of all the closest references for every hypotheses.
+    :type closest_reference_len: int    
+    :return: BLEU's brevity penalty.
+    :rtype: float
     """
-    c = len(hypothesis)
-    ref_lens = (len(reference) for reference in references)
-    r = min(ref_lens, key=lambda ref_len: (abs(ref_len - c), ref_len))
-
-    if c > r:
+    if hyp_len > closest_ref_len:
         return 1
     else:
-        return math.exp(1 - r / c)
+        return math.exp(1 - closest_ref_len / hyp_len)
 
+
+class SmoothingFunction:
+    """
+    This is an implementation of the smoothing techniques 
+    for segment-level BLEU scores that was presented in 
+    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of 
+    Smoothing Techniques for Sentence-Level BLEU. In WMT14. 
+    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
+    """
+    def __init__(self, epsilon=0.1, alpha=5, k=5):
+        """
+        This will initialize the parameters required for the various smoothing
+        techniques, the default values are set to the numbers used in the
+        experiments from Chen and Cherry (2014).
+
+        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures', 
+        ...                 'that', 'the', 'military', 'always', 'obeys', 'the', 
+        ...                 'commands', 'of', 'the', 'party']
+        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures', 
+        ...               'that', 'the', 'military', 'will', 'forever', 'heed', 
+        ...               'Party', 'commands']
+                
+        >>> chencherry = SmoothingFunction()
+        >>> print (sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
+        0.4489...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
+        0.4118...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
+        0.4905...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
+        0.1801...
+        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
+        0.4905...
+
+        :param epsilon: the epsilon value use in method 1
+        :type epsilon: float
+        :param alpha: the alpha value use in method 6
+        :type alpha: int
+        :param k: the k value use in method 4
+        :type k: int
+        """
+        self.epsilon = epsilon
+        self.alpha = alpha
+        self.k = k
+        
+    def method0(self, p_n, *args, **kwargs):
+        """ No smoothing. """
+        return p_n
+        
+    def method1(self, p_n, *args, **kwargs):
+        """ 
+        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
+        """ 
+        return [(p_i.numerator + self.epsilon)/ p_i.denominator 
+                if p_i.numerator == 0 else p_i for p_i in p_n]
+        
+    def method2(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 2: Add 1 to both numerator and denominator from 
+        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of 
+        machine translation quality using longest common subsequence and 
+        skip-bigram statistics. In ACL04.
+        """
+        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n]
+        
+    def method3(self, p_n, *args, **kwargs):
+        """
+        Smoothing method 3: NIST geometric sequence smoothing 
+        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each 
+        precision score whose matching n-gram count is null.
+        k is 1 for the first 'n' value for which the n-gram match count is null/
+        For example, if the text contains:
+         - one 2-gram match
+         - and (consequently) two 1-gram matches
+        the n-gram count for each individual precision score would be:
+         - n=1  =>  prec_count = 2     (two unigrams)
+         - n=2  =>  prec_count = 1     (one bigram)
+         - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
+         - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
+        """
+        incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0:
+                p_n[i] = 1 / (2**incvnt * p_i.denominator)
+                incvnt+=1
+        return p_n
+    
+    def method4(self, p_n, references, hypothesis, hyp_len):
+        """
+        Smoothing method 4: 
+        Shorter translations may have inflated precision values due to having 
+        smaller denominators; therefore, we give them proportionally
+        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry 
+        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
+        """
+        incvnt = 1 
+        for i, p_i in enumerate(p_n):
+            if p_i.numerator == 0 and hyp_len != 0:
+                p_n[i] = incvnt * self.k / math.log(hyp_len) # Note that this K is different from the K from NIST.
+                incvnt+=1
+        return p_n
+
+
+    def method5(self, p_n, references, hypothesis, hyp_len):
+        """
+        Smoothing method 5:
+        The matched counts for similar values of n should be similar. To a 
+        calculate the n-gram matched count, it averages the n−1, n and n+1 gram 
+        matched counts.
+        """
+        m = {}
+        # Requires an precision value for an addition ngram order.
+        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
+        m[-1] = p_n[0] + 1
+        for i, p_i in enumerate(p_n):
+            p_n[i] = (m[i-1] + p_i + p_n_plus1[i+1]) / 3
+            m[i] = p_n[i] 
+        return p_n
+        
+    def method6(self, p_n, references, hypothesis, hyp_len):
+        """
+        Smoothing method 6:
+        Interpolates the maximum likelihood estimate of the precision *p_n* with 
+        a prior estimate *pi0*. The prior is estimated by assuming that the ratio 
+        between pn and pn−1 will be the same as that between pn−1 and pn−2.
+        """
+        for i, p_i in enumerate(p_n):
+            if i in [1,2]: # Skips the first 2 orders of ngrams.
+                continue
+            else:
+                pi0 = 0 if p_n[i-2] == 0 else p_n[i-1]**2 / p_n[i-2] 
+                # No. of ngrams in translation.
+                l = sum(1 for _ in ngrams(hypothesis, i+1))
+                p_n[i] = (p_i + self.alpha * pi0) / (l + self.alpha)
+        return p_n
+    
+    def method7(self, p_n, references, hypothesis, hyp_len):
+        """
+        Smoothing method 6:
+        Interpolates the maximum likelihood estimate of the precision *p_n* with 
+        a prior estimate *pi0*. The prior is estimated by assuming that the ratio 
+        between pn and pn−1 will be the same as that between pn−1 and pn−2.
+        """
+        p_n = self.method4(p_n, references, hypothesis, hyp_len)
+        p_n = self.method5(p_n, references, hypothesis, hyp_len)
+        return p_n
diff --git a/nltk/translate/gdfa.py b/nltk/translate/gdfa.py
index 23b7868..464ee15 100644
--- a/nltk/translate/gdfa.py
+++ b/nltk/translate/gdfa.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: GDFA word alignment symmetrization
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Liling Tan
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/translate/ibm2.py b/nltk/translate/ibm2.py
index 64b7fa4..f2c17a7 100644
--- a/nltk/translate/ibm2.py
+++ b/nltk/translate/ibm2.py
@@ -146,7 +146,7 @@ class IBMModel2(IBMModel):
             m = len(aligned_sentence.words)
             if (l, m) not in l_m_combinations:
                 l_m_combinations.add((l, m))
-                initial_prob = 1 / float(l + 1)
+                initial_prob = 1 / (l + 1)
                 if initial_prob < IBMModel.MIN_PROB:
                     warnings.warn("A source sentence is too long (" + str(l) +
                                   " words). Results may be less accurate.")
diff --git a/nltk/translate/ibm3.py b/nltk/translate/ibm3.py
index 2bac7d2..8af6059 100644
--- a/nltk/translate/ibm3.py
+++ b/nltk/translate/ibm3.py
@@ -197,7 +197,7 @@ class IBMModel3(IBMModel):
             m = len(aligned_sentence.words)
             if (l, m) not in l_m_combinations:
                 l_m_combinations.add((l, m))
-                initial_prob = 1 / float(m)
+                initial_prob = 1 / m
                 if initial_prob < IBMModel.MIN_PROB:
                     warnings.warn("A target sentence is too long (" + str(m) +
                                   " words). Results may be less accurate.")
diff --git a/nltk/translate/ibm4.py b/nltk/translate/ibm4.py
index 36cf197..6a93c7e 100644
--- a/nltk/translate/ibm4.py
+++ b/nltk/translate/ibm4.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: IBM Model 4
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -261,7 +261,7 @@ class IBMModel4(IBMModel):
         if max_m <= 1:
             initial_prob = IBMModel.MIN_PROB
         else:
-            initial_prob = float(1) / (2 * (max_m - 1))
+            initial_prob = 1 / (2 * (max_m - 1))
         if initial_prob < IBMModel.MIN_PROB:
             warnings.warn("A target sentence is too long (" + str(max_m) +
                           " words). Results may be less accurate.")
diff --git a/nltk/translate/ibm5.py b/nltk/translate/ibm5.py
index 94fa631..ca49fc6 100644
--- a/nltk/translate/ibm5.py
+++ b/nltk/translate/ibm5.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: IBM Model 5
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -275,7 +275,7 @@ class IBMModel5(IBMModel):
         # last available position.
         # Thus, the number of possible vacancy difference values is
         # (max_v) - (1-max_v) + 1 = 2 * max_v.
-        if max_m > 0 and (float(1) / (2 * max_m)) < IBMModel.MIN_PROB:
+        if max_m > 0 and (1 / (2 * max_m)) < IBMModel.MIN_PROB:
             warnings.warn("A target sentence is too long (" + str(max_m) +
                           " words). Results may be less accurate.")
 
diff --git a/nltk/translate/ibm_model.py b/nltk/translate/ibm_model.py
index c249826..1d70b56 100644
--- a/nltk/translate/ibm_model.py
+++ b/nltk/translate/ibm_model.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: IBM Model Core
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -37,7 +37,7 @@ Robert L. Mercer. 1993. The Mathematics of Statistical Machine
 Translation: Parameter Estimation. Computational Linguistics, 19 (2),
 263-311.
 """
-
+from __future__ import division
 from bisect import insort_left
 from collections import defaultdict
 from copy import deepcopy
@@ -447,7 +447,7 @@ class AlignmentInfo(object):
         if i is None:
             return 0
 
-        average_position = float(sum(self.cepts[i])) / len(self.cepts[i])
+        average_position = sum(self.cepts[i]) / len(self.cepts[i])
         return int(ceil(average_position))
 
     def previous_cept(self, j):
diff --git a/nltk/translate/metrics.py b/nltk/translate/metrics.py
index 27c55a5..5d54b97 100644
--- a/nltk/translate/metrics.py
+++ b/nltk/translate/metrics.py
@@ -1,11 +1,12 @@
 # Natural Language Toolkit: Translation metrics
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Will Zhang <wilzzha at gmail.com>
 #         Guan Gui <ggui at student.unimelb.edu.au>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
+from __future__ import division
 
 def alignment_error_rate(reference, hypothesis, possible=None):
     """
@@ -35,5 +36,5 @@ def alignment_error_rate(reference, hypothesis, possible=None):
     else:
         assert(reference.issubset(possible)) # sanity check
 
-    return (1.0 - float(len(hypothesis & reference) + len(hypothesis & possible)) /
+    return (1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) /
             float(len(hypothesis) + len(reference)))
diff --git a/nltk/translate/phrase_based.py b/nltk/translate/phrase_based.py
index bec641b..71b1008 100644
--- a/nltk/translate/phrase_based.py
+++ b/nltk/translate/phrase_based.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Phrase Extraction Algorithm
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Authors: Liling Tan, Fredrik Hedman, Petra Barancikova
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/translate/ribes_score.py b/nltk/translate/ribes_score.py
new file mode 100644
index 0000000..5c8e531
--- /dev/null
+++ b/nltk/translate/ribes_score.py
@@ -0,0 +1,325 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: RIBES Score
+#
+# Copyright (C) 2001-2016 NLTK Project
+# Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian
+#               Mark Byers, ekhumoro, P. Ortiz
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+""" RIBES score implementation """
+from __future__ import division
+from itertools import islice
+import math
+
+from nltk.util import ngrams, choose
+
+
+def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10):
+    """
+    The RIBES (Rank-based Intuitive Bilingual Evaluation Score) from 
+    Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and 
+    Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for 
+    Distant Language Pairs". In Proceedings of EMNLP. 
+    http://www.aclweb.org/anthology/D/D10/D10-1092.pdf 
+    
+    The generic RIBES scores used in shared task, e.g. Workshop for 
+    Asian Translation (WAT) uses the following RIBES calculations:
+    
+        RIBES = kendall_tau * (alpha**p1) * (beta**bp)
+    
+    Please note that this re-implementation differs from the official
+    RIBES implementation and though it emulates the results as describe
+    in the original paper, there are further optimization implemented 
+    in the official RIBES script.
+    
+    Users are encouraged to use the official RIBES script instead of this 
+    implementation when evaluating your machine translation system. Refer
+    to http://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script.
+    
+    :param references: a list of reference sentences
+    :type reference: list(list(str))
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    :param alpha: hyperparameter used as a prior for the unigram precision.
+    :type alpha: float
+    :param beta: hyperparameter used as a prior for the brevity penalty.
+    :type beta: float
+    :return: The best ribes score from one of the references.
+    :rtype: float
+    """
+    best_ribes = -1.0
+    # Calculates RIBES for each reference and returns the best score.
+    for reference in references:
+        # Collects the *worder* from the ranked correlation alignments.
+        worder = word_rank_alignment(reference, hypothesis)
+        nkt = kendall_tau(worder)
+            
+        # Calculates the brevity penalty
+        bp = min(1.0, math.exp(1.0 - len(reference)/len(hypothesis)))
+        
+        # Calculates the unigram precision, *p1*
+        p1 = len(worder) / len(hypothesis)
+        
+        _ribes = nkt * (p1 ** alpha) *  (bp ** beta)
+        
+        if _ribes > best_ribes: # Keeps the best score.
+            best_ribes = _ribes
+        
+    return best_ribes
+
+
+def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10):
+    """
+    This function "calculates RIBES for a system output (hypothesis) with 
+    multiple references, and returns "best" score among multi-references and 
+    individual scores. The scores are corpus-wise, i.e., averaged by the number 
+    of sentences." (c.f. RIBES version 1.03.1 code).
+    
+    Different from BLEU's micro-average precision, RIBES calculates the 
+    macro-average precision by averaging the best RIBES score for each pair of 
+    hypothesis and its corresponding references 
+
+    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...         'ensures', 'that', 'the', 'military', 'always',
+    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
+    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...          'heed', 'Party', 'commands']
+    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...          'guarantees', 'the', 'military', 'forces', 'always',
+    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...          'of', 'the', 'party']
+    
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', 
+    ...         'interested', 'in', 'world', 'history']
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', 
+    ...          'because', 'he', 'read', 'the', 'book']
+    
+    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
+    >>> hypotheses = [hyp1, hyp2]
+    >>> round(corpus_ribes(list_of_references, hypotheses),4)
+    0.3597
+    
+    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type references: list(list(list(str)))
+    :param hypotheses: a list of hypothesis sentences
+    :type hypotheses: list(list(str))
+    :param alpha: hyperparameter used as a prior for the unigram precision.
+    :type alpha: float
+    :param beta: hyperparameter used as a prior for the brevity penalty.
+    :type beta: float
+    :return: The best ribes score from one of the references.
+    :rtype: float
+    """
+    corpus_best_ribes = 0.0
+    # Iterate through each hypothesis and their corresponding references.
+    for references, hypothesis in zip(list_of_references, hypotheses):
+        corpus_best_ribes += sentence_ribes(references, hypothesis, alpha, beta)
+    return corpus_best_ribes / len(hypotheses)
+    
+        
+def position_of_ngram(ngram, sentence):
+    """
+    This function returns the position of the first instance of the ngram 
+    appearing in a sentence.
+    
+    Note that one could also use string as follows but the code is a little
+    convoluted with type casting back and forth:
+        
+        char_pos = ' '.join(sent)[:' '.join(sent).index(' '.join(ngram))]
+        word_pos = char_pos.count(' ')
+        
+    Another way to conceive this is:
+    
+        return next(i for i, ng in enumerate(ngrams(sentence, len(ngram))) 
+                    if ng == ngram)
+                    
+    :param ngram: The ngram that needs to be searched
+    :type ngram: tuple
+    :param sentence: The list of tokens to search from.
+    :type sentence: list(str)
+    """
+    # Iterates through the ngrams in sentence.
+    for i,sublist in enumerate(ngrams(sentence, len(ngram))):
+        # Returns the index of the word when ngram matches.
+        if ngram == sublist:
+            return i
+
+
+def word_rank_alignment(reference, hypothesis, character_based=False):
+    """    
+    This is the word rank alignment algorithm described in the paper to produce
+    the *worder* list, i.e. a list of word indices of the hypothesis word orders 
+    w.r.t. the list of reference words.
+    
+    Below is (H0, R0) example from the Isozaki et al. 2010 paper, 
+    note the examples are indexed from 1 but the results here are indexed from 0:
+    
+        >>> ref = str('he was interested in world history because he '
+        ... 'read the book').split()
+        >>> hyp = str('he read the book because he was interested in world '
+        ... 'history').split()
+        >>> word_rank_alignment(ref, hyp)
+        [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        
+    The (H1, R1) example from the paper, note the 0th index:
+    
+        >>> ref = 'John hit Bob yesterday'.split()
+        >>> hyp = 'Bob hit John yesterday'.split()
+        >>> word_rank_alignment(ref, hyp)
+        [2, 1, 0, 3]
+
+    Here is the (H2, R2) example from the paper, note the 0th index here too:
+    
+        >>> ref = 'the boy read the book'.split()
+        >>> hyp = 'the book was read by the boy'.split()
+        >>> word_rank_alignment(ref, hyp)
+        [3, 4, 2, 0, 1]
+        
+    :param reference: a reference sentence
+    :type reference: list(str)
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: list(str)
+    """
+    worder = []
+    hyp_len = len(hypothesis)
+    # Stores a list of possible ngrams from the reference sentence.
+    # This is used for matching context window later in the algorithm.
+    ref_ngrams = []
+    hyp_ngrams = []
+    for n in range(1, len(reference)+1):
+        for ng in ngrams(reference, n):
+            ref_ngrams.append(ng)
+        for ng in ngrams(hypothesis, n):
+            hyp_ngrams.append(ng)
+    for i, h_word in enumerate(hypothesis):
+        # If word is not in the reference, continue.
+        if h_word not in reference:
+            continue
+        # If we can determine one-to-one word correspondence for unigrams that 
+        # only appear once in both the reference and hypothesis.
+        elif hypothesis.count(h_word) == reference.count(h_word) == 1:
+            worder.append(reference.index(h_word))
+        else:
+            max_window_size = max(i, hyp_len-i+1)
+            for window in range(1, max_window_size):
+                if i+window < hyp_len: # If searching the right context is possible.
+                    # Retrieve the right context window.
+                    right_context_ngram = tuple(islice(hypothesis, i, i+window+1))
+                    num_times_in_ref = ref_ngrams.count(right_context_ngram)
+                    num_times_in_hyp = hyp_ngrams.count(right_context_ngram) 
+                    # If ngram appears only once in both ref and hyp.
+                    if num_times_in_ref == num_times_in_hyp == 1:
+                        # Find the position of ngram that matched the reference.
+                        pos = position_of_ngram(right_context_ngram, reference)
+                        worder.append(pos)  # Add the positions of the ngram.
+                        break
+                if window <= i: # If searching the left context is possible.
+                    # Retrieve the left context window.
+                    left_context_ngram = tuple(islice(hypothesis, i-window, i+1))
+                    num_times_in_ref = ref_ngrams.count(left_context_ngram)
+                    num_times_in_hyp = hyp_ngrams.count(left_context_ngram)
+                    if num_times_in_ref == num_times_in_hyp == 1:
+                        # Find the position of ngram that matched the reference.
+                        pos = position_of_ngram(left_context_ngram, reference)
+                        # Add the positions of the ngram.
+                        worder.append(pos+ len(left_context_ngram) -1)  
+                        break
+    return worder
+
+    
+def find_increasing_sequences(worder):
+    """
+    Given the *worder* list, this function groups monotonic +1 sequences. 
+    
+        >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> list(find_increasing_sequences(worder))
+        [(7, 8, 9, 10), (0, 1, 2, 3, 4, 5)]
+    
+    :param worder: The worder list output from word_rank_alignment
+    :param type: list(int)
+    """
+    items = iter(worder)
+    a, b = None, next(items, None)
+    result = [b]
+    while b is not None:
+        a, b = b, next(items, None)
+        if b is not None and a + 1 == b:
+            result.append(b)
+        else:
+            if len(result) > 1:
+                yield tuple(result)
+            result = [b]
+
+
+def kendall_tau(worder, normalize=True):
+    """
+    Calculates the Kendall's Tau correlation coefficient given the *worder*
+    list of word alignments from word_rank_alignment(), using the formula:
+    
+        tau = 2 * num_increasing_pairs / num_possible pairs -1
+    
+    Note that the no. of increasing pairs can be discontinuous in the *worder*
+    list and each each increasing sequence can be tabulated as choose(len(seq), 2) 
+    no. of increasing pairs, e.g.
+    
+        >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> number_possible_pairs = choose(len(worder), 2)
+        >>> round(kendall_tau(worder, normalize=False),3)
+        -0.236
+        >>> round(kendall_tau(worder),3)
+        0.382
+    
+    :param worder: The worder list output from word_rank_alignment
+    :type worder: list(int)
+    :param normalize: Flag to indicate normalization
+    :type normalize: boolean
+    :return: The Kendall's Tau correlation coefficient.
+    :rtype: float
+    """
+    worder_len = len(worder)
+    # Extract the groups of increasing/monotonic sequences.
+    increasing_sequences = find_increasing_sequences(worder)
+    # Calculate no. of increasing_pairs in *worder* list.
+    num_increasing_pairs = sum(choose(len(seq),2) for seq in increasing_sequences) 
+    # Calculate no. of possible pairs.
+    num_possible_pairs = choose(worder_len, 2)
+    # Kendall's Tau computation.
+    tau = 2 * num_increasing_pairs / num_possible_pairs -1
+    if normalize: # If normalized, the tau output falls between 0.0 to 1.0
+        return (tau + 1) /2
+    else: # Otherwise, the tau outputs falls between -1.0 to +1.0
+        return tau
+
+
+def spearman_rho(worder, normalize=True):
+    """
+    Calculates the Spearman's Rho correlation coefficient given the *worder* 
+    list of word alignment from word_rank_alignment(), using the formula:
+    
+        rho = 1 - sum(d**2) / choose(len(worder)+1, 3)  
+        
+    Given that d is the sum of difference between the *worder* list of indices
+    and the original word indices from the reference sentence.
+    
+    Using the (H0,R0) and (H5, R5) example from the paper
+    
+        >>> worder =  [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
+        >>> round(spearman_rho(worder, normalize=False), 3)
+        -0.591
+        >>> round(spearman_rho(worder), 3)
+        0.205
+    
+    :param worder: The worder list output from word_rank_alignment
+    :param type: list(int)
+    """
+    worder_len = len(worder)
+    sum_d_square = sum((wi - i)**2 for wi, i in zip(worder, range(worder_len)))
+    rho = 1 - sum_d_square / choose(worder_len+1, 3)
+    
+    if normalize: # If normalized, the rho output falls between 0.0 to 1.0
+        return (rho + 1) /2
+    else: # Otherwise, the rho outputs falls between -1.0 to +1.0
+        return rho
diff --git a/nltk/translate/stack_decoder.py b/nltk/translate/stack_decoder.py
index 0db00c5..eda5f41 100644
--- a/nltk/translate/stack_decoder.py
+++ b/nltk/translate/stack_decoder.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Stack decoder
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Tah Wei Hoon <hoon.tw at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/tree.py b/nltk/tree.py
index e697d6f..c38eacb 100644
--- a/nltk/tree.py
+++ b/nltk/tree.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Text Trees
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Peter Ljunglöf <peter.ljunglof at gu.se>
diff --git a/nltk/treeprettyprinter.py b/nltk/treeprettyprinter.py
index d1f3ac1..02d83e1 100644
--- a/nltk/treeprettyprinter.py
+++ b/nltk/treeprettyprinter.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: ASCII visualization of NLTK trees
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Andreas van Cranenburgh <A.W.vanCranenburgh at uva.nl>
 #         Peter Ljunglöf <peter.ljunglof at gu.se>
 # URL: <http://nltk.org/>
diff --git a/nltk/twitter/__init__.py b/nltk/twitter/__init__.py
index 4ffb61f..edf5204 100644
--- a/nltk/twitter/__init__.py
+++ b/nltk/twitter/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/twitter/api.py b/nltk/twitter/api.py
index 1ff9be4..9b4c2c6 100644
--- a/nltk/twitter/api.py
+++ b/nltk/twitter/api.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter API
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #         Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/twitter/common.py b/nltk/twitter/common.py
index 9428c64..bae7be1 100644
--- a/nltk/twitter/common.py
+++ b/nltk/twitter/common.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter client
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #         Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/twitter/twitter_demo.py b/nltk/twitter/twitter_demo.py
index 6567ba3..dad0c55 100644
--- a/nltk/twitter/twitter_demo.py
+++ b/nltk/twitter/twitter_demo.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter client
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #         Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/twitter/twitterclient.py b/nltk/twitter/twitterclient.py
index 36abdc2..d78d270 100644
--- a/nltk/twitter/twitterclient.py
+++ b/nltk/twitter/twitterclient.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter client
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #         Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/twitter/util.py b/nltk/twitter/util.py
index 2aff979..8861132 100644
--- a/nltk/twitter/util.py
+++ b/nltk/twitter/util.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Twitter client
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #         Lorenzo Rubio <lrnzcig at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/util.py b/nltk/util.py
index eec7c3d..d16883e 100644
--- a/nltk/util.py
+++ b/nltk/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Utility functions
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -378,9 +378,43 @@ def flatten(*args):
 # Ngram iteration
 ##########################################################################
 
+def pad_sequence(sequence, n, pad_left=False, pad_right=False, 
+                 left_pad_symbol=None, right_pad_symbol=None):
+    """
+    Returns a padded sequence of items before ngram extraction.
+    
+        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
+        ['<s>', 1, 2, 3, 4, 5, '</s>']
+        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
+        ['<s>', 1, 2, 3, 4, 5]
+        >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
+        [1, 2, 3, 4, 5, '</s>']
+    
+    :param sequence: the source data to be padded
+    :type sequence: sequence or iter
+    :param n: the degree of the ngrams
+    :type n: int
+    :param pad_left: whether the ngrams should be left-padded
+    :type pad_left: bool
+    :param pad_right: whether the ngrams should be right-padded
+    :type pad_right: bool
+    :param left_pad_symbol: the symbol to use for left padding (default is None)
+    :type left_pad_symbol: any
+    :param right_pad_symbol: the symbol to use for right padding (default is None)
+    :type right_pad_symbol: any
+    :rtype: sequence or iter
+    """
+    sequence = iter(sequence)
+    if pad_left:
+        sequence = chain((left_pad_symbol,) * (n-1), sequence)
+    if pad_right:
+        sequence = chain(sequence, (right_pad_symbol,) * (n-1))
+    return sequence
+
 # add a flag to pad the sequence so we get peripheral ngrams?
 
-def ngrams(sequence, n, pad_left=False, pad_right=False, pad_symbol=None):
+def ngrams(sequence, n, pad_left=False, pad_right=False, 
+           left_pad_symbol=None, right_pad_symbol=None):
     """
     Return the ngrams generated from a sequence of items, as an iterator.
     For example:
@@ -394,6 +428,13 @@ def ngrams(sequence, n, pad_left=False, pad_right=False, pad_symbol=None):
 
         >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
         [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
+        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
+        [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
+        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
+        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
+        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
+        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
+
 
     :param sequence: the source data to be converted into ngrams
     :type sequence: sequence or iter
@@ -403,17 +444,15 @@ def ngrams(sequence, n, pad_left=False, pad_right=False, pad_symbol=None):
     :type pad_left: bool
     :param pad_right: whether the ngrams should be right-padded
     :type pad_right: bool
-    :param pad_symbol: the symbol to use for padding (default is None)
-    :type pad_symbol: any
-    :rtype: iter(tuple)
+    :param left_pad_symbol: the symbol to use for left padding (default is None)
+    :type left_pad_symbol: any
+    :param right_pad_symbol: the symbol to use for right padding (default is None)
+    :type right_pad_symbol: any
+    :rtype: sequence or iter
     """
-
-    sequence = iter(sequence)
-    if pad_left:
-        sequence = chain((pad_symbol,) * (n-1), sequence)
-    if pad_right:
-        sequence = chain(sequence, (pad_symbol,) * (n-1))
-
+    sequence = pad_sequence(sequence, n, pad_left, pad_right,
+                            left_pad_symbol, right_pad_symbol)
+        
     history = []
     while n > 1:
         history.append(next(sequence))
@@ -461,7 +500,7 @@ def trigrams(sequence, **kwargs):
     for item in ngrams(sequence, 3, **kwargs):
         yield item
 
-def everygrams(sequence, min_len=1, max_len=-1):
+def everygrams(sequence, min_len=1, max_len=-1, **kwargs):
     """
     Returns all possible ngrams generated from a sequence of items, as an iterator.
     
@@ -479,13 +518,14 @@ def everygrams(sequence, min_len=1, max_len=-1):
     :type  max_len: int
     :rtype: iter(tuple)
     """
+    
     if max_len == -1:
-    	max_len = len(sequence)
+        max_len = len(sequence)
     for n in range(min_len, max_len+1):
-        for ng in ngrams(sequence, n):
+        for ng in ngrams(sequence, n, **kwargs):
             yield ng
 
-def skipgrams(sequence, n, k):
+def skipgrams(sequence, n, k, **kwargs):
     """
     Returns all possible skipgrams generated from a sequence of items, as an iterator.
     Skipgrams are ngrams that allows tokens to be skipped.
@@ -505,11 +545,20 @@ def skipgrams(sequence, n, k):
     :type  k: int
     :rtype: iter(tuple)
     """
-    for ngram in ngrams(sequence, n + k, pad_right=True):
+    
+    # Pads the sequence as desired by **kwargs.
+    if 'pad_left' in kwargs or 'pad_right' in kwargs:
+        sequence = pad_sequence(sequence, n, **kwargs)
+    
+    # Note when iterating through the ngrams, the pad_right here is not
+    # the **kwargs padding, it's for the algorithm to detect the SENTINEL 
+    # object on the right pad to stop inner loop.
+    SENTINEL = object()
+    for ngram in ngrams(sequence, n + k, pad_right=True, right_pad_symbol=SENTINEL):
         head = ngram[:1]
         tail = ngram[1:]
         for skip_tail in combinations(tail, n - 1):
-            if skip_tail[-1] is None:
+            if skip_tail[-1] is SENTINEL:
                 continue
             yield head + skip_tail
 
@@ -1183,3 +1232,133 @@ def elementtree_indent(elem, level=0):
         if level and (not elem.tail or not elem.tail.strip()):
             elem.tail = i
 
+######################################################################
+# Mathematical approximations
+######################################################################
+
+def choose(n, k):
+    """
+    This function is a fast way to calculate binomial coefficients, commonly
+    known as nCk, i.e. the number of combinations of n things taken k at a time. 
+    (https://en.wikipedia.org/wiki/Binomial_coefficient).
+    
+    This is the *scipy.special.comb()* with long integer computation but this 
+    approximation is faster, see https://github.com/nltk/nltk/issues/1181
+    
+        >>> choose(4, 2)
+        6
+        >>> choose(6, 2)
+        15
+    
+    :param n: The number of things.
+    :type n: int
+    :param r: The number of times a thing is taken.
+    :type r: int
+    """
+    if 0 <= k <= n:
+        ntok, ktok = 1, 1
+        for t in range(1, min(k, n - k) + 1):
+            ntok *= n
+            ktok *= t
+            n -= 1
+        return ntok // ktok
+    else:
+        return 0
+
+######################################################################
+# Trie Implementation
+######################################################################
+class Trie(defaultdict):
+    """A Trie implementation for strings"""
+    LEAF = True
+
+    def __init__(self, strings=None):
+        """Builds a Trie object, which is built around a ``defaultdict``
+        
+        If ``strings`` is provided, it will add the ``strings``, which
+        consist of a ``list`` of ``strings``, to the Trie. 
+        Otherwise, it'll construct an empty Trie.
+
+        :param strings: List of strings to insert into the trie 
+            (Default is ``None``)
+        :type strings: list(str)
+
+        """
+        defaultdict.__init__(self, Trie)
+        if strings:
+            for string in strings:
+                self.insert(string)
+
+    def insert(self, string):
+        """Inserts ``string`` into the Trie
+
+        :param string: String to insert into the trie
+        :type string: str
+
+        :Example:
+
+        >>> from nltk.util import Trie
+        >>> trie = Trie(["ab"])
+        >>> trie
+        defaultdict(<class 'nltk.util.Trie'>, {'a': defaultdict(<class 'nltk.util.Trie'>, {'b': defaultdict(<class 'nltk.util.Trie'>, {True: None})})})
+
+        """
+        if len(string):
+            self[string[0]].insert(string[1:])
+        else:
+            # mark the string is complete
+            self[Trie.LEAF] = None
+
+    def __str__(self):
+        return str(self.as_dict())
+
+    def as_dict(self, d=None):
+        """Convert ``defaultdict`` to common ``dict`` representation.
+
+        :param: A defaultdict containing strings mapped to nested defaultdicts.
+            This is the structure of the trie. (Default is None)
+        :type: defaultdict(str -> defaultdict)
+        :return: Even though ``defaultdict`` is a subclass of ``dict`` and thus
+            can be converted to a simple ``dict`` using ``dict()``, in our case
+            it's a nested ``defaultdict``, so here's a quick trick to provide to
+            us the ``dict`` representation of the ``Trie`` without 
+            ``defaultdict(<class 'nltk.util.Trie'>, ...``
+        :rtype: dict(str -> dict(bool -> None))
+            Note: there can be an arbitrarily deeply nested 
+            ``dict(str -> dict(str -> dict(..))``, but the last
+            level will have ``dict(str -> dict(bool -> None))``
+
+        :Example:
+
+        >>> from nltk.util import Trie
+        >>> trie = Trie(["abc", "def"])
+        >>> expected = {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
+        >>> trie.as_dict() == expected
+        True
+
+        """
+        def _default_to_regular(d):
+            """
+            Source: http://stackoverflow.com/a/26496899/4760801
+
+            :param d: Nested ``defaultdict`` to convert to regular ``dict``
+            :type d: defaultdict(str -> defaultdict(...))
+            :return: A dict representation of the defaultdict
+            :rtype: dict(str -> dict(str -> ...))
+
+            :Example:
+
+            >>> from collections import defaultdict
+            >>> d = defaultdict(defaultdict)
+            >>> d["one"]["two"] = "three"
+            >>> d
+            defaultdict(<type 'collections.defaultdict'>, {'one': defaultdict(None, {'two': 'three'})})
+            >>> _default_to_regular(d)
+            {'one': {'two': 'three'}}
+
+            """
+            if isinstance(d, defaultdict):
+                d = {k: _default_to_regular(v) for k, v in d.items()}
+            return d
+        
+        return _default_to_regular(self)
diff --git a/nltk/wsd.py b/nltk/wsd.py
index 86ad773..7524244 100644
--- a/nltk/wsd.py
+++ b/nltk/wsd.py
@@ -3,7 +3,7 @@
 # Authors: Liling Tan <alvations at gmail.com>,
 #          Dmitrijs Milajevs <dimazest at gmail.com>
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/setup.py b/setup.py
index 9c59e43..3d1adc7 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 #
 # Setup script for the Natural Language Toolkit
 #
-# Copyright (C) 2001-2015 NLTK Project
+# Copyright (C) 2001-2016 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/nltk.git



More information about the debian-science-commits mailing list