[nltk] 01/06: Imported Upstream version 3.0.2

Daniel Stender danstender-guest at moszumanska.debian.org
Tue Mar 17 16:24:55 UTC 2015


This is an automated email from the git hooks/post-receive script.

danstender-guest pushed a commit to branch master
in repository nltk.

commit 979e24737ad8d8e19b49e743e1ab95697a1549da
Author: Daniel Stender <debian at danielstender.com>
Date:   Mon Mar 16 00:20:03 2015 +0100

    Imported Upstream version 3.0.2
---
 LICENSE.txt                                 |   2 +-
 PKG-INFO                                    |   2 +-
 README.txt                                  |   2 +-
 nltk.egg-info/PKG-INFO                      |   2 +-
 nltk.egg-info/SOURCES.txt                   |  12 +-
 nltk/VERSION                                |   2 +-
 nltk/__init__.py                            |   6 +-
 nltk/align/__init__.py                      |   4 +-
 nltk/align/api.py                           |   3 +-
 nltk/align/bleu.py                          | 209 --------
 nltk/align/bleu_score.py                    | 219 ++++++++
 nltk/align/gdfa.py                          |   2 +-
 nltk/align/ibm2.py                          |   2 +-
 nltk/align/phrase_based.py                  |   2 +-
 nltk/app/__init__.py                        |   2 +-
 nltk/app/chartparser_app.py                 |   2 +-
 nltk/app/chunkparser_app.py                 |   8 +-
 nltk/app/collocations_app.py                |   2 +-
 nltk/app/concordance_app.py                 |   2 +-
 nltk/app/rdparser_app.py                    |   2 +-
 nltk/app/srparser_app.py                    |   2 +-
 nltk/app/wordfreq_app.py                    |   2 +-
 nltk/app/wordnet_app.py                     |   8 +-
 nltk/book.py                                |   2 +-
 nltk/ccg/__init__.py                        |   2 +-
 nltk/ccg/api.py                             |   2 +-
 nltk/ccg/chart.py                           |   2 +-
 nltk/ccg/combinator.py                      |   2 +-
 nltk/ccg/lexicon.py                         |   2 +-
 nltk/chat/__init__.py                       |   2 +-
 nltk/chat/eliza.py                          |   2 +-
 nltk/chat/iesha.py                          |   2 +-
 nltk/chat/rude.py                           |   2 +-
 nltk/chat/suntsu.py                         |   2 +-
 nltk/chat/util.py                           |   2 +-
 nltk/chat/zen.py                            |   2 +-
 nltk/chunk/__init__.py                      |   2 +-
 nltk/chunk/api.py                           |   2 +-
 nltk/chunk/named_entity.py                  |   2 +-
 nltk/chunk/regexp.py                        |   2 +-
 nltk/chunk/util.py                          |   6 +-
 nltk/classify/__init__.py                   |   3 +-
 nltk/classify/api.py                        |   2 +-
 nltk/classify/decisiontree.py               |  20 +-
 nltk/classify/maxent.py                     | 108 ++--
 nltk/classify/megam.py                      |   9 +-
 nltk/classify/naivebayes.py                 |  24 +-
 nltk/classify/rte_classify.py               |  28 +-
 nltk/classify/scikitlearn.py                |   1 -
 nltk/classify/senna.py                      | 184 +++++++
 nltk/classify/svm.py                        |   2 +-
 nltk/classify/tadm.py                       |   2 +-
 nltk/classify/util.py                       |  37 +-
 nltk/classify/weka.py                       |  12 +-
 nltk/cluster/__init__.py                    |   2 +-
 nltk/cluster/api.py                         |   2 +-
 nltk/cluster/em.py                          |   2 +-
 nltk/cluster/gaac.py                        |   2 +-
 nltk/cluster/kmeans.py                      |   2 +-
 nltk/cluster/util.py                        |   2 +-
 nltk/collocations.py                        |   2 +-
 nltk/compat.py                              |  41 +-
 nltk/corpus/__init__.py                     |  14 +-
 nltk/corpus/europarl_raw.py                 |   2 +-
 nltk/corpus/reader/__init__.py              |   6 +-
 nltk/corpus/reader/aligned.py               |   2 +-
 nltk/corpus/reader/api.py                   |   2 +-
 nltk/corpus/reader/bnc.py                   |   2 +-
 nltk/corpus/reader/bracket_parse.py         |   2 +-
 nltk/corpus/reader/chasen.py                |   2 +-
 nltk/corpus/reader/childes.py               |   2 +-
 nltk/corpus/reader/chunked.py               |   2 +-
 nltk/corpus/reader/cmudict.py               |   2 +-
 nltk/corpus/reader/conll.py                 |   2 +-
 nltk/corpus/reader/dependency.py            |   2 +-
 nltk/corpus/reader/framenet.py              |  33 +-
 nltk/corpus/reader/ieer.py                  |   2 +-
 nltk/corpus/reader/indian.py                |   2 +-
 nltk/corpus/reader/ipipan.py                |   2 +-
 nltk/corpus/reader/knbc.py                  |  87 ++--
 nltk/corpus/reader/lin.py                   |   2 +-
 nltk/corpus/reader/nkjp.py                  | 428 +++++++++++++++
 nltk/corpus/reader/nombank.py               |   2 +-
 nltk/corpus/reader/nps_chat.py              |   2 +-
 nltk/corpus/reader/pl196x.py                |   2 +-
 nltk/corpus/reader/plaintext.py             |   2 +-
 nltk/corpus/reader/ppattach.py              |   2 +-
 nltk/corpus/reader/propbank.py              |   2 +-
 nltk/corpus/reader/rte.py                   |   2 +-
 nltk/corpus/reader/semcor.py                |   2 +-
 nltk/corpus/reader/senseval.py              |   2 +-
 nltk/corpus/reader/sentiwordnet.py          |   2 +-
 nltk/corpus/reader/sinica_treebank.py       |   2 +-
 nltk/corpus/reader/string_category.py       |   2 +-
 nltk/corpus/reader/switchboard.py           |   2 +-
 nltk/corpus/reader/tagged.py                |   2 +-
 nltk/corpus/reader/toolbox.py               |   2 +-
 nltk/corpus/reader/util.py                  |   2 +-
 nltk/corpus/reader/verbnet.py               |   2 +-
 nltk/corpus/reader/wordlist.py              |   2 +-
 nltk/corpus/reader/wordnet.py               |   8 +-
 nltk/corpus/reader/xmldocs.py               |   2 +-
 nltk/corpus/reader/ycoe.py                  |   2 +-
 nltk/corpus/util.py                         |   2 +-
 nltk/data.py                                |  21 +-
 nltk/downloader.py                          |  30 +-
 nltk/draw/__init__.py                       |   2 +-
 nltk/draw/cfg.py                            |   2 +-
 nltk/draw/dispersion.py                     |   2 +-
 nltk/draw/table.py                          |   2 +-
 nltk/draw/tree.py                           |   2 +-
 nltk/draw/util.py                           |   2 +-
 nltk/featstruct.py                          |   2 +-
 nltk/grammar.py                             |   2 +-
 nltk/help.py                                |   2 +-
 nltk/inference/__init__.py                  |   2 +-
 nltk/inference/nonmonotonic.py              |   2 +-
 nltk/inference/prover9.py                   |   2 +-
 nltk/inference/resolution.py                |   2 +-
 nltk/inference/tableau.py                   |   2 +-
 nltk/internals.py                           |  22 +-
 nltk/jsontags.py                            |   2 +-
 nltk/metrics/__init__.py                    |   2 +-
 nltk/metrics/agreement.py                   |   2 +-
 nltk/metrics/association.py                 |   2 +-
 nltk/metrics/confusionmatrix.py             |   8 +-
 nltk/metrics/distance.py                    |  31 +-
 nltk/metrics/paice.py                       |   2 +-
 nltk/metrics/scores.py                      |   2 +-
 nltk/metrics/segmentation.py                |   2 +-
 nltk/metrics/spearman.py                    |   2 +-
 nltk/misc/__init__.py                       |   2 +-
 nltk/misc/minimalset.py                     |   2 +-
 nltk/misc/sort.py                           |   2 +-
 nltk/misc/wordfinder.py                     |   2 +-
 nltk/parse/__init__.py                      |   7 +-
 nltk/parse/api.py                           |  22 +-
 nltk/parse/bllip.py                         | 285 ++++++++++
 nltk/parse/chart.py                         |  28 +-
 nltk/parse/dependencygraph.py               | 458 ++++++++++------
 nltk/parse/earleychart.py                   |   4 +-
 nltk/parse/evaluate.py                      | 132 +++++
 nltk/parse/featurechart.py                  |  14 +-
 nltk/parse/generate.py                      |   2 +-
 nltk/parse/malt.py                          |  62 +--
 nltk/parse/nonprojectivedependencyparser.py | 392 +++++++++-----
 nltk/parse/pchart.py                        |  47 +-
 nltk/parse/projectivedependencyparser.py    | 124 +++--
 nltk/parse/recursivedescent.py              |   2 +-
 nltk/parse/shiftreduce.py                   |   2 +-
 nltk/parse/stanford.py                      |  56 +-
 nltk/parse/transitionparser.py              | 773 ++++++++++++++++++++++++++++
 nltk/parse/util.py                          |   2 +-
 nltk/parse/viterbi.py                       |   2 +-
 nltk/probability.py                         |  18 +-
 nltk/sem/__init__.py                        |   2 +-
 nltk/sem/boxer.py                           |  16 +-
 nltk/sem/chat80.py                          |   2 +-
 nltk/sem/cooper_storage.py                  |   2 +-
 nltk/sem/drt.py                             |  19 +-
 nltk/sem/drt_glue_demo.py                   |  16 +-
 nltk/sem/evaluate.py                        |   2 +-
 nltk/sem/glue.py                            |  84 +--
 nltk/sem/hole.py                            |  46 +-
 nltk/sem/lfg.py                             |  75 +--
 nltk/sem/linearlogic.py                     |   2 +-
 nltk/sem/logic.py                           |   6 +-
 nltk/sem/relextract.py                      |   2 +-
 nltk/sem/skolemize.py                       |   2 +-
 nltk/sem/util.py                            |   2 +-
 nltk/stem/__init__.py                       |   2 +-
 nltk/stem/api.py                            |   2 +-
 nltk/stem/isri.py                           |   2 +-
 nltk/stem/lancaster.py                      |   2 +-
 nltk/stem/porter.py                         |   5 +-
 nltk/stem/regexp.py                         |   2 +-
 nltk/stem/rslp.py                           |   2 +-
 nltk/stem/snowball.py                       | 428 ++++++++-------
 nltk/stem/util.py                           |  12 +
 nltk/stem/wordnet.py                        |   2 +-
 nltk/tag/__init__.py                        |   5 +-
 nltk/tag/api.py                             |   2 +-
 nltk/tag/brill.py                           |  70 +--
 nltk/tag/brill_trainer.py                   | 133 +++--
 nltk/tag/crf.py                             | 203 ++++++++
 nltk/tag/hmm.py                             |   2 +-
 nltk/tag/hunpos.py                          |  35 +-
 nltk/tag/mapping.py                         |   2 +-
 nltk/tag/senna.py                           | 292 ++---------
 nltk/tag/sequential.py                      |   2 +-
 nltk/tag/stanford.py                        |  21 +-
 nltk/tag/tnt.py                             |   2 +-
 nltk/tag/util.py                            |   2 +-
 nltk/tbl/__init__.py                        |   2 +-
 nltk/tbl/demo.py                            |   2 +-
 nltk/tbl/erroranalysis.py                   |   2 +-
 nltk/tbl/feature.py                         |  45 +-
 nltk/tbl/rule.py                            |  64 ++-
 nltk/tbl/template.py                        |   2 +-
 nltk/test/__init__.py                       |   2 +-
 nltk/test/align.doctest                     |   6 +-
 nltk/test/bleu.doctest                      |  14 +
 nltk/test/bnc.doctest                       |   2 +-
 nltk/test/ccg.doctest                       |  16 +-
 nltk/test/chat80.doctest                    |   2 +-
 nltk/test/chunk.doctest                     |   2 +-
 nltk/test/classify.doctest                  |   2 +-
 nltk/test/collocations.doctest              |   2 +-
 nltk/test/corpus.doctest                    |  33 +-
 nltk/test/data.doctest                      |   8 +-
 nltk/test/dependency.doctest                | 101 +++-
 nltk/test/discourse.doctest                 |   2 +-
 nltk/test/drt.doctest                       |  18 +-
 nltk/test/featgram.doctest                  |   2 +-
 nltk/test/featstruct.doctest                |   2 +-
 nltk/test/framenet.doctest                  |   2 +-
 nltk/test/generate.doctest                  |   2 +-
 nltk/test/gluesemantics.doctest             |  31 +-
 nltk/test/gluesemantics_malt.doctest        |   2 +-
 nltk/test/grammar.doctest                   |   2 +-
 nltk/test/grammartestsuites.doctest         |   2 +-
 nltk/test/index.doctest                     |   2 +-
 nltk/test/inference.doctest                 |   2 +-
 nltk/test/internals.doctest                 |   2 +-
 nltk/test/japanese.doctest                  |   2 +-
 nltk/test/logic.doctest                     |   2 +-
 nltk/test/metrics.doctest                   |   8 +-
 nltk/test/misc.doctest                      |   2 +-
 nltk/test/nonmonotonic.doctest              |   2 +-
 nltk/test/paice.doctest                     |   9 +-
 nltk/test/parse.doctest                     |   2 +-
 nltk/test/portuguese_en.doctest             |   2 +-
 nltk/test/portuguese_en_fixt.py             |   9 +-
 nltk/test/probability.doctest               |   2 +-
 nltk/test/propbank.doctest                  |   4 +-
 nltk/test/relextract.doctest                |   2 +-
 nltk/test/resolution.doctest                |   2 +-
 nltk/test/semantics.doctest                 |  40 +-
 nltk/test/sentiwordnet.doctest              |   2 +-
 nltk/test/simple.doctest                    |   2 +-
 nltk/test/stem.doctest                      |   2 +-
 nltk/test/tag.doctest                       |   2 +-
 nltk/test/tokenize.doctest                  |  17 +-
 nltk/test/toolbox.doctest                   |   2 +-
 nltk/test/tree.doctest                      |  28 +-
 nltk/test/treeprettyprinter.doctest         | 127 +++++
 nltk/test/treetransforms.doctest            |   2 +-
 nltk/test/unit/test_stem.py                 |   9 +
 nltk/test/util.doctest                      |   2 +-
 nltk/test/wordnet.doctest                   |   2 +-
 nltk/test/wordnet_lch.doctest               |   2 +-
 nltk/test/wsd.doctest                       |  78 +--
 nltk/text.py                                |   4 +-
 nltk/tokenize/__init__.py                   |  24 +-
 nltk/tokenize/api.py                        |   2 +-
 nltk/tokenize/punkt.py                      |  46 +-
 nltk/tokenize/regexp.py                     |  26 +-
 nltk/tokenize/sexpr.py                      |   2 +-
 nltk/tokenize/simple.py                     |   2 +-
 nltk/tokenize/stanford.py                   |  12 +-
 nltk/tokenize/texttiling.py                 |  18 +-
 nltk/tokenize/treebank.py                   |   2 +-
 nltk/tokenize/util.py                       |   2 +-
 nltk/toolbox.py                             |   2 +-
 nltk/tree.py                                |  55 +-
 nltk/treeprettyprinter.py                   | 566 ++++++++++++++++++++
 nltk/util.py                                |   2 +-
 nltk/wsd.py                                 |  88 ++--
 setup.cfg                                   |   4 +-
 setup.py                                    |   2 +-
 270 files changed, 5300 insertions(+), 2080 deletions(-)

diff --git a/LICENSE.txt b/LICENSE.txt
index 3172938..c8d5879 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (C) 2001-2014 NLTK Project
+Copyright (C) 2001-2015 NLTK Project
 
 Licensed under the Apache License, Version 2.0 (the 'License');
 you may not use this file except in compliance with the License.
diff --git a/PKG-INFO b/PKG-INFO
index edd6f1e..b5bf363 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nltk
-Version: 3.0.1
+Version: 3.0.2
 Summary: Natural Language Toolkit
 Home-page: http://nltk.org/
 Author: Steven Bird
diff --git a/README.txt b/README.txt
index cbc5379..5e5e9bd 100644
--- a/README.txt
+++ b/README.txt
@@ -4,7 +4,7 @@ Authors: Steven Bird <stevenbird1 at gmail.com>
          Edward Loper <edloper at gmail.com>
          Ewan Klein <ewan at inf.ed.ac.uk>
 
-Copyright (C) 2001-2014 NLTK Project
+Copyright (C) 2001-2015 NLTK Project
 
 For license information, see LICENSE.txt
 
diff --git a/nltk.egg-info/PKG-INFO b/nltk.egg-info/PKG-INFO
index edd6f1e..b5bf363 100644
--- a/nltk.egg-info/PKG-INFO
+++ b/nltk.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: nltk
-Version: 3.0.1
+Version: 3.0.2
 Summary: Natural Language Toolkit
 Home-page: http://nltk.org/
 Author: Steven Bird
diff --git a/nltk.egg-info/SOURCES.txt b/nltk.egg-info/SOURCES.txt
index f20a498..d0587af 100644
--- a/nltk.egg-info/SOURCES.txt
+++ b/nltk.egg-info/SOURCES.txt
@@ -22,6 +22,7 @@ nltk/probability.py
 nltk/text.py
 nltk/toolbox.py
 nltk/tree.py
+nltk/treeprettyprinter.py
 nltk/treetransforms.py
 nltk/util.py
 nltk/wsd.py
@@ -32,7 +33,7 @@ nltk.egg-info/not-zip-safe
 nltk.egg-info/top_level.txt
 nltk/align/__init__.py
 nltk/align/api.py
-nltk/align/bleu.py
+nltk/align/bleu_score.py
 nltk/align/gale_church.py
 nltk/align/gdfa.py
 nltk/align/ibm1.py
@@ -76,6 +77,7 @@ nltk/classify/naivebayes.py
 nltk/classify/positivenaivebayes.py
 nltk/classify/rte_classify.py
 nltk/classify/scikitlearn.py
+nltk/classify/senna.py
 nltk/classify/svm.py
 nltk/classify/tadm.py
 nltk/classify/util.py
@@ -106,6 +108,7 @@ nltk/corpus/reader/indian.py
 nltk/corpus/reader/ipipan.py
 nltk/corpus/reader/knbc.py
 nltk/corpus/reader/lin.py
+nltk/corpus/reader/nkjp.py
 nltk/corpus/reader/nombank.py
 nltk/corpus/reader/nps_chat.py
 nltk/corpus/reader/pl196x.py
@@ -160,9 +163,11 @@ nltk/misc/sort.py
 nltk/misc/wordfinder.py
 nltk/parse/__init__.py
 nltk/parse/api.py
+nltk/parse/bllip.py
 nltk/parse/chart.py
 nltk/parse/dependencygraph.py
 nltk/parse/earleychart.py
+nltk/parse/evaluate.py
 nltk/parse/featurechart.py
 nltk/parse/generate.py
 nltk/parse/malt.py
@@ -172,6 +177,7 @@ nltk/parse/projectivedependencyparser.py
 nltk/parse/recursivedescent.py
 nltk/parse/shiftreduce.py
 nltk/parse/stanford.py
+nltk/parse/transitionparser.py
 nltk/parse/util.py
 nltk/parse/viterbi.py
 nltk/sem/__init__.py
@@ -197,12 +203,14 @@ nltk/stem/porter.py
 nltk/stem/regexp.py
 nltk/stem/rslp.py
 nltk/stem/snowball.py
+nltk/stem/util.py
 nltk/stem/wordnet.py
 nltk/tag/__init__.py
 nltk/tag/api.py
 nltk/tag/brill.py
 nltk/tag/brill_trainer.py
 nltk/tag/brill_trainer_orig.py
+nltk/tag/crf.py
 nltk/tag/hmm.py
 nltk/tag/hunpos.py
 nltk/tag/mapping.py
@@ -222,6 +230,7 @@ nltk/test/__init__.py
 nltk/test/align.doctest
 nltk/test/align_fixt.py
 nltk/test/all.py
+nltk/test/bleu.doctest
 nltk/test/bnc.doctest
 nltk/test/ccg.doctest
 nltk/test/chat80.doctest
@@ -280,6 +289,7 @@ nltk/test/tag.doctest
 nltk/test/tokenize.doctest
 nltk/test/toolbox.doctest
 nltk/test/tree.doctest
+nltk/test/treeprettyprinter.doctest
 nltk/test/treetransforms.doctest
 nltk/test/util.doctest
 nltk/test/wordnet.doctest
diff --git a/nltk/VERSION b/nltk/VERSION
index cb2b00e..b502146 100644
--- a/nltk/VERSION
+++ b/nltk/VERSION
@@ -1 +1 @@
-3.0.1
+3.0.2
diff --git a/nltk/__init__.py b/nltk/__init__.py
index 85f862a..0882a57 100644
--- a/nltk/__init__.py
+++ b/nltk/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit (NLTK)
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -41,7 +41,7 @@ if __doc__ is not None: # fix for the ``python -OO``
 
 # Copyright notice
 __copyright__ = """\
-Copyright (C) 2001-2014 NLTK Project.
+Copyright (C) 2001-2015 NLTK Project.
 
 Distributed and Licensed under the Apache License, Version 2.0,
 which is included by reference.
@@ -144,7 +144,7 @@ try:
 except ImportError:
     pass
 else:
-    from nltk import cluster; from .cluster import *
+    from nltk import cluster
 
 from nltk.downloader import download, download_shell
 try:
diff --git a/nltk/align/__init__.py b/nltk/align/__init__.py
index 6db9d6a..3f53c1c 100644
--- a/nltk/align/__init__.py
+++ b/nltk/align/__init__.py
@@ -11,9 +11,11 @@ Experimental functionality for bitext alignment.
 These interfaces are prone to change.
 """
 
-from nltk.align.api  import AlignedSent, Alignment
+from nltk.align.api import AlignedSent, Alignment
 from nltk.align.ibm1 import IBMModel1
 from nltk.align.ibm2 import IBMModel2
 from nltk.align.ibm3 import IBMModel3
+from nltk.align.bleu_score import bleu
+
 
 
diff --git a/nltk/align/api.py b/nltk/align/api.py
index 33bd35d..ec8d890 100644
--- a/nltk/align/api.py
+++ b/nltk/align/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Aligned Sentences
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Will Zhang <wilzzha at gmail.com>
 #         Guan Gui <ggui at student.unimelb.edu.au>
 #         Steven Bird <stevenbird1 at gmail.com>
@@ -59,6 +59,7 @@ class AlignedSent(object):
 
     def _get_alignment(self):
         return self._alignment
+        
     def _set_alignment(self, alignment):
         if not isinstance(alignment, Alignment):
             alignment = Alignment(alignment)
diff --git a/nltk/align/bleu.py b/nltk/align/bleu.py
deleted file mode 100644
index ac70f0f..0000000
--- a/nltk/align/bleu.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: BLEU
-#
-# Copyright (C) 2001-2013 NLTK Project
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-from __future__ import division
-
-import math
-
-from nltk import word_tokenize
-from nltk.compat import Counter
-from nltk.util import ngrams
-
-
-class BLEU(object):
-    """
-    This class implements the BLEU method, which is used to evaluate
-    the quality of machine translation. [1]
-
-    Consider an example:
-
-    >>> weights = [0.25, 0.25, 0.25, 0.25]
-    >>> candidate1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...               'ensures', 'that', 'the', 'military', 'always',
-    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-
-    >>> candidate2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-    ...               'that', 'party', 'direct']
-
-    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...               'heed', 'Party', 'commands']
-
-    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...               'guarantees', 'the', 'military', 'forces', 'always',
-    ...               'being', 'under', 'the', 'command', 'of', 'the',
-    ...               'Party']
-
-    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...               'of', 'the', 'party']
-
-    The BLEU method mainly consists of two parts:
-
-    Part 1 - modified n-gram precision
-
-    The normal precision method may lead to some wrong translations with
-    high-precision, e.g., the translation, in which a word of reference
-    repeats several times, has very high precision. So in the modified
-    n-gram precision, a reference word will be considered exhausted after
-    a matching candidate word is identified.
-
-    Unigrams:
-
-    >>> BLEU.modified_precision(
-    ...    candidate1,
-    ...    [reference1, reference2, reference3],
-    ...    n=1,
-    ... )
-    0.94...
-
-    >>> BLEU.modified_precision(
-    ...    candidate2,
-    ...    [reference1, reference2, reference3],
-    ...    n=1,
-    ... )
-    0.57...
-
-    Bigrmas:
-
-    >>> BLEU.modified_precision(
-    ...    candidate1,
-    ...    [reference1, reference2, reference3],
-    ...    n=2,
-    ... )
-    0.58...
-
-    >>> BLEU.modified_precision(
-    ...    candidate2,
-    ...    [reference1, reference2, reference3],
-    ...    n=2,
-    ... )
-    0.07...
-
-
-    Part 2 - brevity penalty
-
-    As the modified n-gram precision still has the problem from the short
-    length sentence, brevity penalty is used to modify the overall BLEU
-    score according to length.
-
-    >>> BLEU.compute(candidate1, [reference1, reference2, reference3], weights)
-    0.504...
-
-    >>> BLEU.compute(candidate2, [reference1, reference2, reference3], weights)
-    0.457...
-
-    2. Test with two corpus that one is a reference and another is
-    an output from translation system:
-
-    >>> weights = [0.25, 0.25, 0.25, 0.25]
-    >>> ref_file = open('newstest2012-ref.en')  # doctest: +SKIP
-    >>> candidate_file = open('newstest2012.fr-en.cmu-avenue')  # doctest: +SKIP
-
-    >>> total = 0.0
-    >>> count = 0
-
-    >>> for candi_raw in candidate_file:  # doctest: +SKIP
-    ...		ref_raw = ref_file.readline()
-    ...		ref_tokens = word_tokenize(ref_raw)
-    ...		candi_tokens = word_tokenize(candi_raw)
-    ...		total = BLEU.compute(candi_tokens, [ref_tokens], weights)
-    ...		count += 1
-
-    >>> total / count  # doctest: +SKIP
-    2.787504437460048e-05
-
-    [1] Papineni, Kishore, et al. "BLEU: a method for automatic evaluation of
-    machine translation." Proceedings of the 40th annual meeting on
-    association for computational linguistics. Association for Computational
-    Linguistics, 2002.
-
-    """
-
-    @staticmethod
-    def compute(candidate, references, weights):
-        candidate = [c.lower() for c in candidate]
-        references = [[r.lower() for r in reference] for reference in references]
-
-        p_ns = (BLEU.modified_precision(candidate, references, i) for i, _ in enumerate(weights, start=1))
-        s = math.fsum(w * math.log(p_n) for w, p_n in zip(weights, p_ns) if p_n)
-
-        bp = BLEU.brevity_penalty(candidate, references)
-        return bp * math.exp(s)
-
-    @staticmethod
-    def modified_precision(candidate, references, n):
-        """ Calculate modified ngram precision.
-
-        >>> BLEU.modified_precision(
-        ...    'the the the the the the the'.split(),
-        ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
-        ...    n=1,
-        ... )
-        0.28...
-
-        >>> BLEU.modified_precision(
-        ...    'the the the the the the the'.split(),
-        ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
-        ...    n=2,
-        ... )
-        0.0
-
-        >>> BLEU.modified_precision(
-        ...    'of the'.split(),
-        ...    [
-        ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
-        ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
-        ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
-        ...    ],
-        ...    n=1,
-        ... )
-        1.0
-
-        >>> BLEU.modified_precision(
-        ...    'of the'.split(),
-        ...    [
-        ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
-        ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
-        ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
-        ...    ],
-        ...    n=2,
-        ... )
-        1.0
-
-        """
-        counts = Counter(ngrams(candidate, n))
-
-        if not counts:
-            return 0
-
-        max_counts = {}
-        for reference in references:
-            reference_counts = Counter(ngrams(reference, n))
-            for ngram in counts:
-                max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
-
-        clipped_counts = dict((ngram, min(count, max_counts[ngram])) for ngram, count in counts.items())
-
-        return sum(clipped_counts.values()) / sum(counts.values())
-
-    @staticmethod
-    def brevity_penalty(candidate, references):
-        c = len(candidate)
-        r = min(abs(len(r) - c) for r in references)
-
-        if c > r:
-            return 1
-        else:
-            return math.exp(1 - r / c)
-
-# run doctests
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
diff --git a/nltk/align/bleu_score.py b/nltk/align/bleu_score.py
new file mode 100644
index 0000000..5472509
--- /dev/null
+++ b/nltk/align/bleu_score.py
@@ -0,0 +1,219 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: BLEU Score
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
+# Contributors: Dmitrijs Milajevs
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""BLEU score implementation."""
+
+from __future__ import division
+
+import math
+
+from nltk.tokenize import word_tokenize
+from nltk.compat import Counter
+from nltk.util import ngrams
+
+
+def bleu(candidate, references, weights):
+    """Calculate BLEU score (Bilingual Evaluation Understudy)
+
+    :param candidate: a candidate sentence
+    :type candidate: list(str)
+    :param references: reference sentences
+    :type references: list(list(str))
+    :param weights: weights for unigrams, bigrams, trigrams and so on
+    :type weights: list(float)
+
+    >>> weights = [0.25, 0.25, 0.25, 0.25]
+    >>> candidate1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...               'ensures', 'that', 'the', 'military', 'always',
+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> candidate2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+    ...               'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...               'heed', 'Party', 'commands']
+
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...               'guarantees', 'the', 'military', 'forces', 'always',
+    ...               'being', 'under', 'the', 'command', 'of', 'the',
+    ...               'Party']
+
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...               'of', 'the', 'party']
+
+    >>> bleu(candidate1, [reference1, reference2, reference3], weights)
+    0.504...
+
+    >>> bleu(candidate2, [reference1, reference2, reference3], weights)
+    0
+
+    Papineni, Kishore, et al. "BLEU: A method for automatic evaluation of
+    machine translation." Proceedings of the 40th annual meeting on association for
+    computational linguistics. Association for Computational Linguistics, 2002.
+    http://www.aclweb.org/anthology/P02-1040.pdf
+
+    """
+    p_ns = (
+        _modified_precision(candidate, references, i)
+        for i, _ in enumerate(weights, start=1)
+    )
+
+    try:
+        s = math.fsum(w * math.log(p_n) for w, p_n in zip(weights, p_ns))
+    except ValueError:
+        # some p_ns is 0
+        return 0
+
+    bp = _brevity_penalty(candidate, references)
+    return bp * math.exp(s)
+
+
+def _modified_precision(candidate, references, n):
+    """Calculate modified ngram precision.
+
+    The normal precision method may lead to some wrong translations with
+    high-precision, e.g., the translation, in which a word of reference
+    repeats several times, has very high precision. So in the modified
+    n-gram precision, a reference word will be considered exhausted after
+    a matching candidate word is identified.
+
+    Paper examples:
+
+    >>> _modified_precision(
+    ...    'the the the the the the the'.split(),
+    ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
+    ...    n=1,
+    ... )
+    0.28...
+
+    >>> _modified_precision(
+    ...    'the the the the the the the'.split(),
+    ...    ['the cat is on the mat'.split(), 'there is a cat on the mat'.split()],
+    ...    n=2,
+    ... )
+    0.0
+
+    >>> _modified_precision(
+    ...    'of the'.split(),
+    ...    [
+    ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
+    ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
+    ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
+    ...    ],
+    ...    n=1,
+    ... )
+    1.0
+
+    >>> _modified_precision(
+    ...    'of the'.split(),
+    ...    [
+    ...        'It is a guide to action that ensures that the military will forever heed Party commands.'.split(),
+    ...        'It is the guiding principle which guarantees the military forces always being under the command of the Party.'.split(),
+    ...        'It is the practical guide for the army always to heed the directions of the party'.split(),
+    ...    ],
+    ...    n=2,
+    ... )
+    1.0
+
+    More examples:
+
+    >>> weights = [0.25, 0.25, 0.25, 0.25]
+    >>> candidate1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+    ...               'ensures', 'that', 'the', 'military', 'always',
+    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
+
+    >>> candidate2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
+    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
+    ...               'that', 'party', 'direct']
+
+    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
+    ...               'heed', 'Party', 'commands']
+
+    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+    ...               'guarantees', 'the', 'military', 'forces', 'always',
+    ...               'being', 'under', 'the', 'command', 'of', 'the',
+    ...               'Party']
+
+    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
+    ...               'of', 'the', 'party']
+
+    Unigrams:
+
+    >>> _modified_precision(
+    ...    candidate1,
+    ...    [reference1, reference2, reference3],
+    ...    n=1,
+    ... )
+    0.94...
+
+    >>> _modified_precision(
+    ...    candidate2,
+    ...    [reference1, reference2, reference3],
+    ...    n=1,
+    ... )
+    0.57...
+
+    Bigrams:
+
+    >>> _modified_precision(
+    ...    candidate1,
+    ...    [reference1, reference2, reference3],
+    ...    n=2,
+    ... )
+    0.58...
+
+    >>> _modified_precision(
+    ...    candidate2,
+    ...    [reference1, reference2, reference3],
+    ...    n=2,
+    ... )
+    0.07...
+
+    """
+    counts = Counter(ngrams(candidate, n))
+
+    if not counts:
+        return 0
+
+    max_counts = {}
+    for reference in references:
+        reference_counts = Counter(ngrams(reference, n))
+        for ngram in counts:
+            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
+
+    clipped_counts = dict((ngram, min(count, max_counts[ngram])) for ngram, count in counts.items())
+
+    return sum(clipped_counts.values()) / sum(counts.values())
+
+
+def _brevity_penalty(candidate, references):
+    """Calculate brevity penalty.
+
+    As the modified n-gram precision still has the problem from the short
+    length sentence, brevity penalty is used to modify the overall BLEU
+    score according to length.
+
+    """
+    c = len(candidate)
+    r = min(abs(len(r) - c) for r in references)
+
+    if c > r:
+        return 1
+    else:
+        return math.exp(1 - r / c)
+
+
+# run doctests
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.ELLIPSIS)
diff --git a/nltk/align/gdfa.py b/nltk/align/gdfa.py
index bd254a9..f2e9743 100644
--- a/nltk/align/gdfa.py
+++ b/nltk/align/gdfa.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: GDFA word alignment symmetrization
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Liling Tan
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/align/ibm2.py b/nltk/align/ibm2.py
index 1aca80c..f2f4b35 100644
--- a/nltk/align/ibm2.py
+++ b/nltk/align/ibm2.py
@@ -20,7 +20,7 @@ class IBMModel2(object):
     Step 1 - Run a number of iterations of IBM Model 1 and get the initial
              distribution of translation probability. 
 
-    Step 2 - Collect the evidence of a English word being translated by a 
+    Step 2 - Collect the evidence of an English word being translated by a 
              foreign language word.
 
     Step 3 - Estimate the probability of translation and alignment according 
diff --git a/nltk/align/phrase_based.py b/nltk/align/phrase_based.py
index a93e752..87de8f3 100644
--- a/nltk/align/phrase_based.py
+++ b/nltk/align/phrase_based.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Phrase Extraction Algorithm
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Liling Tan and Fredrik Hedman
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/__init__.py b/nltk/app/__init__.py
index 437d25e..4297a51 100644
--- a/nltk/app/__init__.py
+++ b/nltk/app/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Applications package
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/app/chartparser_app.py b/nltk/app/chartparser_app.py
index 17d2f99..da38b15 100644
--- a/nltk/app/chartparser_app.py
+++ b/nltk/app/chartparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chart Parser Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Jean Mark Gawron <gawron at mail.sdsu.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/app/chunkparser_app.py b/nltk/app/chunkparser_app.py
index f0b3c4b..23c24a3 100644
--- a/nltk/app/chunkparser_app.py
+++ b/nltk/app/chunkparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Regexp Chunk Parser Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -902,7 +902,7 @@ class RegexpChunkApp(object):
         self.normalized_grammar = self.normalize_grammar(
             self._history[index][0])
         if self.normalized_grammar:
-            rules = [RegexpChunkRule.parse(line)
+            rules = [RegexpChunkRule.fromstring(line)
                      for line in self.normalized_grammar.split('\n')]
         else:
             rules = []
@@ -1035,7 +1035,7 @@ class RegexpChunkApp(object):
             line = line.strip()
             if line:
                 try:
-                    RegexpChunkRule.parse(line)
+                    RegexpChunkRule.fromstring(line)
                 except ValueError as e:
                     self.grammarbox.tag_add('error', '%s.0' % (lineno+1),
                                             '%s.0 lineend' % (lineno+1))
@@ -1068,7 +1068,7 @@ class RegexpChunkApp(object):
         try:
             # Note: the normalized grammar has no blank lines.
             if normalized_grammar:
-                rules = [RegexpChunkRule.parse(line)
+                rules = [RegexpChunkRule.fromstring(line)
                          for line in normalized_grammar.split('\n')]
             else:
                 rules = []
diff --git a/nltk/app/collocations_app.py b/nltk/app/collocations_app.py
index adf9444..b1a2f8a 100644
--- a/nltk/app/collocations_app.py
+++ b/nltk/app/collocations_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Collocations Application
 # Much of the GUI code is imported from concordance.py; We intend to merge these tools together
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/concordance_app.py b/nltk/app/concordance_app.py
index 5e58a96..0ccdcca 100755
--- a/nltk/app/concordance_app.py
+++ b/nltk/app/concordance_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Concordance Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/rdparser_app.py b/nltk/app/rdparser_app.py
index d5f3526..ca4d98d 100644
--- a/nltk/app/rdparser_app.py
+++ b/nltk/app/rdparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Recursive Descent Parser Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/srparser_app.py b/nltk/app/srparser_app.py
index b3ff44a..1de4dc3 100644
--- a/nltk/app/srparser_app.py
+++ b/nltk/app/srparser_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Shift-Reduce Parser Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/wordfreq_app.py b/nltk/app/wordfreq_app.py
index a9128be..3ced28a 100644
--- a/nltk/app/wordfreq_app.py
+++ b/nltk/app/wordfreq_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Wordfreq Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Sumukh Ghodke <sghodke at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/app/wordnet_app.py b/nltk/app/wordnet_app.py
index 687ba1f..82506cd 100644
--- a/nltk/app/wordnet_app.py
+++ b/nltk/app/wordnet_app.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: WordNet Browser Application
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
 #         Paul Bone <pbone at students.csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
@@ -799,7 +799,7 @@ def get_static_web_help_page():
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
      <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2014 NLTK Project
+            Copyright (C) 2001-2015 NLTK Project
             Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
             URL: <http://nltk.org/>
             For license information, see LICENSE.TXT -->
@@ -870,7 +870,7 @@ def get_static_index_page(with_shutdown):
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"  "http://www.w3.org/TR/html4/frameset.dtd">
 <HTML>
      <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2014 NLTK Project
+            Copyright (C) 2001-2015 NLTK Project
             Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
             URL: <http://nltk.org/>
             For license information, see LICENSE.TXT -->
@@ -904,7 +904,7 @@ def get_static_upper_page(with_shutdown):
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
     <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-        Copyright (C) 2001-2014 NLTK Project
+        Copyright (C) 2001-2015 NLTK Project
         Author: Jussi Salmela <jtsalmela at users.sourceforge.net>
         URL: <http://nltk.org/>
         For license information, see LICENSE.TXT -->
diff --git a/nltk/book.py b/nltk/book.py
index 588dec4..3d6777e 100644
--- a/nltk/book.py
+++ b/nltk/book.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Some texts for exploration in chapter 1 of the book
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/ccg/__init__.py b/nltk/ccg/__init__.py
index ebd883a..34b5acb 100644
--- a/nltk/ccg/__init__.py
+++ b/nltk/ccg/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/api.py b/nltk/ccg/api.py
index 8578dd1..bf73fc8 100644
--- a/nltk/ccg/api.py
+++ b/nltk/ccg/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CCG Categories
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/chart.py b/nltk/ccg/chart.py
index 905d7e7..bdd5cf9 100644
--- a/nltk/ccg/chart.py
+++ b/nltk/ccg/chart.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/combinator.py b/nltk/ccg/combinator.py
index 2e98879..a41dccd 100644
--- a/nltk/ccg/combinator.py
+++ b/nltk/ccg/combinator.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/ccg/lexicon.py b/nltk/ccg/lexicon.py
index f41b53e..60badb5 100644
--- a/nltk/ccg/lexicon.py
+++ b/nltk/ccg/lexicon.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Combinatory Categorial Grammar
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Graeme Gange <ggange at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/__init__.py b/nltk/chat/__init__.py
index e00ccfd..881a73e 100644
--- a/nltk/chat/__init__.py
+++ b/nltk/chat/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chatbots
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/eliza.py b/nltk/chat/eliza.py
index c2f869a..000009e 100644
--- a/nltk/chat/eliza.py
+++ b/nltk/chat/eliza.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Eliza
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/chat/iesha.py b/nltk/chat/iesha.py
index 527c39d..8b856d3 100644
--- a/nltk/chat/iesha.py
+++ b/nltk/chat/iesha.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Teen Chatbot
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Selina Dennis <sjmd at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/rude.py b/nltk/chat/rude.py
index 2fad2ff..292d54d 100644
--- a/nltk/chat/rude.py
+++ b/nltk/chat/rude.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Rude Chatbot
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Peter Spiller <pspiller at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/suntsu.py b/nltk/chat/suntsu.py
index 090d04b..f8ddd6f 100644
--- a/nltk/chat/suntsu.py
+++ b/nltk/chat/suntsu.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sun Tsu-Bot
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Sam Huston 2007
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/util.py b/nltk/chat/util.py
index fc11f9d..9a2e6c4 100644
--- a/nltk/chat/util.py
+++ b/nltk/chat/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chatbot Utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chat/zen.py b/nltk/chat/zen.py
index 0c01bd5..a6ed163 100644
--- a/nltk/chat/zen.py
+++ b/nltk/chat/zen.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Zen Chatbot
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Amy Holland <amyrh at csse.unimelb.edu.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chunk/__init__.py b/nltk/chunk/__init__.py
index 23c4b2e..7d33a48 100644
--- a/nltk/chunk/__init__.py
+++ b/nltk/chunk/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunkers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/api.py b/nltk/chunk/api.py
index abc7347..32d3c45 100644
--- a/nltk/chunk/api.py
+++ b/nltk/chunk/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunk parsing API
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/named_entity.py b/nltk/chunk/named_entity.py
index ee14cb2..8cf1077 100644
--- a/nltk/chunk/named_entity.py
+++ b/nltk/chunk/named_entity.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunk parsing API
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/chunk/regexp.py b/nltk/chunk/regexp.py
index 5fd7b3d..6e4d347 100644
--- a/nltk/chunk/regexp.py
+++ b/nltk/chunk/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Regular Expression Chunkers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/chunk/util.py b/nltk/chunk/util.py
index 6162979..7fb13f1 100644
--- a/nltk/chunk/util.py
+++ b/nltk/chunk/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunk format conversions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -548,7 +548,7 @@ def demo():
     s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
     import nltk
     t = nltk.chunk.tagstr2tree(s, chunk_label='NP')
-    print(t.pprint())
+    t.pprint()
     print()
 
     s = """
@@ -582,7 +582,7 @@ better JJR I-ADJP
 """
 
     conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
-    print(conll_tree.pprint())
+    conll_tree.pprint()
 
     # Demonstrate CoNLL output
     print("CoNLL output:")
diff --git a/nltk/classify/__init__.py b/nltk/classify/__init__.py
index 8d6fda2..972995b 100644
--- a/nltk/classify/__init__.py
+++ b/nltk/classify/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifiers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -94,3 +94,4 @@ from nltk.classify.scikitlearn import SklearnClassifier
 from nltk.classify.maxent import (MaxentClassifier, BinaryMaxentFeatureEncoding,
                                   TypedMaxentFeatureEncoding,
                                   ConditionalExponentialClassifier)
+from nltk.classify.senna import Senna
diff --git a/nltk/classify/api.py b/nltk/classify/api.py
index 3b977cc..641f96f 100644
--- a/nltk/classify/api.py
+++ b/nltk/classify/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifier Interface
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/classify/decisiontree.py b/nltk/classify/decisiontree.py
index c95bbe0..bc4cb02 100644
--- a/nltk/classify/decisiontree.py
+++ b/nltk/classify/decisiontree.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Decision Tree Classifiers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -71,7 +71,7 @@ class DecisionTreeClassifier(ClassifierI):
                 errors += 1
         return float(errors)/len(labeled_featuresets)
 
-    def pp(self, width=70, prefix='', depth=4):
+    def pretty_format(self, width=70, prefix='', depth=4):
         """
         Return a string containing a pretty-printed version of this
         decision tree.  Each line in this string corresponds to a
@@ -88,12 +88,12 @@ class DecisionTreeClassifier(ClassifierI):
             n = width-15-len(hdr)
             s += '%s%s %s\n' % (hdr, '.'*(n), result._label)
             if result._fname is not None and depth>1:
-                s += result.pp(width, prefix+'  ', depth-1)
+                s += result.pretty_format(width, prefix+'  ', depth-1)
         if self._default is not None:
             n = width-len(prefix)-21
             s += '%selse: %s %s\n' % (prefix, '.'*n, self._default._label)
             if self._default._fname is not None and depth>1:
-                s += self._default.pp(width, prefix+'  ', depth-1)
+                s += self._default.pretty_format(width, prefix+'  ', depth-1)
         return s
 
     def pseudocode(self, prefix='', depth=4):
@@ -124,7 +124,7 @@ class DecisionTreeClassifier(ClassifierI):
         return s
 
     def __str__(self):
-        return self.pp()
+        return self.pretty_format()
 
     @staticmethod
     def train(labeled_featuresets, entropy_cutoff=0.05, depth_cutoff=100,
@@ -165,13 +165,13 @@ class DecisionTreeClassifier(ClassifierI):
 
     @staticmethod
     def leaf(labeled_featuresets):
-        label = FreqDist(label for (featureset,label)
+        label = FreqDist(label for (featureset, label)
                          in labeled_featuresets).max()
         return DecisionTreeClassifier(label)
 
     @staticmethod
     def stump(feature_name, labeled_featuresets):
-        label = FreqDist(label for (featureset,label)
+        label = FreqDist(label for (featureset, label)
                          in labeled_featuresets).max()
 
         # Find the best label for each value.
@@ -191,11 +191,11 @@ class DecisionTreeClassifier(ClassifierI):
         if self._fname is None: return
         if depth_cutoff <= 0: return
         for fval in self._decisions:
-            fval_featuresets = [(featureset,label) for (featureset,label)
+            fval_featuresets = [(featureset, label) for (featureset, label)
                                 in labeled_featuresets
                                 if featureset.get(self._fname) == fval]
 
-            label_freqs = FreqDist(label for (featureset,label)
+            label_freqs = FreqDist(label for (featureset, label)
                                    in fval_featuresets)
             if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
                 self._decisions[fval] = DecisionTreeClassifier.train(
@@ -206,7 +206,7 @@ class DecisionTreeClassifier(ClassifierI):
                                    in labeled_featuresets
                                    if featureset.get(self._fname) not in
                                    self._decisions]
-            label_freqs = FreqDist(label for (featureset,label)
+            label_freqs = FreqDist(label for (featureset, label)
                                    in default_featuresets)
             if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
                 self._default = DecisionTreeClassifier.train(
diff --git a/nltk/classify/maxent.py b/nltk/classify/maxent.py
index 755e585..5465e12 100644
--- a/nltk/classify/maxent.py
+++ b/nltk/classify/maxent.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Maximum Entropy Classifiers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Dmitry Chichkov <dchichkov at gmail.com> (TypedMaxentFeatureEncoding)
 # URL: <http://nltk.org/>
@@ -59,10 +59,8 @@ try:
 except ImportError:
     pass
 
-import time
 import tempfile
 import os
-import gzip
 from collections import defaultdict
 
 from nltk import compat
@@ -72,7 +70,8 @@ from nltk.probability import DictionaryProbDist
 
 from nltk.classify.api import ClassifierI
 from nltk.classify.util import CutoffChecker, accuracy, log_likelihood
-from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights
+from nltk.classify.megam import (call_megam,
+                                 write_megam_file, parse_megam_weights)
 from nltk.classify.tadm import call_tadm, write_tadm_file, parse_tadm_weights
 
 ######################################################################
@@ -131,7 +130,7 @@ class MaxentClassifier(ClassifierI):
         :type new_weights: list of float
         """
         self._weights = new_weights
-        assert (self._encoding.length() == len(new_weights))
+        assert self._encoding.length() == len(new_weights)
 
     def weights(self):
         """
@@ -185,12 +184,14 @@ class MaxentClassifier(ClassifierI):
             feature_vector.sort(key=lambda fid__: abs(self._weights[fid__[0]]),
                                 reverse=True)
             for (f_id, f_val) in feature_vector:
-                if self._logarithmic: score = self._weights[f_id] * f_val
+                if self._logarithmic:
+                    score = self._weights[f_id] * f_val
                 else: score = self._weights[f_id] ** f_val
                 descr = self._encoding.describe(f_id)
                 descr = descr.split(' and label is ')[0] # hack
                 descr += ' (%s)' % f_val                 # hack
-                if len(descr) > 47: descr = descr[:44]+'...'
+                if len(descr) > 47:
+                    descr = descr[:44]+'...'
                 print(TEMPLATE % (descr, i*8*' ', score))
                 sums[label] += score
         print('  '+'-'*(descr_width-1+8*len(labels)))
@@ -207,9 +208,9 @@ class MaxentClassifier(ClassifierI):
                       key=lambda fid: abs(self._weights[fid]),
                       reverse=True)
         if show == 'pos':
-            fids = [fid for fid in fids if self._weights[fid]>0]
+            fids = [fid for fid in fids if self._weights[fid] > 0]
         elif show == 'neg':
-            fids = [fid for fid in fids if self._weights[fid]<0]
+            fids = [fid for fid in fids if self._weights[fid] < 0]
         for fid in fids[:n]:
             print('%8.3f %s' % (self._weights[fid],
                                 self._encoding.describe(fid)))
@@ -224,7 +225,7 @@ class MaxentClassifier(ClassifierI):
 
     @classmethod
     def train(cls, train_toks, algorithm=None, trace=3, encoding=None,
-              labels=None, sparse=None, gaussian_prior_sigma=0, **cutoffs):
+              labels=None, gaussian_prior_sigma=0, **cutoffs):
         """
         Train a new maxent classifier based on the given corpus of
         training samples.  This classifier will have its weights
@@ -507,14 +508,14 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
         """dict mapping from fname -> fid"""
 
         if alwayson_features:
-            self._alwayson = dict((label,i+self._length)
-                                   for (i,label) in enumerate(labels))
+            self._alwayson = dict((label, i+self._length)
+                                  for (i, label) in enumerate(labels))
             self._length += len(self._alwayson)
 
         if unseen_features:
             fnames = set(fname for (fname, fval, label) in mapping)
             self._unseen = dict((fname, i+self._length)
-                                 for (i, fname) in enumerate(fnames))
+                                for (i, fname) in enumerate(fnames))
             self._length += len(fnames)
 
     def encode(self, featureset, label):
@@ -560,10 +561,12 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
             return '%s==%r and label is %r' % (fname, fval, label)
         elif self._alwayson and f_id in self._alwayson.values():
             for (label, f_id2) in self._alwayson.items():
-                if f_id==f_id2: return 'label is %r' % label
+                if f_id == f_id2:
+                    return 'label is %r' % label
         elif self._unseen and f_id in self._unseen.values():
             for (fname, f_id2) in self._unseen.items():
-                if f_id==f_id2: return '%s is unseen' % fname
+                if f_id == f_id2:
+                    return '%s is unseen' % fname
         else:
             raise ValueError('Bad feature id')
 
@@ -618,12 +621,13 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
                 # If a count cutoff is given, then only add a joint
                 # feature once the corresponding (fname, fval, label)
                 # tuple exceeds that cutoff.
-                count[fname,fval] += 1
-                if count[fname,fval] >= count_cutoff:
+                count[fname, fval] += 1
+                if count[fname, fval] >= count_cutoff:
                     if (fname, fval, label) not in mapping:
                         mapping[fname, fval, label] = len(mapping)
 
-        if labels is None: labels = seen_labels
+        if labels is None:
+            labels = seen_labels
         return cls(labels, mapping, **options)
 
 class GISEncoding(BinaryMaxentFeatureEncoding):
@@ -651,7 +655,7 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
         BinaryMaxentFeatureEncoding.__init__(
             self, labels, mapping, unseen_features, alwayson_features)
         if C is None:
-            C = len(set(fname for (fname,fval,label) in mapping))+1
+            C = len(set(fname for (fname, fval, label) in mapping))+1
         self._C = C
 
     @property
@@ -666,10 +670,10 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
         base_length = BinaryMaxentFeatureEncoding.length(self)
 
         # Add a correction feature.
-        total = sum(v for (f,v) in encoding)
+        total = sum(v for (f, v) in encoding)
         if total >= self._C:
             raise ValueError('Correction feature is not high enough!')
-        encoding.append( (base_length, self._C-total) )
+        encoding.append((base_length, self._C-total))
 
         # Return the result
         return encoding
@@ -690,8 +694,8 @@ class TadmEventMaxentFeatureEncoding(BinaryMaxentFeatureEncoding):
         self._mapping = OrderedDict(mapping)
         self._label_mapping = OrderedDict()
         BinaryMaxentFeatureEncoding.__init__(self, labels, self._mapping,
-                                                   unseen_features,
-                                                   alwayson_features)
+                                             unseen_features,
+                                             alwayson_features)
 
     def encode(self, featureset, label):
         encoding = []
@@ -835,7 +839,7 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
 
         if alwayson_features:
             self._alwayson = dict((label, i+self._length)
-                                  for (i,label) in enumerate(labels))
+                                  for (i, label) in enumerate(labels))
             self._length += len(self._alwayson)
 
         if unseen_features:
@@ -853,7 +857,8 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
             if isinstance(fval, (compat.integer_types, float)):
                 # Known feature name & value:
                 if (fname, type(fval), label) in self._mapping:
-                    encoding.append((self._mapping[fname, type(fval), label], fval))
+                    encoding.append((self._mapping[fname, type(fval),
+                                                   label], fval))
             else:
                 # Known feature name & value:
                 if (fname, fval, label) in self._mapping:
@@ -893,10 +898,12 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
             return '%s==%r and label is %r' % (fname, fval, label)
         elif self._alwayson and f_id in self._alwayson.values():
             for (label, f_id2) in self._alwayson.items():
-                if f_id==f_id2: return 'label is %r' % label
+                if f_id == f_id2:
+                    return 'label is %r' % label
         elif self._unseen and f_id in self._unseen.values():
             for (fname, f_id2) in self._unseen.items():
-                if f_id==f_id2: return '%s is unseen' % fname
+                if f_id == f_id2:
+                    return '%s is unseen' % fname
         else:
             raise ValueError('Bad feature id')
 
@@ -950,16 +957,18 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
 
             # Record each of the features.
             for (fname, fval) in tok.items():
-                if(type(fval) in (int, float)): fval = type(fval)
+                if type(fval) in (int, float):
+                    fval = type(fval)
                 # If a count cutoff is given, then only add a joint
                 # feature once the corresponding (fname, fval, label)
                 # tuple exceeds that cutoff.
-                count[fname,fval] += 1
-                if count[fname,fval] >= count_cutoff:
+                count[fname, fval] += 1
+                if count[fname, fval] >= count_cutoff:
                     if (fname, fval, label) not in mapping:
                         mapping[fname, fval, label] = len(mapping)
 
-        if labels is None: labels = seen_labels
+        if labels is None:
+            labels = seen_labels
         return cls(labels, mapping, **options)
 
 
@@ -1000,12 +1009,13 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
     empirical_fcount = calculate_empirical_fcount(train_toks, encoding)
 
     # Check for any features that are not attested in train_toks.
-    unattested = set(numpy.nonzero(empirical_fcount==0)[0])
+    unattested = set(numpy.nonzero(empirical_fcount == 0)[0])
 
     # Build the classifier.  Start with weight=0 for each attested
     # feature, and weight=-infinity for each unattested feature.
     weights = numpy.zeros(len(empirical_fcount), 'd')
-    for fid in unattested: weights[fid] = numpy.NINF
+    for fid in unattested:
+        weights[fid] = numpy.NINF
     classifier = ConditionalExponentialClassifier(encoding, weights)
 
     # Take the log of the empirical fcount.
@@ -1017,7 +1027,8 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
     ll_old = None
     acc_old = None
 
-    if trace > 0: print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
+    if trace > 0:
+        print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
     if trace > 2:
         print()
         print('      Iteration    Log Likelihood    Accuracy')
@@ -1038,7 +1049,8 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
                 classifier, train_toks, encoding)
 
             # Take the log of estimated fcount (avoid taking log(0).)
-            for fid in unattested: estimated_fcount[fid] += 1
+            for fid in unattested:
+                estimated_fcount[fid] += 1
             log_estimated_fcount = numpy.log2(estimated_fcount)
             del estimated_fcount
 
@@ -1122,15 +1134,17 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
     nftranspose = numpy.reshape(nfarray, (len(nfarray), 1))
 
     # Check for any features that are not attested in train_toks.
-    unattested = set(numpy.nonzero(empirical_ffreq==0)[0])
+    unattested = set(numpy.nonzero(empirical_ffreq == 0)[0])
 
     # Build the classifier.  Start with weight=0 for each attested
     # feature, and weight=-infinity for each unattested feature.
     weights = numpy.zeros(len(empirical_ffreq), 'd')
-    for fid in unattested: weights[fid] = numpy.NINF
+    for fid in unattested:
+        weights[fid] = numpy.NINF
     classifier = ConditionalExponentialClassifier(encoding, weights)
 
-    if trace > 0: print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
+    if trace > 0:
+        print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
     if trace > 2:
         print()
         print('      Iteration    Log Likelihood    Accuracy')
@@ -1202,7 +1216,7 @@ def calculate_nfmap(train_toks, encoding):
     nfset = set()
     for tok, _ in train_toks:
         for label in encoding.labels():
-            nfset.add(sum(val for (id,val) in encoding.encode(tok,label)))
+            nfset.add(sum(val for (id, val) in encoding.encode(tok, label)))
     return dict((nf, i) for (i, nf) in enumerate(nfset))
 
 def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
@@ -1285,7 +1299,7 @@ def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
 
         for label in encoding.labels():
             # Generate the feature vector
-            feature_vector = encoding.encode(tok,label)
+            feature_vector = encoding.encode(tok, label)
             # Find the number of active features
             nf = sum(val for (id, val) in feature_vector)
             # Update the A matrix
@@ -1309,7 +1323,8 @@ def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
         sum2 = numpy.sum(nf_exp_nf_delta * A, axis=0)
 
         # Avoid division by zero.
-        for fid in unattested: sum2[fid] += 1
+        for fid in unattested:
+            sum2[fid] += 1
 
         # Update the deltas.
         deltas -= (ffreq_empirical - sum1) / -sum2
@@ -1346,8 +1361,10 @@ def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
 
     explicit = True
     bernoulli = True
-    if 'explicit' in kwargs: explicit = kwargs['explicit']
-    if 'bernoulli' in kwargs: bernoulli = kwargs['bernoulli']
+    if 'explicit' in kwargs:
+        explicit = kwargs['explicit']
+    if 'bernoulli' in kwargs:
+        bernoulli = kwargs['bernoulli']
 
     # Construct an encoding from the training data.
     if encoding is None:
@@ -1365,7 +1382,7 @@ def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
         fd, trainfile_name = tempfile.mkstemp(prefix='nltk-')
         with open(trainfile_name, 'w') as trainfile:
             write_megam_file(train_toks, encoding, trainfile,
-                                explicit=explicit, bernoulli=bernoulli)
+                             explicit=explicit, bernoulli=bernoulli)
         os.close(fd)
     except (OSError, IOError, ValueError) as e:
         raise ValueError('Error while creating megam training file: %s' % e)
@@ -1399,7 +1416,8 @@ def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
     stdout = call_megam(options)
     # print './megam_i686.opt ', ' '.join(options)
     # Delete the training file
-    try: os.remove(trainfile_name)
+    try:
+        os.remove(trainfile_name)
     except (OSError, IOError) as e:
         print('Warning: unable to delete %s: %s' % (trainfile_name, e))
 
diff --git a/nltk/classify/megam.py b/nltk/classify/megam.py
index 6b01af3..6b44df8 100644
--- a/nltk/classify/megam.py
+++ b/nltk/classify/megam.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to Megam Classifier
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -24,8 +24,6 @@ for details.
 """
 from __future__ import print_function
 
-import os
-import os.path
 import subprocess
 
 from nltk import compat
@@ -99,8 +97,9 @@ def write_megam_file(train_toks, encoding, stream,
     # Write the file, which contains one line per instance.
     for featureset, label in train_toks:
         # First, the instance number (or, in the weighted multiclass case, the cost of each label).
-        if hasattr(encoding,'cost'):
-            stream.write(':'.join(str(encoding.cost(featureset, label, l)) for l in labels))
+        if hasattr(encoding, 'cost'):
+            stream.write(':'.join(str(encoding.cost(featureset, label, l))
+                                  for l in labels))
         else:
             stream.write('%d' % labelnum[label])
 
diff --git a/nltk/classify/naivebayes.py b/nltk/classify/naivebayes.py
index dafb449..5f1cffb 100644
--- a/nltk/classify/naivebayes.py
+++ b/nltk/classify/naivebayes.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Naive Bayes Classifiers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -110,7 +110,7 @@ class NaiveBayesClassifier(ClassifierI):
         for label in self._labels:
             for (fname, fval) in featureset.items():
                 if (label, fname) in self._feature_probdist:
-                    feature_probs = self._feature_probdist[label,fname]
+                    feature_probs = self._feature_probdist[label, fname]
                     logprob[label] += feature_probs.logprob(fval)
                 else:
                     # nb: This case will never come up if the
@@ -127,19 +127,20 @@ class NaiveBayesClassifier(ClassifierI):
 
         for (fname, fval) in self.most_informative_features(n):
             def labelprob(l):
-                return cpdist[l,fname].prob(fval)
+                return cpdist[l, fname].prob(fval)
 
             labels = sorted([l for l in self._labels
-                             if fval in cpdist[l,fname].samples()],
+                             if fval in cpdist[l, fname].samples()],
                             key=labelprob)
-            if len(labels) == 1: continue
+            if len(labels) == 1:
+                continue
             l0 = labels[0]
             l1 = labels[-1]
-            if cpdist[l0,fname].prob(fval) == 0:
+            if cpdist[l0, fname].prob(fval) == 0:
                 ratio = 'INF'
             else:
-                ratio = '%8.1f' % (cpdist[l1,fname].prob(fval) /
-                                  cpdist[l0,fname].prob(fval))
+                ratio = '%8.1f' % (cpdist[l1, fname].prob(fval) /
+                                   cpdist[l0, fname].prob(fval))
             print(('%24s = %-14r %6s : %-6s = %s : 1.0' %
                    (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio)))
 
@@ -163,7 +164,7 @@ class NaiveBayesClassifier(ClassifierI):
         for (label, fname), probdist in self._feature_probdist.items():
             for fval in probdist.samples():
                 feature = (fname, fval)
-                features.add( feature )
+                features.add(feature)
                 p = probdist.prob(fval)
                 maxprob[feature] = max(p, maxprob[feature])
                 minprob[feature] = min(p, minprob[feature])
@@ -173,7 +174,8 @@ class NaiveBayesClassifier(ClassifierI):
         # Convert features to a list, & sort it by how informative
         # features are.
         features = sorted(features,
-            key=lambda feature_: minprob[feature_]/maxprob[feature_])
+                          key=lambda feature_:
+                          minprob[feature_]/maxprob[feature_])
         return features[:n]
 
     @staticmethod
@@ -221,7 +223,7 @@ class NaiveBayesClassifier(ClassifierI):
         feature_probdist = {}
         for ((label, fname), freqdist) in feature_freqdist.items():
             probdist = estimator(freqdist, bins=len(feature_values[fname]))
-            feature_probdist[label,fname] = probdist
+            feature_probdist[label, fname] = probdist
 
         return NaiveBayesClassifier(label_probdist, feature_probdist)
 
diff --git a/nltk/classify/rte_classify.py b/nltk/classify/rte_classify.py
index 09f69cb..5f34545 100644
--- a/nltk/classify/rte_classify.py
+++ b/nltk/classify/rte_classify.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: RTE Classifier
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -53,10 +53,11 @@ class RTEFeatureExtractor(object):
         :type stop: bool
         """
         self.stop = stop
-        self.stopwords = set(['a', 'the', 'it', 'they', 'of', 'in', 'to',
-                              'have', 'is', 'are', 'were', 'and', 'very', '.',','])
+        self.stopwords = set(['a', 'the', 'it', 'they', 'of', 'in', 'to', 'is',
+                              'have', 'are', 'were', 'and', 'very', '.', ','])
 
-        self.negwords = set(['no', 'not', 'never', 'failed' 'rejected', 'denied'])
+        self.negwords = set(['no', 'not', 'never', 'failed', 'rejected',
+                             'denied'])
         # Try to tokenize so that abbreviations like U.S.and monetary amounts
         # like "$23.00" are kept as tokens.
         from nltk.tokenize import RegexpTokenizer
@@ -90,10 +91,12 @@ class RTEFeatureExtractor(object):
         """
         ne_overlap = set(token for token in self._overlap if ne(token))
         if toktype == 'ne':
-            if debug: print("ne overlap", ne_overlap)
+            if debug:
+                print("ne overlap", ne_overlap)
             return ne_overlap
         elif toktype == 'word':
-            if debug: print("word overlap", self._overlap - ne_overlap)
+            if debug:
+                print("word overlap", self._overlap - ne_overlap)
             return self._overlap - ne_overlap
         else:
             raise ValueError("Type not recognized:'%s'" % toktype)
@@ -131,16 +134,21 @@ def rte_classifier(trainer, features=rte_features):
     """
     Classify RTEPairs
     """
-    train = ((pair, pair.value) for pair in nltk.corpus.rte.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml']))
-    test = ((pair, pair.value) for pair in nltk.corpus.rte.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml']))
+    train = ((pair, pair.value) for pair in
+             nltk.corpus.rte.pairs(['rte1_dev.xml', 'rte2_dev.xml',
+                                    'rte3_dev.xml']))
+    test = ((pair, pair.value) for pair in
+            nltk.corpus.rte.pairs(['rte1_test.xml', 'rte2_test.xml',
+                                   'rte3_test.xml']))
 
     # Train up a classifier.
     print('Training classifier...')
-    classifier = trainer( [(features(pair), label) for (pair,label) in train] )
+    classifier = trainer([(features(pair), label) for (pair, label) in train])
 
     # Run the classifier on the test data.
     print('Testing classifier...')
-    acc = accuracy(classifier, [(features(pair), label) for (pair,label) in test])
+    acc = accuracy(classifier, [(features(pair), label)
+                                for (pair, label) in test])
     print('Accuracy: %6.4f' % acc)
 
     # Return the classifier
diff --git a/nltk/classify/scikitlearn.py b/nltk/classify/scikitlearn.py
index a317df9..387adbf 100644
--- a/nltk/classify/scikitlearn.py
+++ b/nltk/classify/scikitlearn.py
@@ -35,7 +35,6 @@ from __future__ import print_function, unicode_literals
 from nltk.classify.api import ClassifierI
 from nltk.probability import DictionaryProbDist
 from nltk import compat
-from warnings import warn
 
 try:
     from sklearn.feature_extraction import DictVectorizer
diff --git a/nltk/classify/senna.py b/nltk/classify/senna.py
new file mode 100644
index 0000000..f5b9c5d
--- /dev/null
+++ b/nltk/classify/senna.py
@@ -0,0 +1,184 @@
+# encoding: utf-8
+# Natural Language Toolkit: Senna Interface
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Author: Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A general interface to the SENNA pipeline that supports any of the
+operations specified in SUPPORTED_OPERATIONS.
+
+Applying multiple operations at once has the speed advantage. For example,
+Senna will automatically determine POS tags if you are extracting named
+entities. Applying both of the operations will cost only the time of
+extracting the named entities.
+
+The SENNA pipeline has a fixed maximum size of the sentences that it can read.
+By default it is 1024 token/sentence. If you have larger sentences, changing
+the MAX_SENTENCE_SIZE value in SENNA_main.c should be considered and your
+system specific binary should be rebuilt. Otherwise this could introduce
+misalignment errors.
+
+The input is:
+- path to the directory that contains SENNA executables. If the path is incorrect, 
+   Senna will automatically search for executable file specified in SENNA environment variable
+- List of the operations needed to be performed.
+- (optionally) the encoding of the input data (default:utf-8)
+
+    >>> from __future__ import unicode_literals
+    >>> from nltk.classify import Senna
+    >>> pipeline = Senna('/usr/share/senna-v2.0', ['pos', 'chk', 'ner'])
+    >>> sent = 'Dusseldorf is an international business center'.split()
+    >>> [(token['word'], token['chk'], token['ner'], token['pos']) for token in pipeline.tag(sent)]
+    [('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'), ('is', 'B-VP', 'O', 'VBZ'), ('an', 'B-NP', 'O', 'DT'), 
+    ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')]
+"""
+
+
+from __future__ import unicode_literals
+from os import path, sep, environ
+from subprocess import Popen, PIPE
+from platform import architecture, system
+
+from nltk.tag.api import TaggerI
+from nltk.compat import text_type, python_2_unicode_compatible
+
+_senna_url = 'http://ml.nec-labs.com/senna/'
+
+
+ at python_2_unicode_compatible
+class Senna(TaggerI):
+
+    SUPPORTED_OPERATIONS = ['pos', 'chk', 'ner']
+
+    def __init__(self, senna_path, operations, encoding='utf-8'):
+        self._encoding = encoding
+        self._path = path.normpath(senna_path) + sep 
+        
+        # Verifies the existence of the executable on the self._path first    
+        #senna_binary_file_1 = self.executable(self._path)
+        exe_file_1 = self.executable(self._path)
+        if not path.isfile(exe_file_1):
+            # Check for the system environment 
+            if 'SENNA' in environ:
+                #self._path = path.join(environ['SENNA'],'')  
+                self._path = path.normpath(environ['SENNA']) + sep 
+                exe_file_2 = self.executable(self._path)
+                if not path.isfile(exe_file_2):
+                    raise OSError("Senna executable expected at %s or %s but not found" % (exe_file_1,exe_file_2))
+        
+        self.operations = operations
+
+    
+    def executable(self, base_path):
+        """
+        The function that determines the system specific binary that should be
+        used in the pipeline. In case, the system is not known the default senna binary will
+        be used.
+        """ 
+        os_name = system()
+        if os_name == 'Linux':
+            bits = architecture()[0]
+            if bits == '64bit':
+                return path.join(base_path, 'senna-linux64')
+            return path.join(base_path, 'senna-linux32')
+        if os_name == 'Windows':
+            return path.join(base_path, 'senna-win32.exe')
+        if os_name == 'Darwin':
+            return path.join(base_path, 'senna-osx')
+        return path.join(base_path, 'senna')
+        
+    def _map(self):
+        """
+        A method that calculates the order of the columns that SENNA pipeline
+        will output the tags into. This depends on the operations being ordered.
+        """
+        _map = {}
+        i = 1
+        for operation in Senna.SUPPORTED_OPERATIONS:
+            if operation in self.operations:
+                _map[operation] = i
+                i+= 1
+        return _map
+
+    def tag(self, tokens):
+        """
+        Applies the specified operation(s) on a list of tokens.
+        """
+        return self.tag_sents([tokens])[0]
+
+    def tag_sents(self, sentences):
+        """
+        Applies the tag method over a list of sentences. This method will return a
+        list of dictionaries. Every dictionary will contain a word with its
+        calculated annotations/tags.
+        """
+        encoding = self._encoding
+        
+        if not path.isfile(self.executable(self._path)):
+            raise OSError("Senna executable expected at %s but not found" % self.executable(self._path))
+        
+         
+        # Build the senna command to run the tagger
+        _senna_cmd = [self.executable(self._path), '-path', self._path, '-usrtokens', '-iobtags']
+        _senna_cmd.extend(['-'+op for op in self.operations])
+
+        # Serialize the actual sentences to a temporary string
+        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
+        if isinstance(_input, text_type) and encoding:
+            _input = _input.encode(encoding)
+
+        # Run the tagger and get the output
+        p = Popen(_senna_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+        (stdout, stderr) = p.communicate(input=_input)
+        senna_output = stdout
+
+        # Check the return code.
+        if p.returncode != 0:
+            raise RuntimeError('Senna command failed! Details: %s' % stderr)
+
+        if encoding:
+            senna_output = stdout.decode(encoding)
+
+        # Output the tagged sentences
+        map_ = self._map()
+        tagged_sentences = [[]]
+        sentence_index = 0
+        token_index = 0
+        for tagged_word in senna_output.strip().split("\n"):
+            if not tagged_word:
+                tagged_sentences.append([])
+                sentence_index += 1
+                token_index = 0
+                continue
+            tags = tagged_word.split('\t')
+            result = {}
+            for tag in map_:
+              result[tag] = tags[map_[tag]].strip()
+            try:
+              result['word'] = sentences[sentence_index][token_index]
+            except IndexError:
+              raise IndexError(
+                "Misalignment error occurred at sentence number %d. Possible reason"
+                " is that the sentence size exceeded the maximum size. Check the "
+                "documentation of Senna class for more information."
+                % sentence_index)
+            tagged_sentences[-1].append(result)
+            token_index += 1
+        return tagged_sentences
+
+
+# skip doctests if Senna is not installed
+def setup_module(module):
+    from nose import SkipTest
+    try:
+        tagger = Senna('/usr/share/senna-v2.0', ['pos', 'chk', 'ner'])
+    except OSError:
+        raise SkipTest("Senna executable not found")
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
+
diff --git a/nltk/classify/svm.py b/nltk/classify/svm.py
index a77c1a8..265bdec 100644
--- a/nltk/classify/svm.py
+++ b/nltk/classify/svm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: SVM-based classifier
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Leon Derczynski <leon at dcs.shef.ac.uk>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/classify/tadm.py b/nltk/classify/tadm.py
index dc2191f..6437d98 100644
--- a/nltk/classify/tadm.py
+++ b/nltk/classify/tadm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to TADM Classifier
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Joseph Frazee <jfrazee at mail.utexas.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/classify/util.py b/nltk/classify/util.py
index 147d1f3..d40c173 100644
--- a/nltk/classify/util.py
+++ b/nltk/classify/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Classifier Utility Functions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -76,16 +76,16 @@ def attested_labels(tokens):
         labels.  A classified token has the form ``(token, label)``.
     :type tokens: list
     """
-    return tuple(set(label for (tok,label) in tokens))
+    return tuple(set(label for (tok, label) in tokens))
 
 def log_likelihood(classifier, gold):
-    results = classifier.prob_classify_many([fs for (fs,l) in gold])
-    ll = [pdist.prob(l) for ((fs,l), pdist) in zip(gold, results)]
+    results = classifier.prob_classify_many([fs for (fs, l) in gold])
+    ll = [pdist.prob(l) for ((fs, l), pdist) in zip(gold, results)]
     return math.log(float(sum(ll))/len(ll))
 
 def accuracy(classifier, gold):
-    results = classifier.classify_many([fs for (fs,l) in gold])
-    correct = [l==r for ((fs,l), r) in zip(gold, results)]
+    results = classifier.classify_many([fs for (fs, l) in gold])
+    correct = [l == r for ((fs, l), r) in zip(gold, results)]
     if correct:
         return float(sum(correct))/len(correct)
     else:
@@ -161,8 +161,8 @@ def binary_names_demo_features(name):
     for letter in 'abcdefghijklmnopqrstuvwxyz':
         features['count(%s)' % letter] = name.lower().count(letter)
         features['has(%s)' % letter] = letter in name.lower()
-        features['startswith(%s)' % letter] = (letter==name[0].lower())
-        features['endswith(%s)' % letter] = (letter==name[-1].lower())
+        features['startswith(%s)' % letter] = (letter == name[0].lower())
+        features['endswith(%s)' % letter] = (letter == name[-1].lower())
     return features
 
 def names_demo(trainer, features=names_demo_features):
@@ -181,17 +181,17 @@ def names_demo(trainer, features=names_demo_features):
 
     # Train up a classifier.
     print('Training classifier...')
-    classifier = trainer( [(features(n), g) for (n,g) in train] )
+    classifier = trainer( [(features(n), g) for (n, g) in train] )
 
     # Run the classifier on the test data.
     print('Testing classifier...')
-    acc = accuracy(classifier, [(features(n),g) for (n,g) in test])
+    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
     print('Accuracy: %6.4f' % acc)
 
     # For classifiers that can find probabilities, show the log
     # likelihood and some sample probability distributions.
     try:
-        test_featuresets = [features(n) for (n,g) in test]
+        test_featuresets = [features(n) for (n, g) in test]
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
@@ -239,13 +239,13 @@ def partial_names_demo(trainer, features=names_demo_features):
 
     # Run the classifier on the test data.
     print('Testing classifier...')
-    acc = accuracy(classifier, [(features(n),m) for (n,m) in test])
+    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
     print('Accuracy: %6.4f' % acc)
 
     # For classifiers that can find probabilities, show the log
     # likelihood and some sample probability distributions.
     try:
-        test_featuresets = [features(n) for (n,m) in test]
+        test_featuresets = [features(n) for (n, m) in test]
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
@@ -275,8 +275,9 @@ def wsd_demo(trainer, word, features, n=1000):
     if word not in _inst_cache:
         _inst_cache[word] = [(i, i.senses[0]) for i in senseval.instances(word)]
     instances = _inst_cache[word][:]
-    if n> len(instances): n = len(instances)
-    senses = list(set(l for (i,l) in instances))
+    if n > len(instances):
+        n = len(instances)
+    senses = list(set(l for (i, l) in instances))
     print('  Senses: ' + ' '.join(senses))
 
     # Randomly split the names into a test & train set.
@@ -288,17 +289,17 @@ def wsd_demo(trainer, word, features, n=1000):
 
     # Train up a classifier.
     print('Training classifier...')
-    classifier = trainer( [(features(i), l) for (i,l) in train] )
+    classifier = trainer([(features(i), l) for (i, l) in train])
 
     # Run the classifier on the test data.
     print('Testing classifier...')
-    acc = accuracy(classifier, [(features(i),l) for (i,l) in test])
+    acc = accuracy(classifier, [(features(i), l) for (i, l) in test])
     print('Accuracy: %6.4f' % acc)
 
     # For classifiers that can find probabilities, show the log
     # likelihood and some sample probability distributions.
     try:
-        test_featuresets = [features(i) for (i,n) in test]
+        test_featuresets = [features(i) for (i, n) in test]
         pdists = classifier.prob_classify_many(test_featuresets)
         ll = [pdist.logprob(gold)
               for ((name, gold), pdist) in zip(test, pdists)]
diff --git a/nltk/classify/weka.py b/nltk/classify/weka.py
index 87b4cfa..643dac3 100644
--- a/nltk/classify/weka.py
+++ b/nltk/classify/weka.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to Weka Classsifiers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -138,7 +138,7 @@ class WekaClassifier(ClassifierI):
             return [line.split()[2].split(':')[1]
                     for line in lines[1:] if line.strip()]
         elif lines[0].split() == ['inst#', 'actual', 'predicted',
-                                'error', 'distribution']:
+                                  'error', 'distribution']:
             return [self.parse_weka_distribution(line.split()[-1])
                     for line in lines[1:] if line.strip()]
 
@@ -147,7 +147,8 @@ class WekaClassifier(ClassifierI):
             return [line.split()[1] for line in lines if line.strip()]
 
         else:
-            for line in lines[:10]: print(line)
+            for line in lines[:10]:
+                print(line)
             raise ValueError('Unhandled output format -- your version '
                              'of weka may not be supported.\n'
                              '  Header: %s' % lines[0])
@@ -199,7 +200,8 @@ class WekaClassifier(ClassifierI):
             # Train the weka model.
             cmd = [javaclass, '-d', model_filename, '-t', train_filename]
             cmd += list(options)
-            if quiet: stdout = subprocess.PIPE
+            if quiet:
+                stdout = subprocess.PIPE
             else: stdout = None
             java(cmd, classpath=_weka_classpath, stdout=stdout)
 
@@ -255,7 +257,7 @@ class ARFF_Formatter:
         string (note: not nominal) types.
         """
         # Find the set of all attested labels.
-        labels = set(label for (tok,label) in tokens)
+        labels = set(label for (tok, label) in tokens)
 
         # Determine the types of all features.
         features = {}
diff --git a/nltk/cluster/__init__.py b/nltk/cluster/__init__.py
index 39fe32a..67e3e96 100644
--- a/nltk/cluster/__init__.py
+++ b/nltk/cluster/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/api.py b/nltk/cluster/api.py
index e194d91..9d2999a 100644
--- a/nltk/cluster/api.py
+++ b/nltk/cluster/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterer Interfaces
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # Porting: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/cluster/em.py b/nltk/cluster/em.py
index 8e12579..9a10ef1 100644
--- a/nltk/cluster/em.py
+++ b/nltk/cluster/em.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Expectation Maximization Clusterer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/gaac.py b/nltk/cluster/gaac.py
index 82c871d..6b06a4e 100644
--- a/nltk/cluster/gaac.py
+++ b/nltk/cluster/gaac.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Group Average Agglomerative Clusterer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/kmeans.py b/nltk/cluster/kmeans.py
index 7e630da..88672ce 100644
--- a/nltk/cluster/kmeans.py
+++ b/nltk/cluster/kmeans.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: K-Means Clusterer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/cluster/util.py b/nltk/cluster/util.py
index fc88c29..fe81e68 100644
--- a/nltk/cluster/util.py
+++ b/nltk/cluster/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Clusterer Utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/collocations.py b/nltk/collocations.py
index 1407577..9d02b9c 100644
--- a/nltk/collocations.py
+++ b/nltk/collocations.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Collocations and Association Measures
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/compat.py b/nltk/compat.py
index c3649f5..6e4cdce 100755
--- a/nltk/compat.py
+++ b/nltk/compat.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Compatibility
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 #
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -358,25 +358,36 @@ except ImportError: # python 2.6
 
 # The following datasets have a /PY3 subdirectory containing
 # a full copy of the data which has been re-encoded or repickled.
-_PY3_DATA_UPDATES = ["chunkers/maxent_ne_chunker",
-                     "help/tagsets",
-                     "taggers/maxent_treebank_pos_tagger",
-                     "tokenizers/punkt"]
+
+_PY3_DATA_UPDATES = []
+
+if sys.platform.startswith('win'):
+    _PY3_DATA_UPDATES = ["chunkers\maxent_ne_chunker",
+                         "help\tagsets",
+                         "taggers\maxent_treebank_pos_tagger",
+                         "tokenizers\punkt"]
+else:
+    _PY3_DATA_UPDATES = ["chunkers/maxent_ne_chunker",
+                        "help/tagsets",
+                        "taggers/maxent_treebank_pos_tagger",
+                        "tokenizers/punkt"]
+
+def add_py3_data(path):
+    if PY3:
+        for item in _PY3_DATA_UPDATES:
+            if item in str(path) and "/PY3" not in str(path):
+                pos = path.index(item) + len(item)
+                if path[pos:pos+4] == ".zip":
+                    pos += 4
+                path = path[:pos] + "/PY3" + path[pos:]
+                break
+    return path
 
 # for use in adding /PY3 to the second (filename) argument
 # of the file pointers in data.py
 def py3_data(init_func):
     def _decorator(*args, **kwargs):
-        if PY3:
-            path = args[1]
-            for item in _PY3_DATA_UPDATES:
-                if item in str(path) and "/PY3" not in str(path):
-                    pos = path.index(item) + len(item)
-                    if path[pos:pos+4] == ".zip":
-                        pos += 4
-                    path = path[:pos] + "/PY3" + path[pos:]
-                    args = (args[0], path) + args[2:]
-                    break
+        args = (args[0], add_py3_data(args[1])) + args[2:]
         return init_func(*args, **kwargs)
     return wraps(init_func)(_decorator)
 
diff --git a/nltk/corpus/__init__.py b/nltk/corpus/__init__.py
index 0323994..101dd26 100644
--- a/nltk/corpus/__init__.py
+++ b/nltk/corpus/__init__.py
@@ -1,11 +1,11 @@
 # Natural Language Toolkit: Corpus Readers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
-# [xx] this docstring isnt' up-to-date!
+# TODO this docstring isn't up-to-date!
 """
 NLTK corpus readers.  The modules in this package provide functions
 that can be used to read corpus files in a variety of formats.  These
@@ -16,8 +16,8 @@ of external corpora.
 Available Corpora
 =================
 
-Please see http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
-for a complete list.  Install corpora using nltk.download().
+Please see http://www.nltk.org/nltk_data/ for a complete list.
+Install corpora using nltk.download().
 
 Corpus Reader Functions
 =======================
@@ -147,6 +147,8 @@ movie_reviews = LazyCorpusLoader(
     encoding='ascii')
 names = LazyCorpusLoader(
     'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii')
+nkjp = LazyCorpusLoader(
+    'nkjp', NKJPCorpusReader, r'', encoding='utf8')
 nps_chat = LazyCorpusLoader(
     'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj')
 pl196x = LazyCorpusLoader(
@@ -202,6 +204,10 @@ udhr = LazyCorpusLoader(
     'udhr', UdhrCorpusReader)
 udhr2 = LazyCorpusLoader(
     'udhr2', PlaintextCorpusReader, r'.*\.txt', encoding='utf8')
+universal_treebanks = LazyCorpusLoader(
+    'universal_treebanks_v20', ConllCorpusReader, r'.*\.conll',
+    columntypes = ('ignore', 'words', 'ignore', 'ignore', 'pos',
+                   'ignore', 'ignore', 'ignore', 'ignore', 'ignore'))
 verbnet = LazyCorpusLoader(
     'verbnet', VerbnetCorpusReader, r'(?!\.).*\.xml')
 webtext = LazyCorpusLoader(
diff --git a/nltk/corpus/europarl_raw.py b/nltk/corpus/europarl_raw.py
index 578fe5a..9ff0b61 100644
--- a/nltk/corpus/europarl_raw.py
+++ b/nltk/corpus/europarl_raw.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Europarl Corpus Readers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author:  Nitin Madnani <nmadnani at umiacs.umd.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/__init__.py b/nltk/corpus/reader/__init__.py
index ef3e9d9..e48be76 100644
--- a/nltk/corpus/reader/__init__.py
+++ b/nltk/corpus/reader/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Readers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -93,6 +93,7 @@ from nltk.corpus.reader.framenet import *
 from nltk.corpus.reader.udhr import *
 from nltk.corpus.reader.bnc import *
 from nltk.corpus.reader.sentiwordnet import *
+from nltk.corpus.reader.nkjp import *
 
 # Make sure that nltk.corpus.reader.bracket_parse gives the module, not
 # the function bracket_parse() defined in nltk.tree:
@@ -127,5 +128,6 @@ __all__ = [
     'CHILDESCorpusReader', 'AlignedCorpusReader',
     'TimitTaggedCorpusReader', 'LinThesaurusCorpusReader',
     'SemcorCorpusReader', 'FramenetCorpusReader', 'UdhrCorpusReader',
-    'BNCCorpusReader', 'SentiWordNetCorpusReader', 'SentiSynset'
+    'BNCCorpusReader', 'SentiWordNetCorpusReader', 'SentiSynset',
+    'NKJPCorpusReader'
 ]
diff --git a/nltk/corpus/reader/aligned.py b/nltk/corpus/reader/aligned.py
index 1e11011..62a76eb 100644
--- a/nltk/corpus/reader/aligned.py
+++ b/nltk/corpus/reader/aligned.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Aligned Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/api.py b/nltk/corpus/reader/api.py
index 9ff8c8b..fc8d97e 100644
--- a/nltk/corpus/reader/api.py
+++ b/nltk/corpus/reader/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: API for Corpus Readers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/bnc.py b/nltk/corpus/reader/bnc.py
index 16d037a..d0e692d 100644
--- a/nltk/corpus/reader/bnc.py
+++ b/nltk/corpus/reader/bnc.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Plaintext Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/bracket_parse.py b/nltk/corpus/reader/bracket_parse.py
index 5fd29a0..999382b 100644
--- a/nltk/corpus/reader/bracket_parse.py
+++ b/nltk/corpus/reader/bracket_parse.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Penn Treebank Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/chasen.py b/nltk/corpus/reader/chasen.py
index dd12228..aac675f 100644
--- a/nltk/corpus/reader/chasen.py
+++ b/nltk/corpus/reader/chasen.py
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Masato Hagiwara <hagisan at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/childes.py b/nltk/corpus/reader/childes.py
index a9b3cb0..eaf373a 100644
--- a/nltk/corpus/reader/childes.py
+++ b/nltk/corpus/reader/childes.py
@@ -1,6 +1,6 @@
 # CHILDES XML Corpus Reader
 
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Tomonori Nagano <tnagano at gc.cuny.edu>
 #         Alexis Dimitriadis <A.Dimitriadis at uu.nl>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/chunked.py b/nltk/corpus/reader/chunked.py
index a702917..7a666d6 100644
--- a/nltk/corpus/reader/chunked.py
+++ b/nltk/corpus/reader/chunked.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Chunked Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/cmudict.py b/nltk/corpus/reader/cmudict.py
index bf76eac..7dc83a5 100644
--- a/nltk/corpus/reader/cmudict.py
+++ b/nltk/corpus/reader/cmudict.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/conll.py b/nltk/corpus/reader/conll.py
index ce55c00..af4b0c4 100644
--- a/nltk/corpus/reader/conll.py
+++ b/nltk/corpus/reader/conll.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CONLL Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/dependency.py b/nltk/corpus/reader/dependency.py
index a43553b..fc0a758 100644
--- a/nltk/corpus/reader/dependency.py
+++ b/nltk/corpus/reader/dependency.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Kepa Sarasola <kepa.sarasola at ehu.es>
 #         Iker Manterola <returntothehangar at hotmail.com>
 #
diff --git a/nltk/corpus/reader/framenet.py b/nltk/corpus/reader/framenet.py
index 567f92e..7c76957 100644
--- a/nltk/corpus/reader/framenet.py
+++ b/nltk/corpus/reader/framenet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Framenet Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Chuck Wooters <wooters at icsi.berkeley.edu>,
 #          Nathan Schneider <nschneid at cs.cmu.edu>
 # URL: <http://nltk.org/>
@@ -1198,6 +1198,37 @@ class FramenetCorpusReader(XMLCorpusReader):
             self._buildframeindex()
         return dict((fID, finfo.name) for fID,finfo in self._frame_idx.items() if name is None or re.search(name, finfo.name) is not None)
 
+    def fes(self, name=None):
+        '''
+        Lists frame element objects. If 'name' is provided, this is treated as 
+        a case-insensitive regular expression to filter by frame name. 
+        (Case-insensitivity is because casing of frame element names is not always 
+        consistent across frames.)
+        
+        >>> from nltk.corpus import framenet as fn
+        >>> fn.fes('Noise_maker')
+        [<fe ID=6043 name=Noise_maker>]
+        >>> sorted([(fe.frame.name,fe.name) for fe in fn.fes('sound')])
+        [('Cause_to_make_noise', 'Sound_maker'), ('Make_noise', 'Sound'), 
+         ('Make_noise', 'Sound_source'), ('Sound_movement', 'Location_of_sound_source'), 
+         ('Sound_movement', 'Sound'), ('Sound_movement', 'Sound_source'), 
+         ('Sounds', 'Component_sound'), ('Sounds', 'Location_of_sound_source'), 
+         ('Sounds', 'Sound_source'), ('Vocalizations', 'Location_of_sound_source'), 
+         ('Vocalizations', 'Sound_source')]
+        >>> sorted(set(fe.name for fe in fn.fes('^sound')))
+        ['Sound', 'Sound_maker', 'Sound_source']
+        >>> len(fn.fes('^sound$'))
+        2
+        
+        :param name: A regular expression pattern used to match against
+            frame element names. If 'name' is None, then a list of all
+            frame elements will be returned.
+        :type name: str
+        :return: A list of matching frame elements
+        :rtype: list(AttrDict)
+        '''
+        return PrettyList(fe for f in self.frames() for fename,fe in f.FE.items() if name is None or re.search(name, fename, re.I))
+
     def lus(self, name=None):
         """
         Obtain details for a specific lexical unit.
diff --git a/nltk/corpus/reader/ieer.py b/nltk/corpus/reader/ieer.py
index f73f4ec..a44bfe0 100644
--- a/nltk/corpus/reader/ieer.py
+++ b/nltk/corpus/reader/ieer.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: IEER Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/indian.py b/nltk/corpus/reader/indian.py
index f777670..d1a2714 100644
--- a/nltk/corpus/reader/indian.py
+++ b/nltk/corpus/reader/indian.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/ipipan.py b/nltk/corpus/reader/ipipan.py
index 0d11ea8..dc37404 100644
--- a/nltk/corpus/reader/ipipan.py
+++ b/nltk/corpus/reader/ipipan.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: IPI PAN Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Konrad Goluchowski <kodie at mimuw.edu.pl>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/knbc.py b/nltk/corpus/reader/knbc.py
index 7f56b43..af1de33 100644
--- a/nltk/corpus/reader/knbc.py
+++ b/nltk/corpus/reader/knbc.py
@@ -1,6 +1,6 @@
 #! /usr/bin/env python
 # KNB Corpus reader
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Masato Hagiwara <hagisan at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -8,18 +8,22 @@
 # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html
 from __future__ import print_function
 
-import sys
+import re
 
-from nltk import compat
-from nltk.tree import bracket_parse, Tree
+from nltk.compat import string_types
 from nltk.parse import DependencyGraph
 
-from nltk.corpus.reader.util import *
-from nltk.corpus.reader.api import *
+from nltk.corpus.reader.util import (
+    FileSystemPathPointer,
+    find_corpus_fileids,
+    read_blankline_block,
+)
+from nltk.corpus.reader.api import SyntaxCorpusReader, CorpusReader
 
 # default function to convert morphlist to str for tree representation
 _morphs2str_default = lambda morphs: '/'.join(m[0] for m in morphs if m[0] != 'EOS')
 
+
 class KNBCorpusReader(SyntaxCorpusReader):
     """
     This class implements:
@@ -35,6 +39,21 @@ class KNBCorpusReader(SyntaxCorpusReader):
     The structure of tagged words:
       tagged_word = (word(str), tags(tuple))
       tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...)
+
+    Usage example
+    -------------
+
+    >>> from nltk.corpus.util import LazyCorpusLoader
+    >>> knbc = LazyCorpusLoader(
+    ...     'knbc/corpus1',
+    ...     KNBCorpusReader,
+    ...     r'.*/KN.*',
+    ...     encoding='euc-jp',
+    ... )
+
+    >>> len(knbc.sents()[0])
+    9
+
     """
 
     def __init__(self, root, fileids, encoding='utf8', morphs2str=_morphs2str_default):
@@ -68,7 +87,7 @@ class KNBCorpusReader(SyntaxCorpusReader):
             if not re.match(r"EOS|\*|\#|\+", line):
                 cells = line.strip().split(" ")
                 # convert cells to morph tuples
-                res.append( (cells[0], ' '.join(cells[1:])) )
+                res.append((cells[0], ' '.join(cells[1:])))
 
         return res
 
@@ -76,7 +95,7 @@ class KNBCorpusReader(SyntaxCorpusReader):
         dg = DependencyGraph()
         i = 0
         for line in t.splitlines():
-            if line.startswith("*") or line.startswith("+"):
+            if line[0] in '*+':
                 # start of bunsetsu or tag
 
                 cells = line.strip().split(" ", 3)
@@ -84,32 +103,32 @@ class KNBCorpusReader(SyntaxCorpusReader):
 
                 assert m is not None
 
-                node = dg.nodelist[i]
-                node['address'] = i
-                node['rel'] = m.group(2)  # dep_type
-
-                node['word'] = []
+                node = dg.nodes[i]
+                node.update(
+                    {
+                        'address': i,
+                        'rel': m.group(2),
+                        'word': [],
+                    }
+                )
 
                 dep_parent = int(m.group(1))
 
-                while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1:
-                    dg.nodelist.append({'word':[], 'deps':[]})
-
                 if dep_parent == -1:
                     dg.root = node
                 else:
-                    dg.nodelist[dep_parent]['deps'].append(i)
+                    dg.nodes[dep_parent]['deps'].append(i)
 
                 i += 1
-            elif not line.startswith("#"):
+            elif line[0] != '#':
                 # normal morph
                 cells = line.strip().split(" ")
                 # convert cells to morph tuples
-                morph = ( cells[0], ' '.join(cells[1:]) )
-                dg.nodelist[i-1]['word'].append(morph)
+                morph = cells[0], ' '.join(cells[1:])
+                dg.nodes[i - 1]['word'].append(morph)
 
         if self.morphs2str:
-            for node in dg.nodelist:
+            for node in dg.nodes.values():
                 node['word'] = self.morphs2str(node['word'])
 
         return dg.tree()
@@ -118,6 +137,7 @@ class KNBCorpusReader(SyntaxCorpusReader):
 # Demo
 ######################################################################
 
+
 def demo():
 
     import nltk
@@ -135,30 +155,33 @@ def demo():
                             sorted(fileids, key=_knbc_fileids_sort), encoding='euc-jp')
 
     print(knbc.fileids()[:10])
-    print(''.join( knbc.words()[:100] ))
+    print(''.join(knbc.words()[:100]))
 
-    print('\n\n'.join( '%s' % tree for tree in knbc.parsed_sents()[:2] ))
+    print('\n\n'.join(str(tree) for tree in knbc.parsed_sents()[:2]))
 
     knbc.morphs2str = lambda morphs: '/'.join(
-        "%s(%s)"%(m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS'
-        ).encode('utf-8')
+        "%s(%s)" % (m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS'
+    ).encode('utf-8')
 
-    print('\n\n'.join( '%s' % tree for tree in knbc.parsed_sents()[:2] ))
+    print('\n\n'.join('%s' % tree for tree in knbc.parsed_sents()[:2]))
+
+    print(
+        '\n'.join(
+            ' '.join("%s/%s" % (w[0], w[1].split(' ')[2]) for w in sent)
+            for sent in knbc.tagged_sents()[0:2]
+        )
+    )
 
-    print('\n'.join( ' '.join("%s/%s"%(w[0], w[1].split(' ')[2]) for w in sent)
-                     for sent in knbc.tagged_sents()[0:2] ))
 
 def test():
 
     from nltk.corpus.util import LazyCorpusLoader
-
     knbc = LazyCorpusLoader(
         'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp')
-    assert isinstance(knbc.words()[0], compat.string_types)
-    assert isinstance(knbc.sents()[0][0], compat.string_types)
+    assert isinstance(knbc.words()[0], string_types)
+    assert isinstance(knbc.sents()[0][0], string_types)
     assert isinstance(knbc.tagged_words()[0], tuple)
     assert isinstance(knbc.tagged_sents()[0][0], tuple)
 
 if __name__ == '__main__':
     demo()
-    # test()
diff --git a/nltk/corpus/reader/lin.py b/nltk/corpus/reader/lin.py
index d8646e9..d6b3d65 100644
--- a/nltk/corpus/reader/lin.py
+++ b/nltk/corpus/reader/lin.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Lin's Thesaurus
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Dan Blanchard <dblanchard at ets.org>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.txt
diff --git a/nltk/corpus/reader/nkjp.py b/nltk/corpus/reader/nkjp.py
new file mode 100644
index 0000000..c7cd522
--- /dev/null
+++ b/nltk/corpus/reader/nkjp.py
@@ -0,0 +1,428 @@
+# Natural Language Toolkit: NKJP Corpus Reader
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Author: Gabriela Kaczka
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import functools
+import os
+import tempfile
+from nltk import compat
+
+from nltk.corpus.reader.util import concat
+from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView
+import re
+
+
+def _parse_args(fun):
+    """
+    Wraps function arguments:
+    if fileids not specified then function set NKJPCorpusReader paths.
+    """
+    @functools.wraps(fun)
+    def decorator(self, fileids=None, **kwargs):
+        if not fileids:
+            fileids = self._paths
+        return fun(self, fileids, **kwargs)
+
+    return decorator
+
+
+class NKJPCorpusReader(XMLCorpusReader):
+    WORDS_MODE = 0
+    SENTS_MODE = 1
+    HEADER_MODE = 2
+    RAW_MODE = 3
+
+    def __init__(self, root, fileids='.*'):
+        """
+        Corpus reader designed to work with National Corpus of Polish.
+        See http://nkjp.pl/ for more details about NKJP.
+        use example:
+        import nltk
+        import nkjp
+        from nkjp import NKJPCorpusReader
+        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus
+        x.header()
+        x.raw()
+        x.words()
+        x.tagged_words(tags=['subst', 'comp'])  #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html
+        x.sents()
+        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s)
+        x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'])
+        x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp'])
+        """
+        if isinstance(fileids, compat.string_types):
+            XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml')
+        else:
+            XMLCorpusReader.__init__(self, root, [fileid + '/header.xml' for fileid in fileids])
+        self._paths = self.get_paths()
+
+    def get_paths(self):
+        return [os.path.join(str(self._root), f.split("header.xml")[0]) for f in self._fileids]
+
+    def fileids(self):
+        """
+        Returns a list of file identifiers for the fileids that make up
+        this corpus.
+        """
+        return [f.split("header.xml")[0] for f in self._fileids]
+
+    def _view(self, filename, tags=None, **kwargs):
+        """
+        Returns a view specialised for use with particular corpus file.
+        """
+        mode = kwargs.pop('mode', NKJPCorpusReader.WORDS_MODE)
+        if mode is NKJPCorpusReader.WORDS_MODE:
+            return NKJPCorpus_Morph_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.SENTS_MODE:
+            return NKJPCorpus_Segmentation_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.HEADER_MODE:
+            return NKJPCorpus_Header_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.RAW_MODE:
+            return NKJPCorpus_Text_View(filename, tags=tags, mode=NKJPCorpus_Text_View.RAW_MODE)
+
+        else:
+            raise NameError('No such mode!')
+
+    def add_root(self, fileid):
+        """
+        Add root if necessary to specified fileid.
+        """
+        if self.root in fileid:
+            return fileid
+        return self.root + fileid
+
+    @_parse_args
+    def header(self, fileids=None, **kwargs):
+        """
+        Returns header(s) of specified fileids.
+        """
+        return concat([self._view(self.add_root(fileid),
+                                  mode=NKJPCorpusReader.HEADER_MODE, **kwargs).handle_query()
+                       for fileid in fileids])
+
+    @_parse_args
+    def sents(self, fileids=None, **kwargs):
+        """
+        Returns sentences in specified fileids.
+        """
+        return concat([self._view(self.add_root(fileid),
+                                  mode=NKJPCorpusReader.SENTS_MODE, **kwargs).handle_query()
+                       for fileid in fileids])
+
+    @_parse_args
+    def words(self, fileids=None, **kwargs):
+        """
+        Returns words in specified fileids.
+        """
+
+        return concat([self._view(self.add_root(fileid),
+                                  mode=NKJPCorpusReader.WORDS_MODE, **kwargs).handle_query()
+                       for fileid in fileids])
+
+    @_parse_args
+    def tagged_words(self, fileids=None, **kwargs):
+        """
+        Call with specified tags as a list, e.g. tags=['subst', 'comp'].
+        Returns tagged words in specified fileids.
+        """
+        tags = kwargs.pop('tags', [])
+        return concat([self._view(self.add_root(fileid),
+                                  mode=NKJPCorpusReader.WORDS_MODE, tags=tags, **kwargs).handle_query()
+                       for fileid in fileids])
+
+    @_parse_args
+    def raw(self, fileids=None, **kwargs):
+        """
+        Returns words in specified fileids.
+        """
+        return concat([self._view(self.add_root(fileid),
+                                  mode=NKJPCorpusReader.RAW_MODE, **kwargs).handle_query()
+                       for fileid in fileids])
+
+
+class NKJPCorpus_Header_View(XMLCorpusView):
+
+    def __init__(self, filename, **kwargs):
+        """
+        HEADER_MODE
+        A stream backed corpus view specialized for use with
+        header.xml files in NKJP corpus.
+        """
+        self.tagspec = ".*/sourceDesc$"
+        XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec)
+
+    def handle_query(self):
+        self._open()
+        header = []
+        while True:
+            segm = XMLCorpusView.read_block(self, self._stream)
+            if len(segm) == 0:
+                break
+            header.extend(segm)
+        self.close()
+        return header
+
+    def handle_elt(self, elt, context):
+        titles = elt.findall('bibl/title')
+        title = []
+        if titles:
+            title = '\n'.join(title.text.strip() for title in titles)
+
+        authors = elt.findall('bibl/author')
+        author = []
+        if authors:
+            author = '\n'.join(author.text.strip() for author in authors)
+
+        dates = elt.findall('bibl/date')
+        date = []
+        if dates:
+            date = '\n'.join(date.text.strip() for date in dates)
+
+        publishers = elt.findall('bibl/publisher')
+        publisher = []
+        if publishers:
+            publisher = '\n'.join(publisher.text.strip() for publisher in publishers)
+
+        idnos = elt.findall('bibl/idno')
+        idno = []
+        if idnos:
+            idno = '\n'.join(idno.text.strip() for idno in idnos)
+
+        notes = elt.findall('bibl/note')
+        note = []
+        if notes:
+            note = '\n'.join(note.text.strip() for note in notes)
+
+        return {'title': title, 'author': author, 'date': date, 'publisher': publisher,
+                'idno': idno, 'note': note}
+
+
+class XML_Tool():
+    """
+    Helper class creating xml file to one without references to nkjp: namespace.
+    That's needed because the XMLCorpusView assumes that one can find short substrings
+    of XML that are valid XML, which is not true if a namespace is declared at top level
+    """
+    def __init__(self, root, filename):
+        self.read_file = os.path.join(root, filename)
+        self.write_file = tempfile.NamedTemporaryFile(delete=False)
+
+    def build_preprocessed_file(self):
+        try:
+            fr = open(self.read_file, 'r')
+            fw = self.write_file
+            line = ' '
+            while len(line):
+                line = fr.readline()
+                x = re.split(r'nkjp:[^ ]* ', line)  #in all files
+                ret = ' '.join(x)
+                x = re.split('<nkjp:paren>', ret)   #in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('</nkjp:paren>', ret)  #in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('<choice>', ret)   #in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('</choice>', ret)  #in ann_segmentation.xml
+                ret = ' '.join(x)
+                fw.write(ret)
+            fr.close()
+            fw.close()
+            return self.write_file.name
+        except Exception:
+            self.remove_preprocessed_file()
+            raise Exception
+
+    def remove_preprocessed_file(self):
+        os.remove(self.write_file.name)
+        pass
+
+
+class NKJPCorpus_Segmentation_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    ann_segmentation.xml files in NKJP corpus.
+    """
+
+    def __init__(self, filename, **kwargs):
+        self.tagspec = '.*p/.*s'
+        #intersperse NKJPCorpus_Text_View
+        self.text_view = NKJPCorpus_Text_View(filename, mode=NKJPCorpus_Text_View.SENTS_MODE)
+        self.text_view.handle_query()
+        #xml preprocessing
+        self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml')
+        #base class init
+        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+
+    def get_segm_id(self, example_word):
+        return example_word.split('(')[1].split(',')[0]
+
+    def get_sent_beg(self, beg_word):
+        #returns index of beginning letter in sentence
+        return int(beg_word.split(',')[1])
+
+    def get_sent_end(self, end_word):
+        #returns index of end letter in sentence
+        splitted = end_word.split(')')[0].split(',')
+        return int(splitted[1]) + int(splitted[2])
+
+    def get_sentences(self, sent_segm):
+        #returns one sentence
+        id = self.get_segm_id(sent_segm[0])
+        segm = self.text_view.segm_dict[id]    #text segment
+        beg = self.get_sent_beg(sent_segm[0])
+        end = self.get_sent_end(sent_segm[len(sent_segm)-1])
+        return segm[beg:end]
+
+    def remove_choice(self, segm):
+        ret = []
+        prev_txt_end = -1
+        prev_txt_nr = -1
+        for word in segm:
+            txt_nr = self.get_segm_id(word)
+            #get increasing sequence of ids: in case of choice get first possibility
+            if self.get_sent_beg(word) > prev_txt_end-1 or prev_txt_nr != txt_nr:
+                ret.append(word)
+                prev_txt_end = self.get_sent_end(word)
+            prev_txt_nr = txt_nr
+
+        return ret
+
+    def handle_query(self):
+        try:
+            self._open()
+            sentences = []
+            while True:
+                sent_segm = XMLCorpusView.read_block(self, self._stream)
+                if len(sent_segm) == 0:
+                    break
+                for segm in sent_segm:
+                    segm = self.remove_choice(segm)
+                    sentences.append(self.get_sentences(segm))
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return sentences
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def handle_elt(self, elt, context):
+        ret = []
+        for seg in elt:
+            ret.append(seg.get('corresp'))
+        return ret
+
+
+class NKJPCorpus_Text_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    text.xml files in NKJP corpus.
+    """
+    SENTS_MODE = 0
+    RAW_MODE = 1
+
+    def __init__(self, filename, **kwargs):
+        self.mode = kwargs.pop('mode', 0)
+        self.tagspec = '.*/div/ab'
+        self.segm_dict = dict()
+        #xml preprocessing
+        self.xml_tool = XML_Tool(filename, 'text.xml')
+        #base class init
+        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+
+    def handle_query(self):
+        try:
+            self._open()
+            x = self.read_block(self._stream)
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return x
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def read_block(self, stream, tagspec=None, elt_handler=None):
+        """
+        Returns text as a list of sentences.
+        """
+        txt = []
+        while True:
+            segm = XMLCorpusView.read_block(self, stream)
+            if len(segm) == 0:
+                break
+            for part in segm:
+                txt.append(part)
+
+        return [' '.join([segm for segm in txt])]
+
+    def get_segm_id(self, elt):
+        for attr in elt.attrib:
+            if attr.endswith('id'):
+                return elt.get(attr)
+
+    def handle_elt(self, elt, context):
+        #fill dictionary to use later in sents mode
+        if self.mode is NKJPCorpus_Text_View.SENTS_MODE:
+            self.segm_dict[self.get_segm_id(elt)] = elt.text
+        return elt.text
+
+
+class NKJPCorpus_Morph_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    ann_morphosyntax.xml files in NKJP corpus.
+    """
+
+    def __init__(self, filename, **kwargs):
+        self.tags = kwargs.pop('tags', None)
+        self.tagspec = '.*/seg/fs'
+        self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml')
+        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+
+    def handle_query(self):
+        try:
+            self._open()
+            words = []
+            while True:
+                segm = XMLCorpusView.read_block(self, self._stream)
+                if len(segm) == 0:
+                    break
+                for part in segm:
+                    if part is not None:
+                        words.append(part)
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return words
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def handle_elt(self, elt, context):
+        word = ''
+        flag = False
+        is_not_interp = True
+        #if tags not specified, then always return word
+        if self.tags is None:
+            flag = True
+
+        for child in elt:
+
+            #get word
+            if 'name' in child.keys() and child.attrib['name'] == 'orth':
+                for symbol in child:
+                    if symbol.tag == 'string':
+                        word = symbol.text
+            elif 'name' in child.keys() and child.attrib['name'] == 'interps':
+                for symbol in child:
+                    if 'type' in symbol.keys() and symbol.attrib['type'] == 'lex':
+                        for symbol2 in symbol:
+                            if 'name' in symbol2.keys() and symbol2.attrib['name'] == 'ctag':
+                                for symbol3 in symbol2:
+                                    if 'value' in symbol3.keys() and self.tags is not None and symbol3.attrib['value'] in self.tags:
+                                        flag = True
+                                    elif 'value' in symbol3.keys() and symbol3.attrib['value'] == 'interp':
+                                        is_not_interp = False
+        if flag and is_not_interp:
+            return word
diff --git a/nltk/corpus/reader/nombank.py b/nltk/corpus/reader/nombank.py
index dc033ea..5c8e7cc 100644
--- a/nltk/corpus/reader/nombank.py
+++ b/nltk/corpus/reader/nombank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: NomBank Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Paul Bedaride <paul.bedaride at gmail.com>
 #          Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/nps_chat.py b/nltk/corpus/reader/nps_chat.py
index 0a2ede5..367efec 100644
--- a/nltk/corpus/reader/nps_chat.py
+++ b/nltk/corpus/reader/nps_chat.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: NPS Chat Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/pl196x.py b/nltk/corpus/reader/pl196x.py
index aa86698..d51cee9 100644
--- a/nltk/corpus/reader/pl196x.py
+++ b/nltk/corpus/reader/pl196x.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit:
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Piotr Kasprzyk <p.j.kasprzyk at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/plaintext.py b/nltk/corpus/reader/plaintext.py
index a456912..b12669f 100644
--- a/nltk/corpus/reader/plaintext.py
+++ b/nltk/corpus/reader/plaintext.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Plaintext Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Nitin Madnani <nmadnani at umiacs.umd.edu>
diff --git a/nltk/corpus/reader/ppattach.py b/nltk/corpus/reader/ppattach.py
index 9462308..46db6ec 100644
--- a/nltk/corpus/reader/ppattach.py
+++ b/nltk/corpus/reader/ppattach.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: PP Attachment Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/propbank.py b/nltk/corpus/reader/propbank.py
index 49bc361..41340ca 100644
--- a/nltk/corpus/reader/propbank.py
+++ b/nltk/corpus/reader/propbank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: PropBank Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/rte.py b/nltk/corpus/reader/rte.py
index 83e88a5..f194ff7 100644
--- a/nltk/corpus/reader/rte.py
+++ b/nltk/corpus/reader/rte.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: RTE Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author:  Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/semcor.py b/nltk/corpus/reader/semcor.py
index 15dc544..3865212 100644
--- a/nltk/corpus/reader/semcor.py
+++ b/nltk/corpus/reader/semcor.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: SemCor Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Nathan Schneider <nschneid at cs.cmu.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/senseval.py b/nltk/corpus/reader/senseval.py
index bbbead9..0ac040b 100644
--- a/nltk/corpus/reader/senseval.py
+++ b/nltk/corpus/reader/senseval.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Senseval 2 Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Steven Bird <stevenbird1 at gmail.com> (modifications)
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/sentiwordnet.py b/nltk/corpus/reader/sentiwordnet.py
index 3d357b8..b53ec56 100644
--- a/nltk/corpus/reader/sentiwordnet.py
+++ b/nltk/corpus/reader/sentiwordnet.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: WordNet
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Christopher Potts <cgpotts at stanford.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/sinica_treebank.py b/nltk/corpus/reader/sinica_treebank.py
index 3d872db..27c93b9 100644
--- a/nltk/corpus/reader/sinica_treebank.py
+++ b/nltk/corpus/reader/sinica_treebank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sinica Treebank Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/string_category.py b/nltk/corpus/reader/string_category.py
index 3b0160b..8335c6b 100644
--- a/nltk/corpus/reader/string_category.py
+++ b/nltk/corpus/reader/string_category.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: String Category Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/switchboard.py b/nltk/corpus/reader/switchboard.py
index 460da31..12f1baa 100644
--- a/nltk/corpus/reader/switchboard.py
+++ b/nltk/corpus/reader/switchboard.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Switchboard Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/tagged.py b/nltk/corpus/reader/tagged.py
index 1e23774..69ba4dd 100644
--- a/nltk/corpus/reader/tagged.py
+++ b/nltk/corpus/reader/tagged.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagged Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Jacob Perkins <japerk at gmail.com>
diff --git a/nltk/corpus/reader/toolbox.py b/nltk/corpus/reader/toolbox.py
index cc8dfad..380102a 100644
--- a/nltk/corpus/reader/toolbox.py
+++ b/nltk/corpus/reader/toolbox.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Toolbox Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Greg Aumann <greg_aumann at sil.org>
 #         Stuart Robinson <Stuart.Robinson at mpi.nl>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/corpus/reader/util.py b/nltk/corpus/reader/util.py
index 7124176..bb31cb9 100644
--- a/nltk/corpus/reader/util.py
+++ b/nltk/corpus/reader/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Reader Utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/verbnet.py b/nltk/corpus/reader/verbnet.py
index 2f9ae93..35a3851 100644
--- a/nltk/corpus/reader/verbnet.py
+++ b/nltk/corpus/reader/verbnet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Verbnet Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/wordlist.py b/nltk/corpus/reader/wordlist.py
index b658697..74d5f22 100644
--- a/nltk/corpus/reader/wordlist.py
+++ b/nltk/corpus/reader/wordlist.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Word List Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/corpus/reader/wordnet.py b/nltk/corpus/reader/wordnet.py
index 2503cdc..477dea7 100644
--- a/nltk/corpus/reader/wordnet.py
+++ b/nltk/corpus/reader/wordnet.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: WordNet
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bethard <Steven.Bethard at colorado.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
@@ -1177,7 +1177,9 @@ class WordNetCorpusReader(CorpusReader):
 
     def lemma(self, name, lang='en'):
         '''Return lemma object that matches the name'''
-        synset_name, lemma_name = name.rsplit('.', 1)
+        # cannot simply split on first '.', e.g.: '.45_caliber.a.01..45_caliber'
+        separator = SENSENUM_RE.search(name).start()
+        synset_name, lemma_name = name[:separator+3], name[separator+4:]
         synset = self.synset(synset_name)
         for lemma in synset.lemmas(lang):
             if lemma._name == lemma_name:
@@ -1615,6 +1617,8 @@ class WordNetCorpusReader(CorpusReader):
         ADJ: [('er', ''), ('est', ''), ('er', 'e'), ('est', 'e')],
         ADV: []}
 
+    MORPHOLOGICAL_SUBSTITUTIONS[ADJ_SAT] = MORPHOLOGICAL_SUBSTITUTIONS[ADJ]
+
     def _morphy(self, form, pos):
         # from jordanbg:
         # Given an original string x
diff --git a/nltk/corpus/reader/xmldocs.py b/nltk/corpus/reader/xmldocs.py
index d55001b..757e5b1 100644
--- a/nltk/corpus/reader/xmldocs.py
+++ b/nltk/corpus/reader/xmldocs.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: XML Corpus Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/reader/ycoe.py b/nltk/corpus/reader/ycoe.py
index bd2c022..be6716c 100644
--- a/nltk/corpus/reader/ycoe.py
+++ b/nltk/corpus/reader/ycoe.py
@@ -2,7 +2,7 @@
 
 # Natural Language Toolkit: York-Toronto-Helsinki Parsed Corpus of Old English Prose (YCOE)
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Selina Dennis <selina at tranzfusion.net>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/corpus/util.py b/nltk/corpus/util.py
index f7449b8..166dfb4 100644
--- a/nltk/corpus/util.py
+++ b/nltk/corpus/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus Reader Utility Functions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/data.py b/nltk/data.py
index 1a6426e..e9e9fa4 100644
--- a/nltk/data.py
+++ b/nltk/data.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Utility functions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -56,7 +56,9 @@ except ImportError:
 # this import should be more specific:
 import nltk
 
-from nltk.compat import py3_data, text_type, string_types, BytesIO, urlopen, url2pathname
+from nltk.compat import py3_data, add_py3_data
+from nltk.compat import text_type, string_types, BytesIO, urlopen, url2pathname
+
 
 ######################################################################
 # Search Path
@@ -205,6 +207,10 @@ def normalize_resource_name(resource_name, allow_relative=True, relative_path=No
     True
     >>> windows or normalize_resource_name('../dir/file', False, '/') == '/dir/file'
     True
+    >>> not windows or normalize_resource_name('/dir/file', True, '/') == 'dir/file'
+    True
+    >>> windows or normalize_resource_name('/dir/file', True, '/') == '/dir/file'
+    True
     """
     is_dir = bool(re.search(r'[\\/.]$', resource_name)) or resource_name.endswith(os.path.sep)
     if sys.platform.startswith('win'):
@@ -448,8 +454,8 @@ class ZipFilePathPointer(PathPointer):
         if isinstance(zipfile, string_types):
             zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
 
-        # Normalize the entry string, it should be absolute:
-        entry = normalize_resource_name(entry, False, '/').lstrip('/')
+        # Normalize the entry string, it should be relative:
+        entry = normalize_resource_name(entry, True, '/').lstrip('/')
 
         # Check that the entry exists:
         if entry:
@@ -560,7 +566,7 @@ def find(resource_name, paths=None):
 
     # Resolve default paths at runtime in-case the user overrides nltk.data.path
     if paths is None:
-        paths=path
+        paths = path
 
     # Check if the resource name includes a zipfile name
     m = re.match(r'(.*\.zip)/?(.*)$|', resource_name)
@@ -740,7 +746,8 @@ def load(resource_url, format='auto', cache=True, verbose=False,
     :type encoding: str
     :param encoding: the encoding of the input; only used for text formats.
     """
-    resource_url=normalize_resource_url(resource_url)
+    resource_url = normalize_resource_url(resource_url)
+    resource_url = add_py3_data(resource_url)
 
     # Determine the format of the resource.
     if format == 'auto':
@@ -1184,7 +1191,7 @@ class SeekableUnicodeStreamReader(object):
     def seek(self, offset, whence=0):
         """
         Move the stream to a new file position.  If the reader is
-        maintaining any buffers, tehn they will be cleared.
+        maintaining any buffers, then they will be cleared.
 
         :param offset: A byte count offset.
         :param whence: If 0, then the offset is from the start of the file
diff --git a/nltk/downloader.py b/nltk/downloader.py
index e441006..08390d4 100644
--- a/nltk/downloader.py
+++ b/nltk/downloader.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Corpus & Model Downloader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
@@ -54,9 +54,9 @@ NLTK Download Server
 Before downloading any packages, the corpus and module downloader
 contacts the NLTK download server, to retrieve an index file
 describing the available packages.  By default, this index file is
-loaded from ``http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml``.
-If necessary, it is possible to create a new ``Downloader`` object,
-specifying a different URL for the package index file.
+loaded from ``http://www.nltk.org/nltk_data/``.  If necessary, it is
+possible to create a new ``Downloader`` object, specifying a different
+URL for the package index file.
 
 Usage::
 
@@ -377,8 +377,7 @@ class Downloader(object):
        server index will be considered 'stale,' and will be
        re-downloaded."""
 
-    # DEFAULT_URL = 'http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml'
-    DEFAULT_URL = 'http://nltk.github.com/nltk_data/'
+    DEFAULT_URL = 'http://www.nltk.org/nltk_data/'
     """The default URL for the NLTK data server's index.  An
        alternative URL can be specified when creating a new
        ``Downloader`` object."""
@@ -841,8 +840,11 @@ class Downloader(object):
             for i, child_id in enumerate(collection.children):
                 if child_id in self._packages:
                     collection.children[i] = self._packages[child_id]
-                if child_id in self._collections:
+                elif child_id in self._collections:
                     collection.children[i] = self._collections[child_id]
+                else:
+                    print('removing collection member with no package: {}'.format(child_id))
+                    del collection.children[i]
 
         # Fill in collection.packages for each collection.
         for collection in self._collections.values():
@@ -1099,8 +1101,8 @@ class DownloaderShell(object):
             if user_input == 's':
                 self._show_config()
             elif user_input == 'd':
-                new_dl_dir = compat.raw_input('  New Directory> ').strip().lower()
-                if new_dl_dir in ('', 'x', 'q'):
+                new_dl_dir = compat.raw_input('  New Directory> ').strip()
+                if new_dl_dir in ('', 'x', 'q', 'X', 'Q'):
                     print('  Cancelled!')
                 elif os.path.isdir(new_dl_dir):
                     self._ds.download_dir = new_dl_dir
@@ -1108,8 +1110,8 @@ class DownloaderShell(object):
                     print(('Directory %r not found!  Create it first.' %
                            new_dl_dir))
             elif user_input == 'u':
-                new_url = compat.raw_input('  New URL> ').strip().lower()
-                if new_url in ('', 'x', 'q'):
+                new_url = compat.raw_input('  New URL> ').strip()
+                if new_url in ('', 'x', 'q', 'X', 'Q'):
                     print('  Cancelled!')
                 else:
                     if not new_url.startswith('http://'):
@@ -1750,7 +1752,11 @@ class DownloaderGUI(object):
             from tkMessageBox import Message
             Message(message=ABOUT, title=TITLE).show()
         except ImportError:
-            ShowText(self._top, TITLE, ABOUT)
+            try:
+                from tkinter.messagebox import Message
+                Message(message=ABOUT, title=TITLE).show()
+            except ImportError:
+                ShowText(self.top, TITLE, ABOUT)
 
     #/////////////////////////////////////////////////////////////////
     # Progress Bar
diff --git a/nltk/draw/__init__.py b/nltk/draw/__init__.py
index ad6dec0..5d197fc 100644
--- a/nltk/draw/__init__.py
+++ b/nltk/draw/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: graphical representations package
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/draw/cfg.py b/nltk/draw/cfg.py
index 1458e63..d691dde 100644
--- a/nltk/draw/cfg.py
+++ b/nltk/draw/cfg.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: CFG visualization
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/dispersion.py b/nltk/draw/dispersion.py
index 170f556..2ba89e7 100644
--- a/nltk/draw/dispersion.py
+++ b/nltk/draw/dispersion.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dispersion Plots
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/table.py b/nltk/draw/table.py
index a36c208..98fff2e 100644
--- a/nltk/draw/table.py
+++ b/nltk/draw/table.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Table widget
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/tree.py b/nltk/draw/tree.py
index 00bdabe..7a10a5d 100644
--- a/nltk/draw/tree.py
+++ b/nltk/draw/tree.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Graphical Representations for Trees
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/draw/util.py b/nltk/draw/util.py
index 760f56d..0d5c83c 100644
--- a/nltk/draw/util.py
+++ b/nltk/draw/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Drawing utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/featstruct.py b/nltk/featstruct.py
index 46a0052..412dae9 100644
--- a/nltk/featstruct.py
+++ b/nltk/featstruct.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Feature Structures
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>,
 #         Rob Speer,
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/grammar.py b/nltk/grammar.py
index 632c234..04301a0 100644
--- a/nltk/grammar.py
+++ b/nltk/grammar.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Context Free Grammars
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Jason Narad <jason.narad at gmail.com>
diff --git a/nltk/help.py b/nltk/help.py
index 736b18f..e76671c 100644
--- a/nltk/help.py
+++ b/nltk/help.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit (NLTK) Help
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Authors: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/inference/__init__.py b/nltk/inference/__init__.py
index 7ba76e0..172df36 100644
--- a/nltk/inference/__init__.py
+++ b/nltk/inference/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Inference
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>
 #
diff --git a/nltk/inference/nonmonotonic.py b/nltk/inference/nonmonotonic.py
index 0d168a5..c0c4198 100644
--- a/nltk/inference/nonmonotonic.py
+++ b/nltk/inference/nonmonotonic.py
@@ -2,7 +2,7 @@
 #
 # Author: Daniel H. Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/inference/prover9.py b/nltk/inference/prover9.py
index f99fe7b..ac93834 100644
--- a/nltk/inference/prover9.py
+++ b/nltk/inference/prover9.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Interface to the Prover9 Theorem Prover
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>
 #
diff --git a/nltk/inference/resolution.py b/nltk/inference/resolution.py
index 4751cf8..cf1fef9 100755
--- a/nltk/inference/resolution.py
+++ b/nltk/inference/resolution.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/inference/tableau.py b/nltk/inference/tableau.py
index 2b32d1f..74965eb 100644
--- a/nltk/inference/tableau.py
+++ b/nltk/inference/tableau.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: First-Order Tableau Theorem Prover
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/internals.py b/nltk/internals.py
index 823423b..4674d9e 100644
--- a/nltk/internals.py
+++ b/nltk/internals.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Internal utility functions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Nitin Madnani <nmadnani at ets.org>
@@ -16,6 +16,7 @@ import textwrap
 import types
 import sys
 import stat
+import locale
 
 # Use the c version of ElementTree, which is faster, if possible:
 try:
@@ -156,8 +157,8 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
 
     # Check the return code.
     if p.returncode != 0:
-        print(stderr.decode(sys.stdout.encoding))
-        raise OSError('Java command failed!')
+        print(_decode_stdoutdata(stderr))
+        raise OSError('Java command failed : ' + str(cmd))
 
     return (stdout, stderr)
 
@@ -490,9 +491,10 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
     if os.name == 'posix':
         for alternative in file_names:
             try:
-                p = subprocess.Popen(['which', alternative], stdout=subprocess.PIPE)
+                p = subprocess.Popen(['which', alternative],
+                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                 stdout, stderr = p.communicate()
-                path = stdout.decode(sys.stdout.encoding).strip()
+                path = _decode_stdoutdata(stdout).strip()
                 if path.endswith(alternative) and os.path.exists(path):
                     if verbose:
                         print('[Found %s: %s]' % (filename, path))
@@ -643,6 +645,16 @@ def find_jar(name_pattern, path_to_jar=None, env_vars=(),
     return next(find_jar_iter(name_pattern, path_to_jar, env_vars,
                          searchpath, url, verbose, is_regex))
 
+def _decode_stdoutdata(stdoutdata):
+    """ Convert data read from stdout/stderr to unicode """
+    if not isinstance(stdoutdata, bytes):
+        return stdoutdata
+    
+    encoding = getattr(sys.__stdout__, "encoding", locale.getpreferredencoding())
+    if encoding is None:
+        return stdoutdata.decode()
+    return stdoutdata.decode(encoding)
+
 ##########################################################################
 # Import Stdlib Module
 ##########################################################################
diff --git a/nltk/jsontags.py b/nltk/jsontags.py
index 084c649..3053568 100644
--- a/nltk/jsontags.py
+++ b/nltk/jsontags.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: JSON Encoder/Decoder Helpers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/__init__.py b/nltk/metrics/__init__.py
index 96be878..b6b108d 100644
--- a/nltk/metrics/__init__.py
+++ b/nltk/metrics/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Metrics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/agreement.py b/nltk/metrics/agreement.py
index 5ea7d98..3a7f0ea 100644
--- a/nltk/metrics/agreement.py
+++ b/nltk/metrics/agreement.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Agreement Metrics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Tom Lippincott <tom at cs.columbia.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/metrics/association.py b/nltk/metrics/association.py
index febdb7d..e57be2f 100644
--- a/nltk/metrics/association.py
+++ b/nltk/metrics/association.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Ngram Association Measures
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/metrics/confusionmatrix.py b/nltk/metrics/confusionmatrix.py
index 22a6b5f..5ffcd24 100644
--- a/nltk/metrics/confusionmatrix.py
+++ b/nltk/metrics/confusionmatrix.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Confusion Matrices
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
@@ -93,9 +93,9 @@ class ConfusionMatrix(object):
                                                      self._total)
 
     def __str__(self):
-        return self.pp()
+        return self.pretty_format()
 
-    def pp(self, show_percents=False, values_in_chart=True,
+    def pretty_format(self, show_percents=False, values_in_chart=True,
            truncate=None, sort_by_count=False):
         """
         :return: A multi-line string representation of this confusion matrix.
@@ -200,7 +200,7 @@ def demo():
     print('Test    =', test)
     print('Confusion matrix:')
     print(ConfusionMatrix(reference, test))
-    print(ConfusionMatrix(reference, test).pp(sort_by_count=True))
+    print(ConfusionMatrix(reference, test).pretty_format(sort_by_count=True))
 
 if __name__ == '__main__':
     demo()
diff --git a/nltk/metrics/distance.py b/nltk/metrics/distance.py
index 0e1c36e..049897a 100644
--- a/nltk/metrics/distance.py
+++ b/nltk/metrics/distance.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Distance Metrics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Tom Lippincott <tom at cs.columbia.edu>
@@ -17,15 +17,15 @@ As metrics, they must satisfy the following three requirements:
 1. d(a, a) = 0
 2. d(a, b) >= 0
 3. d(a, c) <= d(a, b) + d(b, c)
-
 """
+
 from __future__ import print_function
 
 
 def _edit_dist_init(len1, len2):
     lev = []
     for i in range(len1):
-        lev.append([0] * len2)  # initialize 2-D array to zero
+        lev.append([0] * len2)  # initialize 2D array to zero
     for i in range(len1):
         lev[i][0] = i           # column 0: 0,1,2,3,4,...
     for j in range(len2):
@@ -114,11 +114,13 @@ def masi_distance(label1, label2):
     labels are assigned.
 
     >>> from nltk.metrics import masi_distance
-    >>> masi_distance(set([1,2]), set([1,2,3,4]))
+    >>> masi_distance(set([1, 2]), set([1, 2, 3, 4]))
     0.665...
 
-    Passonneau 2006, Measuring Agreement on Set-Valued Items (MASI) for Semantic and Pragmatic Annotation.
+    Passonneau 2006, Measuring Agreement on Set-Valued Items (MASI)
+    for Semantic and Pragmatic Annotation.
     """
+
     len_intersection = len(label1.intersection(label2))
     len_union = len(label1.union(label2))
     len_label1 = len(label1)
@@ -136,7 +138,7 @@ def masi_distance(label1, label2):
 
 
 def interval_distance(label1,label2):
-    """Krippendorff'1 interval distance metric
+    """Krippendorff's interval distance metric
 
     >>> from nltk.metrics import interval_distance
     >>> interval_distance(1,10)
@@ -144,8 +146,9 @@ def interval_distance(label1,label2):
 
     Krippendorff 1980, Content Analysis: An Introduction to its Methodology
     """
+
     try:
-        return pow(label1-label2,2)
+        return pow(label1 - label2, 2)
 #        return pow(list(label1)[0]-list(label2)[0],2)
     except:
         print("non-numeric labels not supported with interval distance")
@@ -153,13 +156,17 @@ def interval_distance(label1,label2):
 
 def presence(label):
     """Higher-order function to test presence of a given label
-
     """
-    return lambda x,y: 1.0*((label in x) == (label in y))
+
+    return lambda x, y: 1.0 * ((label in x) == (label in y))
 
 
 def fractional_presence(label):
-    return lambda x,y:abs((float(1.0/len(x)) - float(1.0/len(y))))*(label in x and label in y) or 0.0*(label not in x and label not in y) or abs((float(1.0/len(x))))*(label in x and label not in y) or ((float(1.0/len(y))))*(label not in x and label in y)
+    return lambda x, y:\
+        abs((float(1.0 / len(x)) - float(1.0 / len(y)))) * (label in x and label in y) \
+        or 0.0 * (label not in x and label not in y) \
+        or abs(float(1.0 / len(x))) * (label in x and label not in y) \
+        or (float(1.0 / len(y))) * (label not in x and label in y)
 
 
 def custom_distance(file):
@@ -174,7 +181,9 @@ def custom_distance(file):
 
 
 def demo():
-    edit_distance_examples = [("rain", "shine"), ("abcdef", "acbdef"), ("language", "lnaguaeg"), ("language", "lnaugage"), ("language", "lngauage")]
+    edit_distance_examples = [
+        ("rain", "shine"), ("abcdef", "acbdef"), ("language", "lnaguaeg"),
+        ("language", "lnaugage"), ("language", "lngauage")]
     for s1, s2 in edit_distance_examples:
         print("Edit distance between '%s' and '%s':" % (s1, s2), edit_distance(s1, s2))
     for s1, s2 in edit_distance_examples:
diff --git a/nltk/metrics/paice.py b/nltk/metrics/paice.py
index 834171e..e05e598 100644
--- a/nltk/metrics/paice.py
+++ b/nltk/metrics/paice.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Agreement Metrics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Lauri Hallila <laurihallila at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/metrics/scores.py b/nltk/metrics/scores.py
index 53cf384..8e504ad 100644
--- a/nltk/metrics/scores.py
+++ b/nltk/metrics/scores.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Evaluation
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/metrics/segmentation.py b/nltk/metrics/segmentation.py
index aac4d67..8d9f745 100644
--- a/nltk/metrics/segmentation.py
+++ b/nltk/metrics/segmentation.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Text Segmentation Metrics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         David Doukhan <david.doukhan at gmail.com>
diff --git a/nltk/metrics/spearman.py b/nltk/metrics/spearman.py
index cc95d17..9b9a86b 100644
--- a/nltk/metrics/spearman.py
+++ b/nltk/metrics/spearman.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Spearman Rank Correlation
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Joel Nothman <jnothman at student.usyd.edu.au>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/__init__.py b/nltk/misc/__init__.py
index bc9210e..38552ec 100644
--- a/nltk/misc/__init__.py
+++ b/nltk/misc/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Miscellaneous modules
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/minimalset.py b/nltk/misc/minimalset.py
index 42615f4..689341a 100644
--- a/nltk/misc/minimalset.py
+++ b/nltk/misc/minimalset.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Minimal Sets
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/sort.py b/nltk/misc/sort.py
index 4972a5b..3b7aed2 100644
--- a/nltk/misc/sort.py
+++ b/nltk/misc/sort.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: List Sorting
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/misc/wordfinder.py b/nltk/misc/wordfinder.py
index a8d45ca..ce63311 100644
--- a/nltk/misc/wordfinder.py
+++ b/nltk/misc/wordfinder.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Word Finder
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/parse/__init__.py b/nltk/parse/__init__.py
index 22bcb88..076b41e 100644
--- a/nltk/parse/__init__.py
+++ b/nltk/parse/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Parsers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -68,10 +68,13 @@ from nltk.parse.recursivedescent import (RecursiveDescentParser,
 from nltk.parse.shiftreduce import (ShiftReduceParser, SteppingShiftReduceParser)
 from nltk.parse.util import load_parser, TestGrammar, extract_test_sentences
 from nltk.parse.viterbi import ViterbiParser
-from nltk.parse.dependencygraph import DependencyGraph, nx_graph
+from nltk.parse.dependencygraph import DependencyGraph
 from nltk.parse.projectivedependencyparser import (ProjectiveDependencyParser,
                                                    ProbabilisticProjectiveDependencyParser)
 from nltk.parse.nonprojectivedependencyparser import (NonprojectiveDependencyParser,
                                                       NaiveBayesDependencyScorer,
                                                       ProbabilisticNonprojectiveParser)
 from nltk.parse.malt import MaltParser
+from nltk.parse.evaluate import DependencyEvaluator
+from nltk.parse.transitionparser import TransitionParser
+from nltk.parse.bllip import BllipParser
diff --git a/nltk/parse/api.py b/nltk/parse/api.py
index 9ec29c8..37bcdaf 100644
--- a/nltk/parse/api.py
+++ b/nltk/parse/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Parser API
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -32,7 +32,7 @@ class ParserI(object):
         """
         raise NotImplementedError()
 
-    def parse(self, sent):
+    def parse(self, sent, *args, **kwargs):
         """
         :return: An iterator that generates parse trees for the sentence.
         When possible this list is sorted from most likely to least likely.
@@ -42,25 +42,25 @@ class ParserI(object):
         :rtype: iter(Tree)
         """
         if overridden(self.parse_sents):
-            return next(self.parse_sents([sent]))
+            return next(self.parse_sents([sent], *args, **kwargs))
         elif overridden(self.parse_one):
-            return (tree for tree in [self.parse_one(sent)] if tree is not None)
+            return (tree for tree in [self.parse_one(sent, *args, **kwargs)] if tree is not None)
         elif overridden(self.parse_all):
-            return iter(self.parse_all(sent))
+            return iter(self.parse_all(sent, *args, **kwargs))
         else:
             raise NotImplementedError()
 
-    def parse_sents(self, sents):
+    def parse_sents(self, sents, *args, **kwargs):
         """
         Apply ``self.parse()`` to each element of ``sents``.
         :rtype: iter(iter(Tree))
         """
-        return (self.parse(sent) for sent in sents)
+        return (self.parse(sent, *args, **kwargs) for sent in sents)
 
-    def parse_all(self, sent):
+    def parse_all(self, sent, *args, **kwargs):
         """:rtype: list(Tree)"""
-        return list(self.parse(sent))
+        return list(self.parse(sent, *args, **kwargs))
 
-    def parse_one(self, sent):
+    def parse_one(self, sent, *args, **kwargs):
         """:rtype: Tree or None"""
-        return next(self.parse(sent), None)
+        return next(self.parse(sent, *args, **kwargs), None)
diff --git a/nltk/parse/bllip.py b/nltk/parse/bllip.py
new file mode 100644
index 0000000..739ef1b
--- /dev/null
+++ b/nltk/parse/bllip.py
@@ -0,0 +1,285 @@
+# Natural Language Toolkit: Interface to BLLIP Parser
+#
+# Author: David McClosky <dmcc at bigasterisk.com>
+#
+# Copyright (C) 2001-2015 NLTK Project
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import print_function
+
+from nltk.parse.api import ParserI
+from nltk.tree import Tree
+
+"""
+Interface for parsing with BLLIP Parser. Requires the Python
+bllipparser module. BllipParser objects can be constructed with the
+``BllipParser.from_unified_model_dir`` class method or manually using the
+``BllipParser`` constructor. The former is generally easier if you have
+a BLLIP Parser unified model directory -- a basic model can be obtained
+from NLTK's downloader. More unified parsing models can be obtained with
+BLLIP Parser's ModelFetcher (run ``python -m bllipparser.ModelFetcher``
+or see docs for ``bllipparser.ModelFetcher.download_and_install_model``).
+
+Basic usage::
+
+    # download and install a basic unified parsing model (Wall Street Journal)
+    # sudo python -m nltk.downloader bllip_wsj_no_aux
+
+    >>> from nltk.data import find
+    >>> model_dir = find('models/bllip_wsj_no_aux').path
+    >>> bllip = BllipParser.from_unified_model_dir(model_dir)
+
+    # 1-best parsing
+    >>> sentence1 = 'British left waffles on Falklands .'.split()
+    >>> top_parse = bllip.parse_one(sentence1)
+    >>> print(top_parse)
+    (S1
+      (S
+        (NP (JJ British) (NN left))
+        (VP (VBZ waffles) (PP (IN on) (NP (NNP Falklands))))
+        (. .)))
+
+    # n-best parsing
+    >>> sentence2 = 'Time flies'.split()
+    >>> all_parses = bllip.parse_all(sentence2)
+    >>> print(len(all_parses))
+    50
+    >>> print(all_parses[0])
+    (S1 (S (NP (NNP Time)) (VP (VBZ flies))))
+
+    # incorporating external tagging constraints (None means unconstrained tag)
+    >>> constrained1 = bllip.tagged_parse([('Time', 'VB'), ('flies', 'NNS')])
+    >>> print(next(constrained1))
+    (S1 (NP (VB Time) (NNS flies)))
+    >>> constrained2 = bllip.tagged_parse([('Time', 'NN'), ('flies', None)])
+    >>> print(next(constrained2))
+    (S1 (NP (NN Time) (VBZ flies)))
+
+References
+----------
+
+- Charniak, Eugene. "A maximum-entropy-inspired parser." Proceedings of
+  the 1st North American chapter of the Association for Computational
+  Linguistics conference. Association for Computational Linguistics,
+  2000.
+
+- Charniak, Eugene, and Mark Johnson. "Coarse-to-fine n-best parsing
+  and MaxEnt discriminative reranking." Proceedings of the 43rd Annual
+  Meeting on Association for Computational Linguistics. Association
+  for Computational Linguistics, 2005.
+
+Known issues
+------------
+
+Note that BLLIP Parser is not currently threadsafe. Since this module
+uses a SWIG interface, it is potentially unsafe to create multiple
+``BllipParser`` objects in the same process. BLLIP Parser currently
+has issues with non-ASCII text and will raise an error if given any.
+
+See http://pypi.python.org/pypi/bllipparser/ for more information
+on BLLIP Parser's Python interface.
+"""
+
+__all__ = ['BllipParser']
+
+# this block allows this module to be imported even if bllipparser isn't
+# available
+try:
+    from bllipparser import RerankingParser
+    from bllipparser.RerankingParser import get_unified_model_parameters
+
+    def _ensure_bllip_import_or_error():
+        pass
+except ImportError as ie:
+    def _ensure_bllip_import_or_error(ie=ie):
+        raise ImportError("Couldn't import bllipparser module: %s" % ie)
+
+def _ensure_ascii(words):
+    try:
+        for i, word in enumerate(words):
+            word.decode('ascii')
+    except UnicodeDecodeError:
+        raise ValueError("Token %d (%r) is non-ASCII. BLLIP Parser "
+                         "currently doesn't support non-ASCII inputs." %
+                         (i, word))
+
+def _scored_parse_to_nltk_tree(scored_parse):
+    return Tree.fromstring(str(scored_parse.ptb_parse))
+
+class BllipParser(ParserI):
+    """
+    Interface for parsing with BLLIP Parser. BllipParser objects can be
+    constructed with the ``BllipParser.from_unified_model_dir`` class
+    method or manually using the ``BllipParser`` constructor.
+    """
+    def __init__(self, parser_model=None, reranker_features=None,
+                 reranker_weights=None, parser_options=None,
+                 reranker_options=None):
+        """
+        Load a BLLIP Parser model from scratch. You'll typically want to
+        use the ``from_unified_model_dir()`` class method to construct
+        this object.
+
+        :param parser_model: Path to parser model directory
+        :type parser_model: str
+
+        :param reranker_features: Path the reranker model's features file
+        :type reranker_features: str
+
+        :param reranker_weights: Path the reranker model's weights file
+        :type reranker_weights: str
+
+        :param parser_options: optional dictionary of parser options, see
+        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
+        for more information.
+        :type parser_options: dict(str)
+
+        :param reranker_options: optional
+        dictionary of reranker options, see
+        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
+        for more information.
+        :type reranker_options: dict(str)
+        """
+        _ensure_bllip_import_or_error()
+
+        parser_options = parser_options or {}
+        reranker_options = reranker_options or {}
+
+        self.rrp = RerankingParser()
+        self.rrp.load_parser_model(parser_model, **parser_options)
+        if reranker_features and reranker_weights:
+            self.rrp.load_reranker_model(features_filename=reranker_features,
+                                         weights_filename=reranker_weights,
+                                         **reranker_options)
+
+    def parse(self, sentence):
+        """
+        Use BLLIP Parser to parse a sentence. Takes a sentence as a list
+        of words; it will be automatically tagged with this BLLIP Parser
+        instance's tagger.
+
+        :return: An iterator that generates parse trees for the sentence
+        from most likely to least likely.
+
+        :param sentence: The sentence to be parsed
+        :type sentence: list(str)
+        :rtype: iter(Tree)
+        """
+        _ensure_ascii(sentence)
+        nbest_list = self.rrp.parse(sentence)
+        for scored_parse in nbest_list:
+            yield _scored_parse_to_nltk_tree(scored_parse)
+
+    def tagged_parse(self, word_and_tag_pairs):
+        """
+        Use BLLIP to parse a sentence. Takes a sentence as a list of
+        (word, tag) tuples; the sentence must have already been tokenized
+        and tagged. BLLIP will attempt to use the tags provided but may
+        use others if it can't come up with a complete parse subject
+        to those constraints. You may also specify a tag as ``None``
+        to leave a token's tag unconstrained.
+
+        :return: An iterator that generates parse trees for the sentence
+        from most likely to least likely.
+
+        :param sentence: Input sentence to parse as (word, tag) pairs
+        :type sentence: list(tuple(str, str))
+        :rtype: iter(Tree)
+        """
+        words = []
+        tag_map = {}
+        for i, (word, tag) in enumerate(word_and_tag_pairs):
+            words.append(word)
+            if tag is not None:
+                tag_map[i] = tag
+
+        _ensure_ascii(words)
+        nbest_list = self.rrp.parse_tagged(words, tag_map)
+        for scored_parse in nbest_list:
+            yield _scored_parse_to_nltk_tree(scored_parse)
+
+    @classmethod
+    def from_unified_model_dir(this_class, model_dir, parser_options=None,
+                               reranker_options=None):
+        """
+        Create a ``BllipParser`` object from a unified parsing model
+        directory. Unified parsing model directories are a standardized
+        way of storing BLLIP parser and reranker models together on disk.
+        See ``bllipparser.RerankingParser.get_unified_model_parameters()``
+        for more information about unified model directories.
+
+        :return: A ``BllipParser`` object using the parser and reranker
+        models in the model directory.
+
+        :param model_dir: Path to the unified model directory.
+        :type model_dir: str
+        :param parser_options: optional dictionary of parser options, see
+        ``bllipparser.RerankingParser.RerankingParser.load_parser_options()``
+        for more information.
+        :type parser_options: dict(str)
+        :param reranker_options: optional dictionary of reranker options, see
+        ``bllipparser.RerankingParser.RerankingParser.load_reranker_model()``
+        for more information.
+        :type reranker_options: dict(str)
+        :rtype: BllipParser
+        """
+        (parser_model_dir, reranker_features_filename,
+         reranker_weights_filename) = get_unified_model_parameters(model_dir)
+        return this_class(parser_model_dir, reranker_features_filename,
+                          reranker_weights_filename, parser_options,
+                          reranker_options)
+
+def demo():
+    """This assumes the Python module bllipparser is installed."""
+
+    # download and install a basic unified parsing model (Wall Street Journal)
+    # sudo python -m nltk.downloader bllip_wsj_no_aux
+
+    from nltk.data import find
+    model_dir = find('models/bllip_wsj_no_aux').path
+
+    print('Loading BLLIP Parsing models...')
+    # the easiest way to get started is to use a unified model
+    bllip = BllipParser.from_unified_model_dir(model_dir)
+    print('Done.')
+
+    sentence1 = 'British left waffles on Falklands .'.split()
+    sentence2 = 'I saw the man with the telescope .'.split()
+    # this sentence is known to fail under the WSJ parsing model
+    fail1 = '# ! ? : -'.split()
+    for sentence in (sentence1, sentence2, fail1):
+        print('Sentence: %r' % ' '.join(sentence))
+        try:
+            tree = next(bllip.parse(sentence))
+            print(tree)
+        except StopIteration:
+            print("(parse failed)")
+
+    # n-best parsing demo
+    for i, parse in enumerate(bllip.parse(sentence1)):
+        print('parse %d:\n%s' % (i, parse))
+
+    # using external POS tag constraints
+    print("forcing 'tree' to be 'NN':",
+          next(bllip.tagged_parse([('A', None), ('tree', 'NN')])))
+    print("forcing 'A' to be 'DT' and 'tree' to be 'NNP':",
+          next(bllip.tagged_parse([('A', 'DT'), ('tree', 'NNP')])))
+    # constraints don't have to make sense... (though on more complicated
+    # sentences, they may cause the parse to fail)
+    print("forcing 'A' to be 'NNP':",
+          next(bllip.tagged_parse([('A', 'NNP'), ('tree', None)])))
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        _ensure_bllip_import_or_error()
+    except ImportError:
+        raise SkipTest('doctests from nltk.parse.bllip are skipped because '
+                       'the bllipparser module is not installed')
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
+
diff --git a/nltk/parse/chart.py b/nltk/parse/chart.py
index 51be1cc..233421d 100644
--- a/nltk/parse/chart.py
+++ b/nltk/parse/chart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: A Chart Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Jean Mark Gawron <gawron at mail.sdsu.edu>
@@ -716,7 +716,7 @@ class Chart(object):
     #////////////////////////////////////////////////////////////
     # Display
     #////////////////////////////////////////////////////////////
-    def pp_edge(self, edge, width=None):
+    def pretty_format_edge(self, edge, width=None):
         """
         Return a pretty-printed string representation of a given edge
         in this chart.
@@ -747,11 +747,11 @@ class Chart(object):
         str += (' '*(width-1)+'.')*(self._num_leaves-end)
         return str + '| %s' % edge
 
-    def pp_leaves(self, width=None):
+    def pretty_format_leaves(self, width=None):
         """
         Return a pretty-printed string representation of this
         chart's leaves.  This string can be used as a header
-        for calls to ``pp_edge``.
+        for calls to ``pretty_format_edge``.
         """
         if width is None: width = 50 // (self.num_leaves()+1)
 
@@ -765,7 +765,7 @@ class Chart(object):
 
         return header
 
-    def pp(self, width=None):
+    def pretty_format(self, width=None):
         """
         Return a pretty-printed string representation of this chart.
 
@@ -779,8 +779,8 @@ class Chart(object):
         edges = sorted([(e.length(), e.start(), e) for e in self])
         edges = [e for (_,_,e) in edges]
 
-        return (self.pp_leaves(width) + '\n' +
-                '\n'.join(self.pp_edge(edge, width) for edge in edges))
+        return (self.pretty_format_leaves(width) + '\n' +
+                '\n'.join(self.pretty_format_edge(edge, width) for edge in edges))
 
     #////////////////////////////////////////////////////////////
     # Display: Dot (AT&T Graphviz)
@@ -1291,7 +1291,7 @@ class ChartParser(ParserI):
             if print_rule_header:
                 print('%s:' % rule)
                 print_rule_header = False
-            print(chart.pp_edge(edge, edge_width))
+            print(chart.pretty_format_edge(edge, edge_width))
 
     def chart_parse(self, tokens, trace=None):
         """
@@ -1312,8 +1312,8 @@ class ChartParser(ParserI):
 
         # Width, for printing trace edges.
         trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1)
-        if trace: print(chart.pp_leaves(trace_edge_width))
-
+        if trace: print(chart.pretty_format_leaves(trace_edge_width))
+        
         if self._use_agenda:
             # Use an agenda-based algorithm.
             for axiom in self._axioms:
@@ -1346,9 +1346,9 @@ class ChartParser(ParserI):
         # Return the final chart.
         return chart
 
-    def parse_all(self, tokens, tree_class=Tree):
+    def parse(self, tokens, tree_class=Tree):
         chart = self.chart_parse(tokens)
-        return chart.parses(self._grammar.start(), tree_class=tree_class)
+        return iter(chart.parses(self._grammar.start(), tree_class=tree_class))
 
 class TopDownChartParser(ChartParser):
     """
@@ -1449,7 +1449,7 @@ class SteppingChartParser(ChartParser):
 
             for e in self._parse():
                 if self._trace > 1: print(self._current_chartrule)
-                if self._trace > 0: print(self._chart.pp_edge(e,w))
+                if self._trace > 0: print(self._chart.pretty_format_edge(e,w))
                 yield e
                 if self._restart: break
             else:
@@ -1628,9 +1628,9 @@ def demo(choice=None,
         print()
         cp = ChartParser(grammar, strategies[strategy][1], trace=trace)
         t = time.time()
-        # parses = cp.parse_all(tokens)
         chart = cp.chart_parse(tokens)
         parses = list(chart.parses(grammar.start()))
+        
         times[strategies[strategy][0]] = time.time()-t
         print("Nr edges in chart:", len(chart.edges()))
         if numparses:
diff --git a/nltk/parse/dependencygraph.py b/nltk/parse/dependencygraph.py
old mode 100644
new mode 100755
index 918753e..5646914
--- a/nltk/parse/dependencygraph.py
+++ b/nltk/parse/dependencygraph.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (modifications)
 #
@@ -15,53 +15,77 @@ The input is assumed to be in Malt-TAB format
 """
 from __future__ import print_function, unicode_literals
 
-import re
+from collections import defaultdict
+from itertools import chain
 from pprint import pformat
+import subprocess
 
 from nltk.tree import Tree
-from nltk.compat import python_2_unicode_compatible
+from nltk.compat import python_2_unicode_compatible, string_types
+
 
 #################################################################
 # DependencyGraph Class
 #################################################################
 
+
 @python_2_unicode_compatible
 class DependencyGraph(object):
     """
     A container for the nodes and labelled edges of a dependency structure.
     """
-    def __init__(self, tree_str=None):
-        """
-        We place a dummy 'top' node in the first position
-        in the nodelist, since the root node is often assigned '0'
-        as its head. This also means that the indexing of the nodelist
-        corresponds directly to the Malt-TAB format, which starts at 1.
+
+    def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None):
+        """Dependency graph.
+
+        We place a dummy `TOP` node with the index 0, since the root node is
+        often assigned 0 as its head. This also means that the indexing of the
+        nodes corresponds directly to the Malt-TAB format, which starts at 1.
+
+        If zero-based is True, then Malt-TAB-like input with node numbers
+        starting at 0 and the root node assigned -1 (as produced by, e.g.,
+        zpar).
+
+        :param str cell_separator: the cell separator. If not provided, cells
+        are split by whitespace.
+
         """
-        top = {'word': None, 'lemma': None, 'ctag': 'TOP', 'tag': 'TOP', 'feats': None, 'rel': 'TOP', 'deps': [], 'address': 0}
-        self.nodelist = [top]
+        self.nodes = defaultdict(lambda: {'deps': defaultdict(list)})
+        self.nodes[0].update(
+            {
+                'word': None,
+                'lemma': None,
+                'ctag': 'TOP',
+                'tag': 'TOP',
+                'feats': None,
+                'rel': 'TOP',
+                'address': 0,
+            }
+        )
+
         self.root = None
-        self.stream = None
+
         if tree_str:
-            self._parse(tree_str)
+            self._parse(
+                tree_str,
+                cell_extractor=cell_extractor,
+                zero_based=zero_based,
+                cell_separator=cell_separator,
+            )
 
     def remove_by_address(self, address):
         """
         Removes the node with the given address.  References
         to this node in others will still exist.
         """
-        node_index = len(self.nodelist) - 1
-        while(node_index >= 0):
-            node = self.nodelist[node_index]
-            if node['address'] == address:
-                self.nodelist.pop(node_index)
-            node_index -= 1
+        del self.nodes[address]
 
     def redirect_arcs(self, originals, redirect):
         """
         Redirects arcs to any of the nodes in the originals list
         to the redirect node address.
         """
-        for node in self.nodelist:
+        for node in self.nodes.values():
             new_deps = []
             for dep in node['deps']:
                 if dep in originals:
@@ -75,72 +99,106 @@ class DependencyGraph(object):
         Adds an arc from the node specified by head_address to the
         node specified by the mod address.
         """
-        for node in self.nodelist:
-            if node['address'] == head_address and (mod_address not in node['deps']):
-                node['deps'].append(mod_address)
+        relation = self.nodes[mod_address]['rel']
+        self.nodes[head_address]['deps'].setdefault(relation, [])
+        self.nodes[head_address]['deps'][relation].append(mod_address)
+        #self.nodes[head_address]['deps'].append(mod_address)
+
 
     def connect_graph(self):
         """
         Fully connects all non-root nodes.  All nodes are set to be dependents
         of the root node.
         """
-        for node1 in self.nodelist:
-            for node2 in self.nodelist:
+        for node1 in self.nodes.values():
+            for node2 in self.nodes.values():
                 if node1['address'] != node2['address'] and node2['rel'] != 'TOP':
-                    node1['deps'].append(node2['address'])
+                    relation = node2['rel']
+                    node1['deps'].setdefault(relation, [])
+                    node1['deps'][relation].append(node2['address'])
+                    #node1['deps'].append(node2['address'])
 
-    # fix error and return
     def get_by_address(self, node_address):
-        """
-        Returns the node with the given address.
-        """
-        for node in self.nodelist:
-            if node['address'] == node_address:
-                return node
-        print('THROW ERROR: address not found in -get_by_address-')
-        return -1
+        """Return the node with the given address."""
+        return self.nodes[node_address]
 
     def contains_address(self, node_address):
         """
         Returns true if the graph contains a node with the given node
         address, false otherwise.
         """
-        for node in self.nodelist:
-            if node['address'] == node_address:
-                return True
-        return False
+        return node_address in self.nodes
+
+    def to_dot(self):
+        """
+        Returns a dot representation suitable for using with Graphviz
+        @rtype C{String}
+        """
+        # Start the digraph specification
+        s = 'digraph G{\n'
+        s += 'edge [dir=forward]\n'
+        s += 'node [shape=plaintext]\n'
+        # Draw the remaining nodes
+        for node in sorted(self.nodes.values()):
+            s += '\n%s [label="%s (%s)"]' % (node['address'], node['address'], node['word'])
+            for rel, deps in node['deps'].iteritems():
+                for dep in deps:
+                    if rel != None:
+                        s += '\n%s -> %s [label="%s"]' % (node['address'], dep, rel)
+                    else:
+                        s += '\n%s -> %s ' % (node['address'], dep)
+        s += "\n}"
+        return s
+
+    def _repr_svg_(self):
+        """Ipython magic: show SVG representation of the transducer"""
+        dot_string = self.draw_dot()
+        format = 'svg'
+        try:
+            process = subprocess.Popen(['dot', '-T%s' % format], stdin=subprocess.PIPE,
+                                       stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        except OSError:
+            raise Exception('Cannot find the dot binary from Graphviz package')
+        out, err = process.communicate(dot_string)
+        if err:
+            raise Exception('Cannot create %s representation by running dot from string\n:%s' % (format, dot_string))
+        return out
 
     def __str__(self):
-        return pformat(self.nodelist)
+        return pformat(self.nodes)
 
     def __repr__(self):
-        return "<DependencyGraph with %d nodes>" % len(self.nodelist)
+        return "<DependencyGraph with {0} nodes>".format(len(self.nodes))
 
     @staticmethod
-    def load(file):
+    def load(filename, zero_based=False, cell_separator=None):
         """
-        :param file: a file in Malt-TAB format
+        :param filename: a name of a file in Malt-TAB format
+        :param zero_based: nodes in the input file are numbered starting from 0
+        rather than 1 (as produced by, e.g., zpar)
+        :param str cell_separator: the cell separator. If not provided, cells
+        are split by whitespace.
+
         :return: a list of DependencyGraphs
-        """
-        with open(file) as infile:
-            return [DependencyGraph(tree_str) for tree_str in
-                                                  infile.read().split('\n\n')]
 
-    @staticmethod
-    def _normalize(line):
-        """
-        Deal with lines in which spaces are used rather than tabs.
         """
-        SPC = re.compile(' +')
-        return re.sub(SPC, '\t', line).strip()
+        with open(filename) as infile:
+            return [
+                DependencyGraph(
+                    tree_str,
+                    zero_based=zero_based,
+                    cell_separator=cell_separator,
+                )
+                for tree_str in infile.read().split('\n\n')
+            ]
 
     def left_children(self, node_index):
         """
         Returns the number of left children under the node specified
         by the given address.
         """
-        children = self.nodelist[node_index]['deps']
-        index = self.nodelist[node_index]['address']
+        children = chain.from_iterable(self.nodes[node_index]['deps'].values())
+        index = self.nodes[node_index]['address']
         return sum(1 for c in children if c < index)
 
     def right_children(self, node_index):
@@ -148,68 +206,120 @@ class DependencyGraph(object):
         Returns the number of right children under the node specified
         by the given address.
         """
-        children = self.nodelist[node_index]['deps']
-        index = self.nodelist[node_index]['address']
+        children = chain.from_iterable(self.nodes[node_index]['deps'].values())
+        index = self.nodes[node_index]['address']
         return sum(1 for c in children if c > index)
 
     def add_node(self, node):
         if not self.contains_address(node['address']):
-            self.nodelist.append(node)
-
-    def _parse(self, input):
-        lines = [DependencyGraph._normalize(line) for line in input.split('\n') if line.strip()]
-        temp = []
-        for index, line in enumerate(lines):
-#           print line
-            try:
-                cells = line.split('\t')
-                nrCells = len(cells)
-                if nrCells == 3:
-                    word, tag, head = cells
-                    lemma, ctag, feats, rel = word, tag, '', ''
-                elif nrCells == 4:
-                    word, tag, head, rel = cells
-                    lemma, ctag, feats = word, tag, ''
-                elif nrCells == 10:
-                    _, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
-                else:
-                    raise ValueError('Number of tab-delimited fields (%d) not supported by CoNLL(10) or Malt-Tab(4) format' % (nrCells))
+            self.nodes[node['address']].update(node)
 
-                head = int(head)
-                self.nodelist.append({'address': index+1, 'word': word, 'lemma': lemma, 'ctag': ctag, 'tag': tag, 'feats': feats, 'head': head, 'rel': rel,
-                                      'deps': [d for (d,h) in temp if h == index+1]})
+    def _parse(self, input_, cell_extractor=None, zero_based=False, cell_separator=None):
+        """Parse a sentence.
 
-                try:
-                    self.nodelist[head]['deps'].append(index+1)
-                except IndexError:
-                    temp.append((index+1, head))
+        :param extractor: a function that given a tuple of cells returns a
+        7-tuple, where the values are ``word, lemma, ctag, tag, feats, head,
+        rel``.
+
+        :param str cell_separator: the cell separator. If not provided, cells
+        are split by whitespace.
+
+        """
+
+        def extract_3_cells(cells):
+            word, tag, head = cells
+            return word, word, tag, tag, '', head, ''
+
+        def extract_4_cells(cells):
+            word, tag, head, rel = cells
+            return word, word, tag, tag, '', head, rel
+
+        def extract_10_cells(cells):
+            _, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
+            return word, lemma, ctag, tag, feats, head, rel
+
+        extractors = {
+            3: extract_3_cells,
+            4: extract_4_cells,
+            10: extract_10_cells,
+        }
 
-            except ValueError:
-                break
+        if isinstance(input_, string_types):
+            input_ = (line for line in input_.split('\n'))
 
-        root_address = self.nodelist[0]['deps'][0]
-        self.root = self.nodelist[root_address]
+        lines = (l.rstrip() for l in input_)
+        lines = (l for l in lines if l)
+
+        cell_number = None
+        for index, line in enumerate(lines, start=1):
+            cells = line.split(cell_separator)
+            if cell_number is None:
+                cell_number = len(cells)
+            else:
+                assert cell_number == len(cells)
+
+            if cell_extractor is None:
+                try:
+                    cell_extractor = extractors[cell_number]
+                except KeyError:
+                    raise ValueError(
+                        'Number of tab-delimited fields ({0}) not supported by '
+                        'CoNLL(10) or Malt-Tab(4) format'.format(cell_number)
+                    )
+
+            word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
+
+            head = int(head)
+            if zero_based:
+                head += 1
+
+            self.nodes[index].update(
+                {
+                    'address': index,
+                    'word': word,
+                    'lemma': lemma,
+                    'ctag': ctag,
+                    'tag': tag,
+                    'feats': feats,
+                    'head': head,
+                    'rel': rel,
+                }
+            )
+
+            # Make sure that he fake root node has labeled dependencies.
+            if (cell_number == 3) and (head == 0):
+                rel = 'ROOT'
+            self.nodes[head]['deps'][rel].append(index)
+
+        if not self.nodes[0]['deps']['ROOT']:
+            raise DependencyGraphError(
+                "The graph does'n contain a node "
+                "that depends on the root element."
+            )
+        root_address = self.nodes[0]['deps']['ROOT'][0]
+        self.root = self.nodes[root_address]
 
     def _word(self, node, filter=True):
         w = node['word']
         if filter:
-            if w != ',': return w
+            if w != ',':
+                return w
         return w
 
     def _tree(self, i):
-        """
-        Recursive function for turning dependency graphs into
-        NLTK trees.
-        :type i: int
-        :param i: index of a node in ``nodelist``
-        :return: either a word (if the indexed node
-        is a leaf) or a ``Tree``.
+        """ Turn dependency graphs into NLTK trees.
+
+        :param int i: index of a node
+        :return: either a word (if the indexed node is a leaf) or a ``Tree``.
         """
         node = self.get_by_address(i)
         word = node['word']
-        deps = node['deps']
+        deps = sorted(chain.from_iterable(node['deps'].values()))
 
-        return (Tree(word, [self._tree(j) for j in deps]) if len(deps) != 0 else word)
+        if deps:
+            return Tree(word, [self._tree(dep) for dep in deps])
+        else:
+            return word
 
     def tree(self):
         """
@@ -217,51 +327,96 @@ class DependencyGraph(object):
         ``Tree`` constructor. Dependency labels are omitted.
         """
         node = self.root
+
         word = node['word']
-        deps = node['deps']
-        return Tree(word, [self._tree(i) for i in deps])
+        deps = sorted(chain.from_iterable(node['deps'].values()))
+        return Tree(word, [self._tree(dep) for dep in deps])
+
+    def triples(self, node=None):
+        """
+        Extract dependency triples of the form:
+        ((head word, head tag), rel, (dep word, dep tag))
+        """
+
+        if not node:
+            node = self.root
+
+        head = (node['word'], node['ctag'])
+        for i in sorted(chain.from_iterable(node['deps'].values())):
+            dep = self.get_by_address(i)
+            yield (head, dep['rel'], (dep['word'], dep['ctag']))
+            for triple in self.triples(node=dep):
+                yield triple
 
     def _hd(self, i):
         try:
-            return self.nodelist[i]['head']
+            return self.nodes[i]['head']
         except IndexError:
             return None
 
     def _rel(self, i):
         try:
-            return self.nodelist[i]['rel']
+            return self.nodes[i]['rel']
         except IndexError:
             return None
 
     # what's the return type?  Boolean or list?
     def contains_cycle(self):
+        """Check whether there are cycles.
+
+        >>> dg = DependencyGraph(treebank_data)
+        >>> dg.contains_cycle()
+        False
+
+        >>> cyclic_dg = DependencyGraph()
+        >>> top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
+        >>> child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
+        >>> child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
+        >>> child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
+        >>> child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
+        >>> cyclic_dg.nodes = {
+        ...     0: top,
+        ...     1: child1,
+        ...     2: child2,
+        ...     3: child3,
+        ...     4: child4,
+        ... }
+        >>> cyclic_dg.root = top
+
+        >>> cyclic_dg.contains_cycle()
+        [3, 1, 2, 4]
+
+        """
         distances = {}
-        for node in self.nodelist:
+
+        for node in self.nodes.values():
             for dep in node['deps']:
-                key = tuple([node['address'], dep]) #'%d -> %d' % (node['address'], dep)
+                key = tuple([node['address'], dep])
                 distances[key] = 1
-        for n in range(len(self.nodelist)):
+
+        for _ in self.nodes:
             new_entries = {}
+
             for pair1 in distances:
                 for pair2 in distances:
                     if pair1[1] == pair2[0]:
                         key = tuple([pair1[0], pair2[1]])
                         new_entries[key] = distances[pair1] + distances[pair2]
+
             for pair in new_entries:
                 distances[pair] = new_entries[pair]
                 if pair[0] == pair[1]:
-                    print(pair[0])
-                    path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0]) #self.nodelist[pair[0]], pair[0])
+                    path = self.get_cycle_path(self.get_by_address(pair[0]), pair[0])
                     return path
-        return False  # return []?
 
+        return False  # return []?
 
     def get_cycle_path(self, curr_node, goal_node_index):
         for dep in curr_node['deps']:
             if dep == goal_node_index:
                 return [curr_node['address']]
         for dep in curr_node['deps']:
-            path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)#self.nodelist[dep], goal_node_index)
+            path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)
             if len(path) > 0:
                 path.insert(0, curr_node['address'])
                 return path
@@ -276,38 +431,43 @@ class DependencyGraph(object):
         :rtype: str
         """
 
-        lines = []
-        for i, node in enumerate(self.nodelist[1:]):
-            word, lemma, ctag, tag, feats, head, rel = node['word'], node['lemma'], node['ctag'], node['tag'], node['feats'], node['head'], node['rel']
-            if style == 3:
-                lines.append('%s\t%s\t%s\n' % (word, tag, head))
-            elif style == 4:
-                lines.append('%s\t%s\t%s\t%s\n' % (word, tag, head, rel))
-            elif style == 10:
-                lines.append('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t_\t_\n' % (i+1, word, lemma, ctag, tag, feats, head, rel))
-            else:
-                raise ValueError('Number of tab-delimited fields (%d) not supported by CoNLL(10) or Malt-Tab(4) format' % (style))
-        return ''.join(lines)
+        if style == 3:
+            template = '{word}\t{tag}\t{head}\n'
+        elif style == 4:
+            template = '{word}\t{tag}\t{head}\t{rel}\n'
+        elif style == 10:
+            template = '{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n'
+        else:
+            raise ValueError(
+                'Number of tab-delimited fields ({0}) not supported by '
+                'CoNLL(10) or Malt-Tab(4) format'.format(style)
+            )
+
+        return ''.join(template.format(i=i, **node) for i, node in sorted(self.nodes.items()) if node['tag'] != 'TOP')
+
+    def nx_graph(self):
+        """Convert the data in a ``nodelist`` into a networkx labeled directed graph."""
+        import networkx as NX
 
+        nx_nodelist = list(range(1, len(self.nodes)))
+        nx_edgelist = [
+            (n, self._hd(n), self._rel(n))
+            for n in nx_nodelist if self._hd(n)
+        ]
+        self.nx_labels = {}
+        for n in nx_nodelist:
+            self.nx_labels[n] = self.nodes[n]['word']
+
+        g = NX.XDiGraph()
+        g.add_nodes_from(nx_nodelist)
+        g.add_edges_from(nx_edgelist)
+
+        return g
 
-def nx_graph(self):
-    """
-    Convert the data in a ``nodelist`` into a networkx
-    labeled directed graph.
-    :rtype: XDigraph
-    """
-    nx_nodelist = list(range(1, len(self.nodelist)))
-    nx_edgelist = [(n, self._hd(n), self._rel(n))
-                        for n in nx_nodelist if self._hd(n)]
-    self.nx_labels = {}
-    for n in nx_nodelist:
-        self.nx_labels[n] = self.nodelist[n]['word']
 
-    g = NX.XDiGraph()
-    g.add_nodes_from(nx_nodelist)
-    g.add_edges_from(nx_edgelist)
+class DependencyGraphError(Exception):
+    """Dependency graph exception."""
 
-    return g
 
 def demo():
     malt_demo()
@@ -315,6 +475,7 @@ def demo():
     conll_file_demo()
     cycle_finding_demo()
 
+
 def malt_demo(nx=False):
     """
     A demonstration of the result of reading a dependency
@@ -340,9 +501,9 @@ Nov.    NNP     9       VMOD
 .       .       9       VMOD
 """)
     tree = dg.tree()
-    print(tree.pprint())
+    tree.pprint()
     if nx:
-        #currently doesn't work
+        # currently doesn't work
         import networkx as NX
         import pylab as P
 
@@ -350,7 +511,7 @@ Nov.    NNP     9       VMOD
         g.info()
         pos = NX.spring_layout(g, dim=1)
         NX.draw_networkx_nodes(g, pos, node_size=50)
-        #NX.draw_networkx_edges(g, pos, edge_color='k', width=8)
+        # NX.draw_networkx_edges(g, pos, edge_color='k', width=8)
         NX.draw_networkx_labels(g, pos, dg.nx_labels)
         P.xticks([])
         P.yticks([])
@@ -365,27 +526,30 @@ def conll_demo():
     """
     dg = DependencyGraph(conll_data1)
     tree = dg.tree()
-    print(tree.pprint())
+    tree.pprint()
     print(dg)
     print(dg.to_conll(4))
 
+
 def conll_file_demo():
     print('Mass conll_read demo...')
     graphs = [DependencyGraph(entry)
               for entry in conll_data2.split('\n\n') if entry]
     for graph in graphs:
         tree = graph.tree()
-        print('\n' + tree.pprint())
+        print('\n')
+        tree.pprint()
+
 
 def cycle_finding_demo():
     dg = DependencyGraph(treebank_data)
     print(dg.contains_cycle())
     cyclic_dg = DependencyGraph()
-    top =    {'word':None, 'deps':[1], 'rel': 'TOP', 'address': 0}
-    child1 = {'word':None, 'deps':[2], 'rel': 'NTOP', 'address': 1}
-    child2 = {'word':None, 'deps':[4], 'rel': 'NTOP', 'address': 2}
-    child3 = {'word':None, 'deps':[1], 'rel': 'NTOP', 'address': 3}
-    child4 = {'word':None, 'deps':[3], 'rel': 'NTOP', 'address': 4}
+    top = {'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}
+    child1 = {'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}
+    child2 = {'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}
+    child3 = {'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}
+    child4 = {'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}
     cyclic_dg.nodelist = [top, child1, child2, child3, child4]
     cyclic_dg.root = top
     print(cyclic_dg.contains_cycle())
diff --git a/nltk/parse/earleychart.py b/nltk/parse/earleychart.py
index 1a4b81c..8dcccd2 100644
--- a/nltk/parse/earleychart.py
+++ b/nltk/parse/earleychart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: An Incremental Earley Chart Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 #         Rob Speer <rspeer at mit.edu>
 #         Edward Loper <edloper at gmail.com>
@@ -309,7 +309,7 @@ class IncrementalChartParser(ChartParser):
 
         # Width, for printing trace edges.
         trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1)
-        if trace: print(chart.pp_leaves(trace_edge_width))
+        if trace: print(chart.pretty_format_leaves(trace_edge_width))
 
         for axiom in self._axioms:
             new_edges = list(axiom.apply(chart, grammar))
diff --git a/nltk/parse/evaluate.py b/nltk/parse/evaluate.py
new file mode 100644
index 0000000..88a47f5
--- /dev/null
+++ b/nltk/parse/evaluate.py
@@ -0,0 +1,132 @@
+# Natural Language Toolkit: evaluation of dependency parser
+#
+# Author: Long Duong <longdt219 at gmail.com>
+#
+# Copyright (C) 2001-2015 NLTK Project
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import division
+
+import unicodedata
+
+
+class DependencyEvaluator(object):
+    """
+    Class for measuring labelled and unlabelled attachment score for
+    dependency parsing. Note that the evaluation ignores punctuation.
+
+    >>> from nltk.parse import DependencyGraph, DependencyEvaluator
+
+    >>> gold_sent = DependencyGraph(\"""
+    ... Pierre  NNP     2       NMOD
+    ... Vinken  NNP     8       SUB
+    ... ,       ,       2       P
+    ... 61      CD      5       NMOD
+    ... years   NNS     6       AMOD
+    ... old     JJ      2       NMOD
+    ... ,       ,       2       P
+    ... will    MD      0       ROOT
+    ... join    VB      8       VC
+    ... the     DT      11      NMOD
+    ... board   NN      9       OBJ
+    ... as      IN      9       VMOD
+    ... a       DT      15      NMOD
+    ... nonexecutive    JJ      15      NMOD
+    ... director        NN      12      PMOD
+    ... Nov.    NNP     9       VMOD
+    ... 29      CD      16      NMOD
+    ... .       .       9       VMOD
+    ... \""")
+
+    >>> parsed_sent = DependencyGraph(\"""
+    ... Pierre  NNP     8       NMOD
+    ... Vinken  NNP     1       SUB
+    ... ,       ,       3       P
+    ... 61      CD      6       NMOD
+    ... years   NNS     6       AMOD
+    ... old     JJ      2       NMOD
+    ... ,       ,       3       AMOD
+    ... will    MD      0       ROOT
+    ... join    VB      8       VC
+    ... the     DT      11      AMOD
+    ... board   NN      9       OBJECT
+    ... as      IN      9       NMOD
+    ... a       DT      15      NMOD
+    ... nonexecutive    JJ      15      NMOD
+    ... director        NN      12      PMOD
+    ... Nov.    NNP     9       VMOD
+    ... 29      CD      16      NMOD
+    ... .       .       9       VMOD
+    ... \""")
+
+    >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
+    >>> las, uas = de.eval()
+    >>> las
+    0.8...
+    >>> abs(uas - 0.6) < 0.00001
+    True
+    """
+
+    def __init__(self, parsed_sents, gold_sents):
+        """
+        :param parsed_sents: the list of parsed_sents as the output of parser
+        :type parsed_sents: list(DependencyGraph)
+        """
+        self._parsed_sents = parsed_sents
+        self._gold_sents = gold_sents
+
+    def _remove_punct(self, inStr):
+        """
+        Function to remove punctuation from Unicode string.
+        :param input: the input string
+        :return: Unicode string after remove all punctuation
+        """
+        punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"])
+        return "".join(x for x in inStr if unicodedata.category(x) not in punc_cat)
+
+    def eval(self):
+        """
+        Return the Labeled Attachment Score (LAS) and Unlabeled Attachment Score (UAS)
+
+        :return : tuple(float,float)
+        """
+        if (len(self._parsed_sents) != len(self._gold_sents)):
+            raise ValueError(" Number of parsed sentence is different with number of gold sentence.")
+
+        corr = 0
+        corrL = 0
+        total = 0
+
+        for i in range(len(self._parsed_sents)):
+            parsed_sent_nodes = self._parsed_sents[i].nodes
+            gold_sent_nodes = self._gold_sents[i].nodes
+
+            if (len(parsed_sent_nodes) != len(gold_sent_nodes)):
+                raise ValueError("Sentences must have equal length.")
+
+            for parsed_node_address, parsed_node in parsed_sent_nodes.items():
+                gold_node = gold_sent_nodes[parsed_node_address]
+
+                if parsed_node["word"] is None:
+                    continue
+                if parsed_node["word"] != gold_node["word"]:
+                    raise ValueError("Sentence sequence is not matched.")
+
+                # Ignore if word is punctuation by default
+                # if (parsed_sent[j]["word"] in string.punctuation):
+                if self._remove_punct(parsed_node["word"]) == "":
+                    continue
+
+                total += 1
+                if parsed_node["head"] == gold_node["head"]:
+                    corr += 1
+                    if parsed_node["rel"] == gold_node["rel"]:
+                        corrL += 1
+
+        return corr / total, corrL / total
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
diff --git a/nltk/parse/featurechart.py b/nltk/parse/featurechart.py
index 90ff716..339a1a4 100644
--- a/nltk/parse/featurechart.py
+++ b/nltk/parse/featurechart.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Chart Parser for Feature-Based Grammars
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Rob Speer <rspeer at mit.edu>
 #         Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 # URL: <http://nltk.org/>
@@ -333,10 +333,12 @@ class FeatureTopDownPredictRule(CachedTopDownPredictRule):
         # If we've already applied this rule to an edge with the same
         # next & end, and the chart & grammar have not changed, then
         # just return (no new edges to add).
-        done = self._done.get((nextsym, index), (None,None))
-        if done[0] is chart and done[1] is grammar: return
+        nextsym_with_bindings = edge.next_with_bindings()
+        done = self._done.get((nextsym_with_bindings, index), (None, None))
+        if done[0] is chart and done[1] is grammar:
+            return
 
-        for prod in grammar.productions(lhs=edge.nextsym()):
+        for prod in grammar.productions(lhs=nextsym):
             # If the left corner in the predicted production is
             # leaf, it must match with the input.
             if prod.rhs():
@@ -347,13 +349,13 @@ class FeatureTopDownPredictRule(CachedTopDownPredictRule):
 
             # We rename vars here, because we don't want variables
             # from the two different productions to match.
-            if unify(prod.lhs(), edge.next_with_bindings(), rename_vars=True):
+            if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
                 new_edge = FeatureTreeEdge.from_production(prod, edge.end())
                 if chart.insert(new_edge, ()):
                     yield new_edge
 
         # Record the fact that we've applied this rule.
-        self._done[nextsym, index] = (chart, grammar)
+        self._done[nextsym_with_bindings, index] = (chart, grammar)
 
 
 #////////////////////////////////////////////////////////////
diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py
index ab5ae1f..b24e3c5 100644
--- a/nltk/parse/generate.py
+++ b/nltk/parse/generate.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Generating from a CFG
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/malt.py b/nltk/parse/malt.py
index be27d23..e86dde4 100644
--- a/nltk/parse/malt.py
+++ b/nltk/parse/malt.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function
@@ -87,56 +87,19 @@ class MaltParser(ParserI):
             url='http://www.maltparser.org/',
             verbose=verbose)
 
-    def parse_all(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a list of
-        words; it will be automatically tagged with this MaltParser instance's
-        tagger.
-
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :return: list(DependencyGraph)
-        """
-        return self.parse_sents([sentence], verbose)
-
     def parse_sents(self, sentences, verbose=False):
         """
-        Use MaltParser to parse multiple sentence. Takes multiple sentences as a
+        Use MaltParser to parse multiple sentences. Takes multiple sentences as a
         list where each sentence is a list of words.
         Each sentence will be automatically tagged with this MaltParser instance's
         tagger.
 
         :param sentences: Input sentences to parse
         :type sentence: list(list(str))
-        :return: list(DependencyGraph)
+        :return: iter(DependencyGraph)
         """
         tagged_sentences = [self.tagger.tag(sentence) for sentence in sentences]
-        return self.tagged_parse_sents(tagged_sentences, verbose)
-
-    def parse(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a list of words.
-        The sentence will be automatically tagged with this MaltParser instance's
-        tagger.
-
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :return: ``DependencyGraph`` the dependency graph representation of the sentence
-        """
-        return self.parse_sents([sentence], verbose)[0]
-
-    def raw_parse(self, sentence, verbose=False):
-        """
-        Use MaltParser to parse a sentence. Takes a sentence as a string;
-        before parsing, it will be automatically tokenized and tagged with this
-        MaltParser instance's tagger.
-
-        :param sentence: Input sentence to parse
-        :type sentence: str
-        :return: list(DependencyGraph)
-        """
-        words = word_tokenize(sentence)
-        return self.parse(words, verbose)
+        return iter(self.tagged_parse_sents(tagged_sentences, verbose))
 
     def tagged_parse(self, sentence, verbose=False):
         """
@@ -146,9 +109,9 @@ class MaltParser(ParserI):
 
         :param sentence: Input sentence to parse
         :type sentence: list(tuple(str, str))
-        :return: ``DependencyGraph`` the dependency graph representation of the sentence
+        :return: iter(DependencyGraph) the possible dependency graph representations of the sentence
         """
-        return self.tagged_parse_sents([sentence], verbose)[0]
+        return next(self.tagged_parse_sents([sentence], verbose))
 
     def tagged_parse_sents(self, sentences, verbose=False):
         """
@@ -158,7 +121,7 @@ class MaltParser(ParserI):
 
         :param sentences: Input sentences to parse
         :type sentence: list(list(tuple(str, str)))
-        :return: list(``DependencyGraph``) the dependency graph representation
+        :return: iter(iter(``DependencyGraph``)) the dependency graph representation
                  of each sentence
         """
 
@@ -193,7 +156,8 @@ class MaltParser(ParserI):
                 raise Exception("MaltParser parsing (%s) failed with exit "
                                 "code %d" % (' '.join(cmd), ret))
 
-            return DependencyGraph.load(output_file.name)
+            # Must return iter(iter(Tree))
+            return (iter([dep_graph]) for dep_graph in  DependencyGraph.load(output_file.name))
         finally:
             input_file.close()
             os.remove(input_file.name)
@@ -276,8 +240,10 @@ def demo():
     maltParser = MaltParser()
     maltParser.train([dg1,dg2], verbose=verbose)
 
-    print(maltParser.raw_parse('John sees Mary', verbose=verbose).tree().pprint())
-    print(maltParser.raw_parse('a man runs', verbose=verbose).tree().pprint())
-
+    maltParser.parse_one(['John','sees','Mary'], verbose=verbose).tree().pprint()
+    maltParser.parse_one(['a','man','runs'], verbose=verbose).tree().pprint()
+    
+    next(maltParser.tagged_parse([('John','NNP'),('sees','VB'),('Mary','NNP')], verbose)).tree().pprint()
+    
 if __name__ == '__main__':
     demo()
diff --git a/nltk/parse/nonprojectivedependencyparser.py b/nltk/parse/nonprojectivedependencyparser.py
index 1186b2d..6de271e 100644
--- a/nltk/parse/nonprojectivedependencyparser.py
+++ b/nltk/parse/nonprojectivedependencyparser.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #
 # URL: <http://nltk.org/>
@@ -9,15 +9,20 @@
 from __future__ import print_function
 
 import math
+import logging
 
 from nltk.compat import xrange
 
-from nltk.parse.dependencygraph import DependencyGraph, conll_data2
+from nltk.parse.dependencygraph import DependencyGraph
+from nltk.classify import NaiveBayesClassifier
+
+logger = logging.getLogger(__name__)
 
 #################################################################
 # DependencyScorerI - Interface for Graph-Edge Weight Calculation
 #################################################################
 
+
 class DependencyScorerI(object):
     """
     A scorer for calculated the weights on the edges of a weighted
@@ -69,22 +74,31 @@ class DependencyScorerI(object):
         """
         raise NotImplementedError()
 
-
-
 #################################################################
 # NaiveBayesDependencyScorer
 #################################################################
 
+
 class NaiveBayesDependencyScorer(DependencyScorerI):
     """
     A dependency scorer built around a MaxEnt classifier.  In this
     particular class that classifier is a ``NaiveBayesClassifier``.
     It uses head-word, head-tag, child-word, and child-tag features
     for classification.
+
+    >>> from nltk.parse.dependencygraph import DependencyGraph, conll_data2
+
+    >>> graphs = [DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry]
+    >>> npp = ProbabilisticNonprojectiveParser()
+    >>> npp.train(graphs, NaiveBayesDependencyScorer())
+    >>> parses = npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'])
+    >>> len(list(parses))
+    1
+
     """
 
     def __init__(self):
-        pass # Do nothing without throwing error
+        pass  # Do nothing without throwing error
 
     def train(self, graphs):
         """
@@ -100,18 +114,25 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
         # Create training labeled training examples
         labeled_examples = []
         for graph in graphs:
-            for head_node in graph.nodelist:
-                for child_index in range(len(graph.nodelist)):
-                    child_node = graph.get_by_address(child_index)
+            for head_node in graph.nodes.values():
+                for child_index, child_node in graph.nodes.items():
                     if child_index in head_node['deps']:
                         label = "T"
                     else:
                         label = "F"
-                    labeled_examples.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag']), label))
-        # Train the classifier
-        import nltk
-        nltk.usage(nltk.ClassifierI)
-        self.classifier = nltk.classify.NaiveBayesClassifier.train(labeled_examples)
+                    labeled_examples.append(
+                        (
+                            dict(
+                                a=head_node['word'],
+                                b=head_node['tag'],
+                                c=child_node['word'],
+                                d=child_node['tag'],
+                            ),
+                            label,
+                        )
+                    )
+
+        self.classifier = NaiveBayesClassifier.train(labeled_examples)
 
     def score(self, graph):
         """
@@ -127,22 +148,29 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
         """
         # Convert graph to feature representation
         edges = []
-        for i in range(len(graph.nodelist)):
-            for j in range(len(graph.nodelist)):
-                head_node = graph.get_by_address(i)
-                child_node = graph.get_by_address(j)
-                print(head_node)
-                print(child_node)
-                edges.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag'])))
+        for head_node in graph.nodes.values():
+            for child_node in graph.nodes.values():
+                edges.append(
+                    (
+                        dict(
+                            a=head_node['word'],
+                            b=head_node['tag'],
+                            c=child_node['word'],
+                            d=child_node['tag'],
+                        )
+                    )
+                )
+
         # Score edges
         edge_scores = []
         row = []
         count = 0
         for pdist in self.classifier.prob_classify_many(edges):
-            print('%.4f %.4f' % (pdist.prob('T'), pdist.prob('F')))
-            row.append([math.log(pdist.prob("T"))])
+            logger.debug('%.4f %.4f', pdist.prob('T'), pdist.prob('F'))
+            # smoothing in case the probability = 0
+            row.append([math.log(pdist.prob("T")+0.00000000001)])
             count += 1
-            if count == len(graph.nodelist):
+            if count == len(graph.nodes):
                 edge_scores.append(row)
                 row = []
                 count = 0
@@ -168,21 +196,62 @@ class DemoScorer(DependencyScorerI):
 # Non-Projective Probabilistic Parsing
 #################################################################
 
+
 class ProbabilisticNonprojectiveParser(object):
-    """
-    A probabilistic non-projective dependency parser.  Nonprojective
-    dependencies allows for "crossing branches" in the parse tree
-    which is necessary for representing particular linguistic
-    phenomena, or even typical parses in some languages.  This parser
-    follows the MST parsing algorithm, outlined in McDonald(2005),
-    which likens the search for the best non-projective parse to
-    finding the maximum spanning tree in a weighted directed graph.
+    """A probabilistic non-projective dependency parser.
+
+    Nonprojective dependencies allows for "crossing branches" in the parse tree
+    which is necessary for representing particular linguistic phenomena, or even
+    typical parses in some languages.  This parser follows the MST parsing
+    algorithm, outlined in McDonald(2005), which likens the search for the best
+    non-projective parse to finding the maximum spanning tree in a weighted
+    directed graph.
+
+    >>> class Scorer(DependencyScorerI):
+    ...     def train(self, graphs):
+    ...         pass
+    ...
+    ...     def score(self, graph):
+    ...         return [
+    ...             [[], [5],  [1],  [1]],
+    ...             [[], [],   [11], [4]],
+    ...             [[], [10], [],   [5]],
+    ...             [[], [8],  [8],  []],
+    ...         ]
+
+
+    >>> npp = ProbabilisticNonprojectiveParser()
+    >>> npp.train([], Scorer())
+
+    >>> parses = npp.parse(['v1', 'v2', 'v3'], [None, None, None])
+    >>> len(list(parses))
+    1
+
+    Rule based example
+    ------------------
+
+    >>> from nltk.grammar import DependencyGrammar
+
+    >>> grammar = DependencyGrammar.fromstring('''
+    ... 'taught' -> 'play' | 'man'
+    ... 'man' -> 'the' | 'in'
+    ... 'in' -> 'corner'
+    ... 'corner' -> 'the'
+    ... 'play' -> 'golf' | 'dachshund' | 'to'
+    ... 'dachshund' -> 'his'
+    ... ''')
+
+    >>> ndp = NonprojectiveDependencyParser(grammar)
+    >>> parses = ndp.parse(['the', 'man', 'in', 'the', 'corner', 'taught', 'his', 'dachshund', 'to', 'play', 'golf'])
+    >>> len(list(parses))
+    4
+
     """
     def __init__(self):
         """
         Creates a new non-projective parser.
         """
-        print('initializing prob. nonprojective...')
+        logging.debug('initializing prob. nonprojective...')
 
     def train(self, graphs, dependency_scorer):
         """
@@ -224,11 +293,11 @@ class ProbabilisticNonprojectiveParser(object):
         :type g_graph, b_graph, c_graph: DependencyGraph
         :param g_graph, b_graph, c_graph: Graphs which need to be updated.
         """
-        print('Collapsing nodes...')
+        logger.debug('Collapsing nodes...')
         # Collapse all cycle nodes into v_n+1 in G_Graph
         for cycle_node_index in cycle_path:
             g_graph.remove_by_address(cycle_node_index)
-        g_graph.nodelist.append(new_node)
+        g_graph.add_node(new_node)
         g_graph.redirect_arcs(cycle_path, new_node['address'])
 
     def update_edge_scores(self, new_node, cycle_path):
@@ -241,25 +310,37 @@ class ProbabilisticNonprojectiveParser(object):
         :type cycle_path: A list of integers.
         :param cycle_path: A list of node addresses that belong to the cycle.
         """
-        print('cycle', cycle_path)
+        logger.debug('cycle %s', cycle_path)
+
         cycle_path = self.compute_original_indexes(cycle_path)
-        print('old cycle ', cycle_path)
-        print('Prior to update:\n', self.scores)
+
+        logger.debug('old cycle %s', cycle_path)
+        logger.debug('Prior to update: %s', self.scores)
+
         for i, row in enumerate(self.scores):
             for j, column in enumerate(self.scores[i]):
-                print(self.scores[i][j])
-                if j in cycle_path and not i in cycle_path and len(self.scores[i][j]) > 0:
-                    new_vals = []
+                logger.debug(self.scores[i][j])
+                if (
+                    j in cycle_path
+                    and i not in cycle_path
+                    and self.scores[i][j]
+                ):
                     subtract_val = self.compute_max_subtract_score(j, cycle_path)
-                    print(self.scores[i][j], ' - ', subtract_val)
+
+                    logger.debug('%s - %s', self.scores[i][j], subtract_val)
+
+                    new_vals = []
                     for cur_val in self.scores[i][j]:
                         new_vals.append(cur_val - subtract_val)
+
                     self.scores[i][j] = new_vals
+
         for i, row in enumerate(self.scores):
             for j, cell in enumerate(self.scores[i]):
                 if i in cycle_path and j in cycle_path:
                     self.scores[i][j] = []
-        print('After update:\n', self.scores)
+
+        logger.debug('After update: %s', self.scores)
 
     def compute_original_indexes(self, new_indexes):
         """
@@ -280,7 +361,7 @@ class ProbabilisticNonprojectiveParser(object):
             for new_index in new_indexes:
                 if new_index in self.inner_nodes:
                     for old_val in self.inner_nodes[new_index]:
-                        if not old_val in originals:
+                        if old_val not in originals:
                             originals.append(old_val)
                             swapped = True
                 else:
@@ -308,7 +389,6 @@ class ProbabilisticNonprojectiveParser(object):
                     max_score = subtract_val
         return max_score
 
-
     def best_incoming_arc(self, node_index):
         """
         Returns the source of the best incoming arc to the
@@ -319,27 +399,28 @@ class ProbabilisticNonprojectiveParser(object):
         the node that is arced to.
         """
         originals = self.compute_original_indexes([node_index])
-        print('originals:', originals)
+        logger.debug('originals: %s', originals)
+
         max_arc = None
         max_score = None
         for row_index in range(len(self.scores)):
             for col_index in range(len(self.scores[row_index])):
-#               print self.scores[row_index][col_index]
+                # print self.scores[row_index][col_index]
                 if col_index in originals and (max_score is None or self.scores[row_index][col_index] > max_score):
                     max_score = self.scores[row_index][col_index]
                     max_arc = row_index
-                    print(row_index, ',', col_index)
-        print(max_score)
+                    logger.debug('%s, %s', row_index, col_index)
+
+        logger.debug(max_score)
+
         for key in self.inner_nodes:
             replaced_nodes = self.inner_nodes[key]
             if max_arc in replaced_nodes:
                 return key
+
         return max_arc
 
     def original_best_arc(self, node_index):
-        """
-        ???
-        """
         originals = self.compute_original_indexes([node_index])
         max_arc = None
         max_score = None
@@ -352,7 +433,6 @@ class ProbabilisticNonprojectiveParser(object):
                     max_orig = col_index
         return [max_arc, max_orig]
 
-
     def parse(self, tokens, tags):
         """
         Parses a list of tokens in accordance to the MST parsing algorithm
@@ -369,101 +449,140 @@ class ProbabilisticNonprojectiveParser(object):
         :rtype: iter(DependencyGraph)
         """
         self.inner_nodes = {}
+
         # Initialize g_graph
         g_graph = DependencyGraph()
         for index, token in enumerate(tokens):
-            g_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1})
+            g_graph.nodes[index + 1].update(
+                {
+                    'word': token,
+                    'tag': tags[index],
+                    'rel': 'NTOP',
+                    'address': index + 1,
+                }
+            )
+        #print (g_graph.nodes)
+
+
         # Fully connect non-root nodes in g_graph
         g_graph.connect_graph()
         original_graph = DependencyGraph()
         for index, token in enumerate(tokens):
-            original_graph.nodelist.append({'word':token, 'tag':tags[index], 'deps':[], 'rel':'NTOP', 'address':index+1})
+            original_graph.nodes[index + 1].update(
+                {
+                    'word': token,
+                    'tag': tags[index],
+                    'rel': 'NTOP',
+                    'address': index+1,
+                }
+            )
 
-        # Initialize b_graph
         b_graph = DependencyGraph()
-        b_graph.nodelist = []
-        # Initialize c_graph
         c_graph = DependencyGraph()
-        c_graph.nodelist = [{'word':token, 'tag':tags[index], 'deps':[],
-                             'rel':'NTOP', 'address':index+1}
-                            for index, token in enumerate(tokens)]
+
+        for index, token in enumerate(tokens):
+            c_graph.nodes[index + 1].update(
+                {
+                    'word': token,
+                    'tag': tags[index],
+                    'rel': 'NTOP',
+                    'address': index + 1,
+                }
+            )
+
         # Assign initial scores to g_graph edges
         self.initialize_edge_scores(g_graph)
-        print(self.scores)
+        logger.debug(self.scores)
         # Initialize a list of unvisited vertices (by node address)
-        unvisited_vertices = [vertex['address'] for vertex in c_graph.nodelist]
+        unvisited_vertices = [
+            vertex['address'] for vertex in c_graph.nodes.values()
+        ]
         # Iterate over unvisited vertices
         nr_vertices = len(tokens)
         betas = {}
-        while len(unvisited_vertices) > 0:
+        while unvisited_vertices:
             # Mark current node as visited
             current_vertex = unvisited_vertices.pop(0)
-            print('current_vertex:', current_vertex)
+            logger.debug('current_vertex: %s', current_vertex)
             # Get corresponding node n_i to vertex v_i
             current_node = g_graph.get_by_address(current_vertex)
-            print('current_node:', current_node)
+            logger.debug('current_node: %s', current_node)
             # Get best in-edge node b for current node
             best_in_edge = self.best_incoming_arc(current_vertex)
             betas[current_vertex] = self.original_best_arc(current_vertex)
-            print('best in arc: ', best_in_edge, ' --> ', current_vertex)
+            logger.debug('best in arc: %s --> %s', best_in_edge, current_vertex)
             # b_graph = Union(b_graph, b)
             for new_vertex in [current_vertex, best_in_edge]:
-                b_graph.add_node({'word':'TEMP', 'deps':[], 'rel': 'NTOP', 'address': new_vertex})
+                b_graph.nodes[new_vertex].update(
+                    {
+                        'word': 'TEMP',
+                        'rel': 'NTOP',
+                        'address': new_vertex,
+                    }
+                )
             b_graph.add_arc(best_in_edge, current_vertex)
             # Beta(current node) = b  - stored for parse recovery
             # If b_graph contains a cycle, collapse it
             cycle_path = b_graph.contains_cycle()
             if cycle_path:
-            # Create a new node v_n+1 with address = len(nodes) + 1
-                new_node = {'word': 'NONE', 'deps':[], 'rel': 'NTOP', 'address': nr_vertices + 1}
-            # c_graph = Union(c_graph, v_n+1)
+                # Create a new node v_n+1 with address = len(nodes) + 1
+                new_node = {
+                    'word': 'NONE',
+                    'rel': 'NTOP',
+                    'address': nr_vertices + 1,
+                }
+                # c_graph = Union(c_graph, v_n+1)
                 c_graph.add_node(new_node)
-            # Collapse all nodes in cycle C into v_n+1
+                # Collapse all nodes in cycle C into v_n+1
                 self.update_edge_scores(new_node, cycle_path)
                 self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph)
                 for cycle_index in cycle_path:
                     c_graph.add_arc(new_node['address'], cycle_index)
-#                   self.replaced_by[cycle_index] = new_node['address']
+                    # self.replaced_by[cycle_index] = new_node['address']
 
                 self.inner_nodes[new_node['address']] = cycle_path
 
-            # Add v_n+1 to list of unvisited vertices
+                # Add v_n+1 to list of unvisited vertices
                 unvisited_vertices.insert(0, nr_vertices + 1)
-            # increment # of nodes counter
+
+                # increment # of nodes counter
                 nr_vertices += 1
-            # Remove cycle nodes from b_graph; B = B - cycle c
+
+                # Remove cycle nodes from b_graph; B = B - cycle c
                 for cycle_node_address in cycle_path:
                     b_graph.remove_by_address(cycle_node_address)
-            print('g_graph:\n', g_graph)
-            print()
-            print('b_graph:\n', b_graph)
-            print()
-            print('c_graph:\n', c_graph)
-            print()
-            print('Betas:\n', betas)
-            print('replaced nodes', self.inner_nodes)
-            print()
-        #Recover parse tree
-        print('Final scores:\n', self.scores)
-        print('Recovering parse...')
+
+            logger.debug('g_graph: %s', g_graph)
+            logger.debug('b_graph: %s', b_graph)
+            logger.debug('c_graph: %s', c_graph)
+            logger.debug('Betas: %s', betas)
+            logger.debug('replaced nodes %s', self.inner_nodes)
+
+        # Recover parse tree
+        logger.debug('Final scores: %s', self.scores)
+
+        logger.debug('Recovering parse...')
         for i in range(len(tokens) + 1, nr_vertices + 1):
             betas[betas[i][1]] = betas[i]
-        print('Betas: ', betas)
-        for node in original_graph.nodelist:
-            node['deps'] = []
+
+        logger.debug('Betas: %s', betas)
+        for node in original_graph.nodes.values():
+            # TODO: It's dangerous to assume that deps it a dictionary
+            # because it's a default dictionary. Ideally, here we should not
+            # be concerned how dependencies are stored inside of a dependency
+            # graph.
+            node['deps'] = {}
         for i in range(1, len(tokens) + 1):
-#           print i, betas[i]
             original_graph.add_arc(betas[i][0], betas[i][1])
-#       print original_graph
-        print('Done.')
-        yield original_graph
-
 
+        logger.debug('Done.')
+        yield original_graph
 
 #################################################################
 # Rule-based Non-Projective Parser
 #################################################################
 
+
 class NonprojectiveDependencyParser(object):
     """
     A non-projective, rule-based, dependency parser.  This parser
@@ -480,7 +599,7 @@ class NonprojectiveDependencyParser(object):
 
         :param dependency_grammar: a grammar of word-to-word relations.
         :type dependency_grammar: DependencyGrammar
-	    """
+        """
         self._grammar = dependency_grammar
 
     def parse(self, tokens):
@@ -500,15 +619,25 @@ class NonprojectiveDependencyParser(object):
         """
         # Create graph representation of tokens
         self._graph = DependencyGraph()
-        self._graph.nodelist = []  # Remove the default root
+
         for index, token in enumerate(tokens):
-            self._graph.nodelist.append({'word':token, 'deps':[], 'rel':'NTOP', 'address':index})
-        for head_node in self._graph.nodelist:
+            self._graph.nodes[index] = {
+                'word': token,
+                'deps': [],
+                'rel': 'NTOP',
+                'address': index,
+            }
+
+        for head_node in self._graph.nodes.values():
             deps = []
-            for dep_node in self._graph.nodelist:
-                if self._grammar.contains(head_node['word'], dep_node['word']) and not head_node['word'] == dep_node['word']:
+            for dep_node in self._graph.nodes.values()  :
+                if (
+                    self._grammar.contains(head_node['word'], dep_node['word'])
+                    and head_node['word'] != dep_node['word']
+                ):
                     deps.append(dep_node['address'])
             head_node['deps'] = deps
+
         # Create lattice of possible heads
         roots = []
         possible_heads = []
@@ -547,24 +676,22 @@ class NonprojectiveDependencyParser(object):
                 if not forward:
                     index_on_stack = False
                     for stack_item in stack:
-#                       print stack_item
                         if stack_item[0] == i:
                             index_on_stack = True
                     orig_length = len(possible_heads[i])
-#                   print len(possible_heads[i])
+
                     if index_on_stack and orig_length == 0:
-                        for j in xrange(len(stack) -1, -1, -1):
+                        for j in xrange(len(stack) - 1, -1, -1):
                             stack_item = stack[j]
                             if stack_item[0] == i:
                                 possible_heads[i].append(stack.pop(j)[1])
-#                       print stack
+
                     elif index_on_stack and orig_length > 0:
                         head = possible_heads[i].pop()
                         analysis[i] = head
                         stack.append([i, head])
                         forward = True
 
-#                   print 'Index on stack:', i, index_on_stack
                 if i + 1 == len(possible_heads):
                     analyses.append(analysis[:])
                     forward = False
@@ -576,22 +703,32 @@ class NonprojectiveDependencyParser(object):
         # Filter parses
         # ensure 1 root, every thing has 1 head
         for analysis in analyses:
-            root_count = 0
-            root = []
-            for i, cell in enumerate(analysis):
-                if cell == -1:
-                    root_count += 1
-                    root = i
-            if root_count == 1:
-                graph = DependencyGraph()
-                graph.nodelist[0]['deps'] = root + 1
-                for i in range(len(tokens)):
-                    node = {'word': tokens[i], 'address': i+1}
-                    node['deps'] = [j+1 for j in range(len(tokens)) if analysis[j] == i]
-                    graph.nodelist.append(node)
-#               cycle = graph.contains_cycle()
-#               if not cycle:
-                yield graph
+            if analysis.count(-1) > 1:
+                # there are several root elements!
+                continue
+
+            graph = DependencyGraph()
+            graph.root = graph.nodes[analysis.index(-1) + 1]
+
+            for address, (token, head_index) in enumerate(zip(tokens, analysis), start=1):
+                head_address = head_index + 1
+
+                node = graph.nodes[address]
+                node.update(
+                    {
+                        'word': token,
+                        'address': address,
+                    }
+                )
+
+                if head_address == 0:
+                    rel = 'ROOT'
+                else:
+                    rel = ''
+                graph.nodes[head_index + 1]['deps'][rel].append(address)
+
+            # TODO: check for cycles
+            yield graph
 
 
 #################################################################
@@ -599,7 +736,7 @@ class NonprojectiveDependencyParser(object):
 #################################################################
 
 def demo():
-#   hall_demo()
+    # hall_demo()
     nonprojective_conll_parse_demo()
     rule_based_demo()
 
@@ -610,14 +747,19 @@ def hall_demo():
     for parse_graph in npp.parse(['v1', 'v2', 'v3'], [None, None, None]):
         print(parse_graph)
 
+
 def nonprojective_conll_parse_demo():
-    graphs = [DependencyGraph(entry)
-              for entry in conll_data2.split('\n\n') if entry]
+    from nltk.parse.dependencygraph import conll_data2
+
+    graphs = [
+        DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry
+    ]
     npp = ProbabilisticNonprojectiveParser()
     npp.train(graphs, NaiveBayesDependencyScorer())
     for parse_graph in npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc']):
         print(parse_graph)
 
+
 def rule_based_demo():
     from nltk.grammar import DependencyGrammar
 
diff --git a/nltk/parse/pchart.py b/nltk/parse/pchart.py
index cacfd3f..1ea0998 100644
--- a/nltk/parse/pchart.py
+++ b/nltk/parse/pchart.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Probabilistic Chart Parsers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
@@ -212,7 +212,7 @@ class BottomUpProbabilisticChartParser(ParserI):
         # Initialize the chart.
         for edge in bu_init.apply(chart, grammar):
             if self._trace > 1:
-                print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
+                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
                                         edge.prob()))
             queue.append(edge)
 
@@ -227,7 +227,7 @@ class BottomUpProbabilisticChartParser(ParserI):
             # Get the best edge.
             edge = queue.pop()
             if self._trace > 0:
-                print('  %-50s [%s]' % (chart.pp_edge(edge,width=2),
+                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
                                         edge.prob()))
 
             # Apply BU & FR to it.
@@ -294,7 +294,7 @@ class BottomUpProbabilisticChartParser(ParserI):
             split = len(queue)-self.beam_size
             if self._trace > 2:
                 for edge in queue[:split]:
-                    print('  %-50s [DISCARDED]' % chart.pp_edge(edge,2))
+                    print('  %-50s [DISCARDED]' % chart.pretty_format_edge(edge,2))
             del queue[:split]
 
 class InsideChartParser(BottomUpProbabilisticChartParser):
@@ -397,10 +397,47 @@ def demo(choice=None, draw_parses=None, print_parses=None):
     summary of the results are displayed.
     """
     import sys, time
-    from nltk import tokenize, toy_pcfg1, toy_pcfg2
+    from nltk import tokenize
     from nltk.parse import pchart
 
     # Define two demos.  Each demo has a sentence and a grammar.
+    toy_pcfg1 = PCFG.fromstring("""
+    S -> NP VP [1.0]
+    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+    Det -> 'the' [0.8] | 'my' [0.2]
+    N -> 'man' [0.5] | 'telescope' [0.5]
+    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+    V -> 'ate' [0.35] | 'saw' [0.65]
+    PP -> P NP [1.0]
+    P -> 'with' [0.61] | 'under' [0.39]
+    """)
+
+    toy_pcfg2 = PCFG.fromstring("""
+    S    -> NP VP         [1.0]
+    VP   -> V NP          [.59]
+    VP   -> V             [.40]
+    VP   -> VP PP         [.01]
+    NP   -> Det N         [.41]
+    NP   -> Name          [.28]
+    NP   -> NP PP         [.31]
+    PP   -> P NP          [1.0]
+    V    -> 'saw'         [.21]
+    V    -> 'ate'         [.51]
+    V    -> 'ran'         [.28]
+    N    -> 'boy'         [.11]
+    N    -> 'cookie'      [.12]
+    N    -> 'table'       [.13]
+    N    -> 'telescope'   [.14]
+    N    -> 'hill'        [.5]
+    Name -> 'Jack'        [.52]
+    Name -> 'Bob'         [.48]
+    P    -> 'with'        [.61]
+    P    -> 'under'       [.39]
+    Det  -> 'the'         [.41]
+    Det  -> 'a'           [.31]
+    Det  -> 'my'          [.28]
+    """)
+
     demos = [('I saw John with my telescope', toy_pcfg1),
              ('the boy saw Jack with Bob under the table with a telescope',
               toy_pcfg2)]
diff --git a/nltk/parse/projectivedependencyparser.py b/nltk/parse/projectivedependencyparser.py
index 3398cd7..363f53a 100644
--- a/nltk/parse/projectivedependencyparser.py
+++ b/nltk/parse/projectivedependencyparser.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Dependency Grammars
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Jason Narad <jason.narad at gmail.com>
 #
 # URL: <http://nltk.org/>
@@ -12,7 +12,7 @@ from collections import defaultdict
 
 from nltk.grammar import (DependencyProduction, DependencyGrammar,
                           ProbabilisticDependencyGrammar)
-from nltk.parse.dependencygraph import DependencyGraph, conll_data2
+from nltk.parse.dependencygraph import DependencyGraph
 from nltk.internals import raise_unorderable_types
 from nltk.compat import total_ordering, python_2_unicode_compatible
 
@@ -191,7 +191,9 @@ class ProjectiveDependencyParser(object):
 #            malt_format = ""
             for i in range(len(tokens)):
 #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
-                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements) 
+                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
             dg = DependencyGraph(conll_format)
 #           if self.meets_arity(dg):
             yield dg.tree()
@@ -232,20 +234,36 @@ class ProjectiveDependencyParser(object):
         return spans
 
 
-
 #################################################################
 # Parsing  with Probabilistic Dependency Grammars
 #################################################################
 
+
 class ProbabilisticProjectiveDependencyParser(object):
-    """
-    A probabilistic, projective dependency parser.  This parser returns
-    the most probable projective parse derived from the probabilistic
-    dependency grammar derived from the train() method.  The probabilistic
-    model is an implementation of Eisner's (1996) Model C, which conditions
-    on head-word, head-tag, child-word, and child-tag.  The decoding
-    uses a bottom-up chart-based span concatenation algorithm that's
-    identical to the one utilized by the rule-based projective parser.
+    """A probabilistic, projective dependency parser.
+
+    This parser returns the most probable projective parse derived from the
+    probabilistic dependency grammar derived from the train() method.  The
+    probabilistic model is an implementation of Eisner's (1996) Model C, which
+    conditions on head-word, head-tag, child-word, and child-tag.  The decoding
+    uses a bottom-up chart-based span concatenation algorithm that's identical
+    to the one utilized by the rule-based projective parser.
+
+    Usage example
+    -------------
+    >>> from nltk.parse.dependencygraph import conll_data2
+
+    >>> graphs = [
+    ... DependencyGraph(entry) for entry in conll_data2.split('\\n\\n') if entry
+    ... ]
+
+    >>> ppdp = ProbabilisticProjectiveDependencyParser()
+    >>> ppdp.train(graphs)
+
+    >>> sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.']
+    >>> list(ppdp.parse(sent))
+    [Tree('zag', ['Cathy', 'hen', Tree('zwaaien', ['wild', '.'])])]
+
     """
 
     def __init__(self):
@@ -253,7 +271,6 @@ class ProbabilisticProjectiveDependencyParser(object):
         Create a new probabilistic dependency parser.  No additional
         operations are necessary.
         """
-        print('')
 
     def parse(self, tokens):
         """
@@ -291,9 +308,11 @@ class ProbabilisticProjectiveDependencyParser(object):
             malt_format = ""
             for i in range(len(tokens)):
                 malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
-                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with recent change in dependency graph such that there must be a ROOT element. 
+                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
             dg = DependencyGraph(conll_format)
-            score = self.compute_prob(dg)
+            score = self.compute_prob(dg)            
             trees.append((score, dg.tree()))
         trees.sort()
         return (tree for (score, tree) in trees)
@@ -346,14 +365,17 @@ class ProbabilisticProjectiveDependencyParser(object):
         events = defaultdict(int)
         tags = {}
         for dg in graphs:
-            for node_index in range(1,len(dg.nodelist)):
-                children = dg.nodelist[node_index]['deps']
+            for node_index in range(1, len(dg.nodes)):
+                #children = dg.nodes[node_index]['deps']
+                # Put list so that in will work in python 3
+                children = sum(list(dg.nodes[node_index]['deps'].values()), [])
+                
                 nr_left_children = dg.left_children(node_index)
                 nr_right_children = dg.right_children(node_index)
                 nr_children = nr_left_children + nr_right_children
                 for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2):
-                    head_word = dg.nodelist[node_index]['word']
-                    head_tag = dg.nodelist[node_index]['tag']
+                    head_word = dg.nodes[node_index]['word']
+                    head_tag = dg.nodes[node_index]['tag']
                     if head_word in tags:
                         tags[head_word].add(head_tag)
                     else:
@@ -365,11 +387,11 @@ class ProbabilisticProjectiveDependencyParser(object):
                     if child_index < 0:
                         array_index = child_index + nr_left_children
                         if array_index >= 0:
-                            child = dg.nodelist[children[array_index]]['word']
-                            child_tag = dg.nodelist[children[array_index]]['tag']
+                            child = dg.nodes[children[array_index]]['word']
+                            child_tag = dg.nodes[children[array_index]]['tag']
                         if child_index != -1:
-                            prev_word = dg.nodelist[children[array_index + 1]]['word']
-                            prev_tag =  dg.nodelist[children[array_index + 1]]['tag']
+                            prev_word = dg.nodes[children[array_index + 1]]['word']
+                            prev_tag = dg.nodes[children[array_index + 1]]['tag']
                         if child != 'STOP':
                             productions.append(DependencyProduction(head_word, [child]))
                         head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % (child, child_tag, prev_tag, head_word, head_tag)
@@ -379,11 +401,11 @@ class ProbabilisticProjectiveDependencyParser(object):
                     elif child_index > 0:
                         array_index = child_index + nr_left_children - 1
                         if array_index < nr_children:
-                            child = dg.nodelist[children[array_index]]['word']
-                            child_tag = dg.nodelist[children[array_index]]['tag']
+                            child = dg.nodes[children[array_index]]['word']
+                            child_tag = dg.nodes[children[array_index]]['tag']
                         if child_index != 1:
-                            prev_word = dg.nodelist[children[array_index - 1]]['word']
-                            prev_tag =  dg.nodelist[children[array_index - 1]]['tag']
+                            prev_word = dg.nodes[children[array_index - 1]]['word']
+                            prev_tag =  dg.nodes[children[array_index - 1]]['tag']
                         if child != 'STOP':
                             productions.append(DependencyProduction(head_word, [child]))
                         head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % (child, child_tag, prev_tag, head_word, head_tag)
@@ -391,7 +413,6 @@ class ProbabilisticProjectiveDependencyParser(object):
                         events[head_event] += 1
                         events[mod_event] += 1
         self._grammar = ProbabilisticDependencyGrammar(productions, events, tags)
-#        print self._grammar
 
     def compute_prob(self, dg):
         """
@@ -405,14 +426,16 @@ class ProbabilisticProjectiveDependencyParser(object):
         :rtype: int
         """
         prob = 1.0
-        for node_index in range(1,len(dg.nodelist)):
-            children = dg.nodelist[node_index]['deps']
+        for node_index in range(1, len(dg.nodes)):
+            #children = dg.nodes[node_index]['deps']
+            children = sum(list(dg.nodes[node_index]['deps'].values()), [])
+            
             nr_left_children = dg.left_children(node_index)
             nr_right_children = dg.right_children(node_index)
             nr_children = nr_left_children + nr_right_children
             for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2):
-                head_word = dg.nodelist[node_index]['word']
-                head_tag = dg.nodelist[node_index]['tag']
+                head_word = dg.nodes[node_index]['word']
+                head_tag = dg.nodes[node_index]['tag']
                 child = 'STOP'
                 child_tag = 'STOP'
                 prev_word = 'START'
@@ -420,29 +443,40 @@ class ProbabilisticProjectiveDependencyParser(object):
                 if child_index < 0:
                     array_index = child_index + nr_left_children
                     if array_index >= 0:
-                        child = dg.nodelist[children[array_index]]['word']
-                        child_tag = dg.nodelist[children[array_index]]['tag']
+                        child = dg.nodes[children[array_index]]['word']
+                        child_tag = dg.nodes[children[array_index]]['tag']
                     if child_index != -1:
-                        prev_word = dg.nodelist[children[array_index + 1]]['word']
-                        prev_tag =  dg.nodelist[children[array_index + 1]]['tag']
+                        prev_word = dg.nodes[children[array_index + 1]]['word']
+                        prev_tag = dg.nodes[children[array_index + 1]]['tag']
                     head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % (child, child_tag, prev_tag, head_word, head_tag)
                     mod_event = '(mods (%s, %s, %s) left))' % (prev_tag, head_word, head_tag)
                     h_count = self._grammar._events[head_event]
                     m_count = self._grammar._events[mod_event]
-                    prob *= (h_count / m_count)
+                    
+                    # If the grammar is not covered 
+                    if m_count != 0:
+                        prob *= (h_count / m_count)
+                    else:
+                        prob = 0.00000001  # Very small number  
+                    
                 elif child_index > 0:
                     array_index = child_index + nr_left_children - 1
                     if array_index < nr_children:
-                        child = dg.nodelist[children[array_index]]['word']
-                        child_tag = dg.nodelist[children[array_index]]['tag']
+                        child = dg.nodes[children[array_index]]['word']
+                        child_tag = dg.nodes[children[array_index]]['tag']
                     if child_index != 1:
-                        prev_word = dg.nodelist[children[array_index - 1]]['word']
-                        prev_tag =  dg.nodelist[children[array_index - 1]]['tag']
+                        prev_word = dg.nodes[children[array_index - 1]]['word']
+                        prev_tag = dg.nodes[children[array_index - 1]]['tag']
                     head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % (child, child_tag, prev_tag, head_word, head_tag)
                     mod_event = '(mods (%s, %s, %s) right))' % (prev_tag, head_word, head_tag)
                     h_count = self._grammar._events[head_event]
                     m_count = self._grammar._events[mod_event]
-                    prob *= (h_count / m_count)
+
+                    if m_count != 0:
+                        prob *= (h_count / m_count)
+                    else:
+                        prob = 0.00000001  # Very small number  
+
         return prob
 
 
@@ -520,21 +554,25 @@ def arity_parse_demo():
     for tree in trees:
         print(tree)
 
+
 def projective_prob_parse_demo():
     """
     A demo showing the training and use of a projective
     dependency parser.
     """
+    from nltk.parse.dependencygraph import conll_data2
+
     graphs = [DependencyGraph(entry)
               for entry in conll_data2.split('\n\n') if entry]
     ppdp = ProbabilisticProjectiveDependencyParser()
     print('Training Probabilistic Projective Dependency Parser...')
     ppdp.train(graphs)
+    
     sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.']
     print('Parsing \'', " ".join(sent), '\'...')
     print('Parse:')
     for tree in ppdp.parse(sent):
-    	print(tree)
+        print(tree)
 
 if __name__ == '__main__':
     demo()
diff --git a/nltk/parse/recursivedescent.py b/nltk/parse/recursivedescent.py
index 540dfde..2617aaa 100644
--- a/nltk/parse/recursivedescent.py
+++ b/nltk/parse/recursivedescent.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Recursive Descent Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/shiftreduce.py b/nltk/parse/shiftreduce.py
index f1f7ecb..f646412 100644
--- a/nltk/parse/shiftreduce.py
+++ b/nltk/parse/shiftreduce.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Shift-Reduce Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/parse/stanford.py b/nltk/parse/stanford.py
index 284a432..d385700 100644
--- a/nltk/parse/stanford.py
+++ b/nltk/parse/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
@@ -29,10 +29,16 @@ class StanfordParser(ParserI):
     >>> parser=StanfordParser(
     ...     model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
     ... )
-    >>> parser.raw_parse_sents((
+
+    >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog"))
+    [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), 
+    Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), 
+    Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])]
+
+    >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents((
     ...     "the quick brown fox jumps over the lazy dog",
     ...     "the quick grey wolf jumps over the lazy fox"
-    ... ))
+    ... ))], [])
     [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
     Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']),
     Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP',
@@ -40,17 +46,17 @@ class StanfordParser(ParserI):
     [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']),
     Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])]
 
-    >>> parser.parse_sents((
+    >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents((
     ...     "I 'm a dog".split(),
     ...     "This is my friends ' cat ( the tabby )".split(),
-    ... ))
+    ... ))], [])
     [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
     Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
     [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
     Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
     Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
 
-    >>> parser.tagged_parse_sents((
+    >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
     ...     (
     ...         ("The", "DT"),
     ...         ("quick", "JJ"),
@@ -63,7 +69,7 @@ class StanfordParser(ParserI):
     ...         ("dog", "NN"),
     ...         (".", "."),
     ...     ),
-    ... ))
+    ... ))],[])
     [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']),
     Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP',
     [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
@@ -73,7 +79,7 @@ class StanfordParser(ParserI):
 
     def __init__(self, path_to_jar=None, path_to_models_jar=None,
                  model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
-                 encoding='UTF-8', verbose=False, java_options='-mx1000m'):
+                 encoding='utf8', verbose=False, java_options='-mx1000m'):
 
         self._stanford_jar = find_jar(
             self._JAR, path_to_jar,
@@ -103,23 +109,11 @@ class StanfordParser(ParserI):
         cur_lines = []
         for line in output_.splitlines(False):
             if line == '':
-                res.append(Tree.fromstring('\n'.join(cur_lines)))
+                res.append(iter([Tree.fromstring('\n'.join(cur_lines))]))
                 cur_lines = []
             else:
                 cur_lines.append(line)
-        return res
-
-    def parse_all(self, sentence, verbose=False):
-        """
-        Use StanfordParser to parse a sentence. Takes a sentence as a list of
-        words; it will be automatically tagged with this StanfordParser instance's
-        tagger.
-
-        :param sentence: Input sentence to parse
-        :type sentence: list(str)
-        :rtype: Tree
-        """
-        return self.parse_sents([sentence], verbose)
+        return iter(res)
 
     def parse_sents(self, sentences, verbose=False):
         """
@@ -132,7 +126,7 @@ class StanfordParser(ParserI):
 
         :param sentences: Input sentences to parse
         :type sentences: list(list(str))
-        :rtype: list(Tree)
+        :rtype: iter(iter(Tree))
         """
         cmd = [
             'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
@@ -153,9 +147,9 @@ class StanfordParser(ParserI):
 
         :param sentence: Input sentence to parse
         :type sentence: str
-        :rtype: Tree
+        :rtype: iter(Tree)
         """
-        return self.raw_parse_sents((sentence,), verbose)
+        return next(self.raw_parse_sents([sentence], verbose))
 
     def raw_parse_sents(self, sentences, verbose=False):
         """
@@ -165,7 +159,7 @@ class StanfordParser(ParserI):
 
         :param sentences: Input sentences to parse
         :type sentences: list(str)
-        :rtype: list(Tree)
+        :rtype: iter(iter(Tree))
         """
         cmd = [
             'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
@@ -183,9 +177,9 @@ class StanfordParser(ParserI):
 
         :param sentence: Input sentence to parse
         :type sentence: list(tuple(str, str))
-        :rtype: Tree
+        :rtype: iter(Tree)
         """
-        return self.tagged_parse_sents([sentence], verbose)[0]
+        return next(self.tagged_parse_sents([sentence], verbose))
 
     def tagged_parse_sents(self, sentences, verbose=False):
         """
@@ -195,7 +189,7 @@ class StanfordParser(ParserI):
 
         :param sentences: Input sentences to parse
         :type sentences: list(list(tuple(str, str)))
-        :rtype: Tree
+        :rtype: iter(iter(Tree))
         """
         tag_separator = '/'
         cmd = [
@@ -253,3 +247,7 @@ def setup_module(module):
         )
     except LookupError:
         raise SkipTest('doctests from nltk.parse.stanford are skipped because the stanford parser jar doesn\'t exist')
+    
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
diff --git a/nltk/parse/transitionparser.py b/nltk/parse/transitionparser.py
new file mode 100644
index 0000000..8a678d6
--- /dev/null
+++ b/nltk/parse/transitionparser.py
@@ -0,0 +1,773 @@
+# Natural Language Toolkit: Arc-Standard and Arc-eager Transition Based Parsers
+#
+# Author: Long Duong <longdt219 at gmail.com>
+#
+# Copyright (C) 2001-2015 NLTK Project
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tempfile
+import pickle
+
+from os import remove
+from copy import deepcopy
+from operator import itemgetter
+try:
+    from scipy import sparse
+    from numpy import array
+    from sklearn.datasets import load_svmlight_file
+    from sklearn import svm
+except ImportError:
+    pass
+
+from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator
+
+
+
+class Configuration(object):
+    """
+    Class for holding configuration which is the partial analysis of the input sentence.
+    The transition based parser aims at finding set of operators that transfer the initial
+    configuration to the terminal configuration.
+
+    The configuration includes:
+        - Stack: for storing partially proceeded words
+        - Buffer: for storing remaining input words
+        - Set of arcs: for storing partially built dependency tree
+
+    This class also provides a method to represent a configuration as list of features.
+    """
+
+    def __init__(self, dep_graph):
+        """
+        :param dep_graph: the representation of an input in the form of dependency graph.
+        :type dep_graph: DependencyGraph where the dependencies are not specified.
+        """
+        # dep_graph.nodes contain list of token for a sentence
+        self.stack = [0]  # The root element
+        self.buffer = list(range(1, len(dep_graph.nodes)))  # The rest is in the buffer
+        self.arcs = []  # empty set of arc
+        self._tokens = dep_graph.nodes
+        self._max_address = len(self.buffer)
+
+    def __str__(self):
+        return 'Stack : ' + \
+            str(self.stack) + '  Buffer : ' + str(self.buffer) + '   Arcs : ' + str(self.arcs)
+
+    def _check_informative(self, feat, flag=False):
+        """
+        Check whether a feature is informative
+        The flag control whether "_" is informative or not
+        """
+        if feat is None:
+            return False
+        if feat == '':
+            return False
+        if flag is False:
+            if feat == '_':
+                return False
+        return True
+
+    def extract_features(self):
+        """
+        Extract the set of features for the current configuration. Implement standard features as describe in
+        Table 3.2 (page 31) in Dependency Parsing book by Sandra Kubler, Ryan McDonal, Joakim Nivre.
+        Please note that these features are very basic.
+        :return: list(str)
+        """
+        result = []
+        # Todo : can come up with more complicated features set for better
+        # performance.
+        if len(self.stack) > 0:
+            # Stack 0
+            stack_idx0 = self.stack[len(self.stack) - 1]
+            token = self._tokens[stack_idx0]
+            if self._check_informative(token['word'], True):
+                result.append('STK_0_FORM_' + token['word'])
+            if 'lemma' in token and self._check_informative(token['lemma']):
+                result.append('STK_0_LEMMA_' + token['lemma'])
+            if self._check_informative(token['tag']):
+                result.append('STK_0_POS_' + token['tag'])
+            if 'feats' in token and self._check_informative(token['feats']):
+                feats = token['feats'].split("|")
+                for feat in feats:
+                    result.append('STK_0_FEATS_' + feat)
+            # Stack 1
+            if len(self.stack) > 1:
+                stack_idx1 = self.stack[len(self.stack) - 2]
+                token = self._tokens[stack_idx1]
+                if self._check_informative(token['tag']):
+                    result.append('STK_1_POS_' + token['tag'])
+
+            # Left most, right most dependency of stack[0]
+            left_most = 1000000
+            right_most = -1
+            dep_left_most = ''
+            dep_right_most = ''
+            for (wi, r, wj) in self.arcs:
+                if wi == stack_idx0:
+                    if (wj > wi) and (wj > right_most):
+                        right_most = wj
+                        dep_right_most = r
+                    if (wj < wi) and (wj < left_most):
+                        left_most = wj
+                        dep_left_most = r
+            if self._check_informative(dep_left_most):
+                result.append('STK_0_LDEP_' + dep_left_most)
+            if self._check_informative(dep_right_most):
+                result.append('STK_0_RDEP_' + dep_right_most)
+
+        # Check Buffered 0
+        if len(self.buffer) > 0:
+            # Buffer 0
+            buffer_idx0 = self.buffer[0]
+            token = self._tokens[buffer_idx0]
+            if self._check_informative(token['word'], True):
+                result.append('BUF_0_FORM_' + token['word'])
+            if 'lemma' in token and self._check_informative(token['lemma']):
+                result.append('BUF_0_LEMMA_' + token['lemma'])
+            if self._check_informative(token['tag']):
+                result.append('BUF_0_POS_' + token['tag'])
+            if 'feats' in token and self._check_informative(token['feats']):
+                feats = token['feats'].split("|")
+                for feat in feats:
+                    result.append('BUF_0_FEATS_' + feat)
+            # Buffer 1
+            if len(self.buffer) > 1:
+                buffer_idx1 = self.buffer[1]
+                token = self._tokens[buffer_idx1]
+                if self._check_informative(token['word'], True):
+                    result.append('BUF_1_FORM_' + token['word'])
+                if self._check_informative(token['tag']):
+                    result.append('BUF_1_POS_' + token['tag'])
+            if len(self.buffer) > 2:
+                buffer_idx2 = self.buffer[2]
+                token = self._tokens[buffer_idx2]
+                if self._check_informative(token['tag']):
+                    result.append('BUF_2_POS_' + token['tag'])
+            if len(self.buffer) > 3:
+                buffer_idx3 = self.buffer[3]
+                token = self._tokens[buffer_idx3]
+                if self._check_informative(token['tag']):
+                    result.append('BUF_3_POS_' + token['tag'])
+                    # Left most, right most dependency of stack[0]
+            left_most = 1000000
+            right_most = -1
+            dep_left_most = ''
+            dep_right_most = ''
+            for (wi, r, wj) in self.arcs:
+                if wi == buffer_idx0:
+                    if (wj > wi) and (wj > right_most):
+                        right_most = wj
+                        dep_right_most = r
+                    if (wj < wi) and (wj < left_most):
+                        left_most = wj
+                        dep_left_most = r
+            if self._check_informative(dep_left_most):
+                result.append('BUF_0_LDEP_' + dep_left_most)
+            if self._check_informative(dep_right_most):
+                result.append('BUF_0_RDEP_' + dep_right_most)
+
+        return result
+
+
+class Transition(object):
+    """
+    This class defines a set of transition which is applied to a configuration to get another configuration
+    Note that for different parsing algorithm, the transition is different.
+    """
+    # Define set of transitions
+    LEFT_ARC = 'LEFTARC'
+    RIGHT_ARC = 'RIGHTARC'
+    SHIFT = 'SHIFT'
+    REDUCE = 'REDUCE'
+
+    def __init__(self, alg_option):
+        """
+        :param alg_option: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
+        :type alg_option: str
+        """
+        self._algo = alg_option
+        if alg_option not in [
+                TransitionParser.ARC_STANDARD,
+                TransitionParser.ARC_EAGER]:
+            raise ValueError(" Currently we only support %s and %s " %
+                                        (TransitionParser.ARC_STANDARD, TransitionParser.ARC_EAGER))
+
+    def left_arc(self, conf, relation):
+        """
+        Note that the algorithm for left-arc is quite similar except for precondition for both arc-standard and arc-eager
+            :param configuration: is the current configuration
+            :return : A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0):
+            return -1
+        if conf.buffer[0] == 0:
+            # here is the Root element
+            return -1
+
+        idx_wi = conf.stack[len(conf.stack) - 1]
+
+        flag = True
+        if self._algo == TransitionParser.ARC_EAGER:
+            for (idx_parent, r, idx_child) in conf.arcs:
+                if idx_child == idx_wi:
+                    flag = False
+
+        if flag:
+            conf.stack.pop()
+            idx_wj = conf.buffer[0]
+            conf.arcs.append((idx_wj, relation, idx_wi))
+        else:
+            return -1
+
+    def right_arc(self, conf, relation):
+        """
+        Note that the algorithm for right-arc is DIFFERENT for arc-standard and arc-eager
+            :param configuration: is the current configuration
+            :return : A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if (len(conf.buffer) <= 0) or (len(conf.stack) <= 0):
+            return -1
+        if self._algo == TransitionParser.ARC_STANDARD:
+            idx_wi = conf.stack.pop()
+            idx_wj = conf.buffer[0]
+            conf.buffer[0] = idx_wi
+            conf.arcs.append((idx_wi, relation, idx_wj))
+        else:  # arc-eager
+            idx_wi = conf.stack[len(conf.stack) - 1]
+            idx_wj = conf.buffer.pop(0)
+            conf.stack.append(idx_wj)
+            conf.arcs.append((idx_wi, relation, idx_wj))
+
+    def reduce(self, conf):
+        """
+        Note that the algorithm for reduce is only available for arc-eager
+            :param configuration: is the current configuration
+            :return : A new configuration or -1 if the pre-condition is not satisfied
+        """
+
+        if self._algo != TransitionParser.ARC_EAGER:
+            return -1
+        if len(conf.stack) <= 0:
+            return -1
+
+        idx_wi = conf.stack[len(conf.stack) - 1]
+        flag = False
+        for (idx_parent, r, idx_child) in conf.arcs:
+            if idx_child == idx_wi:
+                flag = True
+        if flag:
+            conf.stack.pop()  # reduce it
+        else:
+            return -1
+
+    def shift(self, conf):
+        """
+        Note that the algorithm for shift is the SAME for arc-standard and arc-eager
+            :param configuration: is the current configuration
+            :return : A new configuration or -1 if the pre-condition is not satisfied
+        """
+        if len(conf.buffer) <= 0:
+            return -1
+        idx_wi = conf.buffer.pop(0)
+        conf.stack.append(idx_wi)
+
+
+class TransitionParser(ParserI):
+
+    """
+    Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager"
+    """
+    ARC_STANDARD = 'arc-standard'
+    ARC_EAGER = 'arc-eager'
+
+    def __init__(self, algorithm):
+        """
+        :param algorithm: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
+        :type algorithm: str
+        """
+        if not(algorithm in [self.ARC_STANDARD, self.ARC_EAGER]):
+            raise ValueError(" Currently we only support %s and %s " %
+                                        (self.ARC_STANDARD, self.ARC_EAGER))
+        self._algorithm = algorithm
+
+        self._dictionary = {}
+        self._transition = {}
+        self._match_transition = {}
+
+    def _get_dep_relation(self, idx_parent, idx_child, depgraph):
+        p_node = depgraph.nodes[idx_parent]
+        c_node = depgraph.nodes[idx_child]
+
+        if c_node['word'] is None:
+            return None  # Root word
+
+        if c_node['head'] == p_node['address']:
+            return c_node['rel']
+        else:
+            return None
+
+    def _convert_to_binary_features(self, features):
+        """
+        :param features: list of feature string which is needed to convert to binary features
+        :type features: list(str)
+        :return : string of binary features in libsvm format  which is 'featureID:value' pairs
+        """
+        unsorted_result = []
+        for feature in features:
+            self._dictionary.setdefault(feature, len(self._dictionary))
+            unsorted_result.append(self._dictionary[feature])
+
+        # Default value of each feature is 1.0
+        return ' '.join(str(featureID) + ':1.0' for featureID in sorted(unsorted_result))
+
+    def _is_projective(self, depgraph):
+        arc_list = []
+        for key in depgraph.nodes:
+            node = depgraph.nodes[key]
+            if 'head' in node:
+                childIdx = node['address']
+                parentIdx = node['head']
+                arc_list.append((parentIdx, childIdx))
+
+        for (parentIdx, childIdx) in arc_list:
+            # Ensure that childIdx < parentIdx
+            if childIdx > parentIdx:
+                temp = childIdx
+                childIdx = parentIdx
+                parentIdx = temp
+            for k in range(childIdx + 1, parentIdx):
+                for m in range(len(depgraph.nodes)):
+                    if (m < childIdx) or (m > parentIdx):
+                        if (k, m) in arc_list:
+                            return False
+                        if (m, k) in arc_list:
+                            return False
+        return True
+
+    def _write_to_file(self, key, binary_features, input_file):
+        """
+        write the binary features to input file and update the transition dictionary
+        """
+        self._transition.setdefault(key, len(self._transition) + 1)
+        self._match_transition[self._transition[key]] = key
+
+        input_str = str(self._transition[key]) + ' ' + binary_features + '\n'
+        input_file.write(input_str.encode('utf-8'))
+
+    def _create_training_examples_arc_std(self, depgraphs, input_file):
+        """
+        Create the training example in the libsvm format and write it to the input_file.
+        Reference : Page 32, Chapter 3. Dependency Parsing by Sandra Kubler, Ryan McDonal and Joakim Nivre (2009)
+        """
+        operation = Transition(self.ARC_STANDARD)
+        count_proj = 0
+        training_seq = []
+
+        for depgraph in depgraphs:
+            if not self._is_projective(depgraph):
+                continue
+
+            count_proj += 1
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                b0 = conf.buffer[0]
+                features = conf.extract_features()
+                binary_features = self._convert_to_binary_features(features)
+
+                if len(conf.stack) > 0:
+                    s0 = conf.stack[len(conf.stack) - 1]
+                    # Left-arc operation
+                    rel = self._get_dep_relation(b0, s0, depgraph)
+                    if rel is not None:
+                        key = Transition.LEFT_ARC + ':' + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.left_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # Right-arc operation
+                    rel = self._get_dep_relation(s0, b0, depgraph)
+                    if rel is not None:
+                        precondition = True
+                        # Get the max-index of buffer
+                        maxID = conf._max_address
+
+                        for w in range(maxID + 1):
+                            if w != b0:
+                                relw = self._get_dep_relation(b0, w, depgraph)
+                                if relw is not None:
+                                    if (b0, relw, w) not in conf.arcs:
+                                        precondition = False
+
+                        if precondition:
+                            key = Transition.RIGHT_ARC + ':' + rel
+                            self._write_to_file(
+                                key,
+                                binary_features,
+                                input_file)
+                            operation.right_arc(conf, rel)
+                            training_seq.append(key)
+                            continue
+
+                # Shift operation as the default
+                key = Transition.SHIFT
+                self._write_to_file(key, binary_features, input_file)
+                operation.shift(conf)
+                training_seq.append(key)
+
+        print(" Number of training examples : " + str(len(depgraphs)))
+        print(" Number of valid (projective) examples : " + str(count_proj))
+        return training_seq
+
+    def _create_training_examples_arc_eager(self, depgraphs, input_file):
+        """
+        Create the training example in the libsvm format and write it to the input_file.
+        Reference : 'A Dynamic Oracle for Arc-Eager Dependency Parsing' by Joav Goldberg and Joakim Nivre
+        """
+        operation = Transition(self.ARC_EAGER)
+        countProj = 0
+        training_seq = []
+
+        for depgraph in depgraphs:
+            if not self._is_projective(depgraph):
+                continue
+
+            countProj += 1
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                b0 = conf.buffer[0]
+                features = conf.extract_features()
+                binary_features = self._convert_to_binary_features(features)
+
+                if len(conf.stack) > 0:
+                    s0 = conf.stack[len(conf.stack) - 1]
+                    # Left-arc operation
+                    rel = self._get_dep_relation(b0, s0, depgraph)
+                    if rel is not None:
+                        key = Transition.LEFT_ARC + ':' + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.left_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # Right-arc operation
+                    rel = self._get_dep_relation(s0, b0, depgraph)
+                    if rel is not None:
+                        key = Transition.RIGHT_ARC + ':' + rel
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.right_arc(conf, rel)
+                        training_seq.append(key)
+                        continue
+
+                    # reduce operation
+                    flag = False
+                    for k in range(s0):
+                        if self._get_dep_relation(k, b0, depgraph) is not None:
+                            flag = True
+                        if self._get_dep_relation(b0, k, depgraph) is not None:
+                            flag = True
+                    if flag:
+                        key = Transition.REDUCE
+                        self._write_to_file(key, binary_features, input_file)
+                        operation.reduce(conf)
+                        training_seq.append(key)
+                        continue
+
+                # Shift operation as the default
+                key = Transition.SHIFT
+                self._write_to_file(key, binary_features, input_file)
+                operation.shift(conf)
+                training_seq.append(key)
+
+        print(" Number of training examples : " + str(len(depgraphs)))
+        print(" Number of valid (projective) examples : " + str(countProj))
+        return training_seq
+
+    def train(self, depgraphs, modelfile):
+        """
+        :param depgraphs : list of DependencyGraph as the training data
+        :type depgraphs : DependencyGraph
+        :param modelfile : file name to save the trained model
+        :type modelfile : str
+        """
+
+        try:
+            input_file = tempfile.NamedTemporaryFile(
+                prefix='transition_parse.train',
+                dir=tempfile.gettempdir(),
+                delete=False)
+
+            if self._algorithm == self.ARC_STANDARD:
+                self._create_training_examples_arc_std(depgraphs, input_file)
+            else:
+                self._create_training_examples_arc_eager(depgraphs, input_file)
+
+            input_file.close()
+            # Using the temporary file to train the libsvm classifier
+            x_train, y_train = load_svmlight_file(input_file.name)
+            # The parameter is set according to the paper:
+            # Algorithms for Deterministic Incremental Dependency Parsing by Joakim Nivre
+            # Todo : because of probability = True => very slow due to
+            # cross-validation. Need to improve the speed here
+            model = svm.SVC(
+                kernel='poly',
+                degree=2,
+                coef0=0,
+                gamma=0.2,
+                C=0.5,
+                verbose=True,
+                probability=True)
+
+            model.fit(x_train, y_train)
+            # Save the model to file name (as pickle)
+            pickle.dump(model, open(modelfile, 'wb'))
+        finally:
+            remove(input_file.name)
+
+    def parse(self, depgraphs, modelFile):
+        """
+        :param depgraphs: the list of test sentence, each sentence is represented as a dependency graph where the 'head' information is dummy
+        :type depgraphs: list(DependencyGraph)
+        :param modelfile: the model file
+        :type modelfile: str
+        :return: list (DependencyGraph) with the 'head' and 'rel' information
+        """
+        result = []
+        # First load the model
+        model = pickle.load(open(modelFile, 'rb'))
+        operation = Transition(self._algorithm)
+
+        for depgraph in depgraphs:
+            conf = Configuration(depgraph)
+            while len(conf.buffer) > 0:
+                features = conf.extract_features()
+                col = []
+                row = []
+                data = []
+                for feature in features:
+                    if feature in self._dictionary:
+                        col.append(self._dictionary[feature])
+                        row.append(0)
+                        data.append(1.0)
+                np_col = array(sorted(col))  # NB : index must be sorted
+                np_row = array(row)
+                np_data = array(data)
+
+                x_test = sparse.csr_matrix((np_data, (np_row, np_col)), shape=(1, len(self._dictionary)))
+
+                # It's best to use decision function as follow BUT it's not supported yet for sparse SVM
+                # Using decision funcion to build the votes array
+                #dec_func = model.decision_function(x_test)[0]
+                #votes = {}
+                #k = 0
+                # for i in range(len(model.classes_)):
+                #    for j in range(i+1, len(model.classes_)):
+                #        #if  dec_func[k] > 0:
+                #            votes.setdefault(i,0)
+                #            votes[i] +=1
+                #        else:
+                #           votes.setdefault(j,0)
+                #           votes[j] +=1
+                #        k +=1
+                # Sort votes according to the values
+                #sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True)
+
+                # We will use predict_proba instead of decision_function
+                prob_dict = {}
+                pred_prob = model.predict_proba(x_test)[0]
+                for i in range(len(pred_prob)):
+                    prob_dict[i] = pred_prob[i]
+                sorted_Prob = sorted(
+                    prob_dict.items(),
+                    key=itemgetter(1),
+                    reverse=True)
+
+                # Note that SHIFT is always a valid operation
+                for (y_pred_idx, confidence) in sorted_Prob:
+                    #y_pred = model.predict(x_test)[0]
+                    # From the prediction match to the operation
+                    y_pred = model.classes_[y_pred_idx]
+
+                    if y_pred in self._match_transition:
+                        strTransition = self._match_transition[y_pred]
+                        baseTransition = strTransition.split(":")[0]
+
+                        if baseTransition == Transition.LEFT_ARC:
+                            if operation.left_arc(conf, strTransition.split(":")[1]) != -1:
+                                break
+                        elif baseTransition == Transition.RIGHT_ARC:
+                            if operation.right_arc(conf, strTransition.split(":")[1]) != -1:
+                                break
+                        elif baseTransition == Transition.REDUCE:
+                            if operation.reduce(conf) != -1:
+                                break
+                        elif baseTransition == Transition.SHIFT:
+                            if operation.shift(conf) != -1:
+                                break
+                    else:
+                        raise ValueError("The predicted transition is not recognized, expected errors")
+
+            # Finish with operations build the dependency graph from Conf.arcs
+
+            new_depgraph = deepcopy(depgraph)
+            for key in new_depgraph.nodes:
+                node = new_depgraph.nodes[key]
+                node['rel'] = ''
+                # With the default, all the token depend on the Root
+                node['head'] = 0
+            for (head, rel, child) in conf.arcs:
+                c_node = new_depgraph.nodes[child]
+                c_node['head'] = head
+                c_node['rel'] = rel
+            result.append(new_depgraph)
+
+        return result
+
+
+def demo():
+    """
+    >>> from nltk.parse import DependencyGraph, DependencyEvaluator
+    >>> from nltk.parse.transitionparser import TransitionParser, Configuration, Transition
+    >>> gold_sent = DependencyGraph(\"""
+    ... Economic  JJ     2      ATT
+    ... news  NN     3       SBJ
+    ... has       VBD       0       ROOT
+    ... little      JJ      5       ATT
+    ... effect   NN     3       OBJ
+    ... on     IN      5       ATT
+    ... financial       JJ       8       ATT
+    ... markets    NNS      6       PC
+    ... .    .      3       PU
+    ... \""")
+
+    >>> conf = Configuration(gold_sent)
+
+    ###################### Check the Initial Feature ########################
+
+    >>> print(', '.join(conf.extract_features()))
+    STK_0_POS_TOP, BUF_0_FORM_Economic, BUF_0_LEMMA_Economic, BUF_0_POS_JJ, BUF_1_FORM_news, BUF_1_POS_NN, BUF_2_POS_VBD, BUF_3_POS_JJ
+
+    ###################### Check The Transition #######################
+    Check the Initialized Configuration
+    >>> print(conf)
+    Stack : [0]  Buffer : [1, 2, 3, 4, 5, 6, 7, 8, 9]   Arcs : []
+
+    A. Do some transition checks for ARC-STANDARD
+
+    >>> operation = Transition('arc-standard')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,"SBJ")
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf, "ATT")
+
+    Middle Configuration and Features Check
+    >>> print(conf)
+    Stack : [0, 3, 5, 6]  Buffer : [8, 9]   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7)]
+
+    >>> print(', '.join(conf.extract_features()))
+    STK_0_FORM_on, STK_0_LEMMA_on, STK_0_POS_IN, STK_1_POS_NN, BUF_0_FORM_markets, BUF_0_LEMMA_markets, BUF_0_POS_NNS, BUF_1_FORM_., BUF_1_POS_., BUF_0_LDEP_ATT
+
+    >>> operation.right_arc(conf, "PC")
+    >>> operation.right_arc(conf, "ATT")
+    >>> operation.right_arc(conf, "OBJ")
+    >>> operation.shift(conf)
+    >>> operation.right_arc(conf, "PU")
+    >>> operation.right_arc(conf, "ROOT")
+    >>> operation.shift(conf)
+
+    Terminated Configuration Check
+    >>> print(conf)
+    Stack : [0]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (5, 'ATT', 4), (8, 'ATT', 7), (6, 'PC', 8), (5, 'ATT', 6), (3, 'OBJ', 5), (3, 'PU', 9), (0, 'ROOT', 3)]
+
+
+    B. Do some transition checks for ARC-EAGER
+
+    >>> conf = Configuration(gold_sent)
+    >>> operation = Transition('arc-eager')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'SBJ')
+    >>> operation.right_arc(conf,'ROOT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.right_arc(conf,'OBJ')
+    >>> operation.right_arc(conf,'ATT')
+    >>> operation.shift(conf)
+    >>> operation.left_arc(conf,'ATT')
+    >>> operation.right_arc(conf,'PC')
+    >>> operation.reduce(conf)
+    >>> operation.reduce(conf)
+    >>> operation.reduce(conf)
+    >>> operation.right_arc(conf,'PU')
+    >>> print(conf)
+    Stack : [0, 3, 9]  Buffer : []   Arcs : [(2, 'ATT', 1), (3, 'SBJ', 2), (0, 'ROOT', 3), (5, 'ATT', 4), (3, 'OBJ', 5), (5, 'ATT', 6), (8, 'ATT', 7), (6, 'PC', 8), (3, 'PU', 9)]
+
+    ###################### Check The Training Function #######################
+
+    A. Check the ARC-STANDARD training
+    >>> import tempfile
+    >>> import os
+    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False)
+
+    >>> parser_std = TransitionParser('arc-standard')
+    >>> print(', '.join(parser_std._create_training_examples_arc_std([gold_sent], input_file)))
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, SHIFT, SHIFT, LEFTARC:ATT, SHIFT, SHIFT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, RIGHTARC:ATT, RIGHTARC:OBJ, SHIFT, RIGHTARC:PU, RIGHTARC:ROOT, SHIFT
+
+    >>> parser_std.train([gold_sent],'temp.arcstd.model')
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    ...
+    >>> remove(input_file.name)
+
+    B. Check the ARC-EAGER training
+
+    >>> input_file = tempfile.NamedTemporaryFile(prefix='transition_parse.train', dir=tempfile.gettempdir(),delete=False)
+    >>> parser_eager = TransitionParser('arc-eager')
+    >>> print(', '.join(parser_eager._create_training_examples_arc_eager([gold_sent], input_file)))
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    SHIFT, LEFTARC:ATT, SHIFT, LEFTARC:SBJ, RIGHTARC:ROOT, SHIFT, LEFTARC:ATT, RIGHTARC:OBJ, RIGHTARC:ATT, SHIFT, LEFTARC:ATT, RIGHTARC:PC, REDUCE, REDUCE, REDUCE, RIGHTARC:PU
+
+    >>> parser_eager.train([gold_sent],'temp.arceager.model')
+     Number of training examples : 1
+     Number of valid (projective) examples : 1
+    ...
+
+    >>> remove(input_file.name)
+
+    ###################### Check The Parsing Function ########################
+
+    A. Check the ARC-STANDARD parser
+
+    >>> result = parser_std.parse([gold_sent], 'temp.arcstd.model')
+    >>> de = DependencyEvaluator(result, [gold_sent])
+    >>> print(de.eval())
+    (0.125, 0.0)
+
+    B. Check the ARC-EAGER parser
+    >>> result = parser_eager.parse([gold_sent], 'temp.arceager.model')
+    >>> de = DependencyEvaluator(result, [gold_sent])
+    >>> print(de.eval())
+    (0.0, 0.0)
+
+    Note that result is very poor because of only one training example.
+    """
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
diff --git a/nltk/parse/util.py b/nltk/parse/util.py
index acf6151..6120760 100644
--- a/nltk/parse/util.py
+++ b/nltk/parse/util.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py
index 9e7c102..282d9aa 100644
--- a/nltk/parse/viterbi.py
+++ b/nltk/parse/viterbi.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Viterbi Probabilistic Parser
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/probability.py b/nltk/probability.py
index 2ce29e2..1ae001f 100644
--- a/nltk/probability.py
+++ b/nltk/probability.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Probability and Statistics
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (additions)
 #         Trevor Cohn <tacohn at cs.mu.oz.au> (additions)
@@ -308,9 +308,19 @@ class FreqDist(Counter):
 
         :rtype: string
         """
-        return self.pprint()
+        return self.pformat()
 
-    def pprint(self, maxlen=10):
+    def pprint(self, maxlen=10, stream=None):
+        """
+        Print a string representation of this FreqDist to 'stream'
+
+        :param maxlen: The maximum number of items to print
+        :type maxlen: int
+        :param stream: The stream to print to. stdout by default
+        """
+        print(self.pformat(maxlen=maxlen), file=stream)
+
+    def pformat(self, maxlen=10):
         """
         Return a string representation of this FreqDist.
 
@@ -1219,7 +1229,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
         r_Nr = self._freqdist.r_Nr()
         del r_Nr[0]
         return r_Nr
- 
+
     def _r_Nr(self):
         """
         Split the frequency distribution in two list (r, Nr), where Nr(r) > 0
diff --git a/nltk/sem/__init__.py b/nltk/sem/__init__.py
index a5be8f6..047757d 100644
--- a/nltk/sem/__init__.py
+++ b/nltk/sem/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Semantic Interpretation
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/boxer.py b/nltk/sem/boxer.py
index 5164c2d..19a66e1 100644
--- a/nltk/sem/boxer.py
+++ b/nltk/sem/boxer.py
@@ -3,7 +3,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
@@ -81,7 +81,7 @@ class Boxer(object):
         :param input: str Input sentence to parse
         :param occur_index: bool Should predicates be occurrence indexed?
         :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
-        :return: ``drt.AbstractDrs``
+        :return: ``drt.DrtExpression``
         """
         discourse_ids = ([discourse_id] if discourse_id is not None else None)
         d, = self.interpret_multi_sents([[input]], discourse_ids, question, verbose)
@@ -96,7 +96,7 @@ class Boxer(object):
         :param input: list of str Input sentences to parse as a single discourse
         :param occur_index: bool Should predicates be occurrence indexed?
         :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
-        :return: ``drt.AbstractDrs``
+        :return: ``drt.DrtExpression``
         """
         discourse_ids = ([discourse_id] if discourse_id is not None else None)
         d, = self.interpret_multi_sents([input], discourse_ids, question, verbose)
@@ -111,7 +111,7 @@ class Boxer(object):
         :param inputs: list of str Input sentences to parse as individual discourses
         :param occur_index: bool Should predicates be occurrence indexed?
         :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
-        :return: list of ``drt.AbstractDrs``
+        :return: list of ``drt.DrtExpression``
         """
         return self.interpret_multi_sents([[input] for input in inputs], discourse_ids, question, verbose)
 
@@ -122,7 +122,7 @@ class Boxer(object):
         :param inputs: list of list of str Input discourses to parse
         :param occur_index: bool Should predicates be occurrence indexed?
         :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
-        :return: ``drt.AbstractDrs``
+        :return: ``drt.DrtExpression``
         """
         if discourse_ids is not None:
             assert len(inputs) == len(discourse_ids)
@@ -291,7 +291,7 @@ class BoxerOutputDrsParser(DrtParser):
         """
         Parse a DRS condition
 
-        :return: list of ``AbstractDrs``
+        :return: list of ``DrtExpression``
         """
         tok = self.token()
         accum = self.handle_condition(tok, indices)
@@ -312,7 +312,7 @@ class BoxerOutputDrsParser(DrtParser):
         Handle a DRS condition
 
         :param indices: list of int
-        :return: list of ``AbstractDrs``
+        :return: list of ``DrtExpression``
         """
         if tok == 'not':
             return [self._handle_not()]
@@ -1148,7 +1148,7 @@ class NltkDrtBoxerDrsInterpreter(object):
     def interpret(self, ex):
         """
         :param ex: ``AbstractBoxerDrs``
-        :return: ``AbstractDrs``
+        :return: ``DrtExpression``
         """
         if isinstance(ex, BoxerDrs):
             drs = DRS([Variable('x%d' % r) for r in ex.refs], list(map(self.interpret, ex.conds)))
diff --git a/nltk/sem/chat80.py b/nltk/sem/chat80.py
index 7da0353..ab7d216 100644
--- a/nltk/sem/chat80.py
+++ b/nltk/sem/chat80.py
@@ -1,7 +1,7 @@
 # Natural Language Toolkit: Chat-80 KB Reader
 # See http://www.w3.org/TR/swbp-skos-core-guide/
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>,
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/cooper_storage.py b/nltk/sem/cooper_storage.py
index f4f9090..831adaf 100644
--- a/nltk/sem/cooper_storage.py
+++ b/nltk/sem/cooper_storage.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/drt.py b/nltk/sem/drt.py
index 72226b5..3e157de 100644
--- a/nltk/sem/drt.py
+++ b/nltk/sem/drt.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, unicode_literals
@@ -258,19 +258,16 @@ class DrtExpression(object):
         return self.visit_structured(lambda e: e.eliminate_equality(),
                                      self.__class__)
 
-    def pprint(self):
-        """
-        Draw the DRS
-        """
-        print(self.pretty())
-
-    def pretty(self):
+    def pretty_format(self):
         """
         Draw the DRS
         :return: the pretty print string
         """
         return '\n'.join(self._pretty())
 
+    def pretty_print(self):
+        print(self.pretty_format())
+
     def draw(self):
         DrsDrawer(self).draw()
 
@@ -1229,6 +1226,12 @@ def demo():
 
 
 def test_draw():
+    try:
+        from tkinter import Tk
+    except ImportError:
+        from nose import SkipTest
+        raise SkipTest("tkinter is required, but it's not available.")
+
     expressions = [
             r'x',
             r'([],[])',
diff --git a/nltk/sem/drt_glue_demo.py b/nltk/sem/drt_glue_demo.py
index 0f7f828..ccd3c87 100644
--- a/nltk/sem/drt_glue_demo.py
+++ b/nltk/sem/drt_glue_demo.py
@@ -3,18 +3,23 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
+
 from nltk import compat  # this fixes tkinter imports for Python 2.x
 
-from tkinter.font import Font
+try:
+    from tkinter.font import Font
+
+    from tkinter import (Button, Frame, IntVar, Label,
+                         Listbox, Menu, Scrollbar, Tk)
+    from nltk.draw.util import CanvasFrame, ShowText
 
-from tkinter import (Button, Frame, IntVar, Label,
-                     Listbox, Menu, Scrollbar, Tk)
+except ImportError:
+    """Ignore ImportError because tkinter might not be available."""
 
-from nltk.draw.util import CanvasFrame, ShowText
 from nltk.util import in_idle
 from nltk.tag import RegexpTagger
 from nltk.parse import MaltParser
@@ -22,6 +27,7 @@ from nltk.sem.logic import Variable
 from nltk.sem.drt import DrsDrawer, DrtVariableExpression
 from nltk.sem.glue import DrtGlue
 
+
 class DrtGlueDemo(object):
     def __init__(self, examples):
         # Set up the main window.
diff --git a/nltk/sem/evaluate.py b/nltk/sem/evaluate.py
index 4f234df..2a8cd21 100644
--- a/nltk/sem/evaluate.py
+++ b/nltk/sem/evaluate.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Models for first-order languages with lambda
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>,
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/glue.py b/nltk/sem/glue.py
index 8a5eedd..69166b3 100644
--- a/nltk/sem/glue.py
+++ b/nltk/sem/glue.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, division, unicode_literals
@@ -210,7 +210,6 @@ class GlueDict(dict):
                     self[sem][relationships].extend(self[supertype][relationships])
                 self[sem][relationships].extend(glue_formulas) # add the glue entry to the dictionary
 
-
     def __str__(self):
         accum = ''
         for pos in self:
@@ -218,12 +217,12 @@ class GlueDict(dict):
             for relset in self[pos]:
                 i = 1
                 for gf in self[pos][relset]:
-                    if i==1:
+                    if i == 1:
                         accum += str_pos + ': '
                     else:
                         accum += ' '*(len(str_pos)+2)
                     accum += "%s" % gf
-                    if relset and i==len(self[pos][relset]):
+                    if relset and i == len(self[pos][relset]):
                         accum += ' : %s' % relset
                     accum += '\n'
                     i += 1
@@ -231,13 +230,15 @@ class GlueDict(dict):
 
     def to_glueformula_list(self, depgraph, node=None, counter=None, verbose=False):
         if node is None:
-            top = depgraph.nodelist[0]
-            root = depgraph.nodelist[top['deps'][0]]
+            top = depgraph.nodes[0]
+            depList = sum(list(top['deps'].values()), [])
+            root = depgraph.nodes[depList[0]]
+            #print (root) 
             return self.to_glueformula_list(depgraph, root, Counter(), verbose)
 
         glueformulas = self.lookup(node, depgraph, counter)
-        for dep_idx in node['deps']:
-            dep = depgraph.nodelist[dep_idx]
+        for dep_idx in sum(list(node['deps'].values()), []):
+            dep = depgraph.nodes[dep_idx]
             glueformulas.extend(self.to_glueformula_list(depgraph, dep, counter, verbose))
         return glueformulas
 
@@ -250,7 +251,7 @@ class GlueDict(dict):
                 semtype = self[name]
                 break
         if semtype is None:
-#            raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
+            # raise KeyError, "There is no GlueDict entry for sem type '%s' (for '%s')" % (sem, word)
             return []
 
         self.add_missing_dependencies(node, depgraph)
@@ -258,9 +259,11 @@ class GlueDict(dict):
         lookup = self._lookup_semtype_option(semtype, node, depgraph)
 
         if not len(lookup):
-            raise KeyError("There is no GlueDict entry for sem type of '%s'"\
-                    " with tag '%s', and rel '%s'" %\
-                    (node['word'], node['tag'], node['rel']))
+            raise KeyError(
+                "There is no GlueDict entry for sem type of '%s' "
+                "with tag '%s', and rel '%s'" %
+                (node['word'], node['tag'], node['rel'])
+                )
 
         return self.get_glueformulas_from_semtype_entry(lookup, node['word'], node, depgraph, counter)
 
@@ -268,15 +271,19 @@ class GlueDict(dict):
         rel = node['rel'].lower()
 
         if rel == 'main':
-            headnode = depgraph.nodelist[node['head']]
+            headnode = depgraph.nodes[node['head']]
             subj = self.lookup_unique('subj', headnode, depgraph)
-            node['deps'].append(subj['address'])
+            relation = subj['rel']
+            node['deps'].setdefault(relation,[])
+            node['deps'][relation].append(subj['address'])
+            #node['deps'].append(subj['address'])
 
     def _lookup_semtype_option(self, semtype, node, depgraph):
-        relationships = frozenset(depgraph.nodelist[dep]['rel'].lower()
-                                   for dep in node['deps']
-                                   if depgraph.nodelist[dep]['rel'].lower()
-                                       not in OPTIONAL_RELATIONSHIPS)
+        relationships = frozenset(
+            depgraph.nodes[dep]['rel'].lower()
+            for dep in sum(list(node['deps'].values()), [])
+            if depgraph.nodes[dep]['rel'].lower() not in OPTIONAL_RELATIONSHIPS
+        )
 
         try:
             lookup = semtype[relationships]
@@ -351,8 +358,9 @@ class GlueDict(dict):
                 return linearlogic.ConstantExpression(name)
         else:
             return linearlogic.ImpExpression(
-                       self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
-                       self.initialize_labels(expr.consequent, node, depgraph, unique_index))
+                self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
+                self.initialize_labels(expr.consequent, node, depgraph, unique_index)
+            )
 
     def find_label_name(self, name, node, depgraph, unique_index):
         try:
@@ -361,19 +369,27 @@ class GlueDict(dict):
             before_dot = name[:dot]
             after_dot = name[dot+1:]
             if before_dot == 'super':
-                return self.find_label_name(after_dot, depgraph.nodelist[node['head']], depgraph, unique_index)
+                return self.find_label_name(after_dot, depgraph.nodes[node['head']], depgraph, unique_index)
             else:
                 return self.find_label_name(after_dot, self.lookup_unique(before_dot, node, depgraph), depgraph, unique_index)
         except ValueError:
             lbl = self.get_label(node)
-            if   name=='f':     return lbl
-            elif name=='v':     return '%sv' % lbl
-            elif name=='r':     return '%sr' % lbl
-            elif name=='super': return self.get_label(depgraph.nodelist[node['head']])
-            elif name=='var':   return '%s%s' % (lbl.upper(), unique_index)
-            elif name=='a':     return self.get_label(self.lookup_unique('conja', node, depgraph))
-            elif name=='b':     return self.get_label(self.lookup_unique('conjb', node, depgraph))
-            else:               return self.get_label(self.lookup_unique(name, node, depgraph))
+            if name == 'f':
+                return lbl
+            elif name == 'v':
+                return '%sv' % lbl
+            elif name == 'r':
+                return '%sr' % lbl
+            elif name == 'super':
+                return self.get_label(depgraph.nodes[node['head']])
+            elif name == 'var':
+                return '%s%s' % (lbl.upper(), unique_index)
+            elif name == 'a':
+                return self.get_label(self.lookup_unique('conja', node, depgraph))
+            elif name == 'b':
+                return self.get_label(self.lookup_unique('conjb', node, depgraph))
+            else:
+                return self.get_label(self.lookup_unique(name, node, depgraph))
 
     def get_label(self, node):
         """
@@ -396,8 +412,11 @@ class GlueDict(dict):
         """
         Lookup 'key'. There should be exactly one item in the associated relation.
         """
-        deps = [depgraph.nodelist[dep] for dep in node['deps']
-                if depgraph.nodelist[dep]['rel'].lower() == rel.lower()]
+        deps = [
+            depgraph.nodes[dep]
+            for dep in sum(list(node['deps'].values()), [])
+            if depgraph.nodes[dep]['rel'].lower() == rel.lower()
+        ]
 
         if len(deps) == 0:
             raise KeyError("'%s' doesn't contain a feature '%s'" % (node['word'], rel))
@@ -409,6 +428,7 @@ class GlueDict(dict):
     def get_GlueFormula_factory(self):
         return GlueFormula
 
+
 class Glue(object):
     def __init__(self, semtype_file=None, remove_duplicates=False,
                  depparser=None, verbose=False):
@@ -580,7 +600,7 @@ class DrtGlueFormula(GlueFormula):
 
         if isinstance(meaning, string_types):
             self.meaning = drt.DrtExpression.fromstring(meaning)
-        elif isinstance(meaning, drt.AbstractDrs):
+        elif isinstance(meaning, drt.DrtExpression):
             self.meaning = meaning
         else:
             raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))
diff --git a/nltk/sem/hole.py b/nltk/sem/hole.py
index daf337c..2cbe90d 100644
--- a/nltk/sem/hole.py
+++ b/nltk/sem/hole.py
@@ -3,7 +3,7 @@
 # Author:     Peter Wang
 # Updated by: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
@@ -25,7 +25,7 @@ from functools import reduce
 
 from nltk import compat
 from nltk.parse import load_parser
-from nltk.draw.tree import draw_trees
+
 from nltk.sem.skolemize import skolemize
 from nltk.sem.logic import (AllExpression, AndExpression, ApplicationExpression,
                             ExistsExpression, IffExpression, ImpExpression,
@@ -53,8 +53,8 @@ class Constants(object):
     HOLE = 'HOLE'
     LABEL = 'LABEL'
 
-    MAP = {ALL: lambda v,e: AllExpression(v.variable, e),
-           EXISTS: lambda v,e: ExistsExpression(v.variable, e),
+    MAP = {ALL: lambda v, e: AllExpression(v.variable, e),
+           EXISTS: lambda v, e: ExistsExpression(v.variable, e),
            NOT: NegatedExpression,
            AND: AndExpression,
            OR: OrExpression,
@@ -62,6 +62,7 @@ class Constants(object):
            IFF: IffExpression,
            PRED: ApplicationExpression}
 
+
 class HoleSemantics(object):
     """
     This class holds the broken-down components of a hole semantics, i.e. it
@@ -90,8 +91,8 @@ class HoleSemantics(object):
         """
         self.holes = set()
         self.labels = set()
-        self.fragments = {}     # mapping of label -> formula fragment
-        self.constraints = set() # set of Constraints
+        self.fragments = {}  # mapping of label -> formula fragment
+        self.constraints = set()  # set of Constraints
         self._break_down(usr)
         self.top_most_labels = self._find_top_most_labels()
         self.top_hole = self._find_top_hole()
@@ -129,7 +130,7 @@ class HoleSemantics(object):
     def _find_top_nodes(self, node_list):
         top_nodes = node_list.copy()
         for f in compat.itervalues(self.fragments):
-            #the label is the first argument of the predicate
+            # the label is the first argument of the predicate
             args = f[1]
             for arg in args:
                 if arg in node_list:
@@ -149,7 +150,7 @@ class HoleSemantics(object):
         Return the hole that will be the top of the formula tree.
         """
         top_holes = self._find_top_nodes(self.holes)
-        assert len(top_holes) == 1   # it must be unique
+        assert len(top_holes) == 1  # it must be unique
         return top_holes.pop()
 
     def pluggings(self):
@@ -277,7 +278,7 @@ class HoleSemantics(object):
         if node in plugging:
             return self._formula_tree(plugging, plugging[node])
         elif node in self.fragments:
-            pred,args = self.fragments[node]
+            pred, args = self.fragments[node]
             children = [self._formula_tree(plugging, arg) for arg in args]
             return reduce(Constants.MAP[pred.variable.name], children)
         else:
@@ -293,15 +294,19 @@ class Constraint(object):
     def __init__(self, lhs, rhs):
         self.lhs = lhs
         self.rhs = rhs
+
     def __eq__(self, other):
         if self.__class__ == other.__class__:
             return self.lhs == other.lhs and self.rhs == other.rhs
         else:
             return False
+
     def __ne__(self, other):
         return not (self == other)
+
     def __hash__(self):
         return hash(repr(self))
+
     def __repr__(self):
         return '(%s < %s)' % (self.lhs, self.rhs)
 
@@ -310,14 +315,16 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
     if not grammar_filename:
         grammar_filename = 'grammars/sample_grammars/hole.fcfg'
 
-    if verbose: print('Reading grammar file', grammar_filename)
+    if verbose:
+        print('Reading grammar file', grammar_filename)
 
     parser = load_parser(grammar_filename)
 
     # Parse the sentence.
     tokens = sentence.split()
     trees = list(parser.parse(tokens))
-    if verbose: print('Got %d different parses' % len(trees))
+    if verbose:
+        print('Got %d different parses' % len(trees))
 
     all_readings = []
     for tree in trees:
@@ -325,14 +332,16 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
         sem = tree.label()['SEM'].simplify()
 
         # Print the raw semantic representation.
-        if verbose: print('Raw:       ', sem)
+        if verbose:
+            print('Raw:       ', sem)
 
         # Skolemize away all quantifiers.  All variables become unique.
         while isinstance(sem, LambdaExpression):
             sem = sem.term
         skolemized = skolemize(sem)
 
-        if verbose: print('Skolemized:', skolemized)
+        if verbose:
+            print('Skolemized:', skolemized)
 
         # Break the hole semantics representation down into its components
         # i.e. holes, labels, formula fragments and constraints.
@@ -346,7 +355,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
             print('Top hole:    ', hole_sem.top_hole)
             print('Top labels:  ', hole_sem.top_most_labels)
             print('Fragments:')
-            for (l,f) in hole_sem.fragments.items():
+            for l, f in hole_sem.fragments.items():
                 print('\t%s: %s' % (l, f))
 
         # Find all the possible ways to plug the formulas together.
@@ -357,7 +366,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
 
         # Print out the formulas in a textual format.
         if verbose:
-            for i,r in enumerate(readings):
+            for i, r in enumerate(readings):
                 print()
                 print('%d. %s' % (i, r))
             print()
@@ -368,7 +377,8 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
 
 
 if __name__ == '__main__':
-    for r in hole_readings('a dog barks'): print(r)
+    for r in hole_readings('a dog barks'):
+        print(r)
     print()
-    for r in hole_readings('every girl chases a dog'): print(r)
-
+    for r in hole_readings('every girl chases a dog'):
+        print(r)
diff --git a/nltk/sem/lfg.py b/nltk/sem/lfg.py
index 1e1d82e..c8a99b3 100644
--- a/nltk/sem/lfg.py
+++ b/nltk/sem/lfg.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, division, unicode_literals
@@ -10,6 +10,7 @@ from __future__ import print_function, division, unicode_literals
 from nltk.internals import Counter
 from nltk.compat import python_2_unicode_compatible
 
+
 @python_2_unicode_compatible
 class FStructure(dict):
     def safeappend(self, key, item):
@@ -37,45 +38,54 @@ class FStructure(dict):
     def to_depgraph(self, rel=None):
         from nltk.parse.dependencygraph import DependencyGraph
         depgraph = DependencyGraph()
-        nodelist = depgraph.nodelist
+        nodes = depgraph.nodes
 
-        self._to_depgraph(nodelist, 0, 'ROOT')
+        self._to_depgraph(nodes, 0, 'ROOT')
 
-        #Add all the dependencies for all the nodes
-        for node_addr, node in enumerate(nodelist):
-            for n2 in nodelist[1:]:
-                if n2['head'] == node_addr:
-                    node['deps'].append(n2['address'])
+        # Add all the dependencies for all the nodes
+        for address, node in nodes.items():
+            for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
+                if n2['head'] == address:
+                    relation = n2['rel']
+                    node['deps'].setdefault(relation,[])
+                    node['deps'][relation].append(n2['address'])
 
-        depgraph.root = nodelist[1]
+        depgraph.root = nodes[1]
 
         return depgraph
 
-    def _to_depgraph(self, nodelist, head, rel):
-        index = len(nodelist)
+    def _to_depgraph(self, nodes, head, rel):
+        index = len(nodes)
 
-        nodelist.append({'address': index,
-                         'word': self.pred[0],
-                         'tag': self.pred[1],
-                         'head': head,
-                         'rel': rel,
-                         'deps': []})
+        nodes[index].update(
+            {
+                'address': index,
+                'word': self.pred[0],
+                'tag': self.pred[1],
+                'head': head,
+                'rel': rel,
+            }
+        )
 
-        for feature in self:
-            for item in self[feature]:
+        for feature in sorted(self):
+            for item in sorted(self[feature]):
                 if isinstance(item, FStructure):
-                    item._to_depgraph(nodelist, index, feature)
+                    item._to_depgraph(nodes, index, feature)
                 elif isinstance(item, tuple):
-                    nodelist.append({'address': len(nodelist),
-                                     'word': item[0],
-                                     'tag': item[1],
-                                     'head': index,
-                                     'rel': feature,
-                                     'deps': []})
+                    new_index = len(nodes)
+                    nodes[new_index].update(
+                        {
+                            'address': new_index,
+                            'word': item[0],
+                            'tag': item[1],
+                            'head': index,
+                            'rel': feature,
+                        }
+                    )
                 elif isinstance(item, list):
                     for n in item:
-                        n._to_depgraph(nodelist, index, feature)
-                else: # ERROR
+                        n._to_depgraph(nodes, index, feature)
+                else:
                     raise Exception('feature %s is not an FStruct, a list, or a tuple' % feature)
 
     @staticmethod
@@ -107,7 +117,7 @@ class FStructure(dict):
             if not fstruct.pred:
                 fstruct.pred = (word, tag)
 
-            children = [depgraph.nodelist[idx] for idx in node['deps']]
+            children = [depgraph.nodes[idx] for idx in sum(list(node['deps'].values()), [])]
             for child in children:
                 fstruct.safeappend(child['rel'], FStructure._read_depgraph(child, depgraph, label_counter, fstruct))
 
@@ -133,9 +143,9 @@ class FStructure(dict):
         return self.__unicode__().replace('\n', '')
 
     def __str__(self):
-        return self.pprint()
+        return self.pretty_format()
 
-    def pprint(self, indent=3):
+    def pretty_format(self, indent=3):
         try:
             accum = '%s:[' % self.label
         except NameError:
@@ -149,7 +159,7 @@ class FStructure(dict):
             for item in self[feature]:
                 if isinstance(item, FStructure):
                     next_indent = indent+len(feature)+3+len(self.label)
-                    accum += '\n%s%s %s' % (' '*(indent), feature, item.pprint(next_indent))
+                    accum += '\n%s%s %s' % (' '*(indent), feature, item.pretty_format(next_indent))
                 elif isinstance(item, tuple):
                     accum += '\n%s%s \'%s\'' % (' '*(indent), feature, item[0])
                 elif isinstance(item, list):
@@ -196,4 +206,3 @@ dog     NN      3       OBJ
 
 if __name__ == '__main__':
     demo_read_depgraph()
-
diff --git a/nltk/sem/linearlogic.py b/nltk/sem/linearlogic.py
index 4547a8a..483f31f 100644
--- a/nltk/sem/linearlogic.py
+++ b/nltk/sem/linearlogic.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 from __future__ import print_function, unicode_literals
diff --git a/nltk/sem/logic.py b/nltk/sem/logic.py
index 1730ba2..efcfdd9 100644
--- a/nltk/sem/logic.py
+++ b/nltk/sem/logic.py
@@ -2,7 +2,7 @@
 #
 # Author: Dan Garrette <dhgarrette at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
 
@@ -1890,7 +1890,7 @@ def demo():
     print(lexpr(r'(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))').simplify())
 
     print('='*20 + 'Test alpha conversion and binder expression equality' + '='*20)
-    e1 = p('exists x.P(x)')
+    e1 = lexpr('exists x.P(x)')
     print(e1)
     e2 = e1.alpha_convert(Variable('z'))
     print(e2)
@@ -1923,4 +1923,4 @@ def printtype(ex):
 
 if __name__ == '__main__':
     demo()
-    demo_errors()
+#    demo_errors()
diff --git a/nltk/sem/relextract.py b/nltk/sem/relextract.py
index 1a9ca44..630923a 100644
--- a/nltk/sem/relextract.py
+++ b/nltk/sem/relextract.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Relation Extraction
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/sem/skolemize.py b/nltk/sem/skolemize.py
index 120fd9b..96cf6e4 100644
--- a/nltk/sem/skolemize.py
+++ b/nltk/sem/skolemize.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/sem/util.py b/nltk/sem/util.py
index 70d54fc..6a92a97 100644
--- a/nltk/sem/util.py
+++ b/nltk/sem/util.py
@@ -2,7 +2,7 @@
 #
 # Author: Ewan Klein <ewan at inf.ed.ac.uk>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
diff --git a/nltk/stem/__init__.py b/nltk/stem/__init__.py
index 294b018..f495c0a 100644
--- a/nltk/stem/__init__.py
+++ b/nltk/stem/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/api.py b/nltk/stem/api.py
index c603242..09a82e2 100644
--- a/nltk/stem/api.py
+++ b/nltk/stem/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmer Interface
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/isri.py b/nltk/stem/isri.py
index 8ce4ee6..2ddc6ca 100644
--- a/nltk/stem/isri.py
+++ b/nltk/stem/isri.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: The ISRI Arabic Stemmer
 #
-# Copyright (C) 2001-2014 NLTK Proejct
+# Copyright (C) 2001-2015 NLTK Proejct
 # Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005)
 # Author: Hosam Algasaier <hosam_hme at yahoo.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/stem/lancaster.py b/nltk/stem/lancaster.py
index bd169bd..185d4ff 100644
--- a/nltk/stem/lancaster.py
+++ b/nltk/stem/lancaster.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Tomcavage <stomcava at law.upenn.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/stem/porter.py b/nltk/stem/porter.py
index 467a4d4..721c90b 100644
--- a/nltk/stem/porter.py
+++ b/nltk/stem/porter.py
@@ -81,10 +81,9 @@ Later additions:
    Invariants proceed, succeed, exceed. Also suggested by Hiranmay Ghosh.
 
 Additional modifications were made to incorperate this module into
-nltk.  All such modifications are marked with \"--NLTK--\".  The nltk
-version of this module is maintained by the NLTK developers, and is
-available from <http://nltk.sourceforge.net>
+nltk.  All such modifications are marked with \"--NLTK--\".
 """
+
 from __future__ import print_function, unicode_literals
 
 ## --NLTK--
diff --git a/nltk/stem/regexp.py b/nltk/stem/regexp.py
index 55e3305..ee51cd8 100644
--- a/nltk/stem/regexp.py
+++ b/nltk/stem/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Stemmers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at cs.mu.oz.au>
 #         Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
diff --git a/nltk/stem/rslp.py b/nltk/stem/rslp.py
index 805c44b..9e429a2 100644
--- a/nltk/stem/rslp.py
+++ b/nltk/stem/rslp.py
@@ -2,7 +2,7 @@
 
 # Natural Language Toolkit: RSLP Stemmer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Tiago Tresoldi <tresoldi at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/stem/snowball.py b/nltk/stem/snowball.py
index 75ba993..3fea2b3 100644
--- a/nltk/stem/snowball.py
+++ b/nltk/stem/snowball.py
@@ -2,7 +2,7 @@
 #
 # Natural Language Toolkit: Snowball Stemmer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Peter Michael Stahl <pemistahl at gmail.com>
 #         Peter Ljunglof <peter.ljunglof at heatherleaf.se> (revisions)
 # Algorithms: Dr Martin Porter <martin at tartarus.org>
@@ -23,9 +23,11 @@ from __future__ import unicode_literals, print_function
 from nltk import compat
 from nltk.corpus import stopwords
 from nltk.stem import porter
+from nltk.stem.util import suffix_replace
 
 from nltk.stem.api import StemmerI
 
+
 class SnowballStemmer(StemmerI):
 
     """
@@ -189,7 +191,6 @@ class _ScandinavianStemmer(_LanguageSpecificStemmer):
         return r1
 
 
-
 class _StandardStemmer(_LanguageSpecificStemmer):
 
     """
@@ -272,7 +273,7 @@ class _StandardStemmer(_LanguageSpecificStemmer):
                         rv = word[i+1:]
                         break
 
-            elif word[:2] in vowels:
+            elif word[0] in vowels and word[1] in vowels:
                 for i in range(2, len(word)):
                     if word[i] not in vowels:
                         rv = word[i+1:]
@@ -282,8 +283,6 @@ class _StandardStemmer(_LanguageSpecificStemmer):
 
         return rv
 
-
-
 class DanishStemmer(_ScandinavianStemmer):
 
     """
@@ -477,10 +476,10 @@ class DutchStemmer(_StandardStemmer):
         for suffix in self.__step1_suffixes:
             if r1.endswith(suffix):
                 if suffix == "heden":
-                    word = "".join((word[:-5], "heid"))
-                    r1 = "".join((r1[:-5], "heid"))
+                    word = suffix_replace(word, suffix, "heid")
+                    r1 = suffix_replace(r1, suffix, "heid")
                     if r2.endswith("heden"):
-                        r2 = "".join((r2[:-5], "heid"))
+                        r2 = suffix_replace(r2, suffix, "heid")
 
                 elif (suffix in ("ene", "en") and
                       not word.endswith("heden") and
@@ -769,15 +768,15 @@ class EnglishStemmer(_StandardStemmer):
                 if suffix in ("eed", "eedly"):
 
                     if r1.endswith(suffix):
-                        word = "".join((word[:-len(suffix)], "ee"))
+                        word = suffix_replace(word, suffix, "ee")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ee"))
+                            r1 = suffix_replace(r1, suffix, "ee")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ee"))
+                            r2 = suffix_replace(r2, suffix, "ee")
                         else:
                             r2 = ""
                 else:
@@ -863,41 +862,41 @@ class EnglishStemmer(_StandardStemmer):
                         r2 = r2[:-2]
 
                     elif suffix in ("izer", "ization"):
-                        word = "".join((word[:-len(suffix)], "ize"))
+                        word = suffix_replace(word, suffix, "ize")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ize"))
+                            r1 = suffix_replace(r1, suffix, "ize")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ize"))
+                            r2 = suffix_replace(r2, suffix, "ize")
                         else:
                             r2 = ""
 
                     elif suffix in ("ational", "ation", "ator"):
-                        word = "".join((word[:-len(suffix)], "ate"))
+                        word = suffix_replace(word, suffix, "ate")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ate"))
+                            r1 = suffix_replace(r1, suffix, "ate")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ate"))
+                            r2 = suffix_replace(r2, suffix, "ate")
                         else:
                             r2 = "e"
 
                     elif suffix in ("alism", "aliti", "alli"):
-                        word = "".join((word[:-len(suffix)], "al"))
+                        word = suffix_replace(word, suffix, "al")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "al"))
+                            r1 = suffix_replace(r1, suffix, "al")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "al"))
+                            r2 = suffix_replace(r2, suffix, "al")
                         else:
                             r2 = ""
 
@@ -907,41 +906,41 @@ class EnglishStemmer(_StandardStemmer):
                         r2 = r2[:-4]
 
                     elif suffix in ("ousli", "ousness"):
-                        word = "".join((word[:-len(suffix)], "ous"))
+                        word = suffix_replace(word, suffix, "ous")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ous"))
+                            r1 = suffix_replace(r1, suffix, "ous")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ous"))
+                            r2 = suffix_replace(r2, suffix, "ous")
                         else:
                             r2 = ""
 
                     elif suffix in ("iveness", "iviti"):
-                        word = "".join((word[:-len(suffix)], "ive"))
+                        word = suffix_replace(word, suffix, "ive")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ive"))
+                            r1 = suffix_replace(r1, suffix, "ive")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ive"))
+                            r2 = suffix_replace(r2, suffix, "ive")
                         else:
                             r2 = "e"
 
                     elif suffix in ("biliti", "bli"):
-                        word = "".join((word[:-len(suffix)], "ble"))
+                        word = suffix_replace(word, suffix, "ble")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ble"))
+                            r1 = suffix_replace(r1, suffix, "ble")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ble"))
+                            r2 = suffix_replace(r2, suffix, "ble")
                         else:
                             r2 = ""
 
@@ -971,15 +970,15 @@ class EnglishStemmer(_StandardStemmer):
                         r2 = r2[:-2]
 
                     elif suffix == "ational":
-                        word = "".join((word[:-len(suffix)], "ate"))
+                        word = suffix_replace(word, suffix, "ate")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ate"))
+                            r1 = suffix_replace(r1, suffix, "ate")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ate"))
+                            r2 = suffix_replace(r2, suffix, "ate")
                         else:
                             r2 = ""
 
@@ -989,15 +988,15 @@ class EnglishStemmer(_StandardStemmer):
                         r2 = r2[:-3]
 
                     elif suffix in ("icate", "iciti", "ical"):
-                        word = "".join((word[:-len(suffix)], "ic"))
+                        word = suffix_replace(word, suffix, "ic")
 
                         if len(r1) >= len(suffix):
-                            r1 = "".join((r1[:-len(suffix)], "ic"))
+                            r1 = suffix_replace(r1, suffix, "ic")
                         else:
                             r1 = ""
 
                         if len(r2) >= len(suffix):
-                            r2 = "".join((r2[:-len(suffix)], "ic"))
+                            r2 = suffix_replace(r2, suffix, "ic")
                         else:
                             r2 = ""
 
@@ -1146,13 +1145,13 @@ class FinnishStemmer(_StandardStemmer):
                     r1 = r1[:-2]
                     r2 = r2[:-2]
                     if word.endswith("kse"):
-                        word = "".join((word[:-3], "ksi"))
+                        word = suffix_replace(word, "kse", "ksi")
 
                     if r1.endswith("kse"):
-                        r1 = "".join((r1[:-3], "ksi"))
+                        r1 = suffix_replace(r1, "kse", "ksi")
 
                     if r2.endswith("kse"):
-                        r2 = "".join((r2[:-3], "ksi"))
+                        r2 = suffix_replace(r2, "kse", "ksi")
 
                 elif suffix == "an":
                     if (word[-4:-2] in ("ta", "na") or
@@ -1421,7 +1420,7 @@ class FrenchStemmer(_StandardStemmer):
                         step1_success = True
 
                     elif suffix in r1:
-                        word = "".join((word[:-len(suffix)], "eux"))
+                        word = suffix_replace(word, suffix, "eux")
                         step1_success = True
 
                 elif suffix in ("ement", "ements") and suffix in rv:
@@ -1449,12 +1448,12 @@ class FrenchStemmer(_StandardStemmer):
                             word = "".join((word[:-3], "i"))
 
                 elif suffix == "amment" and suffix in rv:
-                    word = "".join((word[:-6], "ant"))
-                    rv = "".join((rv[:-6], "ant"))
+                    word = suffix_replace(word, "amment", "ant")
+                    rv = suffix_replace(rv, "amment", "ant")
                     rv_ending_found = True
 
                 elif suffix == "emment" and suffix in rv:
-                    word = "".join((word[:-6], "ent"))
+                    word = suffix_replace(word, "emment", "ent")
                     rv_ending_found = True
 
                 elif (suffix in ("ment", "ments") and suffix in rv and
@@ -1491,16 +1490,16 @@ class FrenchStemmer(_StandardStemmer):
                             word = "".join((word[:-2], "iqU"))
 
                 elif suffix in ("logie", "logies") and suffix in r2:
-                    word = "".join((word[:-len(suffix)], "log"))
+                    word = suffix_replace(word, suffix, "log")
                     step1_success = True
 
                 elif (suffix in ("usion", "ution", "usions", "utions") and
                       suffix in r2):
-                    word = "".join((word[:-len(suffix)], "u"))
+                    word = suffix_replace(word, suffix, "u")
                     step1_success = True
 
                 elif suffix in ("ence", "ences") and suffix in r2:
-                    word = "".join((word[:-len(suffix)], "ent"))
+                    word = suffix_replace(word, suffix, "ent")
                     step1_success = True
 
                 elif suffix in ("it\xE9", "it\xE9s") and suffix in r2:
@@ -1600,7 +1599,7 @@ class FrenchStemmer(_StandardStemmer):
 
                         elif suffix in ("ier", "i\xE8re", "Ier",
                                         "I\xE8re"):
-                            word = "".join((word[:-len(suffix)], "i"))
+                            word = suffix_replace(word, suffix, "i")
 
                         elif suffix == "e":
                             word = word[:-1]
@@ -1931,34 +1930,34 @@ class HungarianStemmer(_LanguageSpecificStemmer):
 
                     if r1.endswith("\xE1"):
                         word = "".join((word[:-1], "a"))
-                        r1 = "".join((r1[:-1], "a"))
+                        r1 = suffix_replace(r1, "\xE1", "a")
 
                     elif r1.endswith("\xE9"):
                         word = "".join((word[:-1], "e"))
-                        r1 = "".join((r1[:-1], "e"))
+                        r1 = suffix_replace(r1, "\xE9", "e")
                 break
 
         # STEP 3: Remove special cases
         for suffix in self.__step3_suffixes:
             if r1.endswith(suffix):
                 if suffix == "\xE9n":
-                    word = "".join((word[:-2], "e"))
-                    r1 = "".join((r1[:-2], "e"))
+                    word = suffix_replace(word, suffix, "e")
+                    r1 = suffix_replace(r1, suffix, "e")
                 else:
-                    word = "".join((word[:-len(suffix)], "a"))
-                    r1 = "".join((r1[:-len(suffix)], "a"))
+                    word = suffix_replace(word, suffix, "a")
+                    r1 = suffix_replace(r1, suffix, "a")
                 break
 
         # STEP 4: Remove other cases
         for suffix in self.__step4_suffixes:
             if r1.endswith(suffix):
                 if suffix == "\xE1stul":
-                    word = "".join((word[:-5], "a"))
-                    r1 = "".join((r1[:-5], "a"))
+                    word = suffix_replace(word, suffix, "a")
+                    r1 = suffix_replace(r1, suffix, "a")
 
                 elif suffix == "\xE9st\xFCl":
-                    word = "".join((word[:-5], "e"))
-                    r1 = "".join((r1[:-5], "e"))
+                    word = suffix_replace(word, suffix, "e")
+                    r1 = suffix_replace(r1, suffix, "e")
                 else:
                     word = word[:-len(suffix)]
                     r1 = r1[:-len(suffix)]
@@ -1979,13 +1978,13 @@ class HungarianStemmer(_LanguageSpecificStemmer):
         for suffix in self.__step6_suffixes:
             if r1.endswith(suffix):
                 if suffix in ("\xE1k\xE9", "\xE1\xE9i"):
-                    word = "".join((word[:-3], "a"))
-                    r1 = "".join((r1[:-3], "a"))
+                    word = suffix_replace(word, suffix, "a")
+                    r1 = suffix_replace(r1, suffix, "a")
 
                 elif suffix in ("\xE9k\xE9", "\xE9\xE9i",
                                 "\xE9\xE9"):
-                    word = "".join((word[:-len(suffix)], "e"))
-                    r1 = "".join((r1[:-len(suffix)], "e"))
+                    word = suffix_replace(word, suffix, "e")
+                    r1 = suffix_replace(r1, suffix, "e")
                 else:
                     word = word[:-len(suffix)]
                     r1 = r1[:-len(suffix)]
@@ -1997,13 +1996,13 @@ class HungarianStemmer(_LanguageSpecificStemmer):
                 if r1.endswith(suffix):
                     if suffix in ("\xE1nk", "\xE1juk", "\xE1m",
                                   "\xE1d", "\xE1"):
-                        word = "".join((word[:-len(suffix)], "a"))
-                        r1 = "".join((r1[:-len(suffix)], "a"))
+                        word = suffix_replace(word, suffix, "a")
+                        r1 = suffix_replace(r1, suffix, "a")
 
                     elif suffix in ("\xE9nk", "\xE9j\xFCk",
                                     "\xE9m", "\xE9d", "\xE9"):
-                        word = "".join((word[:-len(suffix)], "e"))
-                        r1 = "".join((r1[:-len(suffix)], "e"))
+                        word = suffix_replace(word, suffix, "e")
+                        r1 = suffix_replace(r1, suffix, "e")
                     else:
                         word = word[:-len(suffix)]
                         r1 = r1[:-len(suffix)]
@@ -2015,13 +2014,13 @@ class HungarianStemmer(_LanguageSpecificStemmer):
                 if r1.endswith(suffix):
                     if suffix in ("\xE1im", "\xE1id", "\xE1i",
                                   "\xE1ink", "\xE1itok", "\xE1ik"):
-                        word = "".join((word[:-len(suffix)], "a"))
-                        r1 = "".join((r1[:-len(suffix)], "a"))
+                        word = suffix_replace(word, suffix, "a")
+                        r1 = suffix_replace(r1, suffix, "a")
 
                     elif suffix in ("\xE9im", "\xE9id", "\xE9i",
                                     "\xE9ink", "\xE9itek", "\xE9ik"):
-                        word = "".join((word[:-len(suffix)], "e"))
-                        r1 = "".join((r1[:-len(suffix)], "e"))
+                        word = suffix_replace(word, suffix, "e")
+                        r1 = suffix_replace(r1, suffix, "e")
                     else:
                         word = word[:-len(suffix)]
                         r1 = r1[:-len(suffix)]
@@ -2032,9 +2031,9 @@ class HungarianStemmer(_LanguageSpecificStemmer):
             if word.endswith(suffix):
                 if r1.endswith(suffix):
                     if suffix == "\xE1k":
-                        word = "".join((word[:-2], "a"))
+                        word = suffix_replace(word, suffix, "a")
                     elif suffix == "\xE9k":
-                        word = "".join((word[:-2], "e"))
+                        word = suffix_replace(word, suffix, "e")
                     else:
                         word = word[:-len(suffix)]
                 break
@@ -2201,10 +2200,10 @@ class ItalianStemmer(_StandardStemmer):
 
                 elif (rv[-len(suffix)-2:-len(suffix)] in
                       ("ar", "er", "ir")):
-                    word = "".join((word[:-len(suffix)], "e"))
-                    r1 = "".join((r1[:-len(suffix)], "e"))
-                    r2 = "".join((r2[:-len(suffix)], "e"))
-                    rv = "".join((rv[:-len(suffix)], "e"))
+                    word = suffix_replace(word, suffix, "e")
+                    r1 = suffix_replace(r1, suffix, "e")
+                    r2 = suffix_replace(r2, suffix, "e")
+                    rv = suffix_replace(rv, suffix, "e")
                 break
 
         # STEP 1: Standard suffix removal
@@ -2261,8 +2260,8 @@ class ItalianStemmer(_StandardStemmer):
                         rv = rv[:-5]
 
                     elif suffix in ("enza", "enze"):
-                        word = "".join((word[:-2], "te"))
-                        rv = "".join((rv[:-2], "te"))
+                        word = suffix_replace(word, suffix, "te")
+                        rv = suffix_replace(rv, suffix, "te")
 
                     elif suffix == "it\xE0":
                         word = word[:-3]
@@ -2379,8 +2378,8 @@ class NorwegianStemmer(_ScandinavianStemmer):
         for suffix in self.__step1_suffixes:
             if r1.endswith(suffix):
                 if suffix in ("erte", "ert"):
-                    word = "".join((word[:-len(suffix)], "er"))
-                    r1 = "".join((r1[:-len(suffix)], "er"))
+                    word = suffix_replace(word, suffix, "er")
+                    r1 = suffix_replace(r1, suffix, "er")
 
                 elif suffix == "s":
                     if (word[-2] in self.__s_ending or
@@ -2430,13 +2429,13 @@ class PortugueseStemmer(_StandardStemmer):
     """
 
     __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4"
-    __step1_suffixes = ('amentos', 'imentos', 'uciones', 'amento',
+    __step1_suffixes = ('amentos', 'imentos', 'uço~es', 'amento',
                         'imento', 'adoras', 'adores', 'a\xE7o~es',
-                        'log\xEDas', '\xEAncias', 'amente',
-                        'idades', 'ismos', 'istas', 'adora',
+                        'logias', '\xEAncias', 'amente',
+                        'idades', 'an\xE7as', 'ismos', 'istas', 'adora',
                         'a\xE7a~o', 'antes', '\xE2ncia',
-                        'log\xEDa', 'uci\xF3n', '\xEAncia',
-                        'mente', 'idade', 'ezas', 'icos', 'icas',
+                        'logia', 'uça~o', '\xEAncia',
+                        'mente', 'idade', 'an\xE7a', 'ezas', 'icos', 'icas',
                         'ismo', '\xE1vel', '\xEDvel', 'ista',
                         'osos', 'osas', 'ador', 'ante', 'ivas',
                         'ivos', 'iras', 'eza', 'ico', 'ica',
@@ -2490,7 +2489,9 @@ class PortugueseStemmer(_StandardStemmer):
         step2_success = False
 
         word = (word.replace("\xE3", "a~")
-                    .replace("\xF5", "o~"))
+                    .replace("\xF5", "o~")
+                    .replace("q\xFC", "qu")
+                    .replace("g\xFC", "gu"))
 
         r1, r2 = self._r1r2_standard(word, self.__vowels)
         rv = self._rv_standard(word, self.__vowels)
@@ -2522,30 +2523,30 @@ class PortugueseStemmer(_StandardStemmer):
                       word[-len(suffix)-1:-len(suffix)] == "e"):
                     step1_success = True
 
-                    word = "".join((word[:-len(suffix)], "ir"))
-                    rv = "".join((rv[:-len(suffix)], "ir"))
+                    word = suffix_replace(word, suffix, "ir")
+                    rv = suffix_replace(rv, suffix, "ir")
 
                 elif r2.endswith(suffix):
                     step1_success = True
 
-                    if suffix in ("log\xEDa", "log\xEDas"):
-                        word = word[:-2]
-                        rv = rv[:-2]
+                    if suffix in ("logia", "logias"):
+                        word = suffix_replace(word, suffix, "log")
+                        rv = suffix_replace(rv, suffix, "log")
 
-                    elif suffix in ("uci\xF3n", "uciones"):
-                        word = "".join((word[:-len(suffix)], "u"))
-                        rv = "".join((rv[:-len(suffix)], "u"))
+                    elif suffix in ("ução", "uções"):
+                        word = suffix_replace(word, suffix, "u")
+                        rv = suffix_replace(rv, suffix, "u")
 
                     elif suffix in ("\xEAncia", "\xEAncias"):
-                        word = "".join((word[:-len(suffix)], "ente"))
-                        rv = "".join((rv[:-len(suffix)], "ente"))
+                        word = suffix_replace(word, suffix, "ente")
+                        rv = suffix_replace(rv, suffix, "ente")
 
                     elif suffix == "mente":
                         word = word[:-5]
                         r2 = r2[:-5]
                         rv = rv[:-5]
 
-                        if r2.endswith(("ante", "avel", "\xEDvel")):
+                        if r2.endswith(("ante", "avel", "ivel")):
                             word = word[:-4]
                             rv = rv[:-4]
 
@@ -2609,7 +2610,7 @@ class PortugueseStemmer(_StandardStemmer):
                 word = word[:-1]
 
         elif word.endswith("\xE7"):
-            word = "".join((word[:-1], "c"))
+            word = suffix_replace(word, "\xE7", "c")
 
         word = word.replace("a~", "\xE3").replace("o~", "\xF5")
 
@@ -2745,19 +2746,19 @@ class RomanianStemmer(_StandardStemmer):
                         word = word[:-2]
 
                     elif suffix in ("ea", "ele", "elor"):
-                        word = "".join((word[:-len(suffix)], "e"))
+                        word = suffix_replace(word, suffix, "e")
 
                         if suffix in rv:
-                            rv = "".join((rv[:-len(suffix)], "e"))
+                            rv = suffix_replace(rv, suffix, "e")
                         else:
                             rv = ""
 
                     elif suffix in ("ii", "iua", "iei",
                                     "iile", "iilor", "ilor"):
-                        word = "".join((word[:-len(suffix)], "i"))
+                        word = suffix_replace(word, suffix, "i")
 
                         if suffix in rv:
-                            rv = "".join((rv[:-len(suffix)], "i"))
+                            rv = suffix_replace(rv, suffix, "i")
                         else:
                             rv = ""
 
@@ -2779,7 +2780,7 @@ class RomanianStemmer(_StandardStemmer):
                         if suffix in ("abilitate", "abilitati",
                                       "abilit\u0103i",
                                       "abilit\u0103\u0163i"):
-                            word = "".join((word[:-len(suffix)], "abil"))
+                            word = suffix_replace(word, suffix, "abil")
 
                         elif suffix == "ibilitate":
                             word = word[:-5]
@@ -2787,7 +2788,7 @@ class RomanianStemmer(_StandardStemmer):
                         elif suffix in ("ivitate", "ivitati",
                                         "ivit\u0103i",
                                         "ivit\u0103\u0163i"):
-                            word = "".join((word[:-len(suffix)], "iv"))
+                            word = suffix_replace(word, suffix, "iv")
 
                         elif suffix in ("icitate", "icitati", "icit\u0103i",
                                         "icit\u0103\u0163i", "icator",
@@ -2795,25 +2796,25 @@ class RomanianStemmer(_StandardStemmer):
                                         "icive", "icivi", "iciv\u0103",
                                         "ical", "icala", "icale", "icali",
                                         "ical\u0103"):
-                            word = "".join((word[:-len(suffix)], "ic"))
+                            word = suffix_replace(word, suffix, "ic")
 
                         elif suffix in ("ativ", "ativa", "ative", "ativi",
                                         "ativ\u0103", "a\u0163iune",
                                         "atoare", "ator", "atori",
                                         "\u0103toare",
                                         "\u0103tor", "\u0103tori"):
-                            word = "".join((word[:-len(suffix)], "at"))
+                            word = suffix_replace(word, suffix, "at")
 
                             if suffix in r2:
-                                r2 = "".join((r2[:-len(suffix)], "at"))
+                                r2 = suffix_replace(r2, suffix, "at")
 
                         elif suffix in ("itiv", "itiva", "itive", "itivi",
                                         "itiv\u0103", "i\u0163iune",
                                         "itoare", "itor", "itori"):
-                            word = "".join((word[:-len(suffix)], "it"))
+                            word = suffix_replace(word, suffix, "it")
 
                             if suffix in r2:
-                                r2 = "".join((r2[:-len(suffix)], "it"))
+                                r2 = suffix_replace(r2, suffix, "it")
                     else:
                         step1_success = False
                     break
@@ -2833,7 +2834,7 @@ class RomanianStemmer(_StandardStemmer):
 
                     elif suffix in ("ism", "isme", "ist", "ista", "iste",
                                     "isti", "ist\u0103", "i\u015Fti"):
-                        word = "".join((word[:-len(suffix)], "ist"))
+                        word = suffix_replace(word, suffix, "ist")
 
                     else:
                         word = word[:-len(suffix)]
@@ -3314,7 +3315,6 @@ class RussianStemmer(_LanguageSpecificStemmer):
         return word
 
 
-
 class SpanishStemmer(_StandardStemmer):
 
     """
@@ -3406,123 +3406,108 @@ class SpanishStemmer(_StandardStemmer):
 
         # STEP 0: Attached pronoun
         for suffix in self.__step0_suffixes:
-            if word.endswith(suffix):
-                if rv.endswith(suffix):
-                    if rv[:-len(suffix)].endswith(("i\xE9ndo",
-                                                   "\xE1ndo",
-                                                   "\xE1r", "\xE9r",
-                                                   "\xEDr")):
-                        word = (word[:-len(suffix)].replace("\xE1", "a")
-                                                   .replace("\xE9", "e")
-                                                   .replace("\xED", "i"))
-                        r1 = (r1[:-len(suffix)].replace("\xE1", "a")
-                                               .replace("\xE9", "e")
-                                               .replace("\xED", "i"))
-                        r2 = (r2[:-len(suffix)].replace("\xE1", "a")
-                                               .replace("\xE9", "e")
-                                               .replace("\xED", "i"))
-                        rv = (rv[:-len(suffix)].replace("\xE1", "a")
-                                               .replace("\xE9", "e")
-                                               .replace("\xED", "i"))
-
-                    elif rv[:-len(suffix)].endswith(("ando", "iendo",
-                                                     "ar", "er", "ir")):
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+            if not (word.endswith(suffix) and rv.endswith(suffix)):
+                continue
 
-                    elif (rv[:-len(suffix)].endswith("yendo") and
-                          word[:-len(suffix)].endswith("uyendo")):
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
-                break
+            if ((rv[:-len(suffix)].endswith(("ando", "\xE1ndo",
+                                             "ar", "\xE1r",
+                                             "er", "\xE9r",
+                                             "iendo", "i\xE9ndo",
+                                             "ir", "\xEDr"))) or
+                (rv[:-len(suffix)].endswith("yendo") and
+                    word[:-len(suffix)].endswith("uyendo"))):
+
+                word = self.__replace_accented(word[:-len(suffix)])
+                r1 = self.__replace_accented(r1[:-len(suffix)])
+                r2 = self.__replace_accented(r2[:-len(suffix)])
+                rv = self.__replace_accented(rv[:-len(suffix)])
+            break
 
         # STEP 1: Standard suffix removal
         for suffix in self.__step1_suffixes:
-            if word.endswith(suffix):
-                if suffix == "amente" and r1.endswith(suffix):
-                    step1_success = True
-                    word = word[:-6]
-                    r2 = r2[:-6]
-                    rv = rv[:-6]
+            if not word.endswith(suffix):
+                continue
 
-                    if r2.endswith("iv"):
-                        word = word[:-2]
-                        r2 = r2[:-2]
-                        rv = rv[:-2]
+            if suffix == "amente" and r1.endswith(suffix):
+                step1_success = True
+                word = word[:-6]
+                r2 = r2[:-6]
+                rv = rv[:-6]
 
-                        if r2.endswith("at"):
-                            word = word[:-2]
-                            rv = rv[:-2]
+                if r2.endswith("iv"):
+                    word = word[:-2]
+                    r2 = r2[:-2]
+                    rv = rv[:-2]
 
-                    elif r2.endswith(("os", "ic", "ad")):
+                    if r2.endswith("at"):
                         word = word[:-2]
                         rv = rv[:-2]
 
-                elif r2.endswith(suffix):
-                    step1_success = True
-                    if suffix in ("adora", "ador", "aci\xF3n", "adoras",
-                                  "adores", "aciones", "ante", "antes",
-                                  "ancia", "ancias"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                elif r2.endswith(("os", "ic", "ad")):
+                    word = word[:-2]
+                    rv = rv[:-2]
 
-                        if r2.endswith("ic"):
-                            word = word[:-2]
-                            rv = rv[:-2]
+            elif r2.endswith(suffix):
+                step1_success = True
+                if suffix in ("adora", "ador", "aci\xF3n", "adoras",
+                              "adores", "aciones", "ante", "antes",
+                              "ancia", "ancias"):
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
 
-                    elif suffix in ("log\xEDa", "log\xEDas"):
-                        word = word.replace(suffix, "log")
-                        rv = rv.replace(suffix, "log")
+                    if r2.endswith("ic"):
+                        word = word[:-2]
+                        rv = rv[:-2]
 
-                    elif suffix in ("uci\xF3n", "uciones"):
-                        word = word.replace(suffix, "u")
-                        rv = rv.replace(suffix, "u")
+                elif suffix in ("log\xEDa", "log\xEDas"):
+                    word = suffix_replace(word, suffix, "log")
+                    rv = suffix_replace(rv, suffix, "log")
 
-                    elif suffix in ("encia", "encias"):
-                        word = word.replace(suffix, "ente")
-                        rv = rv.replace(suffix, "ente")
+                elif suffix in ("uci\xF3n", "uciones"):
+                    word = suffix_replace(word, suffix, "u")
+                    rv = suffix_replace(rv, suffix, "u")
 
-                    elif suffix == "mente":
-                        word = word[:-5]
-                        r2 = r2[:-5]
-                        rv = rv[:-5]
+                elif suffix in ("encia", "encias"):
+                    word = suffix_replace(word, suffix, "ente")
+                    rv = suffix_replace(rv, suffix, "ente")
 
-                        if r2.endswith(("ante", "able", "ible")):
-                            word = word[:-4]
-                            rv = rv[:-4]
+                elif suffix == "mente":
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
 
-                    elif suffix in ("idad", "idades"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                    if r2.endswith(("ante", "able", "ible")):
+                        word = word[:-4]
+                        rv = rv[:-4]
 
-                        for pre_suff in ("abil", "ic", "iv"):
-                            if r2.endswith(pre_suff):
-                                word = word[:-len(pre_suff)]
-                                rv = rv[:-len(pre_suff)]
+                elif suffix in ("idad", "idades"):
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
 
-                    elif suffix in ("ivo", "iva", "ivos", "ivas"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
-                        if r2.endswith("at"):
-                            word = word[:-2]
-                            rv = rv[:-2]
-                    else:
-                        word = word[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
-                break
+                    for pre_suff in ("abil", "ic", "iv"):
+                        if r2.endswith(pre_suff):
+                            word = word[:-len(pre_suff)]
+                            rv = rv[:-len(pre_suff)]
+
+                elif suffix in ("ivo", "iva", "ivos", "ivas"):
+                    word = word[:-len(suffix)]
+                    r2 = r2[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+                    if r2.endswith("at"):
+                        word = word[:-2]
+                        rv = rv[:-2]
+                else:
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
+            break
 
         # STEP 2a: Verb suffixes beginning 'y'
         if not step1_success:
             for suffix in self.__step2a_suffixes:
                 if (rv.endswith(suffix) and
-                    word[-len(suffix)-1:-len(suffix)] == "u"):
+                        word[-len(suffix)-1:-len(suffix)] == "u"):
                     word = word[:-len(suffix)]
                     rv = rv[:-len(suffix)]
                     break
@@ -3530,40 +3515,47 @@ class SpanishStemmer(_StandardStemmer):
         # STEP 2b: Other verb suffixes
             for suffix in self.__step2b_suffixes:
                 if rv.endswith(suffix):
+                    word = word[:-len(suffix)]
+                    rv = rv[:-len(suffix)]
                     if suffix in ("en", "es", "\xE9is", "emos"):
-                        word = word[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
-
                         if word.endswith("gu"):
                             word = word[:-1]
 
                         if rv.endswith("gu"):
                             rv = rv[:-1]
-                    else:
-                        word = word[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
                     break
 
         # STEP 3: Residual suffix
         for suffix in self.__step3_suffixes:
             if rv.endswith(suffix):
+                word = word[:-len(suffix)]
                 if suffix in ("e", "\xE9"):
-                    word = word[:-len(suffix)]
                     rv = rv[:-len(suffix)]
 
-                    if word[-2:] == "gu" and rv[-1] == "u":
+                    if word[-2:] == "gu" and rv.endswith("u"):
                         word = word[:-1]
-                else:
-                    word = word[:-len(suffix)]
                 break
 
-        word = (word.replace("\xE1", "a").replace("\xE9", "e")
-                    .replace("\xED", "i").replace("\xF3", "o")
-                    .replace("\xFA", "u"))
-
+        word = self.__replace_accented(word)
 
         return word
 
+    def __replace_accented(self, word):
+        """
+        Replaces all accented letters on a word with their non-accented
+        counterparts.
+
+        :param word: A spanish word, with or without accents
+        :type word: str or unicode
+        :return: a word with the accented letters (á, é, í, ó, ú) replaced with
+                 their non-accented counterparts (a, e, i, o, u)
+        :rtype: str or unicode
+        """
+        return (word.replace("\xE1", "a")
+                .replace("\xE9", "e")
+                .replace("\xED", "i")
+                .replace("\xF3", "o")
+                .replace("\xFA", "u"))
 
 
 class SwedishStemmer(_ScandinavianStemmer):
@@ -3644,11 +3636,9 @@ class SwedishStemmer(_ScandinavianStemmer):
                     word = word[:-1]
                 break
 
-
         return word
 
 
-
 def demo():
     """
     This function provides a demonstration of the Snowball stemmers.
@@ -3720,8 +3710,6 @@ def demo():
         print("\n")
 
 
-
 if __name__ == "__main__":
     import doctest
     doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
-
diff --git a/nltk/stem/util.py b/nltk/stem/util.py
new file mode 100644
index 0000000..174e0fb
--- /dev/null
+++ b/nltk/stem/util.py
@@ -0,0 +1,12 @@
+# Natural Language Toolkit: Stemmer Utilities
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Author: Helder <he7d3r at gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+def suffix_replace(original, old, new):
+    """
+    Replaces the old suffix of the original string by a new suffix
+    """
+    return original[:-len(old)] + new
diff --git a/nltk/stem/wordnet.py b/nltk/stem/wordnet.py
index 7df2fee..092a449 100644
--- a/nltk/stem/wordnet.py
+++ b/nltk/stem/wordnet.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: WordNet stemmer interface
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/__init__.py b/nltk/tag/__init__.py
index 6262eb5..c9ce8d5 100644
--- a/nltk/tag/__init__.py
+++ b/nltk/tag/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Taggers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -73,13 +73,16 @@ from nltk.tag.tnt           import TnT
 from nltk.tag.hunpos        import HunposTagger
 from nltk.tag.stanford      import StanfordTagger
 from nltk.tag.hmm           import HiddenMarkovModelTagger, HiddenMarkovModelTrainer
+from nltk.tag.senna         import SennaTagger, SennaChunkTagger, SennaNERTagger
 from nltk.tag.mapping       import tagset_mapping, map_tag
+from nltk.tag.crf           import CRFTagger
 
 from nltk.data import load
 
 
 # Standard treebank POS tagger
 _POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
+
 def pos_tag(tokens):
     """
     Use NLTK's currently recommended part of speech tagger to
diff --git a/nltk/tag/api.py b/nltk/tag/api.py
index 8fc3efd..fab972a 100644
--- a/nltk/tag/api.py
+++ b/nltk/tag/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagger Interface
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/brill.py b/nltk/tag/brill.py
index ea236fa..3aa46f8 100644
--- a/nltk/tag/brill.py
+++ b/nltk/tag/brill.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
@@ -19,7 +19,7 @@ from nltk import jsontags
 
 
 ######################################################################
-## Brill Templates
+# Brill Templates
 ######################################################################
 
 @jsontags.register_tag
@@ -75,6 +75,7 @@ def nltkdemo18():
         Template(Word([-1]), Word([1])),
     ]
 
+
 def nltkdemo18plus():
     """
     Return 18 templates, from the original nltk demo, and additionally a few
@@ -88,6 +89,7 @@ def nltkdemo18plus():
         Template(Pos([-1]), Word([0]), Pos([1])),
     ]
 
+
 def fntbl37():
     """
     Return 37 templates taken from the postagging task of the
@@ -135,6 +137,7 @@ def fntbl37():
         Template(Pos([1]), Pos([2]), Word([1]))
     ]
 
+
 def brill24():
     """
     Return 24 templates of the seminal TBL paper, Brill (1995)
@@ -171,9 +174,10 @@ def describe_template_sets():
     """
     Print the available template sets in this demo, with a short description"
     """
-    import inspect, sys
+    import inspect
+    import sys
 
-    #a bit of magic to get all functions in this module
+    # a bit of magic to get all functions in this module
     templatesets = inspect.getmembers(sys.modules[__name__], inspect.isfunction)
     for (name, obj) in templatesets:
         if name == "describe_template_sets":
@@ -182,7 +186,7 @@ def describe_template_sets():
 
 
 ######################################################################
-## The Brill Tagger
+# The Brill Tagger
 ######################################################################
 
 @jsontags.register_tag
@@ -201,7 +205,7 @@ class BrillTagger(TaggerI):
     of the TaggerTrainers available.
     """
 
-    json_tag='nltk.tag.BrillTagger'
+    json_tag = 'nltk.tag.BrillTagger'
 
     def __init__(self, initial_tagger, rules, training_stats=None):
         """
@@ -310,35 +314,40 @@ class BrillTagger(TaggerI):
             weighted_traincounts[tid] += score
         tottrainscores = sum(trainscores)
 
-        #det_tplsort() is for deterministic sorting;
-        #the otherwise convenient Counter.most_common() unfortunately
-        #does not break ties deterministically
-        #between python versions and will break cross-version tests
+        # det_tplsort() is for deterministic sorting;
+        # the otherwise convenient Counter.most_common() unfortunately
+        # does not break ties deterministically
+        # between python versions and will break cross-version tests
         def det_tplsort(tpl_value):
             return (tpl_value[1], repr(tpl_value[0]))
 
         def print_train_stats():
             print("TEMPLATE STATISTICS (TRAIN)  {0} templates, {1} rules)".format(
-                                              len(template_counts),len(tids)))
+                len(template_counts),
+                len(tids))
+            )
             print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                   "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
             head = "#ID | Score (train) |  #Rules     | Template"
             print(head, "\n", "-" * len(head), sep="")
             train_tplscores = sorted(weighted_traincounts.items(), key=det_tplsort, reverse=True)
             for (tid, trainscore) in train_tplscores:
-                s = "{0:s} | {1:5d}   {2:5.3f} |{3:4d}   {4:.3f} | {5:s}".format(
-                 tid,
-                 trainscore,
-                 trainscore/tottrainscores,
-                 template_counts[tid],
-                 template_counts[tid]/len(tids),
-                 Template.ALLTEMPLATES[int(tid)])
+                s = "{0} | {1:5d}   {2:5.3f} |{3:4d}   {4:.3f} | {5}".format(
+                    tid,
+                    trainscore,
+                    trainscore/tottrainscores,
+                    template_counts[tid],
+                    template_counts[tid]/len(tids),
+                    Template.ALLTEMPLATES[int(tid)],
+                )
                 print(s)
 
         def print_testtrain_stats():
             testscores = test_stats['rulescores']
             print("TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
-                                                  len(template_counts),len(tids)))
+                len(template_counts),
+                len(tids)),
+            )
             print("TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
                   "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats))
             print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
@@ -352,14 +361,15 @@ class BrillTagger(TaggerI):
             test_tplscores = sorted(weighted_testcounts.items(), key=det_tplsort, reverse=True)
             for (tid, testscore) in test_tplscores:
                 s = "{0:s} |{1:5d}  {2:6.3f} |  {3:4d}   {4:.3f} |{5:4d}   {6:.3f} | {7:s}".format(
-                 tid,
-                 testscore,
-                 testscore/tottestscores,
-                 weighted_traincounts[tid],
-                 weighted_traincounts[tid]/tottrainscores,
-                 template_counts[tid],
-                 template_counts[tid]/len(tids),
-                 Template.ALLTEMPLATES[int(tid)])
+                    tid,
+                    testscore,
+                    testscore/tottestscores,
+                    weighted_traincounts[tid],
+                    weighted_traincounts[tid]/tottrainscores,
+                    template_counts[tid],
+                    template_counts[tid]/len(tids),
+                    Template.ALLTEMPLATES[int(tid)],
+                )
                 print(s)
 
         def print_unused_templates():
@@ -395,15 +405,13 @@ class BrillTagger(TaggerI):
         :returns: tuple of (tagged_sequences, ordered list of rule scores (one for each rule))
         """
         def counterrors(xs):
-            return sum(t[1] != g[1]
-                       for pair in zip(xs, gold)
-                          for (t, g) in zip(*pair))
+            return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair))
         testing_stats = {}
         testing_stats['tokencount'] = sum(len(t) for t in sequences)
         testing_stats['sequencecount'] = len(sequences)
         tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences]
         testing_stats['initialerrors'] = counterrors(tagged_tokenses)
-        testing_stats['initialacc'] = 1- testing_stats['initialerrors']/testing_stats['tokencount']
+        testing_stats['initialacc'] = 1 - testing_stats['initialerrors']/testing_stats['tokencount']
         # Apply each rule to the entire corpus, in order
         errors = [testing_stats['initialerrors']]
         for rule in self._rules:
diff --git a/nltk/tag/brill_trainer.py b/nltk/tag/brill_trainer.py
index 7e835ed..5fa43e5 100644
--- a/nltk/tag/brill_trainer.py
+++ b/nltk/tag/brill_trainer.py
@@ -12,17 +12,17 @@ from __future__ import print_function, division
 
 import bisect
 from collections import defaultdict
-import os.path
-from codecs import open
+
 import textwrap
 
 from nltk.tag.util import untag
 from nltk.tag.brill import BrillTagger
 
 ######################################################################
-## Brill Tagger Trainer
+#  Brill Tagger Trainer
 ######################################################################
 
+
 class BrillTaggerTrainer(object):
     """
     A trainer for tbl taggers.
@@ -91,13 +91,9 @@ class BrillTaggerTrainer(object):
            if the rule applies.  This records the next position we
            need to check to see if the rule messed anything up."""
 
-
-    #////////////////////////////////////////////////////////////
     # Training
-    #////////////////////////////////////////////////////////////
 
     def train(self, train_sents, max_rules=200, min_score=2, min_acc=None):
-
         """
         Trains the Brill tagger on the corpus *train_sents*,
         producing at most *max_rules* transformations, each of which
@@ -140,6 +136,7 @@ class BrillTaggerTrainer(object):
 
         #construct a BrillTaggerTrainer
         >>> tt = BrillTaggerTrainer(baseline, templates, trace=3)
+
         >>> tagger1 = tt.train(training_data, max_rules=10)
         TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: None)
         Finding initial useful rules...
@@ -163,45 +160,37 @@ class BrillTaggerTrainer(object):
           22  27   5  24  | NN->-NONE- if Pos:VBD@[-1]
           17  17   0   0  | NN->CC if Pos:NN@[-1] & Word:and@[0]
 
-
-
         >>> tagger1.rules()[1:3]
         (Rule('001', 'NN', ',', [(Pos([-1]),'NN'), (Word([0]),',')]), Rule('001', 'NN', '.', [(Pos([-1]),'NN'), (Word([0]),'.')]))
 
-
         >>> train_stats = tagger1.train_stats()
         >>> [train_stats[stat] for stat in ['initialerrors', 'finalerrors', 'rulescores']]
         [1775, 1269, [132, 85, 69, 51, 47, 33, 26, 24, 22, 17]]
 
-
-        ##FIXME: the following test fails -- why?
-        #
-        #>>> tagger1.print_template_statistics(printunused=False)
-        #TEMPLATE STATISTICS (TRAIN)  2 templates, 10 rules)
-        #TRAIN (   3163 tokens) initial  2358 0.2545 final:  1719 0.4565
-        ##ID | Score (train) |  #Rules     | Template
-        #--------------------------------------------
-        #001 |   404   0.632 |   7   0.700 | Template(Pos([-1]),Word([0]))
-        #000 |   235   0.368 |   3   0.300 | Template(Pos([-1]))
-        #<BLANKLINE>
-        #<BLANKLINE>
+        >>> tagger1.print_template_statistics(printunused=False)
+        TEMPLATE STATISTICS (TRAIN)  2 templates, 10 rules)
+        TRAIN (   2417 tokens) initial  1775 0.2656 final:  1269 0.4750
+        #ID | Score (train) |  #Rules     | Template
+        --------------------------------------------
+        001 |   305   0.603 |   7   0.700 | Template(Pos([-1]),Word([0]))
+        000 |   201   0.397 |   3   0.300 | Template(Pos([-1]))
+        <BLANKLINE>
+        <BLANKLINE>
 
         >>> tagger1.evaluate(gold_data) # doctest: +ELLIPSIS
         0.43996...
 
-        >>> (tagged, test_stats) = tagger1.batch_tag_incremental(testing_data, gold_data)
-
+        >>> tagged, test_stats = tagger1.batch_tag_incremental(testing_data, gold_data)
 
         >>> tagged[33][12:] == [('foreign', 'IN'), ('debt', 'NN'), ('of', 'IN'), ('$', 'NN'), ('64', 'CD'),
         ... ('billion', 'NN'), ('*U*', 'NN'), ('--', 'NN'), ('the', 'DT'), ('third-highest', 'NN'), ('in', 'NN'),
         ... ('the', 'DT'), ('developing', 'VBG'), ('world', 'NN'), ('.', '.')]
         True
 
-
         >>> [test_stats[stat] for stat in ['initialerrors', 'finalerrors', 'rulescores']]
         [1855, 1376, [100, 85, 67, 58, 27, 36, 27, 16, 31, 32]]
 
-        ##a high-accuracy tagger
+        # a high-accuracy tagger
         >>> tagger2 = tt.train(training_data, max_rules=10, min_acc=0.99)
         TBL train (fast) (seqs: 100; tokens: 2417; tpls: 2; min score: 2; min acc: 0.99)
         Finding initial useful rules...
@@ -225,20 +214,17 @@ class BrillTaggerTrainer(object):
           18  18   0   0  | CD->-NONE- if Pos:NN@[-1] & Word:0@[0]
           18  18   0   0  | NN->CC if Pos:NN@[-1] & Word:and@[0]
 
-
-        >>> tagger2.evaluate(gold_data) # doctest: +ELLIPSIS
+        >>> tagger2.evaluate(gold_data)  # doctest: +ELLIPSIS
         0.44159544...
-
         >>> tagger2.rules()[2:4]
         (Rule('001', 'NN', '.', [(Pos([-1]),'NN'), (Word([0]),'.')]), Rule('001', 'NN', 'IN', [(Pos([-1]),'NN'), (Word([0]),'of')]))
 
-        #NOTE1: (!!FIXME) A far better baseline uses nltk.tag.UnigramTagger,
-        #with a RegexpTagger only as backoff. For instance,
-        #>>> baseline = UnigramTagger(baseline_data, backoff=backoff)
-        #However, as of Nov 2013, nltk.tag.UnigramTagger does not yield consistent results
-        #between python versions. The simplistic backoff above is a workaround to make doctests
-        #get consistent input.
-
+        # NOTE1: (!!FIXME) A far better baseline uses nltk.tag.UnigramTagger,
+        # with a RegexpTagger only as backoff. For instance,
+        # >>> baseline = UnigramTagger(baseline_data, backoff=backoff)
+        # However, as of Nov 2013, nltk.tag.UnigramTagger does not yield consistent results
+        # between python versions. The simplistic backoff above is a workaround to make doctests
+        # get consistent input.
 
         :param train_sents: training data
         :type train_sents: list(list(tuple))
@@ -252,9 +238,8 @@ class BrillTaggerTrainer(object):
         :rtype: BrillTagger
 
         """
-        #!! FIXME: several tests are a bit too dependent on tracing format
-        #!! FIXME: tests in trainer.fast and trainer.brillorig are exact duplicates
-
+        # FIXME: several tests are a bit too dependent on tracing format
+        # FIXME: tests in trainer.fast and trainer.brillorig are exact duplicates
 
         # Basic idea: Keep track of the rules that apply at each position.
         # And keep track of the positions to which each rule applies.
@@ -273,9 +258,11 @@ class BrillTaggerTrainer(object):
         trainstats['sequencecount'] = len(test_sents)
         trainstats['templatecount'] = len(self._templates)
         trainstats['rulescores'] = []
-        trainstats['initialerrors'] = sum(tag[1] != truth[1]
-                                                    for paired in zip(test_sents, train_sents)
-                                                    for (tag, truth) in zip(*paired))
+        trainstats['initialerrors'] = sum(
+            tag[1] != truth[1]
+            for paired in zip(test_sents, train_sents)
+            for (tag, truth) in zip(*paired)
+        )
         trainstats['initialacc'] = 1 - trainstats['initialerrors']/trainstats['tokencount']
         if self._trace > 0:
             print("TBL train (fast) (seqs: {sequencecount}; tokens: {tokencount}; "
@@ -284,14 +271,17 @@ class BrillTaggerTrainer(object):
         # Initialize our mappings.  This will find any errors made
         # by the initial tagger, and use those to generate repair
         # rules, which are added to the rule mappings.
-        if self._trace > 0: print("Finding initial useful rules...")
+        if self._trace:
+            print("Finding initial useful rules...")
         self._init_mappings(test_sents, train_sents)
-        if self._trace > 0: print(("    Found %d useful rules." %
-                                   len(self._rule_scores)))
+        if self._trace:
+            print(("    Found %d useful rules." % len(self._rule_scores)))
 
         # Let the user know what we're up to.
-        if self._trace > 2: self._trace_header()
-        elif self._trace == 1: print("Selecting rules...")
+        if self._trace > 2:
+            self._trace_header()
+        elif self._trace == 1:
+            print("Selecting rules...")
 
         # Repeatedly select the best rule, and add it to `rules`.
         rules = []
@@ -304,10 +294,11 @@ class BrillTaggerTrainer(object):
                     score = self._rule_scores[rule]
                     trainstats['rulescores'].append(score)
                 else:
-                    break # No more good rules left!
+                    break  # No more good rules left!
 
                 # Report the rule that we found.
-                if self._trace > 1: self._trace_rule(rule)
+                if self._trace > 1:
+                    self._trace_rule(rule)
 
                 # Apply the new rule at the relevant sites
                 self._apply_rule(rule, test_sents)
@@ -349,7 +340,7 @@ class BrillTaggerTrainer(object):
             for wordnum, (word, tag) in enumerate(sent):
 
                 # Initialize tag_positions
-                self._tag_positions[tag].append( (sentnum,wordnum) )
+                self._tag_positions[tag].append((sentnum, wordnum))
 
                 # If it's an error token, update the rule-related mappings.
                 correct_tag = train_sents[sentnum][wordnum][1]
@@ -393,7 +384,7 @@ class BrillTaggerTrainer(object):
             self._positions_by_rule[rule][pos] = 1
         elif rule.original_tag == correct_tag:
             self._positions_by_rule[rule][pos] = -1
-        else: # was wrong, remains wrong
+        else:  # was wrong, remains wrong
             self._positions_by_rule[rule][pos] = 0
 
         # Update _rules_by_position
@@ -450,7 +441,7 @@ class BrillTaggerTrainer(object):
             for rule in best_rules:
                 positions = self._tag_positions[rule.original_tag]
 
-                unk = self._first_unknown_position.get(rule, (0,-1))
+                unk = self._first_unknown_position.get(rule, (0, -1))
                 start = bisect.bisect_left(positions, unk)
 
                 for i in range(start, len(positions)):
@@ -461,23 +452,23 @@ class BrillTaggerTrainer(object):
                         if self._rule_scores[rule] < max_score:
                             self._first_unknown_position[rule] = (sentnum,
                                                                   wordnum+1)
-                            break # The update demoted the rule.
+                            break  # The update demoted the rule.
 
                 if self._rule_scores[rule] == max_score:
-                    self._first_unknown_position[rule] = (len(train_sents)+1,0)
-                    #optimization: if no min_acc threshold given, don't bother computing accuracy
+                    self._first_unknown_position[rule] = (len(train_sents) + 1, 0)
+                    # optimization: if no min_acc threshold given, don't bother computing accuracy
                     if min_acc is None:
                         return rule
                     else:
                         changes = self._positions_by_rule[rule].values()
-                        num_fixed = len([c for c in changes if c==1])
-                        num_broken = len([c for c in changes if c==-1])
-                        #acc here is fixed/(fixed+broken); could also be
-                        #fixed/(fixed+broken+other) == num_fixed/len(changes)
+                        num_fixed = len([c for c in changes if c == 1])
+                        num_broken = len([c for c in changes if c == -1])
+                        # acc here is fixed/(fixed+broken); could also be
+                        # fixed/(fixed+broken+other) == num_fixed/len(changes)
                         acc = num_fixed/(num_fixed+num_broken)
                         if acc >= min_acc:
                             return rule
-                        #else: rule too inaccurate, discard and try next
+                        # else: rule too inaccurate, discard and try next
 
             # We demoted (or skipped due to < min_acc, if that was given)
             # all the rules with score==max_score.
@@ -486,19 +477,16 @@ class BrillTaggerTrainer(object):
             if not self._rules_by_score[max_score]:
                 del self._rules_by_score[max_score]
 
-
-
-
     def _apply_rule(self, rule, test_sents):
         """
         Update *test_sents* by applying *rule* everywhere where its
         conditions are met.
         """
         update_positions = set(self._positions_by_rule[rule])
-        old_tag = rule.original_tag
         new_tag = rule.replacement_tag
 
-        if self._trace > 3: self._trace_apply(len(update_positions))
+        if self._trace > 3:
+            self._trace_apply(len(update_positions))
 
         # Update test_sents.
         for (sentnum, wordnum) in update_positions:
@@ -549,7 +537,6 @@ class BrillTaggerTrainer(object):
 
             # Check if the change causes our templates to propose any
             # new rules for this position.
-            site_rules = set()
             for template in self._templates:
                 for new_rule in template.applicable_rules(test_sent, wordnum,
                                                           correct_tag):
@@ -577,9 +564,7 @@ class BrillTaggerTrainer(object):
         if self._trace > 3:
             self._trace_update_rules(num_obsolete, num_new, num_unseen)
 
-    #////////////////////////////////////////////////////////////
     # Tracing
-    #////////////////////////////////////////////////////////////
 
     def _trace_header(self):
         print("""
@@ -593,19 +578,17 @@ class BrillTaggerTrainer(object):
         """.rstrip())
 
     def _trace_rule(self, rule):
-        assert self._rule_scores[rule] == \
-               sum(self._positions_by_rule[rule].values())
+        assert self._rule_scores[rule] == sum(self._positions_by_rule[rule].values())
 
         changes = self._positions_by_rule[rule].values()
-        num_changed = len(changes)
-        num_fixed = len([c for c in changes if c==1])
-        num_broken = len([c for c in changes if c==-1])
-        num_other = len([c for c in changes if c==0])
+        num_fixed = len([c for c in changes if c == 1])
+        num_broken = len([c for c in changes if c == -1])
+        num_other = len([c for c in changes if c == 0])
         score = self._rule_scores[rule]
 
         rulestr = rule.format(self._ruleformat)
         if self._trace > 2:
-            print('%4d%4d%4d%4d  |' % (score,num_fixed,num_broken,num_other), end=' ')
+            print('%4d%4d%4d%4d  |' % (score, num_fixed, num_broken, num_other), end=' ')
             print(textwrap.fill(rulestr, initial_indent=' '*20, width=79,
                                 subsequent_indent=' '*18+'|   ').strip())
         else:
diff --git a/nltk/tag/crf.py b/nltk/tag/crf.py
new file mode 100644
index 0000000..3842d87
--- /dev/null
+++ b/nltk/tag/crf.py
@@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Interface to the CRFSuite Tagger
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Author: Long Duong <longdt219 at gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A module for POS tagging using CRFSuite
+"""
+from __future__ import absolute_import
+from __future__ import unicode_literals
+import unicodedata
+import re 
+from nltk.tag.api import TaggerI
+
+try:
+    import pycrfsuite
+except ImportError:
+    pass
+
+class CRFTagger(TaggerI):
+    """
+    A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite
+    
+    >>> from nltk.tag.crf import CRFTagger
+    >>> ct = CRFTagger()
+ 
+    >>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')],
+    ... [('dog','Noun'),('eat','Verb'),('meat','Noun')]]
+    
+    >>> ct.train(train_data,'model.crf.tagger')
+    >>> ct.tag_sents([['dog','is','good'], ['Cat','eat','meat']])
+    [[('dog', 'Noun'), ('is', 'Verb'), ('good', 'Adj')], [('Cat', 'Noun'), ('eat', 'Verb'), ('meat', 'Noun')]]
+    
+    >>> gold_sentences = [[('dog','Noun'),('is','Verb'),('good','Adj')] , [('Cat','Noun'),('eat','Verb'), ('meat','Noun')]] 
+    >>> ct.evaluate(gold_sentences) 
+    1.0
+    
+    Setting learned model file  
+    >>> ct = CRFTagger() 
+    >>> ct.set_model_file('model.crf.tagger')
+    >>> ct.evaluate(gold_sentences)
+    1.0
+    
+    """
+    
+    
+    def __init__(self,  feature_func = None, verbose = False, training_opt = {}):
+        """
+        Initialize the CRFSuite tagger 
+        :param feature_func: The function that extracts features for each token of a sentence. This function should take 
+        2 parameters: tokens and index which extract features at index position from tokens list. See the build in 
+        _get_features function for more detail.   
+        :param verbose: output the debugging messages during training.
+        :type verbose: boolean  
+        :param training_opt: python-crfsuite training options
+        :type training_opt : dictionary 
+        
+        Set of possible training options (using LBFGS training algorithm).  
+         'feature.minfreq' : The minimum frequency of features.
+         'feature.possible_states' : Force to generate possible state features.
+         'feature.possible_transitions' : Force to generate possible transition features.
+         'c1' : Coefficient for L1 regularization.
+         'c2' : Coefficient for L2 regularization.
+         'max_iterations' : The maximum number of iterations for L-BFGS optimization.
+         'num_memories' : The number of limited memories for approximating the inverse hessian matrix.
+         'epsilon' : Epsilon for testing the convergence of the objective.
+         'period' : The duration of iterations to test the stopping criterion.
+         'delta' : The threshold for the stopping criterion; an L-BFGS iteration stops when the
+                    improvement of the log likelihood over the last ${period} iterations is no greater than this threshold.
+         'linesearch' : The line search algorithm used in L-BFGS updates:
+                           { 'MoreThuente': More and Thuente's method,
+                              'Backtracking': Backtracking method with regular Wolfe condition,
+                              'StrongBacktracking': Backtracking method with strong Wolfe condition
+                           } 
+         'max_linesearch' :  The maximum number of trials for the line search algorithm.
+         
+        """
+                   
+        self._model_file = ''
+        self._tagger = pycrfsuite.Tagger()
+        
+        if feature_func is None:
+            self._feature_func =  self._get_features
+        else:
+            self._feature_func =  feature_func
+        
+        self._verbose = verbose 
+        self._training_options = training_opt
+        self._pattern = re.compile(r'\d')
+        
+    def set_model_file(self, model_file):
+        self._model_file = model_file
+        self._tagger.open(self._model_file)
+            
+    def _get_features(self, tokens, idx):
+        """
+        Extract basic features about this word including 
+             - Current Word 
+             - Is Capitalized ?
+             - Has Punctuation ?
+             - Has Number ?
+             - Suffixes up to length 3
+        Note that : we might include feature over previous word, next word ect. 
+        
+        :return : a list which contains the features
+        :rtype : list(str)    
+        
+        """ 
+        token = tokens[idx]
+        
+        feature_list = []  
+        # Capitalization 
+        if token[0].isupper():
+            feature_list.append('CAPITALIZATION')
+        
+        # Number 
+        if re.search(self._pattern, token) is not None:
+            feature_list.append('HAS_NUM') 
+        
+        # Punctuation
+        punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"])
+        if all (unicodedata.category(x) in punc_cat for x in token):
+            feature_list.append('PUNCTUATION')
+        
+        # Suffix up to length 3
+        if len(token) > 1:
+            feature_list.append('SUF_' + token[-1:]) 
+        if len(token) > 2: 
+            feature_list.append('SUF_' + token[-2:])    
+        if len(token) > 3: 
+            feature_list.append('SUF_' + token[-3:])
+            
+        feature_list.append('WORD_' + token )
+        
+        return feature_list
+        
+    def tag_sents(self, sents):
+        '''
+        Tag a list of sentences. NB before using this function, user should specify the mode_file either by 
+                       - Train a new model using ``train'' function 
+                       - Use the pre-trained model which is set via ``set_model_file'' function  
+        :params sentences : list of sentences needed to tag. 
+        :type sentences : list(list(str))
+        :return : list of tagged sentences. 
+        :rtype : list (list (tuple(str,str))) 
+        '''
+        if self._model_file == '':
+            raise Exception(' No model file is found !! Please use train or set_model_file function')
+        
+        # We need the list of sentences instead of the list generator for matching the input and output
+        result = []  
+        for tokens in sents:
+            features = [self._feature_func(tokens,i) for i in range(len(tokens))]
+            labels = self._tagger.tag(features)
+                
+            if len(labels) != len(tokens):
+                raise Exception(' Predicted Length Not Matched, Expect Errors !')
+            
+            tagged_sent = list(zip(tokens,labels))
+            result.append(tagged_sent)
+            
+        return result 
+    
+    def train(self, train_data, model_file):
+        '''
+        Train the CRF tagger using CRFSuite  
+        :params train_data : is the list of annotated sentences.        
+        :type train_data : list (list(tuple(str,str)))
+        :params model_file : the model will be saved to this file.     
+         
+        '''
+        trainer = pycrfsuite.Trainer(verbose=self._verbose)
+        trainer.set_params(self._training_options)
+        
+        for sent in train_data:
+            tokens,labels = zip(*sent)
+            features = [self._feature_func(tokens,i) for i in range(len(tokens))]
+            trainer.append(features,labels)
+                        
+        # Now train the model, the output should be model_file
+        trainer.train(model_file)
+        # Save the model file
+        self.set_model_file(model_file) 
+
+    def tag(self, tokens):
+        '''
+        Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by 
+                       - Train a new model using ``train'' function 
+                       - Use the pre-trained model which is set via ``set_model_file'' function  
+        :params tokens : list of tokens needed to tag. 
+        :type tokens : list(str)
+        :return : list of tagged tokens. 
+        :rtype : list (tuple(str,str)) 
+        '''
+        
+        return self.tag_sents([tokens])[0]
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
diff --git a/nltk/tag/hmm.py b/nltk/tag/hmm.py
index a4401ab..58127f2 100644
--- a/nltk/tag/hmm.py
+++ b/nltk/tag/hmm.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Hidden Markov Model
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Trevor Cohn <tacohn at csse.unimelb.edu.au>
 #         Philip Blunsom <pcbl at csse.unimelb.edu.au>
 #         Tiago Tresoldi <tiago at tresoldi.pro.br> (fixes)
diff --git a/nltk/tag/hunpos.py b/nltk/tag/hunpos.py
index 916c773..82e0560 100644
--- a/nltk/tag/hunpos.py
+++ b/nltk/tag/hunpos.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the HunPos POS-tagger
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Peter Ljunglöf <peter.ljunglof at heatherleaf.se>
-#         David Nemeskey <nemeskeyd at gmail.com> (modifications)
-#         Attila Zseder <zseder at gmail.com> (modifications)
+#         Dávid Márk Nemeskey <nemeskeyd at gmail.com> (modifications)
+#         Attila Zséder <zseder at gmail.com> (modifications)
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
@@ -34,7 +34,7 @@ class HunposTagger(TaggerI):
     Example:
 
         >>> from nltk.tag.hunpos import HunposTagger
-        >>> ht = HunposTagger('english.model')
+        >>> ht = HunposTagger('en_wsj.model')
         >>> ht.tag('What is the airspeed of an unladen swallow ?'.split())
         [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]
         >>> ht.close()
@@ -44,7 +44,7 @@ class HunposTagger(TaggerI):
     free system resources. The class supports the context manager interface; if
     used in a with statement, the close() method is invoked automatically:
 
-        >>> with HunposTagger('english.model') as ht:
+        >>> with HunposTagger('en_wsj.model') as ht:
         ...     ht.tag('What is the airspeed of an unladen swallow ?'.split())
         ...
         [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]
@@ -71,14 +71,15 @@ class HunposTagger(TaggerI):
         hunpos_paths = list(map(os.path.expanduser, hunpos_paths))
 
         self._hunpos_bin = find_binary(
-                'hunpos-tag', path_to_bin,
-                env_vars=('HUNPOS_TAGGER',),
-                searchpath=hunpos_paths,
-                url=_hunpos_url,
-                verbose=verbose)
-
-        self._hunpos_model = find_file(path_to_model,
-                env_vars=('HUNPOS_TAGGER',), verbose=verbose)
+            'hunpos-tag', path_to_bin,
+            env_vars=('HUNPOS_TAGGER',),
+            searchpath=hunpos_paths,
+            url=_hunpos_url,
+            verbose=verbose
+        )
+
+        self._hunpos_model = find_file(
+            path_to_model, env_vars=('HUNPOS_TAGGER',), verbose=verbose)
         self._encoding = encoding
         self._hunpos = Popen([self._hunpos_bin, self._hunpos_model],
                              shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
@@ -106,14 +107,14 @@ class HunposTagger(TaggerI):
             assert "\n" not in token, "Tokens should not contain newlines"
             if isinstance(token, compat.text_type):
                 token = token.encode(self._encoding)
-            self._hunpos.stdin.write(token + "\n")
+            self._hunpos.stdin.write(token + b"\n")
         # We write a final empty line to tell hunpos that the sentence is finished:
-        self._hunpos.stdin.write("\n")
+        self._hunpos.stdin.write(b"\n")
         self._hunpos.stdin.flush()
 
         tagged_tokens = []
         for token in tokens:
-            tagged = self._hunpos.stdout.readline().strip().split("\t")
+            tagged = self._hunpos.stdout.readline().strip().split(b"\t")
             tag = (tagged[1] if len(tagged) > 1 else None)
             tagged_tokens.append((token, tag))
         # We have to read (and dismiss) the final empty line:
@@ -125,7 +126,7 @@ class HunposTagger(TaggerI):
 def setup_module(module):
     from nose import SkipTest
     try:
-        HunposTagger('english.model')
+        HunposTagger('en_wsj.model')
     except LookupError:
         raise SkipTest("HunposTagger is not available")
 
diff --git a/nltk/tag/mapping.py b/nltk/tag/mapping.py
index 847b2ab..d9a7fe3 100644
--- a/nltk/tag/mapping.py
+++ b/nltk/tag/mapping.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagset Mapping
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Nathan Schneider <nathan at cmu.edu>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/senna.py b/nltk/tag/senna.py
index e7897a1..85fd7e9 100644
--- a/nltk/tag/senna.py
+++ b/nltk/tag/senna.py
@@ -1,286 +1,106 @@
 # encoding: utf-8
-# Natural Language Toolkit: Interface to the Senna tagger
+# Natural Language Toolkit: Senna POS Tagger
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
 """
-A module for interfacing with the SENNA pipeline.
+Senna POS tagger, NER Tagger, Chunk Tagger
+
+The input is:
+- path to the directory that contains SENNA executables. If the path is incorrect, 
+   SennaTagger will automatically search for executable file specified in SENNA environment variable 
+- (optionally) the encoding of the input data (default:utf-8)
+
+    >>> from nltk.tag import SennaTagger
+    >>> tagger = SennaTagger('/usr/share/senna-v2.0')
+    >>> tagger.tag('What is the airspeed of an unladen swallow ?'.split())
+    [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'),
+    ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'NN'), ('?', '.')]
+
+    >>> from nltk.tag import SennaChunkTagger
+    >>> chktagger = SennaChunkTagger('/usr/share/senna-v2.0')
+    >>> chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
+    [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed', 'I-NP'),
+    ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow', 'I-NP'),
+    ('?', 'O')]
+
+    >>> from nltk.tag import SennaNERTagger
+    >>> nertagger = SennaNERTagger('/usr/share/senna-v2.0')
+    >>> nertagger.tag('Shakespeare theatre was in London .'.split())
+    [('Shakespeare', 'B-PER'), ('theatre', 'O'), ('was', 'O'), ('in', 'O'),
+    ('London', 'B-LOC'), ('.', 'O')]
+    >>> nertagger.tag('UN headquarters are in NY , USA .'.split())
+    [('UN', 'B-ORG'), ('headquarters', 'O'), ('are', 'O'), ('in', 'O'),
+    ('NY', 'B-LOC'), (',', 'O'), ('USA', 'B-LOC'), ('.', 'O')]
 """
 
-from os import path, sep
-from subprocess import Popen, PIPE
-from platform import architecture, system
-from nltk.tag.api import TaggerI
-from nltk import compat
+from nltk.compat import python_2_unicode_compatible
+from nltk.classify import Senna
 
-_senna_url = 'http://ml.nec-labs.com/senna/'
-
-
-class Error(Exception):
-    """Basic error handling class to be extended by the module specific
-    exceptions"""
-
-
-class ExecutableNotFound(Error):
-    """Raised if the senna executable does not exist"""
-
-
-class RunFailure(Error):
-    """Raised if the pipeline fails to execute"""
-
-
-class SentenceMisalignment(Error):
-    """Raised if the new sentence is shorter than the original one or the number
-    of sentences in the result is less than the input."""
-
-
-class SennaTagger(TaggerI):
-    r"""
-    A general interface of the SENNA pipeline that supports any of the
-    operations specified in SUPPORTED_OPERATIONS.
-
-    Applying multiple operations at once has the speed advantage. For example,
-    senna v2.0 will calculate the POS tags in case you are extracting the named
-    entities. Applying both of the operations will cost only the time of
-    extracting the named entities.
-
-    SENNA pipeline has a fixed maximum size of the sentences that it can read.
-    By default it is 1024 token/sentence. If you have larger sentences, changing
-    the MAX_SENTENCE_SIZE value in SENNA_main.c should be considered and your
-    system specific binary should be rebuilt. Otherwise this could introduce
-    misalignment errors.
-
-    The input is:
-    - path to the directory that contains SENNA executables.
-    - List of the operations needed to be performed.
-    - (optionally) the encoding of the input data (default:utf-8)
-
-    Example:
-
-        >>> from nltk.tag.senna import SennaTagger
-        >>> pipeline = SennaTagger('/usr/share/senna-v2.0', ['pos', 'chk', 'ner'])
-        >>> sent = u'Düsseldorf is an international business center'.split()
-        >>> pipeline.tag(sent)
-        [{'word': u'D\xfcsseldorf', 'chk': u'B-NP', 'ner': u'B-PER', 'pos': u'NNP'},
-        {'word': u'is', 'chk': u'B-VP', 'ner': u'O', 'pos': u'VBZ'},
-        {'word': u'an', 'chk': u'B-NP', 'ner': u'O', 'pos': u'DT'},
-        {'word': u'international', 'chk': u'I-NP', 'ner': u'O', 'pos': u'JJ'},
-        {'word': u'business', 'chk': u'I-NP', 'ner': u'O', 'pos': u'NN'},
-        {'word': u'center', 'chk': u'I-NP', 'ner': u'O','pos': u'NN'}]
-    """
-
-    SUPPORTED_OPERATIONS = ['pos', 'chk', 'ner']
-
-    def __init__(self, senna_path, operations, encoding='utf-8'):
-        self._encoding = encoding
-        self._path = path.normpath(senna_path) + sep
-        self.operations = operations
-
-    @property
-    def executable(self):
-        """
-        A property that determines the system specific binary that should be
-        used in the pipeline. In case, the system is not known the senna binary will
-        be used.
-        """
-        os_name = system()
-        if os_name == 'Linux':
-            bits = architecture()[0]
-            if bits == '64bit':
-                return path.join(self._path, 'senna-linux64')
-            return path.join(self._path, 'senna-linux32')
-        if os_name == 'Windows':
-            return path.join(self._path, 'senna-win32.exe')
-        if os_name == 'Darwin':
-            return path.join(self._path, 'senna-osx')
-        return path.join(self._path, 'senna')
-
-    def _map(self):
-        """
-        A method that calculates the order of the columns that SENNA pipeline
-        will output the tags into. This depends on the operations being ordered.
-        """
-        _map = {}
-        i = 1
-        for operation in SennaTagger.SUPPORTED_OPERATIONS:
-            if operation in self.operations:
-                _map[operation] = i
-                i+= 1
-        return _map
-
-    def tag(self, tokens):
-        """
-        Applies the specified operation(s) on a list of tokens.
-        """
-        return self.tag_sents([tokens])[0]
-
-    def tag_sents(self, sentences):
-        """
-        Applies the tag method over a list of sentences. This method will return a
-        list of dictionaries. Every dictionary will contain a word with its
-        calculated annotations/tags.
-        """
-        encoding = self._encoding
-
-        # Verifies the existence of the executable
-        if not path.isfile(self.executable):
-          raise ExecutableNotFound("Senna executable expected at %s but not found" %
-                                   self.executable)
-
-        # Build the senna command to run the tagger
-        _senna_cmd = [self.executable, '-path', self._path, '-usrtokens', '-iobtags']
-        _senna_cmd.extend(['-'+op for op in self.operations])
-
-        # Serialize the actual sentences to a temporary string
-        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
-        if isinstance(_input, compat.text_type) and encoding:
-            _input = _input.encode(encoding)
-
-        # Run the tagger and get the output
-        p = Popen(_senna_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
-        (stdout, stderr) = p.communicate(input=_input)
-        senna_output = stdout
-
-        # Check the return code.
-        if p.returncode != 0:
-            raise RunFailure('Senna command failed! Details: %s' % stderr)
-
-        if encoding:
-            senna_output = stdout.decode(encoding)
-
-        # Output the tagged sentences
-        map_ = self._map()
-        tagged_sentences = [[]]
-        sentence_index = 0
-        token_index = 0
-        for tagged_word in senna_output.strip().split("\n"):
-            if not tagged_word:
-                tagged_sentences.append([])
-                sentence_index += 1
-                token_index = 0
-                continue
-            tags = tagged_word.split('\t')
-            result = {}
-            for tag in map_:
-              result[tag] = tags[map_[tag]].strip()
-            try:
-              result['word'] = sentences[sentence_index][token_index]
-            except IndexError:
-              raise SentenceMisalignment(
-                "Misalignment error occurred at sentence number %d. Possible reason"
-                " is that the sentence size exceeded the maximum size. Check the "
-                "documentation of SennaTagger class for more information."
-                % sentence_index)
-            tagged_sentences[-1].append(result)
-            token_index += 1
-        return tagged_sentences
-
-
-class POSTagger(SennaTagger):
-    """
-    A Part of Speech tagger.
-
-    The input is:
-    - path to the directory that contains SENNA executables.
-    - (optionally) the encoding of the input data (default:utf-8)
-
-    Example:
-
-        >>> from nltk.tag.senna import POSTagger
-        >>> postagger = POSTagger('/usr/share/senna-v2.0')
-        >>> postagger.tag('What is the airspeed of an unladen swallow ?'.split())
-        [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'),
-        ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
-    """
+ at python_2_unicode_compatible
+class SennaTagger(Senna):
     def __init__(self, path, encoding='utf-8'):
-        super(POSTagger, self).__init__(path, ['pos'], encoding)
+        super(SennaTagger, self).__init__(path, ['pos'], encoding)
 
     def tag_sents(self, sentences):
         """
         Applies the tag method over a list of sentences. This method will return
         for each sentence a list of tuples of (word, tag).
         """
-        tagged_sents = super(POSTagger, self).tag_sents(sentences)
+        tagged_sents = super(SennaTagger, self).tag_sents(sentences)
         for i in range(len(tagged_sents)):
             for j in range(len(tagged_sents[i])):
                 annotations = tagged_sents[i][j]
                 tagged_sents[i][j] = (annotations['word'], annotations['pos'])
         return tagged_sents
 
-
-class NERTagger(SennaTagger):
-    """
-    A named entity extractor.
-
-    The input is:
-    - path to the directory that contains SENNA executables.
-    - (optionally) the encoding of the input data (default:utf-8)
-
-    Example:
-
-        >>> from nltk.tag.senna import NERTagger
-        >>> nertagger = NERTagger('/usr/share/senna-v2.0')
-        >>> nertagger.tag('Shakespeare theatre was in London .'.split())
-        [('Shakespeare', u'B-PER'), ('theatre', u'O'), ('was', u'O'), ('in', u'O'),
-        ('London', u'B-LOC'), ('.', u'O')]
-        >>> nertagger.tag('UN headquarters are in NY , USA .'.split())
-        [('UN', u'B-ORG'), ('headquarters', u'O'), ('are', u'O'), ('in', u'O'),
-        ('NY', u'B-LOC'), (',', u'O'), ('USA', u'B-LOC'), ('.', u'O')]
-    """
+ at python_2_unicode_compatible
+class SennaChunkTagger(Senna):
     def __init__(self, path, encoding='utf-8'):
-        super(NERTagger, self).__init__(path, ['ner'], encoding)
+        super(SennaChunkTagger, self).__init__(path, ['chk'], encoding)
 
     def tag_sents(self, sentences):
         """
         Applies the tag method over a list of sentences. This method will return
         for each sentence a list of tuples of (word, tag).
         """
-        tagged_sents = super(NERTagger, self).tag_sents(sentences)
+        tagged_sents = super(SennaChunkTagger, self).tag_sents(sentences)
         for i in range(len(tagged_sents)):
             for j in range(len(tagged_sents[i])):
                 annotations = tagged_sents[i][j]
-                tagged_sents[i][j] = (annotations['word'], annotations['ner'])
+                tagged_sents[i][j] = (annotations['word'], annotations['chk'])
         return tagged_sents
 
-
-class CHKTagger(SennaTagger):
-    """
-    A chunker.
-
-    The input is:
-    - path to the directory that contains SENNA executables.
-    - (optionally) the encoding of the input data (default:utf-8)
-
-    Example:
-
-        >>> from nltk.tag.senna import CHKTagger
-        >>> chktagger = CHKTagger('/usr/share/senna-v2.0')
-        >>> chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
-        [('What', u'B-NP'), ('is', u'B-VP'), ('the', u'B-NP'), ('airspeed', u'I-NP'),
-        ('of', u'B-PP'), ('an', u'B-NP'), ('unladen', u'I-NP'), ('swallow',u'I-NP'),
-        ('?', u'O')]
-    """
+ at python_2_unicode_compatible
+class SennaNERTagger(Senna):
     def __init__(self, path, encoding='utf-8'):
-        super(CHKTagger, self).__init__(path, ['chk'], encoding)
+        super(SennaNERTagger, self).__init__(path, ['ner'], encoding)
 
     def tag_sents(self, sentences):
         """
         Applies the tag method over a list of sentences. This method will return
         for each sentence a list of tuples of (word, tag).
         """
-        tagged_sents = super(CHKTagger, self).tag_sents(sentences)
+        tagged_sents = super(SennaNERTagger, self).tag_sents(sentences)
         for i in range(len(tagged_sents)):
             for j in range(len(tagged_sents[i])):
                 annotations = tagged_sents[i][j]
-                tagged_sents[i][j] = (annotations['word'], annotations['chk'])
+                tagged_sents[i][j] = (annotations['word'], annotations['ner'])
         return tagged_sents
 
+
+
 # skip doctests if Senna is not installed
 def setup_module(module):
     from nose import SkipTest
-    tagger = POSTagger('/usr/share/senna-v2.0')
-    if not path.isfile(tagger.executable):
-        raise SkipTest("Senna executable expected at /usr/share/senna-v2.0/senna-osx but not found")
+    try:
+        tagger = Senna('/usr/share/senna-v2.0', ['pos', 'chk', 'ner'])
+    except OSError:
+        raise SkipTest("Senna executable not found")
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
diff --git a/nltk/tag/sequential.py b/nltk/tag/sequential.py
index a93a5bd..df2283f 100644
--- a/nltk/tag/sequential.py
+++ b/nltk/tag/sequential.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Sequential Backoff Taggers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 #         Tiago Tresoldi <tresoldi at users.sf.net> (original affix tagger)
diff --git a/nltk/tag/stanford.py b/nltk/tag/stanford.py
index 5f732e8..3ce7575 100644
--- a/nltk/tag/stanford.py
+++ b/nltk/tag/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford NER-tagger
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Nitin Madnani <nmadnani at ets.org>
 #         Rami Al-Rfou' <ralrfou at cs.stonybrook.edu>
 # URL: <http://nltk.org/>
@@ -36,11 +36,11 @@ class StanfordTagger(TaggerI):
     _SEPARATOR = ''
     _JAR = ''
 
-    def __init__(self, path_to_model, path_to_jar=None, encoding='ascii', verbose=False, java_options='-mx1000m'):
+    def __init__(self, path_to_model, path_to_jar=None, encoding='utf8', verbose=False, java_options='-mx1000m'):
 
         if not self._JAR:
             warnings.warn('The StanfordTagger class is not meant to be '
-                    'instanciated directly. Did you mean POS- or NERTagger?')
+                    'instantiated directly. Did you mean POS- or NERTagger?')
         self._stanford_jar = find_jar(
                 self._JAR, path_to_jar,
                 searchpath=(), url=_stanford_url,
@@ -56,7 +56,7 @@ class StanfordTagger(TaggerI):
       raise NotImplementedError
 
     def tag(self, tokens):
-        return self.tag_sents([tokens])[0]
+        return list(self.tag_sents([tokens]))
 
     def tag_sents(self, sentences):
         encoding = self._encoding
@@ -67,7 +67,7 @@ class StanfordTagger(TaggerI):
         _input_fh, self._input_file_path = tempfile.mkstemp(text=True)
 
         self._cmd.extend(['-encoding', encoding])
-
+        
         # Write the actual sentences to the temporary input file
         _input_fh = os.fdopen(_input_fh, 'wb')
         _input = '\n'.join((' '.join(x) for x in sentences))
@@ -75,18 +75,18 @@ class StanfordTagger(TaggerI):
             _input = _input.encode(encoding)
         _input_fh.write(_input)
         _input_fh.close()
-
+        
         # Run the tagger and get the output
         stanpos_output, _stderr = java(self._cmd,classpath=self._stanford_jar,
                                                        stdout=PIPE, stderr=PIPE)
         stanpos_output = stanpos_output.decode(encoding)
 
         # Delete the temporary file
-        os.unlink(self._input_file_path)
+        os.unlink(self._input_file_path) 
 
         # Return java configurations to their default values
         config_java(options=default_options, verbose=False)
-
+                
         return self.parse_output(stanpos_output)
 
     def parse_output(self, text):
@@ -127,7 +127,7 @@ class POSTagger(StanfordTagger):
     def _cmd(self):
         return ['edu.stanford.nlp.tagger.maxent.MaxentTagger',
                 '-model', self._stanford_model, '-textFile',
-                self._input_file_path, '-tokenize', 'false']
+                self._input_file_path, '-tokenize', 'false','-outputFormatOptions', 'keepEmptySentences']
 
 class NERTagger(StanfordTagger):
     """
@@ -158,9 +158,10 @@ class NERTagger(StanfordTagger):
 
     @property
     def _cmd(self):
+        # Adding -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false for not using stanford Tokenizer  
         return ['edu.stanford.nlp.ie.crf.CRFClassifier',
                 '-loadClassifier', self._stanford_model, '-textFile',
-                self._input_file_path, '-outputFormat', self._FORMAT]
+                self._input_file_path, '-outputFormat', self._FORMAT, '-tokenizerFactory', 'edu.stanford.nlp.process.WhitespaceTokenizer', '-tokenizerOptions','\"tokenizeNLs=false\"']
 
     def parse_output(self, text):
       if self._FORMAT == 'slashTags':
diff --git a/nltk/tag/tnt.py b/nltk/tag/tnt.py
index 6faf04a..0dd7103 100755
--- a/nltk/tag/tnt.py
+++ b/nltk/tag/tnt.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: TnT Tagger
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Sam Huston <sjh900 at gmail.com>
 #
 # URL: <http://nltk.org/>
diff --git a/nltk/tag/util.py b/nltk/tag/util.py
index 1bd0ec5..a791cc4 100644
--- a/nltk/tag/util.py
+++ b/nltk/tag/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tagger Utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tbl/__init__.py b/nltk/tbl/__init__.py
index ed7292b..9c85c90 100644
--- a/nltk/tbl/__init__.py
+++ b/nltk/tbl/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/demo.py b/nltk/tbl/demo.py
index 6df31c0..eee2c39 100644
--- a/nltk/tbl/demo.py
+++ b/nltk/tbl/demo.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/erroranalysis.py b/nltk/tbl/erroranalysis.py
index 1cbbf14..3ce3c89 100644
--- a/nltk/tbl/erroranalysis.py
+++ b/nltk/tbl/erroranalysis.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/tbl/feature.py b/nltk/tbl/feature.py
index 2c0f3cc..5988f07 100644
--- a/nltk/tbl/feature.py
+++ b/nltk/tbl/feature.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
@@ -29,9 +29,9 @@ class Feature(object):
     to the classname.
 
     """
-    #!!FOR_FUTURE: when targeting python3 only, consider @abc.abstractmethod
+    # !!FOR_FUTURE: when targeting python3 only, consider @abc.abstractmethod
     # and metaclass=abc.ABCMeta rather than NotImplementedError
-    #http://julien.danjou.info/blog/2013/guide-python-static-class-abstract-methods
+    # http://julien.danjou.info/blog/2013/guide-python-static-class-abstract-methods
 
     json_tag = 'nltk.tbl.Feature'
     PROPERTY_NAME = None
@@ -77,19 +77,19 @@ class Feature(object):
         :param end: end of range (NOTE: inclusive!) where this feature should apply
 
         """
-        self.positions = None #to avoid warnings
+        self.positions = None  # to avoid warnings
         if end is None:
             self.positions = tuple(sorted(set([int(i) for i in positions])))
-        else:                #positions was actually not a list, but only the start index
+        else:                # positions was actually not a list, but only the start index
             try:
                 if positions > end:
                     raise TypeError
                 self.positions = tuple(range(positions, end+1))
             except TypeError:
-                #let any kind of erroneous spec raise ValueError
+                # let any kind of erroneous spec raise ValueError
                 raise ValueError("illegal interval specification: (start={0}, end={1})".format(positions, end))
 
-        #set property name given in subclass, or otherwise name of subclass
+        # set property name given in subclass, or otherwise name of subclass
         self.PROPERTY_NAME = self.__class__.PROPERTY_NAME or self.__class__.__name__
 
     def encode_json_obj(self):
@@ -113,11 +113,11 @@ class Feature(object):
         (many tbl trainers have a special representation for the
         target feature at [0])
 
-        #For instance, importing a concrete subclass (Feature is abstract)
+        For instance, importing a concrete subclass (Feature is abstract)
         >>> from nltk.tag.brill import Word
 
-        #First argument gives the possible start positions, second the
-        #possible window lengths
+        First argument gives the possible start positions, second the
+        possible window lengths
         >>> Word.expand([-3,-2,-1], [1])
         [Word([-3]), Word([-2]), Word([-1])]
 
@@ -130,14 +130,14 @@ class Feature(object):
         >>> Word.expand([-2,-1], [1])
         [Word([-2]), Word([-1])]
 
-        #a third optional argument excludes all Features whose positions contain zero
+        a third optional argument excludes all Features whose positions contain zero
         >>> Word.expand([-2,-1,0], [1,2], excludezero=False)
         [Word([-2]), Word([-1]), Word([0]), Word([-2, -1]), Word([-1, 0])]
 
         >>> Word.expand([-2,-1,0], [1,2], excludezero=True)
         [Word([-2]), Word([-1]), Word([-2, -1])]
 
-        #All window lengths must be positive
+        All window lengths must be positive
         >>> Word.expand([-2,-1], [0])
         Traceback (most recent call last):
           File "<stdin>", line 1, in <module>
@@ -155,7 +155,7 @@ class Feature(object):
         :raises ValueError: for non-positive window lengths
         """
         if not all(x > 0 for x in winlens):
-            raise ValueError("non-positive window length in {0:s}".format(winlens))
+            raise ValueError("non-positive window length in {0}".format(winlens))
         xs = (starts[i:i+w] for w in winlens for i in range(len(starts)-w+1))
         return [cls(x) for x in xs if not (excludezero and 0 in x)]
 
@@ -187,8 +187,7 @@ class Feature(object):
 
 
         """
-        return (self.__class__ is other.__class__ and
-               set(self.positions) >= set(other.positions))
+        return self.__class__ is other.__class__ and set(self.positions) >= set(other.positions)
 
     def intersects(self, other):
         """
@@ -219,19 +218,19 @@ class Feature(object):
         :rtype: bool
         """
 
-        return bool((self.__class__ is other.__class__ and
-               set(self.positions) & set(other.positions)))
+        return bool((self.__class__ is other.__class__ and set(self.positions) & set(other.positions)))
 
-    #Rich comparisons for Features. With @functools.total_ordering (Python 2.7+),
+    # Rich comparisons for Features. With @functools.total_ordering (Python 2.7+),
     # it will be enough to define __lt__ and __eq__
     def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-               self.positions == other.positions)
+        return (self.__class__ is other.__class__ and self.positions == other.positions)
 
     def __lt__(self, other):
-        return (self.__class__.__name__ < other.__class__.__name__ or
-               #self.positions is a sorted tuple of ints
-               self.positions < other.positions)
+        return (
+            self.__class__.__name__ < other.__class__.__name__ or
+            #    self.positions is a sorted tuple of ints
+            self.positions < other.positions
+        )
 
     def __ne__(self, other):
         return not (self == other)
diff --git a/nltk/tbl/rule.py b/nltk/tbl/rule.py
index d23c780..63dee4f 100644
--- a/nltk/tbl/rule.py
+++ b/nltk/tbl/rule.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
@@ -14,7 +14,7 @@ from nltk.compat import python_2_unicode_compatible, unicode_repr
 from nltk import jsontags
 
 ######################################################################
-## Tag Rules
+# Tag Rules
 ######################################################################
 
 
@@ -115,7 +115,7 @@ class Rule(TagRule):
 
     """
 
-    json_tag='nltk.tbl.Rule'
+    json_tag = 'nltk.tbl.Rule'
 
     def __init__(self, templateid, original_tag, replacement_tag, conditions):
         """
@@ -182,14 +182,14 @@ class Rule(TagRule):
                  self._conditions == other._conditions))
 
     def __ne__(self, other):
-        return not (self==other)
+        return not (self == other)
 
     def __hash__(self):
 
         # Cache our hash value (justified by profiling.)
         try:
             return self.__hash
-        except:
+        except AttributeError:
             self.__hash = hash(repr(self))
             return self.__hash
 
@@ -198,16 +198,19 @@ class Rule(TagRule):
         # a sort key when deterministic=True.)
         try:
             return self.__repr
-        except:
-            self.__repr = ('%s(%r, %s, %s, [%s])' % (
-                self.__class__.__name__,
-                self.templateid,
-                unicode_repr(self.original_tag),
-                unicode_repr(self.replacement_tag),
-
-                # list(self._conditions) would be simpler but will not generate
-                # the same Rule.__repr__ in python 2 and 3 and thus break some tests
-                ", ".join("({0:s},{1:s})".format(f,unicode_repr(v)) for (f,v) in self._conditions)))
+        except AttributeError:
+            self.__repr = (
+                "{0}('{1}', {2}, {3}, [{4}])".format(
+                    self.__class__.__name__,
+                    self.templateid,
+                    unicode_repr(self.original_tag),
+                    unicode_repr(self.replacement_tag),
+
+                    # list(self._conditions) would be simpler but will not generate
+                    # the same Rule.__repr__ in python 2 and 3 and thus break some tests
+                    ', '.join("({0},{1})".format(f, unicode_repr(v)) for (f, v) in self._conditions)
+                )
+            )
 
             return self.__repr
 
@@ -217,16 +220,20 @@ class Rule(TagRule):
             Return a compact, predicate-logic styled string representation
             of the given condition.
             """
-            return ('%s:%s@[%s]' %
-                (feature.PROPERTY_NAME, value, ",".join(str(w) for w in feature.positions)))
-
-        conditions = ' & '.join([_condition_to_logic(f,v) for (f,v) in self._conditions])
-        s = ('%s->%s if %s' % (
+            return '{0}:{1}@[{2}]'.format(
+                feature.PROPERTY_NAME,
+                value,
+                ",".join(str(w) for w in feature.positions)
+            )
+
+        conditions = ' & '.join([_condition_to_logic(f, v) for (f, v) in self._conditions])
+        s = '{0}->{1} if {2}'.format(
             self.original_tag,
             self.replacement_tag,
-            conditions))
-        return s
+            conditions
+        )
 
+        return s
 
     def format(self, fmt):
         """
@@ -235,15 +242,17 @@ class Rule(TagRule):
         >>> from nltk.tbl.rule import Rule
         >>> from nltk.tag.brill import Pos
 
-        >>> r = Rule(23, "VB", "NN", [(Pos([-2,-1]), 'DT')])
+        >>> r = Rule("23", "VB", "NN", [(Pos([-2,-1]), 'DT')])
 
-        #r.format("str") == str(r)
+        r.format("str") == str(r)
+        True
         >>> r.format("str")
         'VB->NN if Pos:DT@[-2,-1]'
 
-        #r.format("repr") == repr(r)
+        r.format("repr") == repr(r)
+        True
         >>> r.format("repr")
-        "Rule(23, 'VB', 'NN', [(Pos([-2, -1]),'DT')])"
+        "Rule('23', 'VB', 'NN', [(Pos([-2, -1]),'DT')])"
 
         >>> r.format("verbose")
         'VB -> NN if the Pos of words i-2...i-1 is "DT"'
@@ -305,7 +314,8 @@ class Rule(TagRule):
 
         replacement = '%s -> %s' % (self.original_tag, self.replacement_tag)
         conditions = (' if ' if self._conditions else "") + ', and '.join(
-            [condition_to_str(f,v) for (f,v) in self._conditions])
+            condition_to_str(f, v) for (f, v) in self._conditions
+        )
         return replacement + conditions
 
 
diff --git a/nltk/tbl/template.py b/nltk/tbl/template.py
index 648dfde..7142733 100644
--- a/nltk/tbl/template.py
+++ b/nltk/tbl/template.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Transformation-based learning
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Marcus Uneson <marcus.uneson at gmail.com>
 #   based on previous (nltk2) version by
 #   Christopher Maloof, Edward Loper, Steven Bird
diff --git a/nltk/test/__init__.py b/nltk/test/__init__.py
index 98d7815..b137d3a 100644
--- a/nltk/test/__init__.py
+++ b/nltk/test/__init__.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Unit Tests
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/test/align.doctest b/nltk/test/align.doctest
index 9f1db41..38b5caf 100644
--- a/nltk/test/align.doctest
+++ b/nltk/test/align.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. -*- coding: utf-8 -*-
@@ -162,7 +162,7 @@ Here are some examples:
 
 
 .. _nltk.metrics.scores.precision:
-    http://nltk.googlecode.com/svn/trunk/doc/api/nltk.metrics.scores-module.html#precision
+    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision
 
 
 Recall
@@ -191,7 +191,7 @@ Here are some examples:
 
 
 .. _nltk.metrics.scores.recall:
-    http://nltk.googlecode.com/svn/trunk/doc/api/nltk.metrics.scores-module.html#recall
+    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall
 
 
 Alignment Error Rate (AER)
diff --git a/nltk/test/bleu.doctest b/nltk/test/bleu.doctest
new file mode 100644
index 0000000..9827062
--- /dev/null
+++ b/nltk/test/bleu.doctest
@@ -0,0 +1,14 @@
+==========
+BLEU tests
+==========
+
+>>> from nltk.align import bleu
+
+If the candidate has no alignment to any of the references, the BLEU score is 0.
+
+>>> bleu(
+...     'John loves Mary'.split(),
+...     ['The candidate has no alignment to any of the references'.split()],
+...     [1],
+... )
+0
diff --git a/nltk/test/bnc.doctest b/nltk/test/bnc.doctest
index 9350b48..acfc44e 100644
--- a/nltk/test/bnc.doctest
+++ b/nltk/test/bnc.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
     >>> from nltk.corpus.reader import BNCCorpusReader
diff --git a/nltk/test/ccg.doctest b/nltk/test/ccg.doctest
index 857c846..716da2c 100644
--- a/nltk/test/ccg.doctest
+++ b/nltk/test/ccg.doctest
@@ -1,13 +1,10 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============================
 Combinatory Categorial Grammar
 ==============================
 
-For more information, please see:
-http://nltk.googlecode.com/svn/trunk/doc/contrib/ccg/ccg.pdf
-
 Relative Clauses
 ----------------
 
@@ -67,7 +64,7 @@ Construct a lexicon:
     ...     ''')
 
     >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> for parse in parser.parse("you prefer that cake".split()):
+    >>> for parse in parser.parse("you prefer that cake".split()):  # doctest: +SKIP
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -80,7 +77,7 @@ Construct a lexicon:
     --------------------------------<
                    S
 
-    >>> for parse in parser.parse("that is the cake which you prefer".split()):
+    >>> for parse in parser.parse("that is the cake which you prefer".split()):  # doctest: +SKIP
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -117,7 +114,7 @@ Without Substitution (no output)
 
 With Substitution:
 
-    >>> for parse in parser.parse(sent):
+    >>> for parse in parser.parse(sent):  # doctest: +SKIP
     ...     chart.printCCGDerivation(parse)
     ...     break
     ...
@@ -188,7 +185,7 @@ Note that while the two derivations are different, they are semantically equival
     >>> lex = lexicon.parseLexicon(test1_lex)
     >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
     >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
-    ...     printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE
+    ...     printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE +SKIP
      I      will       cook               and                might       eat     the    mushrooms             and             parsnips
      NP  ((S\NP)/VP)  (VP/NP)  ((_var2\.,_var2)/.,_var2)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var2\.,_var2)/.,_var2)     N
         ---------------------->B
@@ -237,7 +234,7 @@ Interesting to point that the two parses are clearly semantically different.
     >>> lex = lexicon.parseLexicon(test2_lex)
     >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
     >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
-    ...     printCCGDerivation(parse)  # doctest: +NORMALIZE_WHITESPACE
+    ...     printCCGDerivation(parse)  # doctest: +NORMALIZE_WHITESPACE +SKIP
      articles      which       I      will       file               and             forget         without           reading
         N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var3\.,_var3)/.,_var3)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
                               -----------------<
@@ -274,4 +271,3 @@ Interesting to point that the two parses are clearly semantically different.
                                                                      (N\N)
     -----------------------------------------------------------------------------------------------------------------------------<
                                                                   N
-
diff --git a/nltk/test/chat80.doctest b/nltk/test/chat80.doctest
index 74b0ba8..3080177 100644
--- a/nltk/test/chat80.doctest
+++ b/nltk/test/chat80.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======
diff --git a/nltk/test/chunk.doctest b/nltk/test/chunk.doctest
index 24a08ba..b2952d5 100644
--- a/nltk/test/chunk.doctest
+++ b/nltk/test/chunk.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========
diff --git a/nltk/test/classify.doctest b/nltk/test/classify.doctest
index 892603b..0dc59a3 100644
--- a/nltk/test/classify.doctest
+++ b/nltk/test/classify.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =============
diff --git a/nltk/test/collocations.doctest b/nltk/test/collocations.doctest
index a390714..c4e1fa4 100644
--- a/nltk/test/collocations.doctest
+++ b/nltk/test/collocations.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============
diff --git a/nltk/test/corpus.doctest b/nltk/test/corpus.doctest
index 23bb290..6048961 100644
--- a/nltk/test/corpus.doctest
+++ b/nltk/test/corpus.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ================
@@ -9,7 +9,7 @@ The `nltk.corpus` package defines a collection of *corpus reader*
 classes, which can be used to access the contents of a diverse set of
 corpora.  The list of available corpora is given at:
 
-http://nltk.googlecode.com/svn/trunk/nltk_data/index.xml
+http://www.nltk.org/nltk_data/
 
 Each corpus reader class is specialized to handle a specific
 corpus format.  In addition, the `nltk.corpus` package automatically
@@ -149,7 +149,7 @@ than just bare word strings.
      [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...],
       [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...]
 
-Similarly, the Indian Langauge POS-Tagged Corpus includes samples of
+Similarly, the Indian Language POS-Tagged Corpus includes samples of
 Indian text annotated with part-of-speech tags:
 
     >>> from nltk.corpus import indian
@@ -555,7 +555,7 @@ senseval
 The Senseval 2 corpus is a word sense disambiguation corpus.  Each
 item in the corpus corresponds to a single ambiguous word.  For each
 of these words, the corpus contains a list of instances, corresponding
-to occurences of that word.  Each instance provides the word; a list
+to occurrences of that word.  Each instance provides the word; a list
 of word senses that apply to the word occurrence; and the word's
 context.
 
@@ -859,7 +859,7 @@ The VerbNet corpus is a lexicon that divides verbs into classes, based
 on their syntax-semantics linking behavior.  The basic elements in the
 lexicon are verb lemmas, such as 'abandon' and 'accept', and verb
 classes, which have identifiers such as 'remove-10.1' and
-'admire-31.2-1'.  These class identifiers consist of a representitive
+'admire-31.2-1'.  These class identifiers consist of a representative
 verb selected from the class, followed by a numerical identifier.  The
 list of verb lemmas, and the list of class identifiers, can be
 retrieved with the following methods:
@@ -904,7 +904,7 @@ list of thematic roles for a given Verbnet class:
 
 The Verbnet corpus also provides a variety of pretty printing
 functions that can be used to display the xml contents in a more
-consise form.  The simplest such method is `pprint()`:
+concise form.  The simplest such method is `pprint()`:
 
     >>> print(verbnet.pprint('57'))
     weather-57
@@ -980,7 +980,7 @@ parenthesis-delineated parse trees.
 Automatically Created Corpus Reader Instances
 =============================================
 
-When then `nltk.corpus` module is imported, it automatically creates a
+When the `nltk.corpus` module is imported, it automatically creates a
 set of corpus reader instances that can be used to access the corpora
 in the NLTK data distribution.  Here is a small sample of those
 corpus reader instances:
@@ -1033,7 +1033,7 @@ we can create a customized corpus reader for the genesis corpus that
 uses a different sentence tokenizer as follows:
 
     >>> # Find the directory where the corpus lives.
-    >>> genesis_dir = nltk.data.find('corpora/genesis.zip').join('genesis/')
+    >>> genesis_dir = nltk.data.find('corpora/genesis')
     >>> # Create our custom sentence tokenizer.
     >>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
     >>> # Create the new corpus reader object.
@@ -1107,7 +1107,7 @@ path to the root directory is stored in the ``root`` property:
 
 Each file within the corpus is identified by a platform-independent
 identifier, which is basically a path string that uses ``/`` as the
-path seperator.  I.e., this identifier can be converted to a relative
+path separator.  I.e., this identifier can be converted to a relative
 path as follows:
 
     >>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0]
@@ -1247,7 +1247,7 @@ list), but does not store the data elements in memory; instead, data
 elements are read from the underlying data files on an as-needed
 basis.
 
-By only loading items from the file on an as-needesd basis, corpus
+By only loading items from the file on an as-needed basis, corpus
 views maintain both memory efficiency and responsiveness.  The memory
 efficiency of corpus readers is important because some corpora contain
 very large amounts of data, and storing the entire data set in memory
@@ -1270,7 +1270,7 @@ Writing New Corpus Readers
 
 In order to add support for new corpus formats, it is necessary to
 define new corpus reader classes.  For many corpus formats, writing
-new corpus readers is relatively streight-forward.  In this section,
+new corpus readers is relatively straight-forward.  In this section,
 we'll describe what's involved in creating a new corpus reader.  If
 you do create a new corpus reader, we encourage you to contribute it
 back to the NLTK project.
@@ -1332,11 +1332,12 @@ then you can often just inherit the base class's constructor.
 
 Data Access Methods
 ~~~~~~~~~~~~~~~~~~~
+
 The most common type of data access method takes an argument
 identifying which files to access, and returns a view covering those
-files.  This argument may be a a single file identifier string (to get
-a view for a specific file); a list of file identifier strings (to get
-a view for a specific list of files); or None (to get a view for the
+files.  This argument may be a single file identifier string (to get a
+view for a specific file); a list of file identifier strings (to get a
+view for a specific list of files); or None (to get a view for the
 entire corpus).  The method's implementation converts this argument to
 a list of path names using the `abspaths()` method, which handles all
 three value types (string, list, and None):
@@ -1402,7 +1403,7 @@ This simple block reader reads a single line at a time, and returns a
 single token (consisting of a string) for each whitespace-separated
 substring on the line.  A `StreamBackedCorpusView` built from this
 block reader will act like a read-only list of all the
-whitespace-seperated tokens in an underlying file.
+whitespace-separated tokens in an underlying file.
 
 When deciding how to define the block reader for a given corpus,
 careful consideration should be given to the size of blocks handled by
@@ -1470,7 +1471,7 @@ Plaintext Corpus Reader
 The plaintext corpus reader is used to access corpora that consist of
 unprocessed plaintext data.  It assumes that paragraph breaks are
 indicated by blank lines.  Sentences and words can be tokenized using
-the default tokenizers, or by custom tokenizers specificed as
+the default tokenizers, or by custom tokenizers specified as
 parameters to the constructor.
 
     >>> root = make_testcorpus(ext='.txt',
diff --git a/nltk/test/data.doctest b/nltk/test/data.doctest
index 7b95551..71a3a98 100644
--- a/nltk/test/data.doctest
+++ b/nltk/test/data.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================================
@@ -91,7 +91,7 @@ to format names is specified by `nltk.data.AUTO_FORMATS`:
     .cfg     -> cfg
     .fcfg    -> fcfg
     .fol     -> fol
-	.json    -> json
+    .json    -> json
     .logic   -> logic
     .pcfg    -> pcfg
     .pickle  -> pickle
@@ -277,13 +277,13 @@ Create a temp dir for tests that write files:
 
 The `retrieve()` function accepts all url types:
 
-    >>> urls = ['http://nltk.googlecode.com/svn/trunk/nltk/nltk/test/toy.cfg',
+    >>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg',
     ...         'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'),
     ...         'nltk:grammars/sample_grammars/toy.cfg',
     ...         'grammars/sample_grammars/toy.cfg']
     >>> for i, url in enumerate(urls):
     ...     nltk.data.retrieve(url, 'toy-%d.cfg' % i) # doctest: +ELLIPSIS
-    Retrieving 'http://nltk.googlecode.com/svn/trunk/nltk/nltk/test/toy.cfg', saving to 'toy-0.cfg'
+    Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg'
     Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg'
     Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg'
     Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg'
diff --git a/nltk/test/dependency.doctest b/nltk/test/dependency.doctest
old mode 100644
new mode 100755
index 9cb8374..56b5625
--- a/nltk/test/dependency.doctest
+++ b/nltk/test/dependency.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===================
@@ -6,7 +6,11 @@ Dependency Grammars
 ===================
 
     >>> from nltk.grammar import DependencyGrammar
-    >>> from nltk.parse import *
+    >>> from nltk.parse import (
+    ...     DependencyGraph,
+    ...     ProjectiveDependencyParser,
+    ...     NonprojectiveDependencyParser,
+    ... )
 
 CoNLL Data
 ----------
@@ -31,12 +35,33 @@ CoNLL Data
     ... .       .       9       VMOD
     ... """
 
-
     >>> dg = DependencyGraph(treebank_data)
-    >>> print(dg.tree().pprint())
+    >>> dg.tree().pprint()
     (will
       (Vinken Pierre , (old (years 61)) ,)
       (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
+    >>> for head, rel, dep in dg.triples():
+    ...     print(
+    ...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
+    ...         .format(h=head, r=rel, d=dep)
+    ...     )
+    (will, MD), SUB, (Vinken, NNP)
+    (Vinken, NNP), NMOD, (Pierre, NNP)
+    (Vinken, NNP), P, (,, ,)
+    (Vinken, NNP), NMOD, (old, JJ)
+    (old, JJ), AMOD, (years, NNS)
+    (years, NNS), NMOD, (61, CD)
+    (Vinken, NNP), P, (,, ,)
+    (will, MD), VC, (join, VB)
+    (join, VB), OBJ, (board, NN)
+    (board, NN), NMOD, (the, DT)
+    (join, VB), VMOD, (as, IN)
+    (as, IN), PMOD, (director, NN)
+    (director, NN), NMOD, (a, DT)
+    (director, NN), NMOD, (nonexecutive, JJ)
+    (join, VB), VMOD, (Nov., NNP)
+    (Nov., NNP), NMOD, (29, CD)
+    (join, VB), VMOD, (., .)
 
 Using the dependency-parsed version of the Penn Treebank corpus sample.
 
@@ -62,6 +87,37 @@ Using the dependency-parsed version of the Penn Treebank corpus sample.
     29  CD      16
     .   .       8
 
+Using the output of zpar (like Malt-TAB but with zero-based indexing)
+
+    >>> zpar_data = """
+    ... Pierre	NNP	1	NMOD
+    ... Vinken	NNP	7	SUB
+    ... ,	,	1	P
+    ... 61	CD	4	NMOD
+    ... years	NNS	5	AMOD
+    ... old	JJ	1	NMOD
+    ... ,	,	1	P
+    ... will	MD	-1	ROOT
+    ... join	VB	7	VC
+    ... the	DT	10	NMOD
+    ... board	NN	8	OBJ
+    ... as	IN	8	VMOD
+    ... a	DT	14	NMOD
+    ... nonexecutive	JJ	14	NMOD
+    ... director	NN	11	PMOD
+    ... Nov.	NNP	8	VMOD
+    ... 29	CD	15	NMOD
+    ... .	.	7	P
+    ... """
+
+    >>> zdg = DependencyGraph(zpar_data, zero_based=True)
+    >>> print(zdg.tree())
+    (will
+      (Vinken Pierre , (old (years 61)) ,)
+      (join (board the) (as (director a nonexecutive)) (Nov. 29))
+      .)
+
+
 Projective Dependency Parsing
 -----------------------------
 
@@ -106,21 +162,22 @@ Non-Projective Dependency Parsing
       'dog' -> 'his'
 
     >>> dp = NonprojectiveDependencyParser(grammar)
-    >>> for g in dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf']):
-    ...     print(g)  # doctest: +NORMALIZE_WHITESPACE
-    [{'address': 0,
-      'ctag': 'TOP',
-      'deps': 3,
-      'feats': None,
-      'lemma': None,
-      'rel': 'TOP',
-      'tag': 'TOP',
-      'word': None},
-     {'address': 1, 'deps': [], 'word': 'the'},
-     {'address': 2, 'deps': [1], 'word': 'man'},
-     {'address': 3, 'deps': [2, 7], 'word': 'taught'},
-     {'address': 4, 'deps': [], 'word': 'his'},
-     {'address': 5, 'deps': [4], 'word': 'dog'},
-     {'address': 6, 'deps': [], 'word': 'to'},
-     {'address': 7, 'deps': [5, 6, 8], 'word': 'play'},
-     {'address': 8, 'deps': [], 'word': 'golf'}]
+    >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
+
+    >>> print(g.root['word'])
+    taught
+
+    >>> for _, node in sorted(g.nodes.items()):
+    ...     if node['word'] is not None:
+    ...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
+    1 the: []
+    2 man: [1]
+    3 taught: [2, 7]
+    4 his: []
+    5 dog: [4]
+    6 to: []
+    7 play: [5, 6, 8]
+    8 golf: []
+
+    >>> print(g.tree())
+    (taught (man the) (play (dog his) to golf))
diff --git a/nltk/test/discourse.doctest b/nltk/test/discourse.doctest
index 66cb1f4..21f40ca 100644
--- a/nltk/test/discourse.doctest
+++ b/nltk/test/discourse.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================
diff --git a/nltk/test/drt.doctest b/nltk/test/drt.doctest
index daa0933..0f00911 100644
--- a/nltk/test/drt.doctest
+++ b/nltk/test/drt.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ================================
@@ -52,9 +52,9 @@ The ``fol()`` method converts DRSs into FOL formulae.
     >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
     all x.(man(x) -> walks(x))
 
-In order to visualize a DRS, the ``pprint()`` method can be use.
+In order to visualize a DRS, the ``pretty_format()`` method can be used.
 
-    >>> drs3.pprint()
+    >>> print(drs3.pretty_format())
       _________     __________
      | x       |   | y        |
     (|---------| + |----------|)
@@ -342,7 +342,7 @@ regression tests
     >>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])')
     >>> print(d)
     ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> d.pprint()
+    >>> print(d.pretty_format())
      ____________________________________
     | x                                  |
     |------------------------------------|
@@ -464,13 +464,13 @@ Parse errors
 Pretty Printing
 ===============
 
-    >>> dexpr(r"([],[])").pprint()
+    >>> dexpr(r"([],[])").pretty_print()
      __
     |  |
     |--|
     |__|
 
-    >>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pprint()
+    >>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
      _____________________________
     |                             |
     |-----------------------------|
@@ -487,7 +487,7 @@ Pretty Printing
     |     |_________|             |
     |_____________________________|
 
-    >>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pprint()
+    >>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
       _________     _________
      | x y     |   | z       |
     (|---------| + |---------|)
@@ -495,7 +495,7 @@ Pretty Printing
      |_________|   | walk(z) |
                    |_________|
 
-    >>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pprint()
+    >>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
      _______________________________
     |                               |
     |-------------------------------|
@@ -507,7 +507,7 @@ Pretty Printing
     |                  |_________|  |
     |_______________________________|
 
-    >>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pprint()
+    >>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
               ___                        ________
      \       | x |                 \    |        |
      /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|)
diff --git a/nltk/test/featgram.doctest b/nltk/test/featgram.doctest
index d0dccb5..b4a4e46 100644
--- a/nltk/test/featgram.doctest
+++ b/nltk/test/featgram.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================
diff --git a/nltk/test/featstruct.doctest b/nltk/test/featstruct.doctest
index 9ede986..ee0c052 100644
--- a/nltk/test/featstruct.doctest
+++ b/nltk/test/featstruct.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================================
diff --git a/nltk/test/framenet.doctest b/nltk/test/framenet.doctest
index b68d982..7eb496e 100644
--- a/nltk/test/framenet.doctest
+++ b/nltk/test/framenet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ========
diff --git a/nltk/test/generate.doctest b/nltk/test/generate.doctest
index 4840599..82489b8 100644
--- a/nltk/test/generate.doctest
+++ b/nltk/test/generate.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================================
diff --git a/nltk/test/gluesemantics.doctest b/nltk/test/gluesemantics.doctest
index 814a231..06ca81f 100644
--- a/nltk/test/gluesemantics.doctest
+++ b/nltk/test/gluesemantics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==============================================================================
@@ -70,9 +70,9 @@ Test BindingDict
     >>> all12.sort()
     >>> print(all12)
     ['F: f', 'G: f', 'H: h']
-	
-	>>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
-	True
+
+    >>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
+    True
 
     >>> d4 = BindingDict({F: f})
     >>> try: d4[F] = g
@@ -87,37 +87,37 @@ Test Unify
     Cannot unify f with g given {}
 
     >>> f.unify(G, BindingDict()) == BindingDict({G: f})
-	True
+    True
     >>> try: f.unify(G, BindingDict({G: h}))
     ... except UnificationException as e: print(e)
     ...
     Cannot unify f with G given {G: h}
     >>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f})
-	True
+    True
     >>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f})
-	True
+    True
 
     >>> G.unify(f, BindingDict()) == BindingDict({G: f})
-	True
+    True
     >>> try: G.unify(f, BindingDict({G: h}))
     ... except UnificationException as e: print(e)
     ...
     Cannot unify G with f given {G: h}
     >>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f})
-	True
+    True
     >>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f})
-	True
+    True
 
     >>> G.unify(F, BindingDict()) == BindingDict({G: F})
-	True
+    True
     >>> try: G.unify(F, BindingDict({G: H}))
     ... except UnificationException as e: print(e)
     ...
     Cannot unify G with F given {G: H}
     >>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F})
-	True
+    True
     >>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F})
-	True
+    True
 
 Test Compile
 
@@ -360,13 +360,18 @@ Dependency Graph to Glue Formulas
 Dependency Graph to LFG f-structure
 -----------------------------------
     >>> from nltk.sem.lfg import FStructure
+
     >>> fstruct = FStructure.read_depgraph(depgraph)
+
     >>> print(fstruct)
     f:[pred 'sees'
        obj h:[pred 'dog'
               spec 'a']
        subj g:[pred 'John']]
 
+    >>> fstruct.to_depgraph().tree().pprint()
+    (sees (dog a) John)
+
 ---------------------------------
 LFG f-structure to Glue
 ---------------------------------
diff --git a/nltk/test/gluesemantics_malt.doctest b/nltk/test/gluesemantics_malt.doctest
index 87b2b17..599a573 100644
--- a/nltk/test/gluesemantics_malt.doctest
+++ b/nltk/test/gluesemantics_malt.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. see also: gluesemantics.doctest
diff --git a/nltk/test/grammar.doctest b/nltk/test/grammar.doctest
index 0a9f394..1c41429 100644
--- a/nltk/test/grammar.doctest
+++ b/nltk/test/grammar.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============
diff --git a/nltk/test/grammartestsuites.doctest b/nltk/test/grammartestsuites.doctest
index 3c18e46..7fd09b9 100644
--- a/nltk/test/grammartestsuites.doctest
+++ b/nltk/test/grammartestsuites.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========================
diff --git a/nltk/test/index.doctest b/nltk/test/index.doctest
index cdfd104..eb504c2 100644
--- a/nltk/test/index.doctest
+++ b/nltk/test/index.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. _align howto: align.html
diff --git a/nltk/test/inference.doctest b/nltk/test/inference.doctest
index edb5f88..cf8191c 100644
--- a/nltk/test/inference.doctest
+++ b/nltk/test/inference.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ====================================
diff --git a/nltk/test/internals.doctest b/nltk/test/internals.doctest
index bc1d673..84a628a 100644
--- a/nltk/test/internals.doctest
+++ b/nltk/test/internals.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========================================
diff --git a/nltk/test/japanese.doctest b/nltk/test/japanese.doctest
index 28c6f99..3c8eb48 100644
--- a/nltk/test/japanese.doctest
+++ b/nltk/test/japanese.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ============================
diff --git a/nltk/test/logic.doctest b/nltk/test/logic.doctest
index 5ec455a..a503d07 100644
--- a/nltk/test/logic.doctest
+++ b/nltk/test/logic.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======================
diff --git a/nltk/test/metrics.doctest b/nltk/test/metrics.doctest
index 2736348..5cae4a0 100644
--- a/nltk/test/metrics.doctest
+++ b/nltk/test/metrics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =======
@@ -128,7 +128,7 @@ Confusion Matrix
     <BLANKLINE>
 
     >>> cm = ConfusionMatrix(reference, test)
-    >>> print(cm.pp(sort_by_count=True))
+    >>> print(cm.pretty_format(sort_by_count=True))
       |   e a i o s t . T h n r 1 2 3 c d f g _ z |
     --+-------------------------------------------+
       |<8>. . . . . . . . . . . . . . . . . . 1 . |
@@ -156,7 +156,7 @@ Confusion Matrix
     (row = reference; col = test)
     <BLANKLINE>
 
-    >>> print(cm.pp(sort_by_count=True, truncate=10))
+    >>> print(cm.pretty_format(sort_by_count=True, truncate=10))
       |   e a i o s t . T h |
     --+---------------------+
       |<8>. . . . . . . . . |
@@ -173,7 +173,7 @@ Confusion Matrix
     (row = reference; col = test)
     <BLANKLINE>
 
-    >>> print(cm.pp(sort_by_count=True, truncate=10, values_in_chart=False))
+    >>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
        |                   1 |
        | 1 2 3 4 5 6 7 8 9 0 |
     ---+---------------------+
diff --git a/nltk/test/misc.doctest b/nltk/test/misc.doctest
index b14e62d..141507f 100644
--- a/nltk/test/misc.doctest
+++ b/nltk/test/misc.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 --------------------------------------------------------------------------------
diff --git a/nltk/test/nonmonotonic.doctest b/nltk/test/nonmonotonic.doctest
index ac28978..4dfad9c 100644
--- a/nltk/test/nonmonotonic.doctest
+++ b/nltk/test/nonmonotonic.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
diff --git a/nltk/test/paice.doctest b/nltk/test/paice.doctest
index 318781f..1e3a65c 100644
--- a/nltk/test/paice.doctest
+++ b/nltk/test/paice.doctest
@@ -8,7 +8,7 @@ counts Understemming Index (UI), Overstemming Index (OI), Stemming Weight (SW) a
 
    >>> from nltk.metrics import Paice
 
-  
+
 -------------------------------------
 Understemming and Overstemming values
 -------------------------------------
@@ -26,11 +26,10 @@ Understemming and Overstemming values
     (4.0, 5.0, 2.0, 16.0)
 
     >>> p.ui, p.oi, p.sw
-    (0.8, 0.125, 0.15625)
+    (0.8..., 0.125..., 0.15625...)
 
     >>> p.errt
     1.0
 
-    >>> p.coords
-    [(0.0, 1.0), (0.0, 0.375), (0.6, 0.125), (0.8, 0.125)]
-	
\ No newline at end of file
+    >>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords]
+    [('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')]
diff --git a/nltk/test/parse.doctest b/nltk/test/parse.doctest
index f792cba..6eea819 100644
--- a/nltk/test/parse.doctest
+++ b/nltk/test/parse.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========
diff --git a/nltk/test/portuguese_en.doctest b/nltk/test/portuguese_en.doctest
index ab27cf2..0cef76b 100644
--- a/nltk/test/portuguese_en.doctest
+++ b/nltk/test/portuguese_en.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==================================
diff --git a/nltk/test/portuguese_en_fixt.py b/nltk/test/portuguese_en_fixt.py
index a2e26a3..a33953b 100644
--- a/nltk/test/portuguese_en_fixt.py
+++ b/nltk/test/portuguese_en_fixt.py
@@ -4,8 +4,13 @@ from nltk.compat import PY3
 
 from nltk.corpus import teardown_module
 
+
 def setup_module(module):
     from nose import SkipTest
+
+    raise SkipTest("portuguese_en.doctest imports nltk.examples.pt which doesn't exist!")
+
     if not PY3:
-        raise SkipTest("portuguese_en.doctest was skipped because "
-                       "non-ascii doctests are not supported under Python 2.x")
\ No newline at end of file
+        raise SkipTest(
+            "portuguese_en.doctest was skipped because non-ascii doctests are not supported under Python 2.x"
+        )
diff --git a/nltk/test/probability.doctest b/nltk/test/probability.doctest
index 97f3b8b..9569057 100644
--- a/nltk/test/probability.doctest
+++ b/nltk/test/probability.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===========
diff --git a/nltk/test/propbank.doctest b/nltk/test/propbank.doctest
index fceea1e..23cb7fa 100644
--- a/nltk/test/propbank.doctest
+++ b/nltk/test/propbank.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ========
@@ -71,7 +71,7 @@ trees for 10% of the standard PropBank Corpus are contained in the
     >>> inst.predicate.select(tree)
     Tree('VBD', ['rose'])
     >>> for (argloc, argid) in inst.arguments:
-    ...     print('%-10s %s' % (argid, argloc.select(tree).pprint(500)[:50]))
+    ...     print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50]))
     ARG1       (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP (
     ARGM-DIS   (PP (IN for) (NP (NN example)))
     ARG4-to    (PP-DIR (TO to) (NP (CD 8.04) (NN %)))
diff --git a/nltk/test/relextract.doctest b/nltk/test/relextract.doctest
index 588551e..f29e464 100644
--- a/nltk/test/relextract.doctest
+++ b/nltk/test/relextract.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
diff --git a/nltk/test/resolution.doctest b/nltk/test/resolution.doctest
index 4633f49..6bbae37 100644
--- a/nltk/test/resolution.doctest
+++ b/nltk/test/resolution.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========================
diff --git a/nltk/test/semantics.doctest b/nltk/test/semantics.doctest
index 3daf7b0..a8b9f84 100644
--- a/nltk/test/semantics.doctest
+++ b/nltk/test/semantics.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =========
@@ -425,43 +425,43 @@ Satisfier Tests
     ...     g.purge()
     ...     print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g))))
     Satisfiers of 'boy(x)':
-    	['b1', 'b2']
+    ['b1', 'b2']
     Satisfiers of '(x = x)':
-    	['b1', 'b2', 'd1', 'g1', 'g2']
+    ['b1', 'b2', 'd1', 'g1', 'g2']
     Satisfiers of '(boy(x) | girl(x))':
-    	['b1', 'b2', 'g1', 'g2']
+    ['b1', 'b2', 'g1', 'g2']
     Satisfiers of '(boy(x) & girl(x))':
-    	[]
+    []
     Satisfiers of 'love(adam,x)':
-    	['g1']
+    ['g1']
     Satisfiers of 'love(x,adam)':
-    	['g1', 'g2']
+    ['g1', 'g2']
     Satisfiers of '-(x = adam)':
-    	['b2', 'd1', 'g1', 'g2']
+    ['b2', 'd1', 'g1', 'g2']
     Satisfiers of 'exists z22.love(x,z22)':
-    	['b1', 'b2', 'g1', 'g2']
+    ['b1', 'b2', 'g1', 'g2']
     Satisfiers of 'exists y.love(y,x)':
-    	['b1', 'g1', 'g2']
+    ['b1', 'g1', 'g2']
     Satisfiers of 'all y.(girl(y) -> love(x,y))':
-    	[]
+    []
     Satisfiers of 'all y.(girl(y) -> love(y,x))':
-    	['b1']
+    ['b1']
     Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))':
-    	['b1']
+    ['b1']
     Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))':
-    	[]
+    []
     Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))':
-    	['b1']
+    ['b1']
     Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))':
-    	['b1']
+    ['b1']
     Satisfiers of '(girl(x) -> dog(x))':
-    	['b1', 'b2', 'd1']
+    ['b1', 'b2', 'd1']
     Satisfiers of 'all y.(dog(y) -> (x = y))':
-    	['d1']
+    ['d1']
     Satisfiers of '-exists y.love(y,x)':
-    	['b2', 'd1']
+    ['b2', 'd1']
     Satisfiers of 'exists y.(love(adam,y) & love(y,x))':
-    	['b1']
+    ['b1']
 
 
 Tests based on the Blackburn & Bos testsuite
diff --git a/nltk/test/sentiwordnet.doctest b/nltk/test/sentiwordnet.doctest
index 8e49b8d..f9fad04 100644
--- a/nltk/test/sentiwordnet.doctest
+++ b/nltk/test/sentiwordnet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ======================
diff --git a/nltk/test/simple.doctest b/nltk/test/simple.doctest
index 71e8c40..c29753f 100644
--- a/nltk/test/simple.doctest
+++ b/nltk/test/simple.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/stem.doctest b/nltk/test/stem.doctest
index fdca6dc..c95378b 100644
--- a/nltk/test/stem.doctest
+++ b/nltk/test/stem.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ==========
diff --git a/nltk/test/tag.doctest b/nltk/test/tag.doctest
index b365c3c..415aa44 100644
--- a/nltk/test/tag.doctest
+++ b/nltk/test/tag.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 Regression Tests
diff --git a/nltk/test/tokenize.doctest b/nltk/test/tokenize.doctest
index c554e5f..135a864 100644
--- a/nltk/test/tokenize.doctest
+++ b/nltk/test/tokenize.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
     >>> from __future__ import print_function
@@ -40,6 +40,21 @@ Some test strings.
     >>> word_tokenize(s10)
     ['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.']
 
+Sentence tokenization in word_tokenize:
+
+    >>> s11 = "I called Dr. Jones. I called Dr. Jones."
+    >>> word_tokenize(s11)
+    ['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.']
+    >>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen "
+    ...        "Kuchen einzukaufen. Ich muss.")
+    >>> word_tokenize(s12)
+    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw',
+     '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+    >>> word_tokenize(s12, 'german')
+    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.',
+     'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+
+
 Regression Tests: Regexp Tokenizer
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/nltk/test/toolbox.doctest b/nltk/test/toolbox.doctest
index 4cf27f5..c373f15 100644
--- a/nltk/test/toolbox.doctest
+++ b/nltk/test/toolbox.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
diff --git a/nltk/test/tree.doctest b/nltk/test/tree.doctest
index bcfbffb..179f972 100644
--- a/nltk/test/tree.doctest
+++ b/nltk/test/tree.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
@@ -34,11 +34,35 @@ every tree, subtree, and leaf, in prefix order:
 In addition to `str` and `repr`, several methods exist to convert a
 tree object to one of several standard tree encodings:
 
-    >>> print(tree.pprint_latex_qtree())
+    >>> print(tree.pformat_latex_qtree())
     \Tree [.s
             [.dp [.d the ] [.np dog ] ]
             [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]
 
+There is also a fancy ASCII art representation:
+
+    >>> tree.pretty_print()
+                  s               
+          ________|_____           
+         |              vp        
+         |         _____|___       
+         dp       |         dp    
+      ___|___     |      ___|___   
+     d       np   v     d       np
+     |       |    |     |       |  
+    the     dog chased the     cat
+
+    >>> tree.pretty_print(unicodelines=True, nodedist=4)
+                           s                        
+            ┌──────────────┴────────┐                   
+            │                       vp              
+            │              ┌────────┴──────┐            
+            dp             │               dp       
+     ┌──────┴──────┐       │        ┌──────┴──────┐     
+     d             np      v        d             np
+     │             │       │        │             │     
+    the           dog    chased    the           cat
+
 Trees can be initialized from treebank strings:
 
     >>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
diff --git a/nltk/test/treeprettyprinter.doctest b/nltk/test/treeprettyprinter.doctest
new file mode 100644
index 0000000..4cee8ff
--- /dev/null
+++ b/nltk/test/treeprettyprinter.doctest
@@ -0,0 +1,127 @@
+.. Copyright (C) 2001-2015 NLTK Project
+.. For license information, see LICENSE.TXT
+
+========================================================
+ Unit tests for nltk.treeprettyprinter.TreePrettyPrinter
+========================================================
+
+    >>> from nltk.tree import Tree
+    >>> from nltk.treeprettyprinter import TreePrettyPrinter
+
+Tree nr 2170 from nltk.corpus.treebank:
+
+    >>> tree = Tree.fromstring(
+    ...     '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) '
+    ...     '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))')
+    >>> tpp = TreePrettyPrinter(tree)
+    >>> print(tpp.text())
+                                 S                       
+       __________________________|_____________________   
+      |                          VP                    | 
+      |      ____________________|___________          |  
+      |     |             |                PP-CLR      | 
+      |     |             |             _____|_____    |  
+    NP-SBJ  |          ADJP-PRD        |           NP  | 
+      |     |      _______|______      |           |   |  
+     PRP   VBP    RB             JJ    IN         PRP  . 
+      |     |     |              |     |           |   |  
+      I    feel pretty          good about         it  . 
+
+    >>> print(tpp.text(unicodelines=True))
+                                 S                       
+      ┌──────────────────────────┼─────────────────────┐  
+      │                          VP                    │ 
+      │     ┌─────────────┬──────┴───────────┐         │  
+      │     │             │                PP-CLR      │ 
+      │     │             │            ┌─────┴─────┐   │  
+    NP-SBJ  │          ADJP-PRD        │           NP  │ 
+      │     │     ┌───────┴──────┐     │           │   │  
+     PRP   VBP    RB             JJ    IN         PRP  . 
+      │     │     │              │     │           │   │  
+      I    feel pretty          good about         it  . 
+
+A tree with long labels:
+
+    >>> tree = Tree.fromstring(
+    ...     '(sentence (plural-noun-phrase (plural-noun Superconductors)) '
+    ...     '(verb-phrase (plural-verb conduct) '
+    ...     '(noun-phrase (singular-noun electricity))))')
+    >>> tpp = TreePrettyPrinter(tree)
+    >>> print(tpp.text(abbreviate=8, nodedist=2))
+                sentence                      
+         __________|__________                  
+        |                 verb-phr.           
+        |           __________|__________       
+    plural-n.      |                 noun-phr.
+        |          |                     |      
+    plural-n.  plural-v.             singular.
+        |          |                     |      
+    Supercon.   conduct              electric.
+
+    >>> print(tpp.text(maxwidth=8, nodedist=2))
+              sentence                   
+        _________|________                 
+       |                verb-            
+       |                phrase           
+       |          ________|_________       
+    plural-      |                noun-  
+     noun-       |                phrase 
+     phrase      |                  |    
+       |         |                  |      
+    plural-   plural-           singular-
+      noun      verb               noun  
+       |         |                  |      
+    Supercon  conduct            electric
+    ductors                        ity   
+
+A discontinuous tree:
+
+    >>> tree = Tree.fromstring(
+    ...     '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
+    ...     '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
+    ...     '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
+    >>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
+    ...             ' zwemmen of terrassen .'.split())
+    >>> tpp = TreePrettyPrinter(tree, sentence)
+    >>> print(tpp.text())
+                                          top                                                
+                                      _____|______________________________________________    
+                                   smain                      |                           |  
+      _______________________________|_____                   |                           |   
+     |    |                               inf                 |                           |  
+     |    |                           _____|____              |                           |   
+     |    |                          |         inf            |                           |  
+     |    |                          |      ____|_____        |                           |   
+     |    |                          |     |         conj     |                           |  
+     |    |                    _____ | ___ | _________|______ | __________________        |   
+     |    |                  inf     |     |                  |      |     |      |       |  
+     |    |          _________|_____ | ___ | _________        |      |     |      |       |   
+     |    |         pp               |     |          |       |      |     |      |       |  
+     |    |     ____|____            |     |          |       |      |     |      |       |   
+     |    |    |         np          |     |          |       |     inf    |     inf      |  
+     |    |    |     ____|____       |     |          |       |      |     |      |       |   
+    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+     |    |    |    |         |      |     |          |       |      |     |      |       |   
+     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
+
+    >>> print(tpp.text(unicodelines=True))
+                                          top                                                
+                                     ┌─────┴──────────────────┬───────────────────────────┐   
+                                   smain                      │                           │  
+     ┌────┬──────────────────────────┴─────┐                  │                           │   
+     │    │                               inf                 │                           │  
+     │    │                          ┌─────┴────┐             │                           │   
+     │    │                          │         inf            │                           │  
+     │    │                          │     ┌────┴─────┐       │                           │   
+     │    │                          │     │         conj     │                           │  
+     │    │                   ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐       │   
+     │    │                  inf     │     │                  │      │     │      │       │  
+     │    │         ┌─────────┴───── │ ─── │ ─────────┐       │      │     │      │       │   
+     │    │         pp               │     │          │       │      │     │      │       │  
+     │    │    ┌────┴────┐           │     │          │       │      │     │      │       │   
+     │    │    │         np          │     │          │       │     inf    │     inf      │  
+     │    │    │    ┌────┴────┐      │     │          │       │      │     │      │       │   
+    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+     │    │    │    │         │      │     │          │       │      │     │      │       │   
+     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
+
diff --git a/nltk/test/treetransforms.doctest b/nltk/test/treetransforms.doctest
index 6cc4764..bce92e0 100644
--- a/nltk/test/treetransforms.doctest
+++ b/nltk/test/treetransforms.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 -------------------------------------------
diff --git a/nltk/test/unit/test_stem.py b/nltk/test/unit/test_stem.py
index 8e78576..13ca2fa 100644
--- a/nltk/test/unit/test_stem.py
+++ b/nltk/test/unit/test_stem.py
@@ -3,6 +3,7 @@ from __future__ import print_function, unicode_literals
 import unittest
 from nltk.stem.snowball import SnowballStemmer
 
+
 class SnowballTest(unittest.TestCase):
 
     def test_russian(self):
@@ -22,6 +23,14 @@ class SnowballTest(unittest.TestCase):
         assert stemmer_german.stem("keinen") == 'kein'
         assert stemmer_german2.stem("keinen") == 'keinen'
 
+    def test_spanish(self):
+        stemmer = SnowballStemmer('spanish')
+
+        assert stemmer.stem("Visionado") == 'vision'
+
+        # The word 'algue' was raising an IndexError
+        assert stemmer.stem("algue") == 'algu'
+
     def test_short_strings_bug(self):
         stemmer = SnowballStemmer('english')
         assert stemmer.stem("y's") == 'y'
diff --git a/nltk/test/util.doctest b/nltk/test/util.doctest
index 0aafce2..c28f7cd 100644
--- a/nltk/test/util.doctest
+++ b/nltk/test/util.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest
index f2c614c..ea442de 100644
--- a/nltk/test/wordnet.doctest
+++ b/nltk/test/wordnet.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 =================
diff --git a/nltk/test/wordnet_lch.doctest b/nltk/test/wordnet_lch.doctest
index 80cae25..51d676f 100644
--- a/nltk/test/wordnet_lch.doctest
+++ b/nltk/test/wordnet_lch.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 ===============================
diff --git a/nltk/test/wsd.doctest b/nltk/test/wsd.doctest
index e4d9eda..1e97cf7 100644
--- a/nltk/test/wsd.doctest
+++ b/nltk/test/wsd.doctest
@@ -1,4 +1,4 @@
-.. Copyright (C) 2001-2014 NLTK Project
+.. Copyright (C) 2001-2015 NLTK Project
 .. For license information, see LICENSE.TXT
 
 .. -*- coding: utf-8 -*-
@@ -13,40 +13,56 @@ Lesk Algorithm
 
 
 Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using
-a the definitions of the ambiguous word. 
+a the definitions of the ambiguous word.
 
-Given an ambiguous word and the context in which the word occurs, Lesk returns 
-a Synset with the highest number of overlapping words between the context 
+Given an ambiguous word and the context in which the word occurs, Lesk returns
+a Synset with the highest number of overlapping words between the context
 sentence and different definitions from each Synset.
 
     >>> from nltk.wsd import lesk
-	>>> from nltk import word_tokenize
-	>>> sent = word_tokenize("I went to the bank to deposit money.")
-	>>> word = "bank"
-	>>> pos = "n"
-	>>> print(lesk(sent, word, pos))
-	Synset('depository_financial_institution.n.01')
+    >>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.']
+
+    >>> print(lesk(sent, 'bank', 'n'))
+    Synset('savings_bank.n.02')
+
+    >>> print(lesk(sent, 'bank'))
+    Synset('savings_bank.n.02')
 
 The definitions for "bank" are:
 
-	>>> from nltk.corpus import wordnet as wn
-	>>> for ss in wn.synsets('bank'):
-	...     print(ss, ss.definition())
-	Synset('bank.n.01') sloping land (especially the slope beside a body of water)
-	Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
-	Synset('bank.n.03') a long ridge or pile
-	Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
-	Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
-	Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
-	Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
-	Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
-	Synset('bank.n.09') a building in which the business of banking transacted
-	Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
-	Synset('bank.v.01') tip laterally
-	Synset('bank.v.02') enclose with a bank
-	Synset('bank.v.03') do business with a bank or keep an account at a bank
-	Synset('bank.v.04') act as the banker in a game or in gambling
-	Synset('bank.v.05') be in the banking business
-	Synset('deposit.v.02') put into a bank account
-	Synset('bank.v.07') cover with ashes so to control the rate of burning
-	Synset('trust.v.01') have confidence or faith in
+    >>> from nltk.corpus import wordnet as wn
+    >>> for ss in wn.synsets('bank'):
+    ...     print(ss, ss.definition())
+    ...
+    Synset('bank.n.01') sloping land (especially the slope beside a body of water)
+    Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
+    Synset('bank.n.03') a long ridge or pile
+    Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
+    Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
+    Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
+    Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
+    Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
+    Synset('bank.n.09') a building in which the business of banking transacted
+    Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
+    Synset('bank.v.01') tip laterally
+    Synset('bank.v.02') enclose with a bank
+    Synset('bank.v.03') do business with a bank or keep an account at a bank
+    Synset('bank.v.04') act as the banker in a game or in gambling
+    Synset('bank.v.05') be in the banking business
+    Synset('deposit.v.02') put into a bank account
+    Synset('bank.v.07') cover with ashes so to control the rate of burning
+    Synset('trust.v.01') have confidence or faith in
+
+Test disambiguation of POS tagged `able`.
+
+    >>> [(s, s.pos()) for s in wn.synsets('able')]
+    [(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')]
+    >>> sent = 'people should be able to marry a person of their choice'.split()
+    >>> lesk(sent, 'able')
+    Synset('able.s.04')
+    >>> lesk(sent, 'able', pos='a')
+    Synset('able.a.01')
+
+Test behavior if there is are no matching senses.
+
+    >>> lesk('John loves Mary'.split(), 'loves', synsets=[])
diff --git a/nltk/text.py b/nltk/text.py
index 37bae1c..88dea81 100644
--- a/nltk/text.py
+++ b/nltk/text.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Texts
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 # URL: <http://nltk.org/>
@@ -438,7 +438,7 @@ class Text(object):
         Requires pylab to be installed.
 
         :param words: The words to be plotted
-        :type word: str
+        :type words: list(str)
         :seealso: nltk.draw.dispersion_plot()
         """
         from nltk.draw import dispersion_plot
diff --git a/nltk/tokenize/__init__.py b/nltk/tokenize/__init__.py
index b360bc4..9200848 100644
--- a/nltk/tokenize/__init__.py
+++ b/nltk/tokenize/__init__.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com> (minor additions)
 # URL: <http://nltk.org/>
@@ -66,31 +66,39 @@ from nltk.tokenize.regexp   import (RegexpTokenizer, WhitespaceTokenizer,
                                     BlanklineTokenizer, WordPunctTokenizer,
                                     wordpunct_tokenize, regexp_tokenize,
                                     blankline_tokenize)
-from nltk.tokenize.punkt    import PunktSentenceTokenizer, PunktWordTokenizer
+from nltk.tokenize.punkt    import PunktSentenceTokenizer
 from nltk.tokenize.sexpr    import SExprTokenizer, sexpr_tokenize
 from nltk.tokenize.treebank import TreebankWordTokenizer
 from nltk.tokenize.texttiling import TextTilingTokenizer
 
 # Standard sentence tokenizer.
-def sent_tokenize(text):
+def sent_tokenize(text, language='english'):
     """
     Return a sentence-tokenized copy of *text*,
     using NLTK's recommended sentence tokenizer
-    (currently :class:`.PunktSentenceTokenizer`).
+    (currently :class:`.PunktSentenceTokenizer`
+    for the specified language).
+
+    :param text: text to split into sentences
+    :param language: the model name in the Punkt corpus
     """
-    tokenizer = load('tokenizers/punkt/english.pickle')
+    tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
     return tokenizer.tokenize(text)
 
 # Standard word tokenizer.
 _treebank_word_tokenize = TreebankWordTokenizer().tokenize
-def word_tokenize(text):
+def word_tokenize(text, language='english'):
     """
     Return a tokenized copy of *text*,
     using NLTK's recommended word tokenizer
     (currently :class:`.TreebankWordTokenizer`
-    along with :class:`.PunktSentenceTokenizer`).
+    along with :class:`.PunktSentenceTokenizer`
+    for the specified language).
+
+    :param text: text to split into sentences
+    :param language: the model name in the Punkt corpus
     """
-    return [token for sent in sent_tokenize(text)
+    return [token for sent in sent_tokenize(text, language)
             for token in _treebank_word_tokenize(sent)]
 
 if __name__ == "__main__":
diff --git a/nltk/tokenize/api.py b/nltk/tokenize/api.py
index 5f4d965..9eee06d 100644
--- a/nltk/tokenize/api.py
+++ b/nltk/tokenize/api.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizer Interface
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
diff --git a/nltk/tokenize/punkt.py b/nltk/tokenize/punkt.py
index 211a67e..2700661 100644
--- a/nltk/tokenize/punkt.py
+++ b/nltk/tokenize/punkt.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Punkt sentence tokenizer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Algorithm: Kiss & Strunk (2006)
 # Author: Willy <willy at csse.unimelb.edu.au> (original Python port)
 #         Steven Bird <stevenbird1 at gmail.com> (additions)
@@ -94,18 +94,6 @@ parameters from the given text.
 allows for incremental training and modification of the hyper-parameters used
 to decide what is considered an abbreviation, etc.
 
-:class:`.PunktWordTokenizer` uses a regular expression to divide a text into tokens,
-leaving all periods attached to words, but separating off other punctuation:
-
-    >>> from nltk.tokenize.punkt import PunktWordTokenizer
-    >>> s = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\n\nThanks."
-    >>> PunktWordTokenizer().tokenize(s)
-    ['Good', 'muffins', 'cost', '$3.88', 'in', 'New', 'York.', 'Please',
-    'buy', 'me', 'two', 'of', 'them.', 'Thanks.']
-    >>> PunktWordTokenizer().span_tokenize(s)
-    [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44), 
-    (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]
-
 The algorithm for this tokenizer is described in::
 
   Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence
@@ -307,38 +295,6 @@ numeric tokens are changed to ##number## and hence contain alpha.)"""
 ######################################################################
 
 
-######################################################################
-#{ Punkt Word Tokenizer
-######################################################################
-
-class PunktWordTokenizer(TokenizerI):
-    # Retained for backward compatibility
-    def __init__(self, lang_vars=PunktLanguageVars()):
-        self._lang_vars = lang_vars
-
-    def tokenize(self, text):
-        return self._lang_vars.word_tokenize(text)
-
-    def span_tokenize(self, text):
-        """
-        Given a text, returns a list of the (start, end) spans of words
-        in the text.
-        """
-        return [(sl.start, sl.stop) for sl in self._slices_from_text(text)]
-
-    def _slices_from_text(self, text):
-        last_break = 0
-        contains_no_words = True
-        for match in self._lang_vars._word_tokenizer_re().finditer(text):
-            contains_no_words = False
-            context = match.group()
-            yield slice(match.start(), match.end())
-        if contains_no_words:
-            yield slice(0, 0) # matches PunktSentenceTokenizer's functionality
-
-#}
-######################################################################
-
 
 #////////////////////////////////////////////////////////////
 #{ Helper Functions
diff --git a/nltk/tokenize/regexp.py b/nltk/tokenize/regexp.py
index 476b9a2..743c725 100644
--- a/nltk/tokenize/regexp.py
+++ b/nltk/tokenize/regexp.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Trevor Cohn <tacohn at csse.unimelb.edu.au>
@@ -111,17 +111,19 @@ class RegexpTokenizer(TokenizerI):
         self._discard_empty = discard_empty
         self._flags = flags
         self._regexp = None
-
-        # Remove capturing parentheses -- if the regexp contains any
-        # capturing parentheses, then the behavior of re.findall and
-        # re.split will change.
-        try:
-            self._regexp = compile_regexp_to_noncapturing(pattern, flags)
-        except re.error as e:
-            raise ValueError('Error in regular expression %r: %s' %
-                             (pattern, e))
-
+        
+    def _check_regexp(self):
+        if self._regexp is None:
+            try:
+                # Remove capturing parentheses -- if the regexp contains any
+                # capturing parentheses, then the behavior of re.findall and
+                # re.split will change.                 
+                self._regexp = compile_regexp_to_noncapturing(self._pattern, self._flags)
+            except re.error as e:
+                raise ValueError('Error in regular expression %r: %s' % (self._pattern, e))
+        
     def tokenize(self, text):
+        self._check_regexp()
         # If our regexp matches gaps, use re.split:
         if self._gaps:
             if self._discard_empty:
@@ -134,6 +136,8 @@ class RegexpTokenizer(TokenizerI):
             return self._regexp.findall(text)
 
     def span_tokenize(self, text):
+        self._check_regexp()
+
         if self._gaps:
             for left, right in regexp_span_tokenize(text, self._regexp):
                 if not (self._discard_empty and left == right):
diff --git a/nltk/tokenize/sexpr.py b/nltk/tokenize/sexpr.py
index 78e733e..6a2713e 100644
--- a/nltk/tokenize/sexpr.py
+++ b/nltk/tokenize/sexpr.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Yoav Goldberg <yoavg at cs.bgu.ac.il>
 #         Steven Bird <stevenbird1 at gmail.com> (minor edits)
 # URL: <http://nltk.sourceforge.net>
diff --git a/nltk/tokenize/simple.py b/nltk/tokenize/simple.py
index b92a7e3..032d2f0 100644
--- a/nltk/tokenize/simple.py
+++ b/nltk/tokenize/simple.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Simple Tokenizers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.sourceforge.net>
diff --git a/nltk/tokenize/stanford.py b/nltk/tokenize/stanford.py
index 39f4ddb..581698e 100644
--- a/nltk/tokenize/stanford.py
+++ b/nltk/tokenize/stanford.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Interface to the Stanford Tokenizer
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Xu <xxu at student.unimelb.edu.au>
 #
 # URL: <http://nltk.org/>
@@ -36,7 +36,7 @@ class StanfordTokenizer(TokenizerI):
 
     _JAR = 'stanford-postagger.jar'
 
-    def __init__(self, path_to_jar=None, encoding='UTF-8', options=None, verbose=False, java_options='-mx1000m'):
+    def __init__(self, path_to_jar=None, encoding='utf8', options=None, verbose=False, java_options='-mx1000m'):
         self._stanford_jar = find_jar(
             self._JAR, path_to_jar,
             env_vars=('STANFORD_POSTAGGER',),
@@ -46,8 +46,9 @@ class StanfordTokenizer(TokenizerI):
 
         self._encoding = encoding
         self.java_options = java_options
+
         options = {} if options is None else options
-        self._options_cmd = ','.join('{0}={1}'.format(key, json.dumps(val)) for key, val in options.items())
+        self._options_cmd = ','.join('{0}={1}'.format(key, val) for key, val in options.items())
 
     @staticmethod
     def _parse_tokenized_output(s):
@@ -104,3 +105,8 @@ def setup_module(module):
         StanfordTokenizer()
     except LookupError:
         raise SkipTest('doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn\'t exist')
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS)
diff --git a/nltk/tokenize/texttiling.py b/nltk/tokenize/texttiling.py
index a5df71c..d8c3e62 100644
--- a/nltk/tokenize/texttiling.py
+++ b/nltk/tokenize/texttiling.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: TextTiling
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: George Boutsioukis
 #
 # URL: <http://nltk.org/>
@@ -414,7 +414,7 @@ def smooth(x,window_len=11,window='flat'):
     if x.size < window_len:
         raise ValueError("Input vector needs to be bigger than window size.")
 
-    if window_len<3:
+    if window_len < 3:
         return x
 
     if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
@@ -424,11 +424,11 @@ def smooth(x,window_len=11,window='flat'):
 
     #print(len(s))
     if window == 'flat': #moving average
-        w=numpy.ones(window_len,'d')
+        w = numpy.ones(window_len,'d')
     else:
-        w=eval('numpy.'+window+'(window_len)')
+        w = eval('numpy.' + window + '(window_len)')
 
-    y=numpy.convolve(w/w.sum(),s,mode='same')
+    y = numpy.convolve(w/w.sum(), s, mode='same')
 
     return y[window_len-1:-window_len+1]
 
@@ -436,15 +436,15 @@ def smooth(x,window_len=11,window='flat'):
 def demo(text=None):
     from nltk.corpus import brown
     import pylab
-    tt=TextTilingTokenizer(demo_mode=True)
-    if text is None: text=brown.raw()[:10000]
-    s,ss,d,b=tt.tokenize(text)
+    tt = TextTilingTokenizer(demo_mode=True)
+    if text is None: text = brown.raw()[:10000]
+    s, ss, d, b = tt.tokenize(text)
     pylab.xlabel("Sentence Gap index")
     pylab.ylabel("Gap Scores")
     pylab.plot(range(len(s)), s, label="Gap Scores")
     pylab.plot(range(len(ss)), ss, label="Smoothed Gap scores")
     pylab.plot(range(len(d)), d, label="Depth scores")
-    pylab.stem(range(len(b)),b)
+    pylab.stem(range(len(b)), b)
     pylab.legend()
     pylab.show()
 
diff --git a/nltk/tokenize/treebank.py b/nltk/tokenize/treebank.py
index aabde29..465f5d9 100644
--- a/nltk/tokenize/treebank.py
+++ b/nltk/tokenize/treebank.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizers
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Michael Heilman <mheilman at cmu.edu> (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed)
 #
diff --git a/nltk/tokenize/util.py b/nltk/tokenize/util.py
index d9d1342..7fe7a6f 100644
--- a/nltk/tokenize/util.py
+++ b/nltk/tokenize/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Tokenizer Utilities
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.sourceforge.net>
 # For license information, see LICENSE.TXT
diff --git a/nltk/toolbox.py b/nltk/toolbox.py
index 46bf42b..8b86fc6 100644
--- a/nltk/toolbox.py
+++ b/nltk/toolbox.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 # Natural Language Toolkit: Toolbox Reader
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Greg Aumann <greg_aumann at sil.org>
 # URL: <http://nltk.org>
 # For license information, see LICENSE.TXT
diff --git a/nltk/tree.py b/nltk/tree.py
index eab736e..2de9cb9 100644
--- a/nltk/tree.py
+++ b/nltk/tree.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # Natural Language Toolkit: Text Trees
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Edward Loper <edloper at gmail.com>
 #         Steven Bird <stevenbird1 at gmail.com>
 #         Peter Ljunglöf <peter.ljunglof at gu.se>
@@ -685,6 +685,16 @@ class Tree(list):
         from nltk.draw.tree import draw_trees
         draw_trees(self)
 
+    def pretty_print(self, sentence=None, highlight=(), stream=None, **kwargs):
+        """
+        Pretty-print this tree as ASCII or Unicode art.
+        For explanation of the arguments, see the documentation for
+        `nltk.treeprettyprinter.TreePrettyPrinter`.
+        """
+        from nltk.treeprettyprinter import TreePrettyPrinter
+        print(TreePrettyPrinter(self, sentence, highlight).text(**kwargs),
+              file=stream)
+        
     def __repr__(self):
         childstr = ", ".join(unicode_repr(c) for c in self)
         return '%s(%s, [%s])' % (type(self).__name__, unicode_repr(self._label), childstr)
@@ -715,8 +725,7 @@ class Tree(list):
             _canvas_frame.destroy_widget(widget)
             subprocess.call([find_binary('gs', binary_names=['gswin32c.exe', 'gswin64c.exe'], env_vars=['PATH'], verbose=False)] +
                             '-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}'
-                            .format(out_path, in_path).split(),
-                            shell=True)
+                            .format(out_path, in_path).split())
             with open(out_path, 'rb') as sr:
                 res = sr.read()
             os.remove(in_path)
@@ -724,9 +733,21 @@ class Tree(list):
             return base64.b64encode(res).decode()
 
     def __str__(self):
-        return self.pprint()
+        return self.pformat()
+
+    def pprint(self, **kwargs):
+        """
+        Print a string representation of this Tree to 'stream'
+        """
+
+        if "stream" in kwargs:
+            stream = kwargs["stream"]
+            del kwargs["stream"]
+        else:
+            stream = None
+        print(self.pformat(**kwargs), file=stream)
 
-    def pprint(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
+    def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
         """
         :return: A pretty-printed string representation of this tree.
         :rtype: str
@@ -742,8 +763,8 @@ class Tree(list):
         """
 
         # Try writing it on one line.
-        s = self._pprint_flat(nodesep, parens, quotes)
-        if len(s)+indent < margin:
+        s = self._pformat_flat(nodesep, parens, quotes)
+        if len(s) + indent < margin:
             return s
 
         # If it doesn't fit on one line, then write it on multi-lines.
@@ -753,7 +774,7 @@ class Tree(list):
             s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep)
         for child in self:
             if isinstance(child, Tree):
-                s += '\n'+' '*(indent+2)+child.pprint(margin, indent+2,
+                s += '\n'+' '*(indent+2)+child.pformat(margin, indent+2,
                                                   nodesep, parens, quotes)
             elif isinstance(child, tuple):
                 s += '\n'+' '*(indent+2)+ "/".join(child)
@@ -763,7 +784,7 @@ class Tree(list):
                 s += '\n'+' '*(indent+2)+ unicode_repr(child)
         return s+parens[1]
 
-    def pprint_latex_qtree(self):
+    def pformat_latex_qtree(self):
         r"""
         Returns a representation of the tree compatible with the
         LaTeX qtree package. This consists of the string ``\Tree``
@@ -783,14 +804,14 @@ class Tree(list):
         """
         reserved_chars = re.compile('([#\$%&~_\{\}])')
 
-        pprint = self.pprint(indent=6, nodesep='', parens=('[.', ' ]'))
-        return r'\Tree ' + re.sub(reserved_chars, r'\\\1', pprint)
+        pformat = self.pformat(indent=6, nodesep='', parens=('[.', ' ]'))
+        return r'\Tree ' + re.sub(reserved_chars, r'\\\1', pformat)
 
-    def _pprint_flat(self, nodesep, parens, quotes):
+    def _pformat_flat(self, nodesep, parens, quotes):
         childstrs = []
         for child in self:
             if isinstance(child, Tree):
-                childstrs.append(child._pprint_flat(nodesep, parens, quotes))
+                childstrs.append(child._pformat_flat(nodesep, parens, quotes))
             elif isinstance(child, tuple):
                 childstrs.append("/".join(child))
             elif isinstance(child, string_types) and not quotes:
@@ -1391,7 +1412,7 @@ class ProbabilisticTree(Tree, ProbabilisticMixIn):
     def __repr__(self):
         return '%s (p=%r)' % (Tree.unicode_repr(self), self.prob())
     def __str__(self):
-        return '%s (p=%.6g)' % (self.pprint(margin=60), self.prob())
+        return '%s (p=%.6g)' % (self.pformat(margin=60), self.prob())
     def copy(self, deep=False):
         if not deep: return type(self)(self._label, self, prob=self.prob())
         else: return type(self).convert(self)
@@ -1433,7 +1454,7 @@ class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn):
     def __repr__(self):
         return '%s [%s]' % (Tree.unicode_repr(self), self.prob())
     def __str__(self):
-        return '%s [%s]' % (self.pprint(margin=60), self.prob())
+        return '%s [%s]' % (self.pformat(margin=60), self.prob())
     def copy(self, deep=False):
         if not deep: return type(self)(self._label, self, prob=self.prob())
         else: return type(self).convert(self)
@@ -1556,14 +1577,14 @@ def demo():
     print()
 
     # Demonstrate parsing of treebank output format.
-    t = Tree.fromstring(t.pprint())
+    t = Tree.fromstring(t.pformat())
     print("Convert tree to bracketed string and back again:")
     print(t)
     print()
 
     # Demonstrate LaTeX output
     print("LaTeX output:")
-    print(t.pprint_latex_qtree())
+    print(t.pformat_latex_qtree())
     print()
 
     # Demonstrate Productions
diff --git a/nltk/treeprettyprinter.py b/nltk/treeprettyprinter.py
new file mode 100644
index 0000000..06c1a86
--- /dev/null
+++ b/nltk/treeprettyprinter.py
@@ -0,0 +1,566 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: ASCII visualization of NLTK trees
+#
+# Copyright (C) 2001-2015 NLTK Project
+# Author: Andreas van Cranenburgh <A.W.vanCranenburgh at uva.nl>
+#         Peter Ljunglöf <peter.ljunglof at gu.se>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Pretty-printing of discontinuous trees. 
+Adapted from the disco-dop project, by Andreas van Cranenburgh.
+https://github.com/andreasvc/disco-dop
+
+Interesting reference (not used for this code):
+T. Eschbach et al., Orth. Hypergraph Drawing, Journal of
+Graph Algorithms and Applications, 10(2) 141--157 (2006)149.
+http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf
+"""
+
+from __future__ import division, print_function, unicode_literals
+
+from nltk.util import slice_bounds, OrderedDict
+from nltk.compat import string_types, python_2_unicode_compatible, unicode_repr
+from nltk.internals import raise_unorderable_types
+from nltk.tree import Tree
+
+import re
+import sys
+import codecs
+from cgi import escape
+from collections import defaultdict
+from operator import itemgetter
+from itertools import chain, islice
+
+
+ANSICOLOR = {
+    'black': 30,
+    'red': 31,
+    'green': 32,
+    'yellow': 33,
+    'blue': 34,
+    'magenta': 35,
+    'cyan': 36,
+    'white': 37,
+}
+
+
+ at python_2_unicode_compatible
+class TreePrettyPrinter(object):
+    """
+    Pretty-print a tree in text format, either as ASCII or Unicode.
+    The tree can be a normal tree, or discontinuous.
+
+    ``TreePrettyPrinter(tree, sentence=None, highlight=())``
+    creates an object from which different visualizations can be created.
+
+    :param tree: a Tree object.
+    :param sentence: a list of words (strings). If `sentence` is given, 
+        `tree` must contain integers as leaves, which are taken as indices 
+        in `sentence`. Using this you can display a discontinuous tree.
+    :param highlight: Optionally, a sequence of Tree objects in `tree` which
+        should be highlighted. Has the effect of only applying colors to nodes
+        in this sequence (nodes should be given as Tree objects, terminals as
+        indices).
+
+    >>> from nltk.tree import Tree
+    >>> tree = Tree.fromstring('(S (NP Mary) (VP walks))')
+    >>> print(TreePrettyPrinter(tree).text())
+    ... # doctest: +NORMALIZE_WHITESPACE
+          S
+      ____|____
+     NP        VP
+     |         |
+    Mary     walks
+    """
+
+    def __init__(self, tree, sentence=None, highlight=()):
+        if sentence is None:
+            leaves = tree.leaves()
+            if (leaves and not any(len(a) == 0 for a in tree.subtrees())
+                    and all(isinstance(a, int) for a in leaves)):
+                sentence = [str(a) for a in leaves]
+            else:
+                # this deals with empty nodes (frontier non-terminals)
+                # and multiple/mixed terminals under non-terminals.
+                tree = tree.copy(True)
+                sentence = []
+                for a in tree.subtrees():
+                    if len(a) == 0:
+                        a.append(len(sentence))
+                        sentence.append(None)
+                    elif any(not isinstance(b, Tree) for b in a):
+                        for n, b in enumerate(a):
+                            if not isinstance(b, Tree):
+                                a[n] = len(sentence)
+                                sentence.append('%s' % b)
+        self.nodes, self.coords, self.edges, self.highlight = self.nodecoords(
+                tree, sentence, highlight)
+
+    def __str__(self):
+        return self.text()
+
+    def __repr__(self):
+        return '<TreePrettyPrinter with %d nodes>' % len(self.nodes)
+
+
+    @staticmethod
+    def nodecoords(tree, sentence, highlight):
+        """
+        Produce coordinates of nodes on a grid.
+
+        Objective:
+
+        - Produce coordinates for a non-overlapping placement of nodes and
+            horizontal lines.
+        - Order edges so that crossing edges cross a minimal number of previous
+            horizontal lines (never vertical lines).
+
+        Approach:
+
+        - bottom up level order traversal (start at terminals)
+        - at each level, identify nodes which cannot be on the same row
+        - identify nodes which cannot be in the same column
+        - place nodes into a grid at (row, column)
+        - order child-parent edges with crossing edges last
+
+        Coordinates are (row, column); the origin (0, 0) is at the top left;
+        the root node is on row 0. Coordinates do not consider the size of a
+        node (which depends on font, &c), so the width of a column of the grid
+        should be automatically determined by the element with the greatest
+        width in that column. Alternatively, the integer coordinates could be
+        converted to coordinates in which the distances between adjacent nodes
+        are non-uniform.
+
+        Produces tuple (nodes, coords, edges, highlighted) where:
+
+        - nodes[id]: Tree object for the node with this integer id
+        - coords[id]: (n, m) coordinate where to draw node with id in the grid
+        - edges[id]: parent id of node with this id (ordered dictionary)
+        - highlighted: set of ids that should be highlighted
+        """
+        def findcell(m, matrix, startoflevel, children):
+            """
+            Find vacant row, column index for node ``m``.
+            Iterate over current rows for this level (try lowest first)
+            and look for cell between first and last child of this node,
+            add new row to level if no free row available.
+            """
+            candidates = [a for _, a in children[m]]
+            minidx, maxidx = min(candidates), max(candidates)
+            leaves = tree[m].leaves()
+            center = scale * sum(leaves) // len(leaves)  # center of gravity
+            if minidx < maxidx and not minidx < center < maxidx:
+                center = sum(candidates) // len(candidates)
+            if max(candidates) - min(candidates) > 2 * scale:
+                center -= center % scale  # round to unscaled coordinate
+                if minidx < maxidx and not minidx < center < maxidx:
+                    center += scale
+            if ids[m] == 0:
+                startoflevel = len(matrix)
+            for rowidx in range(startoflevel, len(matrix) + 1):
+                if rowidx == len(matrix):  # need to add a new row
+                    matrix.append([vertline if a not in (corner, None)
+                            else None for a in matrix[-1]])
+                row = matrix[rowidx]
+                i = j = center
+                if len(children[m]) == 1:  # place unaries directly above child
+                    return rowidx, next(iter(children[m]))[1]
+                elif all(a is None or a == vertline for a
+                        in row[min(candidates):max(candidates) + 1]):
+                    # find free column
+                    for n in range(scale):
+                        i = j = center + n
+                        while j > minidx or i < maxidx:
+                            if i < maxidx and (matrix[rowidx][i] is None
+                                    or i in candidates):
+                                return rowidx, i
+                            elif j > minidx and (matrix[rowidx][j] is None
+                                    or j in candidates):
+                                return rowidx, j
+                            i += scale
+                            j -= scale
+            raise ValueError('could not find a free cell for:\n%s\n%s'
+                    'min=%d; max=%d' % (tree[m], minidx, maxidx, dumpmatrix()))
+
+        def dumpmatrix():
+            """Dump matrix contents for debugging purposes."""
+            return '\n'.join(
+                '%2d: %s' % (n, ' '.join(('%2r' % i)[:2] for i in row))
+                for n, row in enumerate(matrix))
+
+        leaves = tree.leaves()
+        if not all(isinstance(n, int) for n in leaves):
+            raise ValueError('All leaves must be integer indices.')
+        if len(leaves) != len(set(leaves)):
+            raise ValueError('Indices must occur at most once.')
+        if not all(0 <= n < len(sentence) for n in leaves):
+            raise ValueError('All leaves must be in the interval 0..n '
+                    'with n=len(sentence)\ntokens: %d indices: '
+                    '%r\nsentence: %s' % (len(sentence), tree.leaves(), sentence))
+        vertline, corner = -1, -2  # constants
+        tree = tree.copy(True)
+        for a in tree.subtrees():
+            a.sort(key=lambda n: min(n.leaves()) if isinstance(n, Tree) else n)
+        scale = 2
+        crossed = set()
+        # internal nodes and lexical nodes (no frontiers)
+        positions = tree.treepositions()
+        maxdepth = max(map(len, positions)) + 1
+        childcols = defaultdict(set)
+        matrix = [[None] * (len(sentence) * scale)]
+        nodes = {}
+        ids = dict((a, n) for n, a in enumerate(positions))
+        highlighted_nodes = set(n for a, n in ids.items()
+                                if not highlight or tree[a] in highlight)
+        levels = dict((n, []) for n in range(maxdepth - 1))
+        terminals = []
+        for a in positions:
+            node = tree[a]
+            if isinstance(node, Tree):
+                levels[maxdepth - node.height()].append(a)
+            else:
+                terminals.append(a)
+
+        for n in levels:
+            levels[n].sort(key=lambda n: max(tree[n].leaves())
+                    - min(tree[n].leaves()))
+        terminals.sort()
+        positions = set(positions)
+
+        for m in terminals:
+            i = int(tree[m]) * scale
+            assert matrix[0][i] is None, (matrix[0][i], m, i)
+            matrix[0][i] = ids[m]
+            nodes[ids[m]] = sentence[tree[m]]
+            if nodes[ids[m]] is None:
+                nodes[ids[m]] = '...'
+                highlighted_nodes.discard(ids[m])
+            positions.remove(m)
+            childcols[m[:-1]].add((0, i))
+
+        # add other nodes centered on their children,
+        # if the center is already taken, back off
+        # to the left and right alternately, until an empty cell is found.
+        for n in sorted(levels, reverse=True):
+            nodesatdepth = levels[n]
+            startoflevel = len(matrix)
+            matrix.append([vertline if a not in (corner, None) else None
+                    for a in matrix[-1]])
+            for m in nodesatdepth:  # [::-1]:
+                if n < maxdepth - 1 and childcols[m]:
+                    _, pivot = min(childcols[m], key=itemgetter(1))
+                    if (set(a[:-1] for row in matrix[:-1] for a in row[:pivot]
+                            if isinstance(a, tuple)) &
+                        set(a[:-1] for row in matrix[:-1] for a in row[pivot:]
+                            if isinstance(a, tuple))):
+                        crossed.add(m)
+
+                rowidx, i = findcell(m, matrix, startoflevel, childcols)
+                positions.remove(m)
+
+                # block positions where children of this node branch out
+                for _, x in childcols[m]:
+                    matrix[rowidx][x] = corner
+                # assert m == () or matrix[rowidx][i] in (None, corner), (
+                #         matrix[rowidx][i], m, str(tree), ' '.join(sentence))
+                # node itself
+                matrix[rowidx][i] = ids[m]
+                nodes[ids[m]] = tree[m]
+                # add column to the set of children for its parent
+                if m != ():
+                    childcols[m[:-1]].add((rowidx, i))
+        assert len(positions) == 0
+
+        # remove unused columns, right to left
+        for m in range(scale * len(sentence) - 1, -1, -1):
+            if not any(isinstance(row[m], (Tree, int))
+                    for row in matrix):
+                for row in matrix:
+                    del row[m]
+
+        # remove unused rows, reverse
+        matrix = [row for row in reversed(matrix)
+                if not all(a is None or a == vertline for a in row)]
+
+        # collect coordinates of nodes
+        coords = {}
+        for n, _ in enumerate(matrix):
+            for m, i in enumerate(matrix[n]):
+                if isinstance(i, int) and i >= 0:
+                    coords[i] = n, m
+
+        # move crossed edges last
+        positions = sorted([a for level in levels.values()
+                for a in level], key=lambda a: a[:-1] in crossed)
+
+        # collect edges from node to node
+        edges = OrderedDict()
+        for i in reversed(positions):
+            for j, _ in enumerate(tree[i]):
+                edges[ids[i + (j, )]] = ids[i]
+
+        return nodes, coords, edges, highlighted_nodes
+
+
+    def text(self, nodedist=1, unicodelines=False, html=False, ansi=False,
+             nodecolor='blue', leafcolor='red', funccolor='green',
+             abbreviate=None, maxwidth=16):
+        """
+        :return: ASCII art for a discontinuous tree.
+
+        :param unicodelines: whether to use Unicode line drawing characters
+            instead of plain (7-bit) ASCII.
+        :param html: whether to wrap output in html code (default plain text).
+        :param ansi: whether to produce colors with ANSI escape sequences
+            (only effective when html==False).
+        :param leafcolor, nodecolor: specify colors of leaves and phrasal
+            nodes; effective when either html or ansi is True.
+        :param abbreviate: if True, abbreviate labels longer than 5 characters.
+            If integer, abbreviate labels longer than `abbr` characters.
+        :param maxwidth: maximum number of characters before a label starts to
+            wrap; pass None to disable.
+        """
+        if abbreviate == True:
+            abbreviate = 5
+        if unicodelines:
+            horzline = '\u2500'
+            leftcorner = '\u250c'
+            rightcorner = '\u2510'
+            vertline = ' \u2502 '
+            tee = horzline + '\u252C' + horzline
+            bottom = horzline + '\u2534' + horzline
+            cross = horzline + '\u253c' + horzline
+            ellipsis = '\u2026'
+        else:
+            horzline = '_'
+            leftcorner = rightcorner = ' '
+            vertline = ' | '
+            tee = 3 * horzline
+            cross = bottom = '_|_'
+            ellipsis = '.'
+
+        def crosscell(cur, x=vertline):
+            """Overwrite center of this cell with a vertical branch."""
+            splitl = len(cur) - len(cur) // 2 - len(x) // 2 - 1
+            lst = list(cur)
+            lst[splitl:splitl + len(x)] = list(x)
+            return ''.join(lst)
+
+        result = []
+        matrix = defaultdict(dict)
+        maxnodewith = defaultdict(lambda: 3)
+        maxnodeheight = defaultdict(lambda: 1)
+        maxcol = 0
+        minchildcol = {}
+        maxchildcol = {}
+        childcols = defaultdict(set)
+        labels = {}
+        wrapre = re.compile('(.{%d,%d}\\b\\W*|.{%d})' % (
+                maxwidth - 4, maxwidth, maxwidth))
+        # collect labels and coordinates
+        for a in self.nodes:
+            row, column = self.coords[a]
+            matrix[row][column] = a
+            maxcol = max(maxcol, column)
+            label = (self.nodes[a].label() if isinstance(self.nodes[a], Tree)
+                     else self.nodes[a])
+            if abbreviate and len(label) > abbreviate:
+                label = label[:abbreviate] + ellipsis
+            if maxwidth and len(label) > maxwidth:
+                label = wrapre.sub(r'\1\n', label).strip()
+            label = label.split('\n')
+            maxnodeheight[row] = max(maxnodeheight[row], len(label))
+            maxnodewith[column] = max(maxnodewith[column], max(map(len, label)))
+            labels[a] = label
+            if a not in self.edges:
+                continue  # e.g., root
+            parent = self.edges[a]
+            childcols[parent].add((row, column))
+            minchildcol[parent] = min(minchildcol.get(parent, column), column)
+            maxchildcol[parent] = max(maxchildcol.get(parent, column), column)
+        # bottom up level order traversal
+        for row in sorted(matrix, reverse=True):
+            noderows = [[''.center(maxnodewith[col]) for col in range(maxcol + 1)]
+                    for _ in range(maxnodeheight[row])]
+            branchrow = [''.center(maxnodewith[col]) for col in range(maxcol + 1)]
+            for col in matrix[row]:
+                n = matrix[row][col]
+                node = self.nodes[n]
+                text = labels[n]
+                if isinstance(node, Tree):
+                    # draw horizontal branch towards children for this node
+                    if n in minchildcol and minchildcol[n] < maxchildcol[n]:
+                        i, j = minchildcol[n], maxchildcol[n]
+                        a, b = (maxnodewith[i] + 1) // 2 - 1, maxnodewith[j] // 2
+                        branchrow[i] = ((' ' * a) + leftcorner).ljust(
+                                maxnodewith[i], horzline)
+                        branchrow[j] = (rightcorner + (' ' * b)).rjust(
+                                maxnodewith[j], horzline)
+                        for i in range(minchildcol[n] + 1, maxchildcol[n]):
+                            if i == col and any(
+                                    a == i for _, a in childcols[n]):
+                                line = cross
+                            elif i == col:
+                                line = bottom
+                            elif any(a == i for _, a in childcols[n]):
+                                line = tee
+                            else:
+                                line = horzline
+                            branchrow[i] = line.center(maxnodewith[i], horzline)
+                    else:  # if n and n in minchildcol:
+                        branchrow[col] = crosscell(branchrow[col])
+                text = [a.center(maxnodewith[col]) for a in text]
+                color = nodecolor if isinstance(node, Tree) else leafcolor
+                if isinstance(node, Tree) and node.label().startswith('-'):
+                    color = funccolor
+                if html:
+                    text = [escape(a) for a in text]
+                    if n in self.highlight:
+                        text = ['<font color=%s>%s</font>' % (
+                                color, a) for a in text]
+                elif ansi and n in self.highlight:
+                    text = ['\x1b[%d;1m%s\x1b[0m' % (
+                            ANSICOLOR[color], a) for a in text]
+                for x in range(maxnodeheight[row]):
+                    # draw vertical lines in partially filled multiline node
+                    # labels, but only if it's not a frontier node.
+                    noderows[x][col] = (text[x] if x < len(text)
+                            else (vertline if childcols[n] else ' ').center(
+                                maxnodewith[col], ' '))
+            # for each column, if there is a node below us which has a parent
+            # above us, draw a vertical branch in that column.
+            if row != max(matrix):
+                for n, (childrow, col) in self.coords.items():
+                    if (n > 0 and
+                            self.coords[self.edges[n]][0] < row < childrow):
+                        branchrow[col] = crosscell(branchrow[col])
+                        if col not in matrix[row]:
+                            for noderow in noderows:
+                                noderow[col] = crosscell(noderow[col])
+                branchrow = [a + ((a[-1] if a[-1] != ' ' else b[0]) * nodedist)
+                        for a, b in zip(branchrow, branchrow[1:] + [' '])]
+                result.append(''.join(branchrow))
+            result.extend((' ' * nodedist).join(noderow)
+                    for noderow in reversed(noderows))
+        return '\n'.join(reversed(result)) + '\n'
+
+
+    def svg(self, nodecolor='blue', leafcolor='red', funccolor='green'):
+        """
+        :return: SVG representation of a tree.
+        """
+        fontsize = 12
+        hscale = 40
+        vscale = 25
+        hstart = vstart = 20
+        width = max(col for _, col in self.coords.values())
+        height = max(row for row, _ in self.coords.values())
+        result = ['<svg version="1.1" xmlns="http://www.w3.org/2000/svg" '
+                  'width="%dem" height="%dem" viewBox="%d %d %d %d">' % (
+                      width * 3,
+                      height * 2.5,
+                      -hstart, -vstart,
+                      width * hscale + 3 * hstart,
+                      height * vscale + 3 * vstart)
+                      ]
+
+        children = defaultdict(set)
+        for n in self.nodes:
+            if n:
+                children[self.edges[n]].add(n)
+
+        # horizontal branches from nodes to children
+        for node in self.nodes:
+            if not children[node]:
+                continue
+            y, x = self.coords[node]
+            x *= hscale
+            y *= vscale
+            x += hstart
+            y += vstart + fontsize // 2
+            childx = [self.coords[c][1] for c in children[node]]
+            xmin = hstart + hscale * min(childx)
+            xmax = hstart + hscale * max(childx)
+            result.append(
+                '\t<polyline style="stroke:black; stroke-width:1; fill:none;" '
+                'points="%g,%g %g,%g" />' % (xmin, y, xmax, y))
+            result.append(
+                '\t<polyline style="stroke:black; stroke-width:1; fill:none;" '
+                'points="%g,%g %g,%g" />' % (x, y, x, y - fontsize // 3))
+
+        # vertical branches from children to parents
+        for child, parent in self.edges.items():
+            y, _ = self.coords[parent]
+            y *= vscale
+            y += vstart + fontsize // 2
+            childy, childx = self.coords[child]
+            childx *= hscale
+            childy *= vscale
+            childx += hstart
+            childy += vstart - fontsize
+            result += [
+                '\t<polyline style="stroke:white; stroke-width:10; fill:none;"'
+                ' points="%g,%g %g,%g" />' % (childx, childy, childx, y + 5),
+                '\t<polyline style="stroke:black; stroke-width:1; fill:none;"'
+                ' points="%g,%g %g,%g" />' % (childx, childy, childx, y),
+                ]
+
+        # write nodes with coordinates
+        for n, (row, column) in self.coords.items():
+            node = self.nodes[n]
+            x = column * hscale + hstart
+            y = row * vscale + vstart
+            if n in self.highlight:
+                color = nodecolor if isinstance(node, Tree) else leafcolor
+                if isinstance(node, Tree) and node.label().startswith('-'):
+                    color = funccolor
+            else:
+                color = 'black'
+            result += ['\t<text style="text-anchor: middle; fill: %s; '
+                       'font-size: %dpx;" x="%g" y="%g">%s</text>' % (
+                           color, fontsize, x, y,
+                           escape(node.label() if isinstance(node, Tree) 
+                                  else node))]
+
+        result += ['</svg>']
+        return '\n'.join(result)
+
+
+def test():
+    """Do some tree drawing tests."""
+    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
+        print()
+        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
+        print(tree)
+        print()
+        drawtree = TreePrettyPrinter(tree, sentence)
+        try:
+            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
+        except (UnicodeDecodeError, UnicodeEncodeError):
+            print(drawtree.text(unicodelines=False, ansi=False, **xargs))
+
+    from nltk.corpus import treebank
+    for n in [0, 1440, 1591, 2771, 2170]:
+        tree = treebank.parsed_sents()[n]
+        print_tree(n, tree, nodedist=2, maxwidth=8)
+    print()
+    print('ASCII version:')
+    print(TreePrettyPrinter(tree).text(nodedist=2))
+
+    tree = Tree.fromstring(
+        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
+        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
+        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
+    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
+                ' zwemmen of terrassen .'.split())
+    print_tree('Discontinuous tree', tree, sentence, nodedist=2)
+
+
+__all__ = ['TreePrettyPrinter']
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
+    test()
diff --git a/nltk/util.py b/nltk/util.py
index daef21b..eec59cd 100644
--- a/nltk/util.py
+++ b/nltk/util.py
@@ -1,6 +1,6 @@
 # Natural Language Toolkit: Utility functions
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
diff --git a/nltk/wsd.py b/nltk/wsd.py
index f4e5282..7648c91 100644
--- a/nltk/wsd.py
+++ b/nltk/wsd.py
@@ -1,64 +1,54 @@
 # Natural Language Toolkit: Word Sense Disambiguation Algorithms
 #
-# Author: Liling Tan <alvations at gmail.com>
+# Authors: Liling Tan <alvations at gmail.com>,
+#          Dmitrijs Milajevs <dimazest at gmail.com>
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # URL: <http://nltk.org/>
 # For license information, see LICENSE.TXT
 
-from nltk.corpus import wordnet as wn
+from nltk.corpus import wordnet
 
-############################################################
-# Lesk Algorithm
-############################################################
 
-def _compare_overlaps_greedy(context, synsets_signatures, pos=None):
-    """
-    Calculate overlaps between the context sentence and the synset_signature
-    and returns the synset with the highest overlap.
-    
-    :param context: ``context_sentence`` The context sentence where the ambiguous word occurs.
-    :param synsets_signatures: ``dictionary`` A list of words that 'signifies' the ambiguous word.
-    :param pos: ``pos`` A specified Part-of-Speech (POS).
+def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
+    """Return a synset for an ambiguous word in a context.
+
+    :param iter context_sentence: The context sentence where the ambiguous word
+    occurs, passed as an iterable of words.
+    :param str ambiguous_word: The ambiguous word that requires WSD.
+    :param str pos: A specified Part-of-Speech (POS).
+    :param iter synsets: Possible synsets of the ambiguous word.
     :return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
-    """
-    max_overlaps = 0
-    lesk_sense = None
-    for ss in synsets_signatures:
-        if pos and str(ss.pos()) != pos: # Skips different POS.
-            continue
-        overlaps = set(synsets_signatures[ss]).intersection(context)
-        if len(overlaps) > max_overlaps:
-            lesk_sense = ss
-            max_overlaps = len(overlaps)  
-    return lesk_sense
 
-def lesk(context_sentence, ambiguous_word, pos=None, dictionary=None):
-    """
-    This function is the implementation of the original Lesk algorithm (1986).
-    It requires a dictionary which contains the definition of the different
-    sense of each word. See http://goo.gl/8TB15w
+    This function is an implementation of the original Lesk algorithm (1986) [1].
 
-        >>> from nltk import word_tokenize
-        >>> sent = word_tokenize("I went to the bank to deposit money.")
-        >>> word = "bank"
-        >>> pos = "n"
-        >>> lesk(sent, word, pos)
-        Synset('bank.n.07')
-    
-    :param context_sentence: The context sentence where the ambiguous word occurs.
-    :param ambiguous_word: The ambiguous word that requires WSD.
-    :param pos: A specified Part-of-Speech (POS).
-    :param dictionary: A list of words that 'signifies' the ambiguous word.
-    :return: ``lesk_sense`` The Synset() object with the highest signature overlaps.
+    Usage example::
+
+        >>> lesk(['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.'], 'bank', 'n')
+        Synset('savings_bank.n.02')
+
+    [1] Lesk, Michael. "Automatic sense disambiguation using machine
+    readable dictionaries: how to tell a pine cone from an ice cream
+    cone." Proceedings of the 5th Annual International Conference on
+    Systems Documentation. ACM, 1986.
+    http://dl.acm.org/citation.cfm?id=318728
     """
-    if not dictionary:
-        dictionary = {}
-        for ss in wn.synsets(ambiguous_word):
-            dictionary[ss] = ss.definition().split()
-    best_sense = _compare_overlaps_greedy(context_sentence,
-                                       dictionary, pos)
-    return best_sense
+
+    context = set(context_sentence)
+    if synsets is None:
+        synsets = wordnet.synsets(ambiguous_word)
+
+    if pos:
+        synsets = [ss for ss in synsets if str(ss.pos()) == pos]
+
+    if not synsets:
+        return None
+
+    _, sense = max(
+        (len(context.intersection(ss.definition().split())), ss) for ss in synsets
+    )
+
+    return sense
 
 
 if __name__ == "__main__":
diff --git a/setup.cfg b/setup.cfg
index 72f9d44..861a9f5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [egg_info]
-tag_svn_revision = 0
-tag_date = 0
 tag_build = 
+tag_date = 0
+tag_svn_revision = 0
 
diff --git a/setup.py b/setup.py
index 079bcf3..a90eca6 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 #
 # Setup script for the Natural Language Toolkit
 #
-# Copyright (C) 2001-2014 NLTK Project
+# Copyright (C) 2001-2015 NLTK Project
 # Author: Steven Bird <stevenbird1 at gmail.com>
 #         Edward Loper <edloper at gmail.com>
 #         Ewan Klein <ewan at inf.ed.ac.uk>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/nltk.git



More information about the debian-science-commits mailing list