[libmpikmeans] 01/29: Imported Upstream version 1.5

Christian Kastner chrisk-guest at moszumanska.debian.org
Mon Aug 25 21:19:59 UTC 2014


This is an automated email from the git hooks/post-receive script.

chrisk-guest pushed a commit to branch master
in repository libmpikmeans.

commit b7e185b36ff9b442f41fb6207f47321e030126e9
Author: Christian Kastner <debian at kvr.at>
Date:   Fri Nov 12 01:05:19 2010 +0100

    Imported Upstream version 1.5
---
 Changelog             |   12 +
 LICENSE-2.0.txt       |  202 +++++
 Makefile              |  104 +++
 README                |   64 ++
 example.txt           |    6 +
 libmpikmeans.so       |  Bin 0 -> 19039 bytes
 mpi_assign            |  Bin 0 -> 1597627 bytes
 mpi_assign64          |  Bin 0 -> 1758705 bytes
 mpi_assign_main.cxx   |  189 +++++
 mpi_assign_mex.cxx    |   47 ++
 mpi_assign_mex.mexa64 |  Bin 0 -> 20977 bytes
 mpi_assign_mex.mexglx |  Bin 0 -> 18428 bytes
 mpi_kmeans            |  Bin 0 -> 1614641 bytes
 mpi_kmeans.cxx        |  630 +++++++++++++++
 mpi_kmeans.h          |   42 +
 mpi_kmeans.o          |  Bin 0 -> 13288 bytes
 mpi_kmeans.py         |   40 +
 mpi_kmeans64          |  Bin 0 -> 1773791 bytes
 mpi_kmeans_main.cxx   |  296 +++++++
 mpi_kmeans_mex.cxx    |  161 ++++
 mpi_kmeans_mex.h      |   16 +
 mpi_kmeans_mex.mexa64 |  Bin 0 -> 23209 bytes
 mpi_kmeans_mex.mexglx |  Bin 0 -> 20612 bytes
 py_kmeans.c           | 2071 +++++++++++++++++++++++++++++++++++++++++++++++++
 py_kmeans.pyx         |   71 ++
 py_kmeans.so          |  Bin 0 -> 48868 bytes
 test_code.m           |  123 +++
 27 files changed, 4074 insertions(+)

diff --git a/Changelog b/Changelog
new file mode 100644
index 0000000..51f8bbb
--- /dev/null
+++ b/Changelog
@@ -0,0 +1,12 @@
+Version 1.5
+	- The algorithm is now available in stand-alone as well
+	- Reordered the source files
+	- A new python wrapper (thanks to Thomas Wiecki)
+
+Version 1.2
+	- Allocating a lot less memory now, thanks to Flaviu Iepure for pointing out an allocation mistake 
+	- Cleaned up the code
+	- Added a simple test case function
+
+Version 1.1 
+	- Added a Python version (thanks to Christoph Lampert)
\ No newline at end of file
diff --git a/LICENSE-2.0.txt b/LICENSE-2.0.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE-2.0.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..69b725b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,104 @@
+#
+# Choose your compiler
+#
+# 
+
+#CC = gcc-4.2
+#CPP = g++-4.2
+#CFLAGS=-O3 -ffast-math -fomit-frame-pointer -fPIC -mtune=k8 -march=k8 -Werror
+CC = gcc-4.1
+CPP = g++-4.1
+CFLAGS=-O3 -ffast-math -fomit-frame-pointer -fPIC -Werror
+#CC=/agbs/share/sw/icc/bin/icc
+#CFLAGS= -fast -DCOMPILE_WITH_ICC -Werror
+
+#
+# MPI KMEANS FLAGS
+#
+VERBOSEFLAG=-DKMEANS_VERBOSE=0 # 0: silent, 1:iteration counter, 2:everything
+#PRECISION=-DINPUT_TYPE=0 # 0: double, 1:float 
+
+#
+# MATLAB
+#
+MATLABDIR=/agbs/share/sw/matlab
+MATLAB_INCLUDE=-I$(MATLABDIR)/extern/include
+
+#
+# BOOST LIBS (for standalone only)
+#
+BOOST_LIB=-L/kyb/agbs/pgehler/lib -lboost_program_options-gcc41-mt -lboost_filesystem-gcc41-mt -lboost_system-gcc41-mt
+BOOST_INCLUDE=-I/kyb/agbs/pgehler/include/boost-1_36/
+
+#
+# PYTHON
+#
+PYTHON_INCLUDE=-I/usr/include/python2.5
+PYTHON_LIB=-lpython2.5
+NUMPY_INCLUDE=-I/usr/lib/python2.5/site-packages/numpy/core/include
+
+#
+# ARCHITECURE
+# 
+
+# 32 bit
+SUFFIX=mexglx
+MATLAB_LIB=-L$(MATLABDIR)/bin/glnx86 -lmex
+
+# 64 bit
+#SUFFIX=mexa64
+#MATLAB_LIB=-L$(MATLABDIR)/bin/glnxa64 -lmex
+
+LIBS=/usr/lib/gcc/i486-linux-gnu/4.1/libstdc++.a /usr/lib/libm.a
+
+all: standalone matlab libmpikmeans python
+matlab: 	mpi_kmeans_mex.$(SUFFIX) mpi_assign_mex.$(SUFFIX)
+standalone:	mpi_kmeans_main mpi_assign_main
+python:	cython_wrapper
+
+mpi_kmeans.o:	mpi_kmeans.cxx mpi_kmeans.h
+	$(CC) $(CFLAGS) $(VERBOSEFLAG) $(PRECISION) -c -o $@ mpi_kmeans.cxx
+
+libmpikmeans:	mpi_kmeans.o
+	ar rc libmpikmeans.a mpi_kmeans.o
+	ranlib libmpikmeans.a
+	$(CC) -shared -Wl,-soname=libmpikmeans.so -fPIC $(CFLAGS) -o libmpikmeans.so $(VERBOSEFLAGS) $(PRECISION) mpi_kmeans.cxx
+
+mpi_kmeans_main.o:	mpi_kmeans_main.cxx
+	$(CC) $(CFLAGS) $(BOOST_INCLUDE) -c -o mpi_kmeans_main.o mpi_kmeans_main.cxx 
+
+mpi_assign_main.o:	mpi_assign_main.cxx
+	$(CC) $(CFLAGS) $(BOOST_INCLUDE) -c -o mpi_assign_main.o mpi_assign_main.cxx 
+
+mpi_kmeans_main:	libmpikmeans mpi_kmeans_main.o
+	$(CC) mpi_kmeans_main.o  $(CFLAGS) -L/usr/lib/ -static -o mpi_kmeans -lm libmpikmeans.a \
+		$(BOOST_LIB) $(LIBS)
+
+mpi_assign_main:	libmpikmeans mpi_assign_main.o
+	$(CC) mpi_assign_main.o  $(CFLAGS) -L/usr/lib/ -static -o mpi_assign -lm libmpikmeans.a \
+		$(BOOST_LIB) $(LIBS)
+
+%_mex.o:	%_mex.cxx
+	$(CC) $(CFLAGS) $(MATLAB_INCLUDE) $(VERBOSEFLAG) $(PRECISION) -c $^ -o $@
+
+mpi_kmeans_mex.$(SUFFIX):	libmpikmeans mpi_kmeans_mex.o 
+	$(CC) mpi_kmeans_mex.o -shared -o mpi_kmeans_mex.$(SUFFIX) libmpikmeans.a $(MATLAB_LIB) 
+
+mpi_assign_mex.$(SUFFIX):	libmpikmeans mpi_assign_mex.o
+	$(CC) mpi_assign_mex.o -shared -o mpi_assign_mex.$(SUFFIX) libmpikmeans.a $(MATLAB_LIB)
+
+cython_wrapper:	py_kmeans.c mpi_kmeans.o
+	$(CPP) $(CFLAGS) $(PYTHON_INCLUDE) $(NUMPY_INCLUDE) -c -o py_kmeans.o py_kmeans.c
+	$(CPP) $(CFLAGS) $(PYTHON_LIB) -lm -pthread -shared py_kmeans.o mpi_kmeans.o  -o py_kmeans.so 
+
+test:	
+	matlab -nojvm -r "test_code;exit"
+
+clean:	
+	rm -f *.o
+	rm -f *.mexglx
+	rm -f *.mexa64
+	rm -f libmpikmeans.so
+	rm -f libmpikmeans.a
+	rm -f mpi_assign mpi_kmeans
+
diff --git a/README b/README
new file mode 100644
index 0000000..4dc2632
--- /dev/null
+++ b/README
@@ -0,0 +1,64 @@
+Readme file for the Kmeans software MPI Kmeans
+
+1. Installation
+===============
+
+  There are precompile binaries for the stand-alone (mpi_assign,mpi_kmeans) and 
+  the matlab version (*.mex*). You should first try if they work on your machine 
+  (32bit and 64bit version). If not you need to adapt the Makefile and type 
+  depending on your target
+
+  make standalone
+  make matlab
+  make python 
+
+  or simply 
+
+  make clean all
+
+  If you are fine with single precision or in need for more mem you can compile with -DINPUT_TYPE=1
+
+
+2. Usage
+========
+
+  a) Stand alone:
+	./mpi_kmeans --help
+    	./mpi_kmeans --k 2 --data example.txt --output clusters.txt
+
+    	./mpi_assign --help
+    	./mpi_assign --data example.txt --cluster clusters.txt --assignment assignment.txt
+
+  b) Matlab:
+    	Try "help mpi_kmeans" in a matlab shell. This will also give an example.
+
+  c) Python:
+	In a python shell type:
+
+	import py_kmeans
+	help(py_kmeans.kmeans)
+	
+	(a second, older python version is ./mpi_kmeans.py)
+
+
+3. References
+=============
+
+This k-means clustering code is a mex implementation of the ICML2003 paper 
+
+ at misc{ elkan03using,
+  author = "C. Elkan",
+  title = "Using the triangle inequality to accelerate kMeans",
+  text = "C. Elkan. Using the triangle inequality to accelerate kMeans. In Proceedings
+    of the Twentieth International Conference on Machine Learning, 2003, pp.
+    147-153.",
+  year = "2003",
+  url = "citeseer.ist.psu.edu/elkan03using.html" 
+}
+
+4. Author
+=========
+
+See LICENSE-2.0.txt for licensing terms.
+
+(c) 2007-2009, Peter Gehler, peter.gehler at tuebingen.mpg.de
diff --git a/example.txt b/example.txt
new file mode 100644
index 0000000..2bd5844
--- /dev/null
+++ b/example.txt
@@ -0,0 +1,6 @@
+1 0 0 
+2 0 0 
+3 0 0 
+0 0 2
+0 0 2
+0 0 2
diff --git a/libmpikmeans.so b/libmpikmeans.so
new file mode 100755
index 0000000..eb1be8f
Binary files /dev/null and b/libmpikmeans.so differ
diff --git a/mpi_assign b/mpi_assign
new file mode 100755
index 0000000..3bc290b
Binary files /dev/null and b/mpi_assign differ
diff --git a/mpi_assign64 b/mpi_assign64
new file mode 100755
index 0000000..2da93be
Binary files /dev/null and b/mpi_assign64 differ
diff --git a/mpi_assign_main.cxx b/mpi_assign_main.cxx
new file mode 100644
index 0000000..60c63c7
--- /dev/null
+++ b/mpi_assign_main.cxx
@@ -0,0 +1,189 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <string>
+#include <algorithm>
+#include <iterator>
+#include <numeric>
+#include <ext/numeric>
+#include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#include "mpi_kmeans.h"
+
+namespace po = boost::program_options;
+
+static void write_assignment(const std::string& output_filename,
+								  const std::vector<int> labels) {
+	
+	std::cout << "Writing cluster centers to \""
+			  << output_filename << "\"" << std::endl;
+
+	std::ofstream wout(output_filename.c_str());
+	if (wout.fail()) {
+		std::cerr << "Failed to open \"" << output_filename
+				  << "\" for writing." << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	wout << std::setprecision(1);
+	for (unsigned int m=0; m < labels.size(); ++m) {
+		wout << labels[m] << std::endl;
+	}
+	wout.close();
+
+	return;
+}
+
+
+static int read_problem_data(const std::string& filename,
+							  std::vector<std::vector<double> >& data) {
+	data.clear();
+
+	std::ifstream in(filename.c_str());
+	if (in.fail()) {
+		std::cerr << "Failed to open file \""
+				  << filename << "\" for reading." << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	std::string line;
+	unsigned int ndims = 0;
+	while (in.eof() == false) {
+		std::getline(in,line);
+		if (line.size() == 0)
+			continue; // skip over empty lines
+	
+		// remove trailing whitespaces
+		line.erase(line.find_last_not_of(" ")+1);
+
+		std::vector<double> current_data;	
+		std::istringstream is(line);
+		while (is.eof() == false) {
+			double value;
+			is >> value;
+			current_data.push_back(value);
+		}
+		
+		// Ensure the same number of dimensions for each point
+		if (ndims == 0)
+			ndims = current_data.size();	
+		assert(ndims == current_data.size());
+		data.push_back(current_data);
+	}
+	in.close();
+	
+	if (data.size() == 0) {
+		std::cerr << "No points read from file \"" << filename
+				  << "\"" << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	return(data.size());
+
+}
+
+
+int main(int argc, char* argv[]) {
+
+	std::string data_filename;
+	std::string cluster_filename;
+	std::string assignment_filename;
+
+	// Set Program options
+	po::options_description generic("Generic Options");
+	generic.add_options()
+		("help","Produce help message")
+		("verbose","Verbose output")
+		;
+
+	po::options_description input_options("Input/Output Options");
+	input_options.add_options()
+		("data",po::value<std::string>
+		 (&data_filename)->default_value("data.txt"),
+		 "Data file, one datum per line")
+		("cluster",po::value<std::string>
+		 (&cluster_filename)->default_value("clustercenter.txt"),
+		 "Output file, one cluster center per line")
+		("assignment",po::value<std::string>
+		 (&assignment_filename)->default_value("assignment.txt"),
+		 "Output file, one cluster center per line")
+		;
+
+	po::options_description all_options;
+	all_options.add(generic).add(input_options);
+	po::variables_map vm;
+	po::store(po::command_line_parser(argc,argv).options(all_options).run(), vm);
+	po::notify(vm);
+
+	bool verbose = vm.count("verbose");
+
+	if (vm.count("help")) {
+		std::cerr << "Assigning points to cluster center" << std::endl;
+		std::cerr << all_options << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Example:" << std::endl;
+		std::cerr << "  mpi_assign --data example.txt --cluster clusters.txt --assignment assignment.txt" << std::endl;
+		exit(EXIT_SUCCESS);
+	}
+
+	// read in the data
+	std::cout << "Input file: " << data_filename << std::endl;
+	std::vector<std::vector<double> > data_X;	
+	int nof_points = read_problem_data(data_filename,data_X);
+	assert(nof_points>0);
+
+
+	// read in the clusters
+	std::cout << "Clustercenter file: " << cluster_filename << std::endl;
+	std::vector<std::vector<double> > data_CX;	
+	int nof_clusters = read_problem_data(cluster_filename,data_CX);
+	assert(nof_clusters>0);
+
+	unsigned int dims = data_X[0].size();
+	assert(dims>0);
+	if (data_X[0].size() != data_CX[0].size()) {
+		std::cerr << "Dimension mismatch between points and clusters" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	// convert clusters to double*
+	double *CX = (double *)malloc(nof_clusters * dims * sizeof(double));
+	unsigned int cntr = 0;
+	for (unsigned int m=0; m < data_CX.size() ; ++m) {
+		for (unsigned int n=0; n < data_CX[m].size() ; ++n) {
+			CX[cntr] = data_CX[m][n];
+			cntr += 1;
+		}
+	}
+	
+	// start K-Means
+	std::cout << "Starting Assignment ..." << std::endl;
+	std::cout << " ... with " << nof_points << " training points " <<std::endl;
+	std::cout << " ... for " << nof_clusters << " clusters " <<std::endl;
+	std::cout << " ... in " << dims << " dimensions " <<std::endl;
+
+	std::vector<int> labels;
+	double *x = (double *)malloc(dims * sizeof(double));
+	for (unsigned int i=0; i < nof_points ; i++ ) {
+		for (unsigned int n=0; n<data_X[i].size() ; ++n)
+			x[n] = data_X[i][n];
+		labels.push_back(1+assign_point_to_cluster_ordinary(x,CX,dims,nof_clusters));
+	}
+
+	std::cout << "Done!" << std::endl;
+
+	// write the clusters
+	write_assignment(assignment_filename,labels);
+
+	// done
+	exit(EXIT_SUCCESS);
+}
diff --git a/mpi_assign_mex.cxx b/mpi_assign_mex.cxx
new file mode 100644
index 0000000..83da73b
--- /dev/null
+++ b/mpi_assign_mex.cxx
@@ -0,0 +1,47 @@
+#include "mex.h"
+#include <memory.h>
+#include "mpi_kmeans.h"
+
+void mexFunction(const int nlhs, mxArray *plhs[], const int nrhs, const mxArray *prhs[])
+{
+
+#ifdef COMPILE_WITH_ICC
+	unsigned int dims[2];
+#else
+	mwSize dims[2];
+#endif
+
+	unsigned int dim = mxGetM(prhs[0]);
+	unsigned int npts = mxGetN(prhs[0]);
+	unsigned int nclus = mxGetN(prhs[1]);
+
+	if (mxGetM(prhs[1])!=dim)
+		mexErrMsgTxt("Dimension mismatch in input arguments");
+
+	dims[0]= npts;
+	dims[1] = 1;
+	plhs[0] = mxCreateNumericArray(2,dims,mxUINT32_CLASS,mxREAL);
+	unsigned int *c = (unsigned int*)mxGetPr(plhs[0]);
+
+	/* input points */
+	const PREC *X = (PREC*)mxGetPr(prhs[0]);
+
+	/* cluster centers */
+	const PREC *CX = (PREC*)mxGetPr(prhs[1]);
+	
+	const PREC *px=X;
+
+	/* sanity check */
+	for (unsigned int i=0 ; i<npts*dim; i++)
+		if (mxIsNaN(px[i])||mxIsInf(px[i]))
+			mexErrMsgTxt("NaN or Inf in the input data detected... abort");
+
+
+	for (unsigned int i=0 ; i<npts; i++,px+=dim)
+		c[i] = 1+assign_point_to_cluster_ordinary(px, CX, dim, nclus);
+
+}
+
+
+
+
diff --git a/mpi_assign_mex.mexa64 b/mpi_assign_mex.mexa64
new file mode 100755
index 0000000..b68f1fe
Binary files /dev/null and b/mpi_assign_mex.mexa64 differ
diff --git a/mpi_assign_mex.mexglx b/mpi_assign_mex.mexglx
new file mode 100755
index 0000000..0a493c3
Binary files /dev/null and b/mpi_assign_mex.mexglx differ
diff --git a/mpi_kmeans b/mpi_kmeans
new file mode 100755
index 0000000..93a4af1
Binary files /dev/null and b/mpi_kmeans differ
diff --git a/mpi_kmeans.cxx b/mpi_kmeans.cxx
new file mode 100644
index 0000000..2720a66
--- /dev/null
+++ b/mpi_kmeans.cxx
@@ -0,0 +1,630 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+#include "mpi_kmeans.h"
+
+#if KMEANS_VERBOSE>1
+unsigned int saved_two=0,saved_three_one=0,saved_three_two=0,saved_three_three=0,saved_three_b=0;
+#endif
+
+
+void kmeans_error(char *msg)
+{
+	printf("%s",msg);
+	exit(-1);
+}
+
+int comp_randperm (const void * a, const void * b)
+{
+	return ((int)( *(double*)a - *(double*)b ));
+}
+
+
+void randperm(unsigned int *order, unsigned int npoints)
+{
+	double *r = (double*)malloc(2*npoints*sizeof(double));
+	for (unsigned int i=0; i<2*npoints; i++,i++)
+	{
+		r[i] = rand();
+		r[i+1] = i/2;
+	}
+	qsort (r, npoints, 2*sizeof(double), comp_randperm);
+
+	for (unsigned int i=1; i<2*npoints; i++,i++)
+		order[i/2] = (unsigned int)r[i];
+
+	free(r);
+}
+
+PREC compute_distance(const PREC *vec1, const PREC *vec2, const unsigned int dim)
+{
+	PREC d = 0.0;
+	for ( unsigned int k=0 ; k<dim ; k++ )
+	{
+		PREC df = (vec1[k]-vec2[k]);
+		d += df*df;
+	}
+	assert(d>=0.0);
+	d = sqrt(d);
+
+	return d;
+}
+
+PREC compute_sserror(const PREC *CX, const PREC *X, const unsigned int *c,unsigned int dim, unsigned int npts)
+{
+	PREC sse = 0.0;
+	const PREC *px = X;
+	for ( unsigned int i=0 ; i<npts ; i++,px+=dim)
+	{
+		const PREC *pcx = CX+c[i]*dim;
+		PREC d = compute_distance(px,pcx,dim);
+		sse += d*d;
+	}
+	assert(sse>=0.0);
+	return(sse);
+}
+
+void remove_point_from_cluster(unsigned int cluster_ind, PREC *CX, const PREC *px, unsigned int *nr_points, unsigned int dim)
+{
+	PREC *pcx = CX + cluster_ind*dim;
+
+	/* empty cluster after or before removal */
+	if (nr_points[cluster_ind]<2)
+	{
+		for ( unsigned int k=0 ; k<dim ; k++ )
+			pcx[k] = 0.0;
+		nr_points[cluster_ind]=0;
+	}
+	else
+	{
+		/* pgehler: remove PREC here */
+		PREC nr_old,nr_new; 
+		nr_old = (PREC)nr_points[cluster_ind];
+		(nr_points[cluster_ind])--;
+		nr_new = (PREC)nr_points[cluster_ind];
+
+		for ( unsigned int k=0 ; k<dim ; k++ )
+			pcx[k] = (nr_old*pcx[k] - px[k])/nr_new;
+	}
+}
+
+void add_point_to_cluster(unsigned int cluster_ind, PREC *CX, const PREC *px, unsigned int *nr_points, unsigned int dim)
+{
+
+	PREC *pcx = CX + cluster_ind*dim;
+
+	/* first point in cluster */
+	if (nr_points[cluster_ind]==0)
+	{		
+		(nr_points[cluster_ind])++;
+		for ( unsigned int k=0 ; k<dim ; k++ )
+			pcx[k] = px[k];
+	}
+	else
+	{
+/* remove PREC here */
+		PREC nr_old = (PREC)(nr_points[cluster_ind]);
+		(nr_points[cluster_ind])++;
+		PREC nr_new = (PREC)(nr_points[cluster_ind]);
+		for ( unsigned int k=0 ; k<dim ; k++ )
+			pcx[k] = (nr_old*pcx[k]+px[k])/nr_new;
+	}
+}
+
+
+bool remove_identical_clusters(PREC *CX, BOUND_PREC *cluster_distance, const PREC *X, unsigned int *cluster_count, unsigned int *c, unsigned int dim, unsigned int nclus, unsigned int npts)
+{
+	bool stat = false;
+	for ( unsigned int i=0 ; i<(nclus-1) ; i++ )
+	{
+		for ( unsigned int j=i+1 ; j<nclus ; j++ )
+		{
+			if (cluster_distance[i*nclus+j] <= BOUND_EPS)
+			{
+#if KMEANS_VERBOSE>1
+				printf("found identical cluster : %d\n",j);
+#endif
+				stat = true;
+				/* assign the points from j to i */
+				const PREC *px = X;
+				for ( unsigned int n=0 ; n<npts ; n++,px+=dim )
+				{
+					if (c[n] != j) continue;
+					remove_point_from_cluster(c[n],CX,px,cluster_count,dim);
+					c[n] = i;
+					add_point_to_cluster(c[i],CX,px,cluster_count,dim);
+				}
+			}
+		}
+	}
+	return(stat);
+}
+
+void compute_cluster_distances(BOUND_PREC *dist, BOUND_PREC *s, const PREC *CX, unsigned int dim,unsigned int nclus, const bool *cluster_changed)
+{
+	for ( unsigned int j=0 ; j<nclus ; j++ )
+		s[j] = BOUND_PREC_MAX;
+
+	const PREC *pcx = CX;
+	for ( unsigned int i=0 ; i<nclus-1 ; i++,pcx+=dim)
+	{
+		const PREC *pcxp = CX + (i+1)*dim;
+		unsigned int cnt=i*nclus+i+1;
+		for ( unsigned int j=i+1 ; j<nclus; j++,cnt++,pcxp+=dim )
+		{
+			if (cluster_changed[i] || cluster_changed[j])
+			{
+				dist[cnt] = (BOUND_PREC)(0.5 * compute_distance(pcx,pcxp,dim));
+				dist[j*nclus+i] = dist[cnt];
+
+				if (dist[cnt] < s[i])
+					s[i] = dist[cnt];
+
+				if (dist[cnt] < s[j])
+					s[j] = dist[cnt];
+			}
+		}
+	}
+}
+
+
+unsigned int init_point_to_cluster(unsigned int point_ind, const PREC *px, const PREC *CX, unsigned int dim,unsigned int nclus, PREC *mindist, BOUND_PREC *low_b, const BOUND_PREC *cl_dist)
+{
+	bool use_low_b = true;
+
+	if (low_b==NULL) use_low_b = false;
+	unsigned int bias = point_ind*nclus;
+	
+	const PREC *pcx = CX;
+	PREC mind = compute_distance(px,pcx,dim);
+	if (use_low_b) low_b[bias] = (BOUND_PREC)mind;
+	unsigned int assignment = 0;
+	pcx+=dim;
+	for ( unsigned int j=1 ; j<nclus ; j++,pcx+=dim )
+	{
+		if (mind + BOUND_EPS <= cl_dist[assignment*nclus+j])
+			continue;
+
+		PREC d = compute_distance(px,pcx,dim);
+		if(use_low_b) low_b[j+bias] = (BOUND_PREC)d;
+
+		if (d<mind)
+		{
+			mind = d;
+			assignment = j;
+		}
+	}
+	mindist[point_ind] = mind;
+	return(assignment);
+}
+
+unsigned int assign_point_to_cluster_ordinary(const PREC *px, const PREC *CX, unsigned int dim,unsigned int nclus)
+{
+	unsigned int assignment = nclus;
+	PREC mind = PREC_MAX;
+	const PREC *pcx = CX;
+	for ( unsigned int j=0 ; j<nclus ; j++,pcx+=dim )
+	{
+		PREC d = compute_distance(px,pcx,dim);
+		if (d<mind)
+		{
+			mind = d;
+			assignment = j;
+		}
+	}
+	assert(assignment < nclus);
+	return(assignment);
+}
+
+unsigned int assign_point_to_cluster(unsigned int point_ind, const PREC *px, const PREC *CX, unsigned int dim,unsigned int nclus, unsigned int old_assignment, PREC *mindist, BOUND_PREC *s, BOUND_PREC *cl_dist, BOUND_PREC *low_b)
+{
+	bool up_to_date = false,use_low_b=true;;
+
+	unsigned int bias = point_ind*nclus;
+	if (low_b==NULL)use_low_b=false;
+
+	PREC mind = mindist[point_ind];
+
+	if (mind+BOUND_EPS <= s[old_assignment])
+	{
+#ifdef KMEANS_VEBOSE
+		saved_two++;
+#endif
+		return(old_assignment);
+	}
+
+	unsigned int assignment = old_assignment;
+	unsigned int counter = assignment*nclus;
+	const PREC *pcx = CX;
+	for ( unsigned int j=0 ; j<nclus ; j++,pcx+=dim )
+	{
+		if (j==old_assignment)
+		{
+#if KMEANS_VERBOSE>1
+			saved_three_one++;
+#endif
+			continue;
+		}
+		
+		if (use_low_b && (mind+BOUND_EPS <= low_b[j+bias]))
+		{
+#if KMEANS_VERBOSE>1
+			saved_three_two++;
+#endif
+			continue;
+		}
+
+		if (mind+BOUND_EPS <= cl_dist[counter+j])
+		{
+#if KMEANS_VERBOSE>1
+			saved_three_three++;
+#endif
+			continue;
+		}
+
+		PREC d = 0.0;
+		if (!up_to_date)
+		{
+			d = compute_distance(px,CX+assignment*dim,dim);
+			mind = d;
+			if(use_low_b) low_b[assignment+bias] = (BOUND_PREC)d;
+			up_to_date = true;
+		}
+		
+		if (!use_low_b)
+			d = compute_distance(px,pcx,dim);
+		else if ((mind > BOUND_EPS+low_b[j+bias]) || (mind > BOUND_EPS+cl_dist[counter+j]))
+		{
+			d =compute_distance(px,pcx,dim);
+			low_b[j+bias] = (BOUND_PREC)d;
+		}
+		else
+		{
+#if KMEANS_VERBOSE>1
+			saved_three_b++;
+#endif
+			continue;
+		}
+
+		if (d<mind)
+		{
+			mind = d;
+			assignment = j;
+			counter = assignment*nclus;
+			up_to_date = true;
+		}
+	}
+	mindist[point_ind] = mind;
+
+	return(assignment);
+}
+
+
+PREC kmeans_run(PREC *CX,const PREC *X,unsigned int *c,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter)
+{
+	PREC *tCX = (PREC *)calloc(nclus * dim, sizeof(PREC));
+	if (tCX==NULL)	kmeans_error((char*)"Failed to allocate mem for Cluster points");
+
+	/* number of points per cluster */
+	unsigned int *CN = (unsigned int *) calloc(nclus, sizeof(unsigned int)); 
+	if (CX==NULL)	kmeans_error((char*)"Failed to allocate mem for assignment");
+	
+	/* old assignement of points to cluster */
+	unsigned int *old_c = (unsigned int *) malloc(npts* sizeof(unsigned int));
+	if (old_c==NULL)	kmeans_error((char*)"Failed to allocate mem for temp assignment");
+
+	/* assign to value which is out of range */
+	for ( unsigned int i=0 ; i<npts ; i++)
+		old_c[i] = nclus;
+
+#if KMEANS_VERBOSE>0
+	printf("compile without setting the KMEANS_VERBOSE flag for no output\n");
+#endif
+
+	BOUND_PREC *low_b = (BOUND_PREC *) calloc(npts*nclus,sizeof(BOUND_PREC));
+	bool use_low_b = false;
+	if (low_b == NULL)
+	{
+#if KMEANS_VERBOSE>0
+		printf("not enough memory for lower bound, will compute without\n");
+#endif
+		use_low_b = false;
+	}
+	else
+	  {
+	    use_low_b = true;
+	    assert(low_b);
+	  }
+
+
+	BOUND_PREC *cl_dist = (BOUND_PREC *)calloc(nclus*nclus, sizeof(BOUND_PREC));
+	if (cl_dist==NULL)	kmeans_error((char*)"Failed to allocate mem for cluster-cluster distance");
+
+	BOUND_PREC *s = (BOUND_PREC *) malloc(nclus*sizeof(BOUND_PREC));
+	if (s==NULL)	kmeans_error((char*)"Failed to allocate mem for assignment");
+
+	BOUND_PREC *offset = (BOUND_PREC *) malloc(nclus * sizeof(BOUND_PREC)); /* change in distance of a cluster mean after a iteration */
+	if (offset==NULL)	kmeans_error((char*)"Failed to allocate mem for bound points-nearest cluster");
+
+	PREC *mindist = (PREC *)malloc(npts * sizeof(PREC));
+	if (mindist==NULL)	kmeans_error((char*)"Failed to allocate mem for bound points-clusters");
+
+	for ( unsigned int i=0;i<npts;i++)
+		mindist[i] = PREC_MAX;
+
+	bool *cluster_changed = (bool *) malloc(nclus * sizeof(bool)); /* did the cluster changed? */
+	if (cluster_changed==NULL)	kmeans_error((char*)"Failed to allocate mem for variable cluster_changed");
+	for ( unsigned int j=0 ; j<nclus ; j++ )
+		cluster_changed[j] = true;
+
+
+	unsigned int iteration = 0;
+	unsigned int nchanged = 1;
+	while (iteration < maxiter || maxiter == 0)
+	{
+		
+		/* compute cluster-cluster distances */
+		compute_cluster_distances(cl_dist, s, CX, dim,nclus, cluster_changed);
+		
+		/* assign all points from identical clusters to the first occurence of that cluster */
+		remove_identical_clusters(CX, cl_dist, X, CN, c, dim, nclus, npts);
+			
+		/* find nearest cluster center */
+		if (iteration == 0)
+		{
+		  
+		  const PREC *px = X;
+		  for ( unsigned int i=0 ; i<npts ; i++,px+=dim)
+			{
+				c[i] = init_point_to_cluster(i,px,CX,dim,nclus,mindist,low_b,cl_dist);
+				add_point_to_cluster(c[i],tCX,px,CN,dim);
+			}
+			nchanged = npts;
+		}
+		else
+		{
+			for ( unsigned int j=0 ; j<nclus ; j++)
+				cluster_changed[j] = false;
+
+			nchanged = 0;
+			const PREC *px = X;
+			for ( unsigned int i=0 ; i<npts ; i++,px+=dim)
+			{
+				c[i] = assign_point_to_cluster(i,px,CX,dim,nclus,old_c[i],mindist,s,cl_dist,low_b);
+
+#ifdef KMEANS_DEBUG
+				{
+					/* If the assignments are not the same, there is still the BOUND_EPS difference 
+					   which can be the reason of this*/
+					unsigned int tmp = assign_point_to_cluster_ordinary(px,CX,dim,nclus);
+					if (tmp != c[i])
+					{
+						double d1 = compute_distance(px,CX+(tmp*dim),dim);
+						double d2 = compute_distance(px,CX+(c[i]*dim),dim);
+						assert( (d1>d2)?((d1-d2)<BOUND_EPS):((d2-d1)<BOUND_EPS) );
+					}
+				}
+#endif
+
+				if (old_c[i] == c[i]) continue;
+				
+				nchanged++;
+
+				cluster_changed[c[i]] = true;
+				cluster_changed[old_c[i]] = true;
+
+				remove_point_from_cluster(old_c[i],tCX,px,CN,dim);
+				add_point_to_cluster(c[i],tCX,px,CN,dim);
+			}
+
+		}
+
+
+		/* fill up empty clusters */
+		for ( unsigned int j=0 ; j<nclus ; j++)
+		{
+			if (CN[j]>0) continue;
+			unsigned int *rperm = (unsigned int*)malloc(npts*sizeof(unsigned int));
+			if (cluster_changed==NULL)	kmeans_error((char*)"Failed to allocate mem for permutation");
+
+			randperm(rperm,npts);
+			unsigned int i = 0; 
+			while (rperm[i]<npts && CN[c[rperm[i]]]<2) i++;
+			if (i==npts)continue;
+			i = rperm[i];
+#if KMEANS_VERBOSE>0
+			printf("empty cluster [%d], filling it with point [%d]\n",j,i);
+#endif
+			cluster_changed[c[rperm[i]]] = true;
+			cluster_changed[j] = true;
+			const PREC *px = X + i*dim;
+			remove_point_from_cluster(c[i],tCX,px,CN,dim);
+			c[i] = j;
+			add_point_to_cluster(j,tCX,px,CN,dim);
+			/* void the bounds */
+			s[j] = (BOUND_PREC)0.0;
+			mindist[i] = 0.0;
+			if (use_low_b)
+				for ( unsigned int k=0 ; k<npts ; k++ )
+					low_b[k*nclus+j] = (BOUND_PREC)0.0;
+			
+			nchanged++;
+			free(rperm);
+		}
+
+		/* no assignment changed: done */
+		if (nchanged==0) break; 
+
+		/* compute the offset */
+
+		PREC *pcx = CX;
+		PREC *tpcx = tCX;
+		for ( unsigned int j=0 ; j<nclus ; j++,pcx+=dim,tpcx+=dim )
+		{
+			offset[j] = (BOUND_PREC)0.0;
+			if (cluster_changed[j])
+			{
+				offset[j] = (BOUND_PREC)compute_distance(pcx,tpcx,dim);
+				memcpy(pcx,tpcx,dim*sizeof(PREC));
+			}
+		}
+		
+		/* update the lower bound */
+		if (use_low_b)
+		{
+			for ( unsigned int i=0,cnt=0 ; i<npts ; i++ )
+				for ( unsigned int j=0 ; j<nclus ; j++,cnt++ )
+				{
+					low_b[cnt] -= offset[j];
+					if (low_b[cnt]<(BOUND_PREC)0.0) low_b[cnt] = (BOUND_PREC)0.0;
+				}
+		}
+
+		for ( unsigned int i=0; i<npts; i++)
+			mindist[i] += (PREC)offset[c[i]];
+
+		memcpy(old_c,c,npts*sizeof(unsigned int));
+
+#if KMEANS_VERBOSE>0
+		PREC sse = compute_sserror(CX,X,c,dim,npts);
+		printf("iteration %4d, #(changed points): %4d, sse: %4.2f\n",(int)iteration,(int)nchanged,sse);
+#endif
+
+#if KMEANS_VERBOSE>1
+		printf("saved at 2) %d\n",saved_two);
+		printf("saved at 3i) %d\n",saved_three_one);
+		printf("saved at 3ii) %d\n",saved_three_two);
+		printf("saved at 3iii) %d\n",saved_three_three);
+		printf("saved at 3b) %d\n",saved_three_b);
+		saved_two=0;
+		saved_three_one=0;
+		saved_three_two=0;
+		saved_three_three=0;
+		saved_three_b=0;
+#endif
+
+		iteration++;
+
+	}
+
+#ifdef KMEANS_DEBUG
+	for ( unsigned int j=0;j<nclus;j++)
+		assert(CN[j]!=0); /* Empty cluster after all */
+#endif
+
+
+	/* find nearest cluster center if iteration reached maxiter */
+	if (nchanged>0)
+	{
+	    const PREC *px = X;
+		for ( unsigned int i=0 ; i<npts ; i++,px+=dim)
+			c[i] = assign_point_to_cluster_ordinary(px,CX,dim,nclus);
+	}
+	PREC sse = compute_sserror(CX,X,c,dim,npts);
+
+#if KMEANS_VERBOSE>0
+	printf("iteration %4d, #(changed points): %4d, sse: %4.2f\n",(int)iteration,(int)nchanged,sse);
+#endif
+
+	if(low_b) free(low_b);
+	free(cluster_changed);
+	free(mindist);
+	free(s);
+	free(offset);
+	free(cl_dist);
+	free(tCX);
+	free(CN);
+	free(old_c);
+
+	return(sse);
+}
+
+PREC kmeans(PREC *CX,const PREC *X,unsigned int *assignment,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter, unsigned int restarts)
+{
+
+  if (npts < nclus)
+    {
+      CX = (PREC*)calloc(nclus*dim,sizeof(PREC));
+      memcpy(CX,X,dim*nclus*sizeof(PREC));
+      PREC sse = 0.0;
+      return(sse);
+    }
+  else if (npts == nclus)
+    {
+      memcpy(CX,X,dim*nclus*sizeof(PREC));
+      PREC sse = 0.0;
+      return(sse);
+    }
+  else if (nclus == 0)
+  {
+	  printf("Error: Number of clusters is 0\n");
+	  exit(-1);
+  }
+
+  /*
+   * No starting point is given, generate a new one
+   */
+  if (CX==NULL)
+  {
+      unsigned int *order = (unsigned int*)malloc(npts*sizeof(unsigned int));
+
+      CX = (PREC*)calloc(nclus*dim,sizeof(PREC));
+      /* generate new starting point */
+      randperm(order,npts);
+      for (unsigned int i=0; i<nclus; i++)
+		  for ( unsigned int k=0; k<dim; k++ )
+			  CX[(i*dim)+k] = X[order[i]*dim+k];
+      free(order);
+		
+  }
+  assert(CX != NULL);
+  PREC sse = kmeans_run(CX,X,assignment,dim,npts,nclus,maxiter);
+
+  unsigned int res = restarts;
+  if (res>0)
+  {
+      PREC minsse = sse;
+      unsigned int *order = (unsigned int*)malloc(npts*sizeof(unsigned int));
+      PREC *bestCX = (PREC*) malloc(dim*nclus*sizeof(PREC));
+      unsigned int *bestassignment = (unsigned int*)malloc(npts*sizeof(unsigned int));
+
+      memcpy(bestCX,CX,dim*nclus*sizeof(PREC));
+      memcpy(bestassignment,assignment,npts*sizeof(unsigned int));
+
+      while (res>0)
+	  {
+
+		  /* generate new starting point */
+		  randperm(order,npts);
+		  for (unsigned int i=0; i<nclus; i++)
+			  for (unsigned int k=0; k<dim; k++ )
+				  CX[(i*dim)+k] = X[order[i]*dim+k];
+		
+		  sse = kmeans_run(CX,X,assignment,dim,npts,nclus,maxiter);
+		  if (sse<minsse)
+		  {
+#if KMEANS_VERBOSE>1
+			  printf("found a better clustering with sse = %g\n",sse);
+#endif
+			  minsse = sse;
+			  memcpy(bestCX,CX,dim*nclus*sizeof(PREC));
+			  memcpy(bestassignment,assignment,npts*sizeof(unsigned int));
+		  }
+		  res--;
+
+	  }
+      memcpy(CX,bestCX,dim*nclus*sizeof(PREC));
+      memcpy(assignment,bestassignment,npts*sizeof(unsigned int));
+      sse = minsse;
+      free(bestassignment);
+      free(bestCX);
+      free(order);
+  }
+  assert(CX != NULL);
+
+  return(sse);
+
+}
diff --git a/mpi_kmeans.h b/mpi_kmeans.h
new file mode 100644
index 0000000..d08a6bb
--- /dev/null
+++ b/mpi_kmeans.h
@@ -0,0 +1,42 @@
+#ifndef __MPI_KMEANS_MEX_H__
+#define __MPI_KMEANS_MEX_H__
+
+
+#ifndef KMEANS_VERBOSE 
+#define KMEANS_VERBOSE 0
+#endif
+
+/* Double precision is default*/
+#ifndef INPUT_TYPE
+#define INPUT_TYPE 0
+#endif
+
+#if INPUT_TYPE==0
+#define PREC double
+#define PREC_MAX DBL_MAX
+#elif INPUT_TYPE==1
+#define PREC float
+#define PREC_MAX FLT_MAX
+#endif
+
+
+#ifndef BOUND_PREC
+#define BOUND_PREC float
+#endif
+
+#ifndef BOUND_PREC_MAX
+#define BOUND_PREC_MAX FLT_MAX
+#endif
+
+#define BOUND_EPS 1e-6
+
+extern "C"{
+PREC kmeans(PREC *CXp,const PREC *X,unsigned int *c,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter, unsigned int nr_restarts);
+}
+PREC compute_distance(const PREC *vec1, const PREC *vec2, const unsigned int dim);
+unsigned int assign_point_to_cluster_ordinary(const PREC *px, const PREC *CX, unsigned int dim,unsigned int nclus);
+void randperm(unsigned int *order, unsigned int npoints);
+
+#endif
+
+
diff --git a/mpi_kmeans.o b/mpi_kmeans.o
new file mode 100644
index 0000000..955e41c
Binary files /dev/null and b/mpi_kmeans.o differ
diff --git a/mpi_kmeans.py b/mpi_kmeans.py
new file mode 100755
index 0000000..f152e42
--- /dev/null
+++ b/mpi_kmeans.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+# Wrapper for the MPI-Kmeans library by Peter Gehler 
+
+from ctypes import c_int, c_double, c_uint
+from numpy.ctypeslib import ndpointer
+import numpy as N
+from numpy import empty,array,reshape,arange
+
+def kmeans(X, nclst, maxiter=0, numruns=1):
+    """Wrapper for Peter Gehlers accelerated MPI-Kmeans routine."""
+    
+    mpikmeanslib = N.ctypeslib.load_library("libmpikmeans.so", ".")
+    mpikmeanslib.kmeans.restype = c_double
+    mpikmeanslib.kmeans.argtypes = [ndpointer(dtype=c_double, ndim=1, flags='C_CONTIGUOUS'), \
+                                    ndpointer(dtype=c_double, ndim=1, flags='C_CONTIGUOUS'), \
+                                    ndpointer(dtype=c_uint, ndim=1, flags='C_CONTIGUOUS'), \
+                                    c_uint, c_uint, c_uint, c_uint, c_uint ]
+    
+    npts,dim = X.shape
+    assignments=empty( (npts), c_uint )
+    
+    bestSSE=N.Inf
+    bestassignments=empty( (npts), c_uint)
+    Xvec = array( reshape( X, (-1,) ), c_double )
+    permutation = N.random.permutation( range(npts) ) # randomize order of points
+    CX = array(X[permutation[:nclst],:], c_double).flatten()
+    SSE = mpikmeanslib.kmeans( CX, Xvec, assignments, dim, npts, min(nclst, npts), maxiter, numruns)
+    return reshape(CX, (nclst,dim)), SSE, (assignments+1)
+
+
+if __name__ == "__main__":
+    from numpy import array
+    from numpy.random import rand
+    
+    X = array( rand(12), c_double )
+    X.shape = (4,3)
+    clst,dist,labels = kmeans(X, 2)
+    print "cluster centers=\n",clst
+    print "dist=",dist
+    print "cluster labels",labels
diff --git a/mpi_kmeans64 b/mpi_kmeans64
new file mode 100755
index 0000000..a5d472e
Binary files /dev/null and b/mpi_kmeans64 differ
diff --git a/mpi_kmeans_main.cxx b/mpi_kmeans_main.cxx
new file mode 100644
index 0000000..896cadb
--- /dev/null
+++ b/mpi_kmeans_main.cxx
@@ -0,0 +1,296 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <string>
+#include <algorithm>
+#include <iterator>
+#include <numeric>
+#include <ext/numeric>
+#include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
+
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+
+#include "mpi_kmeans.h"
+
+namespace po = boost::program_options;
+
+static unsigned int count_lines(const std::string& filename) {
+	std::ifstream in(filename.c_str());
+	if (in.fail()) {
+		std::cerr << "count_lines, failed to open \"" << filename
+			<< "\"." << std::endl;
+		exit(EXIT_FAILURE);
+	}
+	unsigned int lines = 0;
+	std::string line;
+	while (in.eof() == false) {
+		std::getline(in, line);
+		if (line.size() == 0)
+			continue;
+		lines += 1;
+	}
+	in.close();
+	return (lines);
+}
+
+
+static void write_cluster_centers(const std::string& output_filename,
+								  const std::vector<std::vector<double> >& data_CX) {
+	
+	std::cout << "Writing cluster centers to \""
+			  << output_filename << "\"" << std::endl;
+
+	std::ofstream wout(output_filename.c_str());
+	if (wout.fail()) {
+		std::cerr << "Failed to open \"" << output_filename
+				  << "\" for writing." << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	wout << std::setprecision(12);
+	for (unsigned int m=0; m < data_CX.size(); ++m) {
+		for (unsigned int n=0; n < data_CX[m].size(); ++n) {
+			wout << (n==0? "":" ") << data_CX[m][n];
+		}
+		wout << std::endl;
+	}
+	wout.close();
+
+	return;
+}
+
+static void write_cluster_centers(const std::string& output_filename,
+								  double *data_CX, 
+								  unsigned int nof_clusters,
+								  unsigned int dims) {
+	
+	std::cout << "Writing cluster centers to \""
+			  << output_filename << "\"" << std::endl;
+
+	std::ofstream wout(output_filename.c_str());
+	if (wout.fail()) {
+		std::cerr << "Failed to open \"" << output_filename
+				  << "\" for writing." << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	wout << std::setprecision(12);
+	unsigned int cntr = 0;
+	for (unsigned int m=0; m < nof_clusters; ++m) {
+		for (unsigned int n=0; n < dims; ++n) {
+			wout << (n==0? "":" ") << data_CX[cntr];
+			cntr += 1;
+		}
+		wout << std::endl;
+	}
+	wout.close();
+
+	return;
+}
+
+
+static int read_problem_data(const std::string& train_filename,
+							  std::vector<std::vector<double> >& data_X) {
+	data_X.clear();
+
+	std::ifstream in(train_filename.c_str());
+	if (in.fail()) {
+		std::cerr << "Failed to open file \""
+				  << train_filename << "\" for reading." << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	std::string line;
+	unsigned int ndims = 0;
+	while (in.eof() == false) {
+		std::getline(in,line);
+		if (line.size() == 0)
+			continue; // skip over empty lines
+	
+		// remove trailing whitespaces 
+		line.erase(line.find_last_not_of(" ")+1);
+
+		std::vector<double> current_data;	
+		std::istringstream is(line);
+		while (is.eof() == false) {
+			double value;
+			is >> value;
+			current_data.push_back(value);
+		}
+		
+		// Ensure the same number of dimensions for each point
+		if (ndims == 0)
+			ndims = current_data.size();	
+		assert(ndims == current_data.size());
+		data_X.push_back(current_data);
+	}
+	in.close();
+	
+	if (data_X.size() == 0) {
+		std::cerr << "No points read from file \"" << train_filename
+				  << "\"" << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	return(data_X.size());
+
+}
+
+static int read_problem_data(const std::string& train_filename,
+							  double *data_X) {
+
+	std::ifstream in(train_filename.c_str());
+	if (in.fail()) {
+		std::cerr << "Failed to open file \""
+				  << train_filename << "\" for reading." << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	unsigned int nof_points = count_lines(train_filename);
+	if (nof_points == 0) {
+		std::cerr << "No points read from file \"" << train_filename
+				  << "\"" << std::endl;
+		std::cerr << "Try mpi_assign --help" << std::endl;
+		exit(EXIT_FAILURE);
+	}
+
+	std::string line;
+	unsigned int ndims = 0;
+	unsigned int cntr = 0;
+	while (in.eof() == false) {
+		std::getline(in,line);
+		if (line.size() == 0)
+			continue; // skip over empty lines
+	
+		// remove trailing whitespaces 
+		line.erase(line.find_last_not_of(" ")+1);
+
+		std::vector<double> current_data;	
+		std::istringstream is(line);
+		while (is.eof() == false) {
+			double value;
+			is >> value;
+			current_data.push_back(value);
+		}
+		
+		// Ensure the same number of dimensions for each point
+		if (ndims == 0)
+			ndims = current_data.size();	
+		assert(ndims == current_data.size());
+		if (data_X==NULL)
+			data_X = (double *)malloc(nof_points * ndims * sizeof(double));
+		
+		for (unsigned int i=0; i<ndims; ++i) {
+			data_X[cntr] = current_data[i];
+			cntr += 1;
+		}
+	}
+	in.close();
+	
+	return(nof_points);
+
+}
+
+
+
+int main(int argc, char* argv[]) {
+
+	std::string train_filename;
+	std::string output_filename;
+	int nof_clusters;
+	int nof_restarts;
+	int maxiter;
+
+	// Set Program options
+	po::options_description generic("Generic Options");
+	generic.add_options()
+		("help","Produce help message")
+		("verbose","Verbose output")
+		;
+
+	po::options_description input_options("Input/Output Options");
+	input_options.add_options()
+		("data",po::value<std::string>
+		 (&train_filename)->default_value("data.txt"),
+		 "Training file, one datum per line")
+		("output",po::value<std::string>
+		 (&output_filename)->default_value("output.txt"),
+		 "Output file, one cluster center per line")
+		;
+
+	po::options_description kmeans_options("K-Means Options");
+	kmeans_options.add_options()
+		("k",po::value<int>(&nof_clusters)->default_value(100),
+		 "Number of clusters to generate")
+		("restarts",po::value<int>(&nof_restarts)->default_value(0),
+		 "Number of K-Means restarts. (0: single run)")
+		("maxiter",po::value<int>(&maxiter)->default_value(0),
+		 "Maximum number of K-Means iterations. (0: infinity)")
+		;
+
+	po::options_description all_options;
+	all_options.add(generic).add(input_options).add(kmeans_options);
+	po::variables_map vm;
+	po::store(po::command_line_parser(argc,argv).options(all_options).run(), vm);
+	po::notify(vm);
+
+	bool verbose = vm.count("verbose");
+
+	if (vm.count("help")) {
+		std::cerr << "K-Means clustering" << std::endl;
+		std::cerr << all_options << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Example:" << std::endl;
+		std::cerr << "  mpi_kmeans --k 2 --data example.txt --output clusters.txt" << std::endl;
+		exit(EXIT_SUCCESS);
+	}
+
+	// read in the problem
+	std::cout << "Training file: " << train_filename << std::endl;
+	std::vector<std::vector<double> > data_X; // so far kmeans does not support std::<vector>
+	int nof_points = read_problem_data(train_filename,data_X);
+	assert(nof_points>0);
+
+	unsigned int dims = data_X[0].size();
+	assert(dims>0);
+
+	// convert points to double*
+	double *X = (double *)malloc(nof_points * dims * sizeof(double));
+	unsigned int cntr = 0;
+	for (unsigned int m=0; m < data_X.size() ; ++m) {
+		for (unsigned int n=0; n < data_X[m].size() ; ++n) {
+			X[cntr] = data_X[m][n];
+			cntr += 1;
+		}
+		data_X[m].clear();
+	}
+	data_X.clear();
+
+	// start K-Means
+	std::cout << "Starting Kmeans ..." << std::endl;
+	std::cout << " ... with " << nof_points << " training points " <<std::endl;
+	std::cout << " ... for " << nof_clusters << " clusters " <<std::endl;
+
+	unsigned int *assignment = (unsigned int *)malloc(nof_points * sizeof(unsigned int));
+	double *CX = (double *) calloc(nof_clusters * dims, sizeof(double));
+	double sse = kmeans(CX, X, assignment, dims, nof_points, nof_clusters, maxiter, nof_restarts);
+	free(X); 
+	assert(CX);
+
+	std::cout << "Done!" << std::endl;
+	std::cout << "Sum of Squared Error : " << sse << std::endl;
+
+	// write the clusters
+	// write_cluster_centers(output_filename,data_X);
+	write_cluster_centers(output_filename,CX,nof_clusters,dims);
+
+	// done
+	exit(EXIT_SUCCESS);
+}
diff --git a/mpi_kmeans_mex.cxx b/mpi_kmeans_mex.cxx
new file mode 100644
index 0000000..d00278c
--- /dev/null
+++ b/mpi_kmeans_mex.cxx
@@ -0,0 +1,161 @@
+#include <memory.h>
+#include "mex.h"
+#include "mpi_kmeans.h"
+#include <assert.h>
+
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+
+#ifdef COMPILE_WITH_ICC
+	unsigned int dims[2];
+#else
+	mwSize dims[2];
+#endif
+
+	if (nrhs < 2){
+		mexPrintf("at least two input arguments expected.");
+		return;
+	}
+	unsigned int maxiter = 0;
+	if (nrhs > 2)
+		maxiter = (unsigned int) *(mxGetPr(prhs[2]));
+	
+	unsigned int restarts = 0;
+	if (nrhs > 3)
+		restarts = (unsigned int) *(mxGetPr(prhs[3]));
+
+#ifdef KMEANS_VERBOSE
+	if (restarts>0)
+		printf("will do %d restarts\n",restarts);
+#endif
+
+	if ((nlhs > 3) || (nlhs < 1)){
+		mexPrintf("minimal one, maximal three output arguments expected.");
+		return;
+	}
+
+#ifdef INPUT_TYPE
+#if INPUT_TYPE==0
+	if (!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]) ||
+		mxGetNumberOfDimensions(prhs[0]) != 2)
+	{
+		mexPrintf("input 1 (X) must be a real double matrix");
+		return;
+	}
+#elif INPUT_TYPE==1
+	if (!mxIsSingle(prhs[0]) || mxIsComplex(prhs[0]) ||
+		mxGetNumberOfDimensions(prhs[0]) != 2)
+	{
+		mexPrintf("input 1 (X) must be a real single matrix");
+		return;
+	}
+#endif
+#endif
+
+	unsigned int dim = mxGetM(prhs[0]);
+	unsigned int npts = mxGetN(prhs[0]);
+
+	unsigned int nclus = 0;
+	if (mxGetN(prhs[1])==1 && mxGetM(prhs[1])==1)
+		nclus = (unsigned int)*(mxGetPr(prhs[1]));
+	else
+	{
+		nclus = mxGetN(prhs[1]);
+		if (dim != mxGetM(prhs[1]))
+			mexErrMsgTxt("Dimension mismatch");
+	}
+	if (nclus == 0)
+		mexErrMsgTxt("Number of Clusters is zero");
+
+	if (npts < nclus)
+		mexErrMsgTxt("less training points than clusters to compute");
+
+	if (mxIsSingle(prhs[1]))
+		printf("Is Single!\n");
+
+
+	if (mxIsComplex(prhs[1]) ||
+		mxGetNumberOfDimensions(prhs[1]) != 2)
+	{
+		mexPrintf("input 2 (CX) must be a real double matrix compatible with input 1 (X)");
+		return;
+	}
+
+	dims[0] = dim;
+	dims[1] = nclus;
+
+#ifdef INPUT_TYPE
+#if INPUT_TYPE==0
+	//plhs[0] = mxCreateDoubleMatrix(dim, nclus, mxREAL);
+	plhs[0] = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);
+#elif INPUT_TYPE==2
+
+
+	plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxREAL);
+#endif
+#endif
+
+	PREC *CXp = (PREC*)mxGetPr(plhs[0]);
+
+	/* input points */
+	const PREC *X = (PREC*)mxGetPr(prhs[0]);
+
+	/* sanity check */
+	for (unsigned int i=0 ; i<npts*dim; i++)
+		if (mxIsNaN(X[i])||mxIsInf(X[i]))
+			mexErrMsgTxt("NaN or Inf in the input data detected... abort");
+
+
+	/* return also the assignment */
+	unsigned int *assignment = NULL;
+	if (nlhs>2) 
+	{
+		dims[0] = npts;
+		dims[1] = 1;
+		plhs[2] = mxCreateNumericArray(2,dims,mxUINT32_CLASS,mxREAL);
+		assignment = (unsigned int*)mxGetPr(plhs[2]);
+	}
+	else
+		assignment = (unsigned int *) malloc(npts * sizeof(unsigned int)); 	/* assignement of points to cluster */
+
+	assert(assignment);
+
+	if ((mxGetN(prhs[1])==mxGetM(prhs[1]))==1)
+	{
+		/* select nclus points from data at random... */
+		unsigned int *order = (unsigned int*)malloc(npts * sizeof(unsigned int));
+		randperm(order,npts);
+		for ( unsigned int i=0; i<nclus; i++)
+			for (unsigned int k=0; k<dim; k++ )
+				CXp[(i*dim)+k] = (double)X[order[i]*dim+k];
+		free(order);
+	}
+	else
+	{
+		/* ... or copy initial guess to CXp */
+		memcpy(CXp,mxGetPr(prhs[1]),dim*nclus*sizeof(PREC));
+	}
+
+	/* start kmeans */
+	PREC sse = kmeans(CXp,X,assignment,dim,npts,nclus,maxiter,restarts);
+
+	if (nlhs>1)
+	{
+		plhs[1] = mxCreateScalarDouble(0.0);
+		PREC *psse = (PREC*)mxGetPr(plhs[1]);
+		psse[0] = sse;
+	}
+
+	/* return also the assignment */
+	if (nlhs>2)
+	{
+		for ( unsigned int i=0; i<npts; i++)
+			assignment[i]++;
+	}
+	else
+		free(assignment);
+}
+
+
+
+
diff --git a/mpi_kmeans_mex.h b/mpi_kmeans_mex.h
new file mode 100644
index 0000000..2213a88
--- /dev/null
+++ b/mpi_kmeans_mex.h
@@ -0,0 +1,16 @@
+#ifndef __MPI_KMEANS_MEX_H__
+#define __MPI_KMEANS_MEX_H__
+
+double compute_distance(const double *vec1, const double *vec2, const unsigned int dim);
+void compute_cluster_distances(PREC *dist, PREC *s, const double *CX, unsigned int dim,unsigned int nclus, const bool *cluster_changed);
+void add_point_to_cluster(double *pcxp, unsigned int *CN, const double *px, const unsigned int dim);
+void remove_point_from_cluster(double *pcxp, unsigned int *CN, const double *px, const unsigned int dim);
+double compute_sserror(const double *CX, const double *X, const unsigned int *c,unsigned int dim, unsigned int npts);
+double init_point_to_cluster(unsigned int *c, PREC2 *mindist, PREC *low_b,const PREC *dist, const double *CX, const double *px, unsigned int dim, unsigned int nclus);
+void find_nearest_cluster_center(unsigned int *c, const double *CX, const double *px, const PREC *dist, PREC2 *mindist, PREC *low_b, PREC *s, bool *up_to_date, unsigned int dim, unsigned int nclus);
+double kmeans(double *CXp,const double *X,unsigned int *c,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter);
+
+
+#endif
+
+
diff --git a/mpi_kmeans_mex.mexa64 b/mpi_kmeans_mex.mexa64
new file mode 100755
index 0000000..2148b52
Binary files /dev/null and b/mpi_kmeans_mex.mexa64 differ
diff --git a/mpi_kmeans_mex.mexglx b/mpi_kmeans_mex.mexglx
new file mode 100755
index 0000000..bc9f96c
Binary files /dev/null and b/mpi_kmeans_mex.mexglx differ
diff --git a/py_kmeans.c b/py_kmeans.c
new file mode 100755
index 0000000..0c4f624
--- /dev/null
+++ b/py_kmeans.c
@@ -0,0 +1,2071 @@
+/* Generated by Cython 0.9.8.1.1 on Wed Nov 12 12:42:51 2008 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include "structmember.h"
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#if PY_VERSION_HEX < 0x02040000
+  #define METH_COEXIST 0
+#endif
+#if PY_VERSION_HEX < 0x02050000
+  typedef int Py_ssize_t;
+  #define PY_SSIZE_T_MAX INT_MAX
+  #define PY_SSIZE_T_MIN INT_MIN
+  #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+  #define PyInt_AsSsize_t(o)   PyInt_AsLong(o)
+  #define PyNumber_Index(o)    PyNumber_Int(o)
+  #define PyIndex_Check(o)     PyNumber_Check(o)
+#endif
+#if PY_VERSION_HEX < 0x02060000
+  #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
+  #define Py_TYPE(ob)   (((PyObject*)(ob))->ob_type)
+  #define Py_SIZE(ob)   (((PyVarObject*)(ob))->ob_size)
+  #define PyVarObject_HEAD_INIT(type, size) \
+          PyObject_HEAD_INIT(type) size,
+  #define PyType_Modified(t)
+
+  typedef struct {
+       void *buf;
+       Py_ssize_t len;
+       int readonly;
+       const char *format;
+       int ndim;
+       Py_ssize_t *shape;
+       Py_ssize_t *strides;
+       Py_ssize_t *suboffsets;
+       Py_ssize_t itemsize;
+       void *internal;
+  } Py_buffer;
+
+  #define PyBUF_SIMPLE 0
+  #define PyBUF_WRITABLE 0x0001
+  #define PyBUF_LOCK 0x0002
+  #define PyBUF_FORMAT 0x0004
+  #define PyBUF_ND 0x0008
+  #define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+  #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+  #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+  #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+  #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+
+#endif
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define Py_TPFLAGS_CHECKTYPES 0
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyString_Type                PyBytes_Type
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define PyBytes_Type                 PyString_Type
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyMethod_New(func, self, klass) PyInstanceMethod_New(func)
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+#else
+  #define _USE_MATH_DEFINES
+#endif
+#ifdef __cplusplus
+#define __PYX_EXTERN_C extern "C"
+#else
+#define __PYX_EXTERN_C extern
+#endif
+#include <math.h>
+#define __PYX_HAVE_API__py_kmeans
+#include "mpi_kmeans.h"
+#include "numpy/arrayobject.h"
+
+
+#ifdef __GNUC__
+#define INLINE __inline__
+#elif _WIN32
+#define INLINE __inline
+#else
+#define INLINE 
+#endif
+
+typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern; char is_identifier;} __Pyx_StringTabEntry; /*proto*/
+
+
+
+static int __pyx_skip_dispatch = 0;
+
+
+/* Type Conversion Predeclarations */
+
+#if PY_MAJOR_VERSION < 3
+#define __Pyx_PyBytes_FromString PyString_FromString
+#define __Pyx_PyBytes_AsString   PyString_AsString
+#else
+#define __Pyx_PyBytes_FromString PyBytes_FromString
+#define __Pyx_PyBytes_AsString   PyBytes_AsString
+#endif
+
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static INLINE int __Pyx_PyObject_IsTrue(PyObject* x);
+static INLINE PY_LONG_LONG __pyx_PyInt_AsLongLong(PyObject* x);
+static INLINE unsigned PY_LONG_LONG __pyx_PyInt_AsUnsignedLongLong(PyObject* x);
+static INLINE Py_ssize_t __pyx_PyIndex_AsSsize_t(PyObject* b);
+
+#define __pyx_PyInt_AsLong(x) (PyInt_CheckExact(x) ? PyInt_AS_LONG(x) : PyInt_AsLong(x))
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+
+static INLINE unsigned char __pyx_PyInt_unsigned_char(PyObject* x);
+static INLINE unsigned short __pyx_PyInt_unsigned_short(PyObject* x);
+static INLINE char __pyx_PyInt_char(PyObject* x);
+static INLINE short __pyx_PyInt_short(PyObject* x);
+static INLINE int __pyx_PyInt_int(PyObject* x);
+static INLINE long __pyx_PyInt_long(PyObject* x);
+static INLINE signed char __pyx_PyInt_signed_char(PyObject* x);
+static INLINE signed short __pyx_PyInt_signed_short(PyObject* x);
+static INLINE signed int __pyx_PyInt_signed_int(PyObject* x);
+static INLINE signed long __pyx_PyInt_signed_long(PyObject* x);
+static INLINE long double __pyx_PyInt_long_double(PyObject* x);
+#ifdef __GNUC__
+/* Test for GCC > 2.95 */
+#if __GNUC__ > 2 ||               (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) 
+#define likely(x)   __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* __GNUC__ > 2 ... */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+#define likely(x)   (x)
+#define unlikely(x) (x)
+#endif /* __GNUC__ */
+    
+static PyObject *__pyx_m;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+static const char **__pyx_f;
+static INLINE void __Pyx_SafeReleaseBuffer(PyObject* obj, Py_buffer* info);
+static INLINE void __Pyx_ZeroBuffer(Py_buffer* buf); /*proto*/
+static INLINE const char* __Pyx_ConsumeWhitespace(const char* ts); /*proto*/
+static INLINE const char* __Pyx_BufferTypestringCheckEndian(const char* ts); /*proto*/
+static void __Pyx_BufferNdimError(Py_buffer* buffer, int expected_ndim); /*proto*/
+static const char* __Pyx_BufferTypestringCheck_item_nn___pyx_t_9py_kmeans_DTYPE_t(const char* ts); /*proto*/
+
+static int __Pyx_GetBuffer_nn___pyx_t_9py_kmeans_DTYPE_t(PyObject* obj, Py_buffer* buf, int flags, int nd); /*proto*/
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name, int exact); /*proto*/
+
+static INLINE void __Pyx_RaiseArgtupleTooLong(Py_ssize_t num_expected, Py_ssize_t num_found); /*proto*/
+#if (PY_MAJOR_VERSION < 3) && !(Py_TPFLAGS_DEFAULT & Py_TPFLAGS_HAVE_NEWBUFFER)
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+static void __Pyx_ReleaseBuffer(PyObject *obj, Py_buffer *view);
+#else
+#define __Pyx_GetBuffer PyObject_GetBuffer
+#define __Pyx_ReleaseBuffer PyObject_ReleaseBuffer
+#endif
+
+Py_ssize_t __Pyx_zeros[] = {0, 0};
+Py_ssize_t __Pyx_minusones[] = {-1, -1};
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list); /*proto*/
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
+
+static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); /*proto*/
+
+static int __Pyx_Print(PyObject *, int); /*proto*/
+#if PY_MAJOR_VERSION >= 3
+static PyObject* __pyx_print = 0;
+static PyObject* __pyx_print_kwargs = 0;
+#endif
+
+static PyObject *__Pyx_UnpackItem(PyObject *, Py_ssize_t index); /*proto*/
+static int __Pyx_EndUnpack(PyObject *); /*proto*/
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
+
+static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name, long size);  /*proto*/
+
+static PyObject *__Pyx_ImportModule(char *name); /*proto*/
+
+static void __Pyx_AddTraceback(const char *funcname); /*proto*/
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
+
+/* Type declarations */
+
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+typedef npy_long __pyx_t_5numpy_int_t;
+
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+typedef npy_double __pyx_t_5numpy_float_t;
+
+typedef npy_double __pyx_t_5numpy_double_t;
+
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+
+typedef __pyx_t_5numpy_double_t __pyx_t_9py_kmeans_DTYPE_t;
+/* Module declarations from numpy */
+
+/* Module declarations from numpy */
+
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+/* Module declarations from py_kmeans */
+
+
+
+/* Implementation of py_kmeans */
+static PyObject *__pyx_int_1;
+static PyObject *__pyx_int_4;
+static PyObject *__pyx_int_3;
+static PyObject *__pyx_int_2;
+static char __pyx_k_numpy[] = "numpy";
+static PyObject *__pyx_kp_numpy;
+static char __pyx_k_np[] = "np";
+static PyObject *__pyx_kp_np;
+static char __pyx_k_double[] = "double";
+static PyObject *__pyx_kp_double;
+static char __pyx_k_DTYPE[] = "DTYPE";
+static PyObject *__pyx_kp_DTYPE;
+static char __pyx_k_ctypes[] = "ctypes";
+static PyObject *__pyx_kp_ctypes;
+static char __pyx_k_c_uint[] = "c_uint";
+static PyObject *__pyx_kp_c_uint;
+static char __pyx_k_c_double[] = "c_double";
+static PyObject *__pyx_kp_c_double;
+static char __pyx_k_min[] = "min";
+static PyObject *__pyx_kp_min;
+static char __pyx_k_empty[] = "empty";
+static PyObject *__pyx_kp_empty;
+static char __pyx_k_dtype[] = "dtype";
+static PyObject *__pyx_kp_dtype;
+static char __pyx_k_order[] = "order";
+static PyObject *__pyx_kp_order;
+static char __pyx_k_17[] = "C";
+static PyObject *__pyx_kp_17;
+static char __pyx_k_random[] = "random";
+static PyObject *__pyx_kp_random;
+static char __pyx_k_permutation[] = "permutation";
+static PyObject *__pyx_kp_permutation;
+static char __pyx_k_range[] = "range";
+static PyObject *__pyx_kp_range;
+static char __pyx_k_array[] = "array";
+static PyObject *__pyx_kp_array;
+static char __pyx_k_18[] = "C";
+static PyObject *__pyx_kp_18;
+static char __pyx_k_rand[] = "rand";
+static PyObject *__pyx_kp_rand;
+static char __pyx_k_kmeans[] = "kmeans";
+static PyObject *__pyx_kp_kmeans;
+static PyObject *__pyx_builtin_min;
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_kp_19;
+static PyObject *__pyx_kp_20;
+static PyObject *__pyx_kp_21;
+static char __pyx_k_19[] = "cluster centers=\n";
+static char __pyx_k_20[] = "dist=";
+static char __pyx_k_21[] = "cluster labels";
+static char __pyx_k___getbuffer__[] = "__getbuffer__";
+static PyObject *__pyx_kp___getbuffer__;
+static char __pyx_k_RuntimeError[] = "RuntimeError";
+static PyObject *__pyx_kp_RuntimeError;
+static char __pyx_k_ValueError[] = "ValueError";
+static PyObject *__pyx_kp_ValueError;
+static PyObject *__pyx_kp_1;
+static PyObject *__pyx_kp_16;
+static PyObject *__pyx_builtin_RuntimeError;
+static PyObject *__pyx_builtin_ValueError;
+static char __pyx_k_1[] = "Py_intptr_t and Py_ssize_t differs in size, numpy.pxd does not support this";
+static char __pyx_k_2[] = "b";
+static char __pyx_k_3[] = "B";
+static char __pyx_k_4[] = "h";
+static char __pyx_k_5[] = "H";
+static char __pyx_k_6[] = "i";
+static char __pyx_k_7[] = "I";
+static char __pyx_k_8[] = "l";
+static char __pyx_k_9[] = "L";
+static char __pyx_k_10[] = "q";
+static char __pyx_k_11[] = "Q";
+static char __pyx_k_12[] = "f";
+static char __pyx_k_13[] = "d";
+static char __pyx_k_14[] = "g";
+static char __pyx_k_15[] = "O";
+static char __pyx_k_16[] = "only objects, int and float dtypes supported for ndarray buffer access so far (dtype is %d)";
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":21
+ * from ctypes import c_uint, c_double
+ * 
+ * def kmeans(np.ndarray[DTYPE_t, ndim=2] X, unsigned int num_clusters, unsigned int maxiter=0, unsigned int num_runs=1):             # <<<<<<<<<<<<<<
+ *     """Cython wrapper for Peter Gehlers accelerated MPI-Kmeans routine.
+ *     centroids, dist, assignments = kmeans(X, num_clusters, maxiter=0, num_runs=1)
+ */
+
+static PyObject *__pyx_pf_9py_kmeans_kmeans(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_9py_kmeans_kmeans[] = "Cython wrapper for Peter Gehlers accelerated MPI-Kmeans routine.\n    centroids, dist, assignments = kmeans(X, num_clusters, maxiter=0, num_runs=1)\n\n    --Input--\n    X            : input data (2D numpy array)\n    num_clusters : number of centroids to use (k)\n    [maxiter]    : how many iterations to run (setting this to 0 will run kmeans until it converges) (default is 0).\n    [num_runs}   : how many times to restart the clustering (def [...]
+static PyObject *__pyx_pf_9py_kmeans_kmeans(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+PyArrayObject *__pyx_v_X = 0;
+unsigned int __pyx_v_num_clusters;
+unsigned int __pyx_v_maxiter;
+unsigned int __pyx_v_num_runs;
+unsigned int __pyx_v_num_points;
+unsigned int __pyx_v_dim;
+double __pyx_v_dist;
+PyArrayObject *__pyx_v_assignments = 0;
+PyObject *__pyx_v_permutation;
+PyArrayObject *__pyx_v_centroids = 0;
+Py_buffer __pyx_bstruct_centroids;
+Py_ssize_t __pyx_bstride_0_centroids = 0;
+Py_ssize_t __pyx_bstride_1_centroids = 0;
+Py_ssize_t __pyx_bshape_0_centroids = 0;
+Py_ssize_t __pyx_bshape_1_centroids = 0;
+Py_buffer __pyx_bstruct_X;
+Py_ssize_t __pyx_bstride_0_X = 0;
+Py_ssize_t __pyx_bstride_1_X = 0;
+Py_ssize_t __pyx_bshape_0_X = 0;
+Py_ssize_t __pyx_bshape_1_X = 0;
+PyObject *__pyx_r;
+PyObject *__pyx_1 = 0;
+PyObject *__pyx_2 = 0;
+PyObject *__pyx_3 = 0;
+unsigned int __pyx_4;
+PyObject *__pyx_5 = 0;
+PyArrayObject *__pyx_t_1 = NULL;
+static char *__pyx_argnames[] = {"X","num_clusters","maxiter","num_runs",0};
+__pyx_self = __pyx_self;
+__pyx_v_maxiter = 0;
+__pyx_v_num_runs = 1;
+if (likely(!__pyx_kwds) && likely(2 <= PyTuple_GET_SIZE(__pyx_args)) && likely(PyTuple_GET_SIZE(__pyx_args) <= 4)) {
+  __pyx_v_X = ((PyArrayObject *)PyTuple_GET_ITEM(__pyx_args, 0));
+  __pyx_v_num_clusters = PyInt_AsUnsignedLongMask(PyTuple_GET_ITEM(__pyx_args, 1)); if (unlikely((__pyx_v_num_clusters == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+  if (PyTuple_GET_SIZE(__pyx_args) > 2) {
+    __pyx_v_maxiter = PyInt_AsUnsignedLongMask(PyTuple_GET_ITEM(__pyx_args, 2)); if (unlikely((__pyx_v_maxiter == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    if (PyTuple_GET_SIZE(__pyx_args) > 3) {
+      __pyx_v_num_runs = PyInt_AsUnsignedLongMask(PyTuple_GET_ITEM(__pyx_args, 3)); if (unlikely((__pyx_v_num_runs == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+    }
+  }
+}
+else {
+  if (unlikely(!PyArg_ParseTupleAndKeywords(__pyx_args, __pyx_kwds, "OI|II", __pyx_argnames, &__pyx_v_X, &__pyx_v_num_clusters, &__pyx_v_maxiter, &__pyx_v_num_runs))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+}
+goto __pyx_L4;
+__pyx_L3_error:;
+__Pyx_AddTraceback("py_kmeans.kmeans");
+return NULL;
+__pyx_L4:;
+__pyx_v_permutation = Py_None; Py_INCREF(Py_None);
+__pyx_bstruct_centroids.buf = NULL;
+if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_X), __pyx_ptype_5numpy_ndarray, 1, "X", 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (unlikely(__Pyx_GetBuffer_nn___pyx_t_9py_kmeans_DTYPE_t((PyObject*)__pyx_v_X, &__pyx_bstruct_X, PyBUF_FORMAT| PyBUF_STRIDES, 2) == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_bstride_0_X = __pyx_bstruct_X.strides[0]; __pyx_bstride_1_X = __pyx_bstruct_X.strides[1];
+__pyx_bshape_0_X = __pyx_bstruct_X.shape[0]; __pyx_bshape_1_X = __pyx_bstruct_X.shape[1];
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":43
+ * 
+ *     # Initializing
+ *     cdef unsigned int num_points = X.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef unsigned int dim = X.shape[1]
+ *     cdef double dist
+ */
+__pyx_v_num_points = (__pyx_v_X->dimensions[0]);
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":44
+ *     # Initializing
+ *     cdef unsigned int num_points = X.shape[0]
+ *     cdef unsigned int dim = X.shape[1]             # <<<<<<<<<<<<<<
+ *     cdef double dist
+ *     num_clusters = <unsigned int> min(num_clusters, num_points)
+ */
+__pyx_v_dim = (__pyx_v_X->dimensions[1]);
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":46
+ *     cdef unsigned int dim = X.shape[1]
+ *     cdef double dist
+ *     num_clusters = <unsigned int> min(num_clusters, num_points)             # <<<<<<<<<<<<<<
+ * 
+ *     # Init output array for assignments
+ */
+__pyx_1 = PyLong_FromUnsignedLong(__pyx_v_num_clusters); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyLong_FromUnsignedLong(__pyx_v_num_points); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PyTuple_New(2); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_3, 0, __pyx_1);
+PyTuple_SET_ITEM(__pyx_3, 1, __pyx_2);
+__pyx_1 = 0;
+__pyx_2 = 0;
+__pyx_1 = PyObject_Call(__pyx_builtin_min, ((PyObject *)__pyx_3), NULL); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_3)); __pyx_3 = 0;
+__pyx_4 = PyInt_AsUnsignedLongMask(__pyx_1); if (unlikely((__pyx_4 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+__pyx_v_num_clusters = ((unsigned int)__pyx_4);
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":49
+ * 
+ *     # Init output array for assignments
+ *     cdef np.ndarray assignments=np.empty( (num_points), dtype=c_uint, order='C')             # <<<<<<<<<<<<<<
+ * 
+ *     # Init output array for cluster centroids
+ */
+__pyx_2 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PyObject_GetAttr(__pyx_2, __pyx_kp_empty); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+__pyx_1 = PyLong_FromUnsignedLong(__pyx_v_num_points); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyTuple_New(1); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_2, 0, __pyx_1);
+__pyx_1 = 0;
+__pyx_1 = PyDict_New(); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_5 = __Pyx_GetName(__pyx_m, __pyx_kp_c_uint); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (PyDict_SetItem(__pyx_1, __pyx_kp_dtype, __pyx_5) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_5); __pyx_5 = 0;
+if (PyDict_SetItem(__pyx_1, __pyx_kp_order, __pyx_kp_17) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_5 = PyEval_CallObjectWithKeywords(__pyx_3, ((PyObject *)__pyx_2), ((PyObject *)__pyx_1)); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_3); __pyx_3 = 0;
+Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+Py_DECREF(((PyObject *)__pyx_1)); __pyx_1 = 0;
+if (!(__Pyx_TypeTest(__pyx_5, __pyx_ptype_5numpy_ndarray))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 49; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_v_assignments = ((PyArrayObject *)__pyx_5);
+__pyx_5 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":52
+ * 
+ *     # Init output array for cluster centroids
+ *     permutation = np.random.permutation( range(num_points) )             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[DTYPE_t, ndim=2] centroids = np.array(X[permutation[:num_clusters],:], order='C')
+ * 
+ */
+__pyx_3 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyObject_GetAttr(__pyx_3, __pyx_kp_random); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_3); __pyx_3 = 0;
+__pyx_1 = PyObject_GetAttr(__pyx_2, __pyx_kp_permutation); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+__pyx_5 = PyLong_FromUnsignedLong(__pyx_v_num_points); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PyTuple_New(1); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_3, 0, __pyx_5);
+__pyx_5 = 0;
+__pyx_2 = PyObject_Call(__pyx_builtin_range, ((PyObject *)__pyx_3), NULL); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_3)); __pyx_3 = 0;
+__pyx_5 = PyTuple_New(1); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_5, 0, __pyx_2);
+__pyx_2 = 0;
+__pyx_3 = PyObject_Call(__pyx_1, ((PyObject *)__pyx_5), NULL); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+Py_DECREF(((PyObject *)__pyx_5)); __pyx_5 = 0;
+Py_DECREF(__pyx_v_permutation);
+__pyx_v_permutation = __pyx_3;
+__pyx_3 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":53
+ *     # Init output array for cluster centroids
+ *     permutation = np.random.permutation( range(num_points) )
+ *     cdef np.ndarray[DTYPE_t, ndim=2] centroids = np.array(X[permutation[:num_clusters],:], order='C')             # <<<<<<<<<<<<<<
+ * 
+ *     # Call mpi_kmeans routine
+ */
+__pyx_2 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_1 = PyObject_GetAttr(__pyx_2, __pyx_kp_array); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+__pyx_5 = PySequence_GetSlice(__pyx_v_permutation, 0, __pyx_v_num_clusters); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyTuple_New(2); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_2, 0, __pyx_5);
+PyTuple_SET_ITEM(__pyx_2, 1, __pyx_3);
+__pyx_5 = 0;
+__pyx_3 = 0;
+__pyx_5 = PyObject_GetItem(((PyObject *)__pyx_v_X), ((PyObject *)__pyx_2)); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+__pyx_3 = PyTuple_New(1); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_3, 0, __pyx_5);
+__pyx_5 = 0;
+__pyx_2 = PyDict_New(); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (PyDict_SetItem(__pyx_2, __pyx_kp_order, __pyx_kp_18) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_5 = PyEval_CallObjectWithKeywords(__pyx_1, ((PyObject *)__pyx_3), ((PyObject *)__pyx_2)); if (unlikely(!__pyx_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+Py_DECREF(((PyObject *)__pyx_3)); __pyx_3 = 0;
+Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+if (!(__Pyx_TypeTest(__pyx_5, __pyx_ptype_5numpy_ndarray))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_t_1 = ((PyArrayObject *)__pyx_5);
+if (unlikely(__Pyx_GetBuffer_nn___pyx_t_9py_kmeans_DTYPE_t((PyObject*)__pyx_t_1, &__pyx_bstruct_centroids, PyBUF_FORMAT| PyBUF_STRIDES, 2) == -1)) {
+  __pyx_v_centroids = ((PyArrayObject *)Py_None); Py_INCREF(Py_None); __pyx_bstruct_centroids.buf = NULL;
+  {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+} else {__pyx_bstride_0_centroids = __pyx_bstruct_centroids.strides[0]; __pyx_bstride_1_centroids = __pyx_bstruct_centroids.strides[1];
+  __pyx_bshape_0_centroids = __pyx_bstruct_centroids.shape[0]; __pyx_bshape_1_centroids = __pyx_bstruct_centroids.shape[1];
+}
+__pyx_t_1 = 0;
+__pyx_v_centroids = ((PyArrayObject *)__pyx_5);
+__pyx_5 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":58
+ *     dist = c_kmeans( <double *> centroids.data, <double *> X.data,
+ * 		  <unsigned int *> assignments.data, dim, num_points,
+ * 		  num_clusters, maxiter, num_runs)             # <<<<<<<<<<<<<<
+ * 
+ *     return centroids, dist, (assignments+1)
+ */
+__pyx_v_dist = kmeans(((double *)__pyx_v_centroids->data), ((double *)__pyx_v_X->data), ((unsigned int *)__pyx_v_assignments->data), __pyx_v_dim, __pyx_v_num_points, __pyx_v_num_clusters, __pyx_v_maxiter, __pyx_v_num_runs);
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":60
+ * 		  num_clusters, maxiter, num_runs)
+ * 
+ *     return centroids, dist, (assignments+1)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+__pyx_1 = PyFloat_FromDouble(__pyx_v_dist); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PyNumber_Add(((PyObject *)__pyx_v_assignments), __pyx_int_1); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyTuple_New(3); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(((PyObject *)__pyx_v_centroids));
+PyTuple_SET_ITEM(__pyx_2, 0, ((PyObject *)__pyx_v_centroids));
+PyTuple_SET_ITEM(__pyx_2, 1, __pyx_1);
+PyTuple_SET_ITEM(__pyx_2, 2, __pyx_3);
+__pyx_1 = 0;
+__pyx_3 = 0;
+__pyx_r = ((PyObject *)__pyx_2);
+__pyx_2 = 0;
+goto __pyx_L0;
+
+__pyx_r = Py_None; Py_INCREF(Py_None);
+goto __pyx_L0;
+__pyx_L1_error:;
+Py_XDECREF(__pyx_1);
+Py_XDECREF(__pyx_2);
+Py_XDECREF(__pyx_3);
+Py_XDECREF(__pyx_5);
+{ PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+  PyErr_Fetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+  __Pyx_SafeReleaseBuffer((PyObject*)__pyx_v_centroids, &__pyx_bstruct_centroids);
+  __Pyx_SafeReleaseBuffer((PyObject*)__pyx_v_X, &__pyx_bstruct_X);
+PyErr_Restore(__pyx_type, __pyx_value, __pyx_tb);}
+__Pyx_AddTraceback("py_kmeans.kmeans");
+__pyx_r = NULL;
+goto __pyx_L2;
+__pyx_L0:;
+__Pyx_SafeReleaseBuffer((PyObject*)__pyx_v_centroids, &__pyx_bstruct_centroids);
+__Pyx_SafeReleaseBuffer((PyObject*)__pyx_v_X, &__pyx_bstruct_X);
+__pyx_L2:;
+Py_XDECREF(__pyx_v_assignments);
+Py_DECREF(__pyx_v_permutation);
+Py_XDECREF(__pyx_v_centroids);
+return __pyx_r;
+}
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":63
+ * 
+ * 
+ * def test():             # <<<<<<<<<<<<<<
+ *     #np.random.seed(1)
+ *     X = np.array( np.random.rand(4,3) )
+ */
+
+static PyObject *__pyx_pf_9py_kmeans_test(PyObject *__pyx_self, PyObject *unused); /*proto*/
+static PyObject *__pyx_pf_9py_kmeans_test(PyObject *__pyx_self, PyObject *unused) {
+PyObject *__pyx_v_X;
+PyObject *__pyx_v_clst;
+PyObject *__pyx_v_dist;
+PyObject *__pyx_v_labels;
+PyObject *__pyx_r;
+PyObject *__pyx_1 = 0;
+PyObject *__pyx_2 = 0;
+PyObject *__pyx_3 = 0;
+PyObject *__pyx_4 = 0;
+__pyx_self = __pyx_self;
+__pyx_v_X = Py_None; Py_INCREF(Py_None);
+__pyx_v_clst = Py_None; Py_INCREF(Py_None);
+__pyx_v_dist = Py_None; Py_INCREF(Py_None);
+__pyx_v_labels = Py_None; Py_INCREF(Py_None);
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":65
+ * def test():
+ *     #np.random.seed(1)
+ *     X = np.array( np.random.rand(4,3) )             # <<<<<<<<<<<<<<
+ *     print X
+ *     clst,dist,labels = kmeans(X, 2)
+ */
+__pyx_1 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyObject_GetAttr(__pyx_1, __pyx_kp_array); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+__pyx_1 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_3 = PyObject_GetAttr(__pyx_1, __pyx_kp_random); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+__pyx_1 = PyObject_GetAttr(__pyx_3, __pyx_kp_rand); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_3); __pyx_3 = 0;
+__pyx_3 = PyTuple_New(2); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_int_4);
+PyTuple_SET_ITEM(__pyx_3, 0, __pyx_int_4);
+Py_INCREF(__pyx_int_3);
+PyTuple_SET_ITEM(__pyx_3, 1, __pyx_int_3);
+__pyx_4 = PyObject_Call(__pyx_1, ((PyObject *)__pyx_3), NULL); if (unlikely(!__pyx_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+Py_DECREF(((PyObject *)__pyx_3)); __pyx_3 = 0;
+__pyx_1 = PyTuple_New(1); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+PyTuple_SET_ITEM(__pyx_1, 0, __pyx_4);
+__pyx_4 = 0;
+__pyx_3 = PyObject_Call(__pyx_2, ((PyObject *)__pyx_1), NULL); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+Py_DECREF(((PyObject *)__pyx_1)); __pyx_1 = 0;
+Py_DECREF(__pyx_v_X);
+__pyx_v_X = __pyx_3;
+__pyx_3 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":66
+ *     #np.random.seed(1)
+ *     X = np.array( np.random.rand(4,3) )
+ *     print X             # <<<<<<<<<<<<<<
+ *     clst,dist,labels = kmeans(X, 2)
+ * 
+ */
+__pyx_4 = PyTuple_New(1); if (unlikely(!__pyx_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_v_X);
+PyTuple_SET_ITEM(__pyx_4, 0, __pyx_v_X);
+if (__Pyx_Print(((PyObject *)__pyx_4), 1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_4)); __pyx_4 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":67
+ *     X = np.array( np.random.rand(4,3) )
+ *     print X
+ *     clst,dist,labels = kmeans(X, 2)             # <<<<<<<<<<<<<<
+ * 
+ *     print "cluster centers=\n",clst
+ */
+__pyx_2 = __Pyx_GetName(__pyx_m, __pyx_kp_kmeans); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_1 = PyTuple_New(2); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_v_X);
+PyTuple_SET_ITEM(__pyx_1, 0, __pyx_v_X);
+Py_INCREF(__pyx_int_2);
+PyTuple_SET_ITEM(__pyx_1, 1, __pyx_int_2);
+__pyx_3 = PyObject_Call(__pyx_2, ((PyObject *)__pyx_1), NULL); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+Py_DECREF(((PyObject *)__pyx_1)); __pyx_1 = 0;
+if (PyTuple_CheckExact(__pyx_3) && PyTuple_GET_SIZE(__pyx_3) == 3) {
+  PyObject* tuple = __pyx_3;
+  __pyx_2 = PyTuple_GET_ITEM(tuple, 0);
+  Py_INCREF(__pyx_2);
+  Py_DECREF(__pyx_v_clst);
+  __pyx_v_clst = __pyx_2;
+  __pyx_2 = 0;
+  __pyx_1 = PyTuple_GET_ITEM(tuple, 1);
+  Py_INCREF(__pyx_1);
+  Py_DECREF(__pyx_v_dist);
+  __pyx_v_dist = __pyx_1;
+  __pyx_1 = 0;
+  __pyx_2 = PyTuple_GET_ITEM(tuple, 2);
+  Py_INCREF(__pyx_2);
+  Py_DECREF(__pyx_v_labels);
+  __pyx_v_labels = __pyx_2;
+  __pyx_2 = 0;
+  Py_DECREF(__pyx_3); __pyx_3 = 0;
+}
+else {
+  __pyx_4 = PyObject_GetIter(__pyx_3); if (unlikely(!__pyx_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_3); __pyx_3 = 0;
+  __pyx_2 = __Pyx_UnpackItem(__pyx_4, 0); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_v_clst);
+  __pyx_v_clst = __pyx_2;
+  __pyx_2 = 0;
+  __pyx_1 = __Pyx_UnpackItem(__pyx_4, 1); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_v_dist);
+  __pyx_v_dist = __pyx_1;
+  __pyx_1 = 0;
+  __pyx_2 = __Pyx_UnpackItem(__pyx_4, 2); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_v_labels);
+  __pyx_v_labels = __pyx_2;
+  __pyx_2 = 0;
+  if (__Pyx_EndUnpack(__pyx_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_4); __pyx_4 = 0;
+}
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":69
+ *     clst,dist,labels = kmeans(X, 2)
+ * 
+ *     print "cluster centers=\n",clst             # <<<<<<<<<<<<<<
+ *     print "dist=",dist
+ *     print "cluster labels",labels
+ */
+__pyx_1 = PyTuple_New(2); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_kp_19);
+PyTuple_SET_ITEM(__pyx_1, 0, __pyx_kp_19);
+Py_INCREF(__pyx_v_clst);
+PyTuple_SET_ITEM(__pyx_1, 1, __pyx_v_clst);
+if (__Pyx_Print(((PyObject *)__pyx_1), 1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_1)); __pyx_1 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":70
+ * 
+ *     print "cluster centers=\n",clst
+ *     print "dist=",dist             # <<<<<<<<<<<<<<
+ *     print "cluster labels",labels
+ */
+__pyx_2 = PyTuple_New(2); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_kp_20);
+PyTuple_SET_ITEM(__pyx_2, 0, __pyx_kp_20);
+Py_INCREF(__pyx_v_dist);
+PyTuple_SET_ITEM(__pyx_2, 1, __pyx_v_dist);
+if (__Pyx_Print(((PyObject *)__pyx_2), 1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 70; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":71
+ *     print "cluster centers=\n",clst
+ *     print "dist=",dist
+ *     print "cluster labels",labels             # <<<<<<<<<<<<<<
+ */
+__pyx_3 = PyTuple_New(2); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_kp_21);
+PyTuple_SET_ITEM(__pyx_3, 0, __pyx_kp_21);
+Py_INCREF(__pyx_v_labels);
+PyTuple_SET_ITEM(__pyx_3, 1, __pyx_v_labels);
+if (__Pyx_Print(((PyObject *)__pyx_3), 1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 71; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_3)); __pyx_3 = 0;
+
+__pyx_r = Py_None; Py_INCREF(Py_None);
+goto __pyx_L0;
+__pyx_L1_error:;
+Py_XDECREF(__pyx_1);
+Py_XDECREF(__pyx_2);
+Py_XDECREF(__pyx_3);
+Py_XDECREF(__pyx_4);
+__Pyx_AddTraceback("py_kmeans.test");
+__pyx_r = NULL;
+__pyx_L0:;
+Py_DECREF(__pyx_v_X);
+Py_DECREF(__pyx_v_clst);
+Py_DECREF(__pyx_v_dist);
+Py_DECREF(__pyx_v_labels);
+return __pyx_r;
+}
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":36
+ *         # experimental exception made for __getbuffer__ and __releasebuffer__
+ *         # -- the details of this may change.
+ *         def __getbuffer__(ndarray self, Py_buffer* info, int flags):             # <<<<<<<<<<<<<<
+ *             # This implementation of getbuffer is geared towards Cython
+ *             # requirements, and does not yet fullfill the PEP (specifically,
+ */
+
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+int __pyx_v_t;
+char *__pyx_v_f;
+int __pyx_r;
+int __pyx_1;
+PyObject *__pyx_2 = 0;
+PyObject *__pyx_3 = 0;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":42
+ *             # so the flags are not even checked).
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 raise RuntimeError("Py_intptr_t and Py_ssize_t differs in size, numpy.pxd does not support this")
+ * 
+ */
+__pyx_1 = ((sizeof(npy_intp)) != (sizeof(Py_ssize_t)));
+if (__pyx_1) {
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":43
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 raise RuntimeError("Py_intptr_t and Py_ssize_t differs in size, numpy.pxd does not support this")             # <<<<<<<<<<<<<<
+ * 
+ *             info.buf = PyArray_DATA(self)
+ */
+  __pyx_2 = PyTuple_New(1); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_INCREF(__pyx_kp_1);
+  PyTuple_SET_ITEM(__pyx_2, 0, __pyx_kp_1);
+  __pyx_3 = PyObject_Call(__pyx_builtin_RuntimeError, ((PyObject *)__pyx_2), NULL); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+  __Pyx_Raise(__pyx_3, 0, 0);
+  Py_DECREF(__pyx_3); __pyx_3 = 0;
+  {__pyx_filename = __pyx_f[1]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  goto __pyx_L5;
+}
+__pyx_L5:;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":45
+ *                 raise RuntimeError("Py_intptr_t and Py_ssize_t differs in size, numpy.pxd does not support this")
+ * 
+ *             info.buf = PyArray_DATA(self)             # <<<<<<<<<<<<<<
+ *             info.ndim = PyArray_NDIM(self)
+ *             info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ */
+__pyx_v_info->buf = PyArray_DATA(((PyArrayObject *)__pyx_v_self));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":46
+ * 
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = PyArray_NDIM(self)             # <<<<<<<<<<<<<<
+ *             info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *             info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ */
+__pyx_v_info->ndim = PyArray_NDIM(((PyArrayObject *)__pyx_v_self));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":47
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = PyArray_NDIM(self)
+ *             info.strides = <Py_ssize_t*>PyArray_STRIDES(self)             # <<<<<<<<<<<<<<
+ *             info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ */
+__pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(((PyArrayObject *)__pyx_v_self)));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":48
+ *             info.ndim = PyArray_NDIM(self)
+ *             info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *             info.shape = <Py_ssize_t*>PyArray_DIMS(self)             # <<<<<<<<<<<<<<
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ */
+__pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(((PyArrayObject *)__pyx_v_self)));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":49
+ *             info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *             info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL             # <<<<<<<<<<<<<<
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ */
+__pyx_v_info->suboffsets = NULL;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":50
+ *             info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)             # <<<<<<<<<<<<<<
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ * 
+ */
+__pyx_v_info->itemsize = PyArray_ITEMSIZE(((PyArrayObject *)__pyx_v_self));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":51
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)             # <<<<<<<<<<<<<<
+ * 
+ *             # Formats that are not tested and working in Cython are not
+ */
+__pyx_v_info->readonly = (!PyArray_ISWRITEABLE(((PyArrayObject *)__pyx_v_self)));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":55
+ *             # Formats that are not tested and working in Cython are not
+ *             # made available from this pxd file yet.
+ *             cdef int t = PyArray_TYPE(self)             # <<<<<<<<<<<<<<
+ *             cdef char* f = NULL
+ *             if   t == NPY_BYTE:       f = "b"
+ */
+__pyx_v_t = PyArray_TYPE(((PyArrayObject *)__pyx_v_self));
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":56
+ *             # made available from this pxd file yet.
+ *             cdef int t = PyArray_TYPE(self)
+ *             cdef char* f = NULL             # <<<<<<<<<<<<<<
+ *             if   t == NPY_BYTE:       f = "b"
+ *             elif t == NPY_UBYTE:      f = "B"
+ */
+__pyx_v_f = NULL;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":57
+ *             cdef int t = PyArray_TYPE(self)
+ *             cdef char* f = NULL
+ *             if   t == NPY_BYTE:       f = "b"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UBYTE:      f = "B"
+ *             elif t == NPY_SHORT:      f = "h"
+ */
+switch (__pyx_v_t) {
+  case NPY_BYTE:
+  __pyx_v_f = __pyx_k_2;
+  break;
+  case NPY_UBYTE:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":58
+ *             cdef char* f = NULL
+ *             if   t == NPY_BYTE:       f = "b"
+ *             elif t == NPY_UBYTE:      f = "B"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_SHORT:      f = "h"
+ *             elif t == NPY_USHORT:     f = "H"
+ */
+  __pyx_v_f = __pyx_k_3;
+  break;
+  case NPY_SHORT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":59
+ *             if   t == NPY_BYTE:       f = "b"
+ *             elif t == NPY_UBYTE:      f = "B"
+ *             elif t == NPY_SHORT:      f = "h"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_USHORT:     f = "H"
+ *             elif t == NPY_INT:        f = "i"
+ */
+  __pyx_v_f = __pyx_k_4;
+  break;
+  case NPY_USHORT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":60
+ *             elif t == NPY_UBYTE:      f = "B"
+ *             elif t == NPY_SHORT:      f = "h"
+ *             elif t == NPY_USHORT:     f = "H"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_INT:        f = "i"
+ *             elif t == NPY_UINT:       f = "I"
+ */
+  __pyx_v_f = __pyx_k_5;
+  break;
+  case NPY_INT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":61
+ *             elif t == NPY_SHORT:      f = "h"
+ *             elif t == NPY_USHORT:     f = "H"
+ *             elif t == NPY_INT:        f = "i"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UINT:       f = "I"
+ *             elif t == NPY_LONG:       f = "l"
+ */
+  __pyx_v_f = __pyx_k_6;
+  break;
+  case NPY_UINT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":62
+ *             elif t == NPY_USHORT:     f = "H"
+ *             elif t == NPY_INT:        f = "i"
+ *             elif t == NPY_UINT:       f = "I"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONG:       f = "l"
+ *             elif t == NPY_ULONG:      f = "L"
+ */
+  __pyx_v_f = __pyx_k_7;
+  break;
+  case NPY_LONG:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":63
+ *             elif t == NPY_INT:        f = "i"
+ *             elif t == NPY_UINT:       f = "I"
+ *             elif t == NPY_LONG:       f = "l"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONG:      f = "L"
+ *             elif t == NPY_LONGLONG:   f = "q"
+ */
+  __pyx_v_f = __pyx_k_8;
+  break;
+  case NPY_ULONG:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":64
+ *             elif t == NPY_UINT:       f = "I"
+ *             elif t == NPY_LONG:       f = "l"
+ *             elif t == NPY_ULONG:      f = "L"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGLONG:   f = "q"
+ *             elif t == NPY_ULONGLONG:  f = "Q"
+ */
+  __pyx_v_f = __pyx_k_9;
+  break;
+  case NPY_LONGLONG:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":65
+ *             elif t == NPY_LONG:       f = "l"
+ *             elif t == NPY_ULONG:      f = "L"
+ *             elif t == NPY_LONGLONG:   f = "q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONGLONG:  f = "Q"
+ *             elif t == NPY_FLOAT:      f = "f"
+ */
+  __pyx_v_f = __pyx_k_10;
+  break;
+  case NPY_ULONGLONG:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":66
+ *             elif t == NPY_ULONG:      f = "L"
+ *             elif t == NPY_LONGLONG:   f = "q"
+ *             elif t == NPY_ULONGLONG:  f = "Q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_FLOAT:      f = "f"
+ *             elif t == NPY_DOUBLE:     f = "d"
+ */
+  __pyx_v_f = __pyx_k_11;
+  break;
+  case NPY_FLOAT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":67
+ *             elif t == NPY_LONGLONG:   f = "q"
+ *             elif t == NPY_ULONGLONG:  f = "Q"
+ *             elif t == NPY_FLOAT:      f = "f"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_DOUBLE:     f = "d"
+ *             elif t == NPY_LONGDOUBLE: f = "g"
+ */
+  __pyx_v_f = __pyx_k_12;
+  break;
+  case NPY_DOUBLE:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":68
+ *             elif t == NPY_ULONGLONG:  f = "Q"
+ *             elif t == NPY_FLOAT:      f = "f"
+ *             elif t == NPY_DOUBLE:     f = "d"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGDOUBLE: f = "g"
+ *             elif t == NPY_OBJECT:     f = "O"
+ */
+  __pyx_v_f = __pyx_k_13;
+  break;
+  case NPY_LONGDOUBLE:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":69
+ *             elif t == NPY_FLOAT:      f = "f"
+ *             elif t == NPY_DOUBLE:     f = "d"
+ *             elif t == NPY_LONGDOUBLE: f = "g"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_OBJECT:     f = "O"
+ * 
+ */
+  __pyx_v_f = __pyx_k_14;
+  break;
+  case NPY_OBJECT:
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":70
+ *             elif t == NPY_DOUBLE:     f = "d"
+ *             elif t == NPY_LONGDOUBLE: f = "g"
+ *             elif t == NPY_OBJECT:     f = "O"             # <<<<<<<<<<<<<<
+ * 
+ *             if f == NULL:
+ */
+  __pyx_v_f = __pyx_k_15;
+  break;
+}
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":72
+ *             elif t == NPY_OBJECT:     f = "O"
+ * 
+ *             if f == NULL:             # <<<<<<<<<<<<<<
+ *                 raise ValueError("only objects, int and float dtypes supported for ndarray buffer access so far (dtype is %d)" % t)
+ *             info.format = f
+ */
+__pyx_1 = (__pyx_v_f == NULL);
+if (__pyx_1) {
+
+  /* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":73
+ * 
+ *             if f == NULL:
+ *                 raise ValueError("only objects, int and float dtypes supported for ndarray buffer access so far (dtype is %d)" % t)             # <<<<<<<<<<<<<<
+ *             info.format = f
+ * 
+ */
+  __pyx_2 = PyInt_FromLong(__pyx_v_t); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_3 = PyNumber_Remainder(__pyx_kp_16, __pyx_2); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(__pyx_2); __pyx_2 = 0;
+  __pyx_2 = PyTuple_New(1); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  PyTuple_SET_ITEM(__pyx_2, 0, __pyx_3);
+  __pyx_3 = 0;
+  __pyx_3 = PyObject_Call(__pyx_builtin_ValueError, ((PyObject *)__pyx_2), NULL); if (unlikely(!__pyx_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  Py_DECREF(((PyObject *)__pyx_2)); __pyx_2 = 0;
+  __Pyx_Raise(__pyx_3, 0, 0);
+  Py_DECREF(__pyx_3); __pyx_3 = 0;
+  {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  goto __pyx_L6;
+}
+__pyx_L6:;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":74
+ *             if f == NULL:
+ *                 raise ValueError("only objects, int and float dtypes supported for ndarray buffer access so far (dtype is %d)" % t)
+ *             info.format = f             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+__pyx_v_info->format = __pyx_v_f;
+
+__pyx_r = 0;
+goto __pyx_L0;
+__pyx_L1_error:;
+Py_XDECREF(__pyx_2);
+Py_XDECREF(__pyx_3);
+__Pyx_AddTraceback("numpy.ndarray.__getbuffer__");
+__pyx_r = -1;
+__pyx_L0:;
+return __pyx_r;
+}
+
+static struct PyMethodDef __pyx_methods[] = {
+{"kmeans", (PyCFunction)__pyx_pf_9py_kmeans_kmeans, METH_VARARGS|METH_KEYWORDS, __pyx_doc_9py_kmeans_kmeans},
+{"test", (PyCFunction)__pyx_pf_9py_kmeans_test, METH_NOARGS, 0},
+{0, 0, 0, 0}
+};
+
+static void __pyx_init_filenames(void); /*proto*/
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef __pyx_moduledef = {
+  PyModuleDef_HEAD_INIT,
+  "py_kmeans",
+  0, /* m_doc */
+  -1, /* m_size */
+  __pyx_methods /* m_methods */,
+  NULL, /* m_reload */
+  NULL, /* m_traverse */
+  NULL, /* m_clear */
+  NULL /* m_free */
+};
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+  {&__pyx_kp_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 1, 1, 1},
+  {&__pyx_kp_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 1, 1},
+  {&__pyx_kp_double, __pyx_k_double, sizeof(__pyx_k_double), 1, 1, 1},
+  {&__pyx_kp_DTYPE, __pyx_k_DTYPE, sizeof(__pyx_k_DTYPE), 1, 1, 1},
+  {&__pyx_kp_ctypes, __pyx_k_ctypes, sizeof(__pyx_k_ctypes), 1, 1, 1},
+  {&__pyx_kp_c_uint, __pyx_k_c_uint, sizeof(__pyx_k_c_uint), 1, 1, 1},
+  {&__pyx_kp_c_double, __pyx_k_c_double, sizeof(__pyx_k_c_double), 1, 1, 1},
+  {&__pyx_kp_min, __pyx_k_min, sizeof(__pyx_k_min), 1, 1, 1},
+  {&__pyx_kp_empty, __pyx_k_empty, sizeof(__pyx_k_empty), 1, 1, 1},
+  {&__pyx_kp_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 1, 1, 1},
+  {&__pyx_kp_order, __pyx_k_order, sizeof(__pyx_k_order), 1, 1, 1},
+  {&__pyx_kp_17, __pyx_k_17, sizeof(__pyx_k_17), 0, 1, 0},
+  {&__pyx_kp_random, __pyx_k_random, sizeof(__pyx_k_random), 1, 1, 1},
+  {&__pyx_kp_permutation, __pyx_k_permutation, sizeof(__pyx_k_permutation), 1, 1, 1},
+  {&__pyx_kp_range, __pyx_k_range, sizeof(__pyx_k_range), 1, 1, 1},
+  {&__pyx_kp_array, __pyx_k_array, sizeof(__pyx_k_array), 1, 1, 1},
+  {&__pyx_kp_18, __pyx_k_18, sizeof(__pyx_k_18), 0, 1, 0},
+  {&__pyx_kp_rand, __pyx_k_rand, sizeof(__pyx_k_rand), 1, 1, 1},
+  {&__pyx_kp_kmeans, __pyx_k_kmeans, sizeof(__pyx_k_kmeans), 0, 1, 1},
+  {&__pyx_kp_19, __pyx_k_19, sizeof(__pyx_k_19), 0, 0, 0},
+  {&__pyx_kp_20, __pyx_k_20, sizeof(__pyx_k_20), 0, 0, 0},
+  {&__pyx_kp_21, __pyx_k_21, sizeof(__pyx_k_21), 0, 0, 0},
+  {&__pyx_kp___getbuffer__, __pyx_k___getbuffer__, sizeof(__pyx_k___getbuffer__), 0, 1, 1},
+  {&__pyx_kp_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 1, 1, 1},
+  {&__pyx_kp_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 1, 1, 1},
+  {&__pyx_kp_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 0},
+  {&__pyx_kp_16, __pyx_k_16, sizeof(__pyx_k_16), 0, 0, 0},
+  {0, 0, 0, 0, 0, 0}
+};
+static int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_min = __Pyx_GetName(__pyx_b, __pyx_kp_min); if (!__pyx_builtin_min) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_kp_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_RuntimeError = __Pyx_GetName(__pyx_b, __pyx_kp_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_kp_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 73; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static int __Pyx_InitGlobals(void) {
+  __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  __pyx_int_3 = PyInt_FromLong(3); if (unlikely(!__pyx_int_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initpy_kmeans(void); /*proto*/
+PyMODINIT_FUNC initpy_kmeans(void)
+#else
+PyMODINIT_FUNC PyInit_py_kmeans(void); /*proto*/
+PyMODINIT_FUNC PyInit_py_kmeans(void)
+#endif
+{
+PyObject *__pyx_1 = 0;
+PyObject *__pyx_2 = 0;
+__pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+/*--- Libary function declarations ---*/
+__pyx_init_filenames();
+/*--- Initialize various global constants etc. ---*/
+if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+/*--- Module creation code ---*/
+#if PY_MAJOR_VERSION < 3
+__pyx_m = Py_InitModule4("py_kmeans", __pyx_methods, 0, 0, PYTHON_API_VERSION);
+#else
+__pyx_m = PyModule_Create(&__pyx_moduledef);
+#endif
+if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+#if PY_MAJOR_VERSION < 3
+Py_INCREF(__pyx_m);
+#endif
+__pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME);
+if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+/*--- Builtin init code ---*/
+if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_skip_dispatch = 0;
+/*--- Global init code ---*/
+/*--- Function export code ---*/
+/*--- Type init code ---*/
+/*--- Type import code ---*/
+__pyx_ptype_5numpy_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject)); if (unlikely(!__pyx_ptype_5numpy_ndarray)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+/*--- Function import code ---*/
+/*--- Execution code ---*/
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":8
+ * 
+ * from __future__ import division
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * 
+ */
+__pyx_1 = __Pyx_Import(__pyx_kp_numpy, 0); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (PyObject_SetAttr(__pyx_m, __pyx_kp_np, __pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":12
+ * 
+ * # Define data type
+ * DTYPE = np.double             # <<<<<<<<<<<<<<
+ * ctypedef np.double_t DTYPE_t
+ * 
+ */
+__pyx_1 = __Pyx_GetName(__pyx_m, __pyx_kp_np); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+__pyx_2 = PyObject_GetAttr(__pyx_1, __pyx_kp_double); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+if (PyObject_SetAttr(__pyx_m, __pyx_kp_DTYPE, __pyx_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+
+/* "/mnt/mpg/working/projects/mpi_kmeans/py_kmeans.pyx":19
+ *     double c_kmeans "kmeans" (double *CX, double *X,unsigned int *assignment,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter, unsigned int nr_restarts)
+ * 
+ * from ctypes import c_uint, c_double             # <<<<<<<<<<<<<<
+ * 
+ * def kmeans(np.ndarray[DTYPE_t, ndim=2] X, unsigned int num_clusters, unsigned int maxiter=0, unsigned int num_runs=1):
+ */
+__pyx_1 = PyList_New(2); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_INCREF(__pyx_kp_c_uint);
+PyList_SET_ITEM(__pyx_1, 0, __pyx_kp_c_uint);
+Py_INCREF(__pyx_kp_c_double);
+PyList_SET_ITEM(__pyx_1, 1, __pyx_kp_c_double);
+__pyx_2 = __Pyx_Import(__pyx_kp_ctypes, ((PyObject *)__pyx_1)); if (unlikely(!__pyx_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(((PyObject *)__pyx_1)); __pyx_1 = 0;
+__pyx_1 = PyObject_GetAttr(__pyx_2, __pyx_kp_c_uint); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (PyObject_SetAttr(__pyx_m, __pyx_kp_c_uint, __pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+__pyx_1 = PyObject_GetAttr(__pyx_2, __pyx_kp_c_double); if (unlikely(!__pyx_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+if (PyObject_SetAttr(__pyx_m, __pyx_kp_c_double, __pyx_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 19; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+Py_DECREF(__pyx_1); __pyx_1 = 0;
+Py_DECREF(__pyx_2); __pyx_2 = 0;
+
+/* "/usr/lib64/python2.5/site-packages/Cython-0.9.8.1.1-py2.5-linux-x86_64.egg/Cython/Includes/numpy.pxd":36
+ *         # experimental exception made for __getbuffer__ and __releasebuffer__
+ *         # -- the details of this may change.
+ *         def __getbuffer__(ndarray self, Py_buffer* info, int flags):             # <<<<<<<<<<<<<<
+ *             # This implementation of getbuffer is geared towards Cython
+ *             # requirements, and does not yet fullfill the PEP (specifically,
+ */
+#if PY_MAJOR_VERSION < 3
+return;
+#else
+return __pyx_m;
+#endif
+__pyx_L1_error:;
+Py_XDECREF(__pyx_1);
+Py_XDECREF(__pyx_2);
+__Pyx_AddTraceback("py_kmeans");
+#if PY_MAJOR_VERSION >= 3
+return NULL;
+#endif
+}
+
+static const char *__pyx_filenames[] = {
+"py_kmeans.pyx",
+"numpy.pxd",
+};
+
+/* Runtime support code */
+
+static void __pyx_init_filenames(void) {
+__pyx_f = __pyx_filenames;
+}
+
+static INLINE void __Pyx_SafeReleaseBuffer(PyObject* obj, Py_buffer* info) {
+  if (info->buf == NULL) return;
+  if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL;
+  __Pyx_ReleaseBuffer(obj, info);
+}
+
+static INLINE void __Pyx_ZeroBuffer(Py_buffer* buf) {
+  buf->buf = NULL;
+  buf->strides = __Pyx_zeros;
+  buf->shape = __Pyx_zeros;
+  buf->suboffsets = __Pyx_minusones;
+}
+
+static INLINE const char* __Pyx_ConsumeWhitespace(const char* ts) {
+  while (1) {
+    switch (*ts) {
+      case 10:
+      case 13:
+      case ' ':
+        ++ts;
+      default:
+        return ts;
+    }
+  }
+}
+
+static INLINE const char* __Pyx_BufferTypestringCheckEndian(const char* ts) {
+  int num = 1;
+  int little_endian = ((char*)&num)[0];
+  int ok = 1;
+  switch (*ts) {
+    case '@':
+    case '=':
+      ++ts; break;
+    case '<':
+      if (little_endian) ++ts;
+      else ok = 0;
+      break;
+    case '>':
+    case '!':
+      if (!little_endian) ++ts;
+      else ok = 0;
+      break;
+  }
+  if (!ok) {
+    PyErr_Format(PyExc_ValueError, "Buffer has wrong endianness (rejecting on '%s')", ts);
+    return NULL;
+  }
+  return ts;
+}
+
+static void __Pyx_BufferNdimError(Py_buffer* buffer, int expected_ndim) {
+  PyErr_Format(PyExc_ValueError,
+               "Buffer has wrong number of dimensions (expected %d, got %d)",
+               expected_ndim, buffer->ndim);
+}
+
+
+static const char* __Pyx_BufferTypestringCheck_item_nn___pyx_t_9py_kmeans_DTYPE_t(const char* ts) {
+  int ok;
+  if (*ts == '1') ++ts;
+  switch (*ts) {
+    case 'f': ok = (sizeof(__pyx_t_9py_kmeans_DTYPE_t) == sizeof(float) && (__pyx_t_9py_kmeans_DTYPE_t)-1 < 0); break;
+    case 'd': ok = (sizeof(__pyx_t_9py_kmeans_DTYPE_t) == sizeof(double) && (__pyx_t_9py_kmeans_DTYPE_t)-1 < 0); break;
+    case 'g': ok = (sizeof(__pyx_t_9py_kmeans_DTYPE_t) == sizeof(long double) && (__pyx_t_9py_kmeans_DTYPE_t)-1 < 0); break;  default: ok = 0;
+  }
+  if (!ok) {
+      PyErr_Format(PyExc_ValueError, "Buffer datatype mismatch (rejecting on '%s')", ts);
+      return NULL;
+  } else return ts + 1;
+  
+}
+
+static int __Pyx_GetBuffer_nn___pyx_t_9py_kmeans_DTYPE_t(PyObject* obj, Py_buffer* buf, int flags, int nd) {
+  const char* ts;
+  if (obj == Py_None) {
+    __Pyx_ZeroBuffer(buf);
+    return 0;
+  }
+  buf->buf = NULL;
+  if (__Pyx_GetBuffer(obj, buf, flags) == -1) goto fail;
+  if (buf->ndim != nd) {
+    __Pyx_BufferNdimError(buf, nd);
+    goto fail;
+  }
+  ts = buf->format;
+  ts = __Pyx_ConsumeWhitespace(ts);
+  ts = __Pyx_BufferTypestringCheckEndian(ts);
+  if (!ts) goto fail;
+  ts = __Pyx_ConsumeWhitespace(ts);
+  ts = __Pyx_BufferTypestringCheck_item_nn___pyx_t_9py_kmeans_DTYPE_t(ts);
+  if (!ts) goto fail;
+  ts = __Pyx_ConsumeWhitespace(ts);
+  if (*ts != 0) {
+    PyErr_Format(PyExc_ValueError,
+      "Expected non-struct buffer data type (expected end, got '%s')", ts);
+    goto fail;
+  }
+  if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones;
+  return 0;
+fail:;
+  __Pyx_ZeroBuffer(buf);
+  return -1;
+}
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed, char *name, int exact) {
+    if (!type) {
+        PyErr_Format(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (none_allowed && obj == Py_None) return 1;
+    else if (exact) {
+        if (Py_TYPE(obj) == type) return 1;
+    }
+    else {
+        if (PyObject_TypeCheck(obj, type)) return 1;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%s' has incorrect type (expected %s, got %s)",
+        name, type->tp_name, Py_TYPE(obj)->tp_name);
+    return 0;
+}
+
+static INLINE void __Pyx_RaiseArgtupleTooLong(
+    Py_ssize_t num_expected,
+    Py_ssize_t num_found)
+{
+    const char* error_message =
+    #if PY_VERSION_HEX < 0x02050000
+        "function takes at most %d positional arguments (%d given)";
+    #else
+        "function takes at most %zd positional arguments (%zd given)";
+    #endif
+    PyErr_Format(PyExc_TypeError, error_message, num_expected, num_found);
+}
+
+#if (PY_MAJOR_VERSION < 3) && !(Py_TPFLAGS_DEFAULT & Py_TPFLAGS_HAVE_NEWBUFFER)
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+  if (PyObject_TypeCheck(obj, __pyx_ptype_5numpy_ndarray)) return __pyx_pf_5numpy_7ndarray___getbuffer__(obj, view, flags);
+  else {
+  PyErr_Format(PyExc_TypeError, "'%100s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+  return -1;
+    }
+}
+
+static void __Pyx_ReleaseBuffer(PyObject *obj, Py_buffer *view) {
+
+}
+
+#endif
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list) {
+    PyObject *__import__ = 0;
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
+    __import__ = PyObject_GetAttrString(__pyx_b, "__import__");
+    if (!__import__)
+        goto bad;
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict(__pyx_m);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    module = PyObject_CallFunction(__import__, "OOOO",
+        name, global_dict, empty_dict, list);
+bad:
+    Py_XDECREF(empty_list);
+    Py_XDECREF(__import__);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
+    PyObject *result;
+    result = PyObject_GetAttr(dict, name);
+    if (!result)
+        PyErr_SetObject(PyExc_NameError, name);
+    return result;
+}
+
+static int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (!type) {
+        PyErr_Format(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (obj == Py_None || PyObject_TypeCheck(obj, type))
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %s to %s",
+        Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *__Pyx_GetStdout(void) {
+    PyObject *f = PySys_GetObject("stdout");
+    if (!f) {
+        PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
+    }
+    return f;
+}
+
+static int __Pyx_Print(PyObject *arg_tuple, int newline) {
+    PyObject *f;
+    PyObject* v;
+    int i;
+    
+    if (!(f = __Pyx_GetStdout()))
+        return -1;
+    for (i=0; i < PyTuple_GET_SIZE(arg_tuple); i++) {
+        if (PyFile_SoftSpace(f, 1)) {
+            if (PyFile_WriteString(" ", f) < 0)
+                return -1;
+        }
+        v = PyTuple_GET_ITEM(arg_tuple, i);
+        if (PyFile_WriteObject(v, f, Py_PRINT_RAW) < 0)
+            return -1;
+        if (PyString_Check(v)) {
+            char *s = PyString_AsString(v);
+            Py_ssize_t len = PyString_Size(v);
+            if (len > 0 &&
+                isspace(Py_CHARMASK(s[len-1])) &&
+                s[len-1] != ' ')
+                    PyFile_SoftSpace(f, 0);
+        }
+    }
+    if (newline) {
+        if (PyFile_WriteString("\n", f) < 0)
+            return -1;
+        PyFile_SoftSpace(f, 0);
+    }
+    return 0;
+}
+
+#else /* Python 3 has a print function */
+static int __Pyx_Print(PyObject *arg_tuple, int newline) {
+    PyObject* kwargs = 0;
+    PyObject* result = 0;
+    PyObject* end_string;
+    if (!__pyx_print) {
+        __pyx_print = PyObject_GetAttrString(__pyx_b, "print");
+        if (!__pyx_print)
+            return -1;
+    }
+    if (!newline) {
+        if (!__pyx_print_kwargs) {
+            __pyx_print_kwargs = PyDict_New();
+            if (!__pyx_print_kwargs)
+                return -1;
+            end_string = PyUnicode_FromStringAndSize(" ", 1);
+            if (!end_string)
+                return -1;
+            if (PyDict_SetItemString(__pyx_print_kwargs, "end", end_string) < 0) {
+                Py_DECREF(end_string);
+                return -1;
+            }
+            Py_DECREF(end_string);
+        }
+        kwargs = __pyx_print_kwargs;
+    }
+    result = PyObject_Call(__pyx_print, arg_tuple, kwargs);
+    if (!result)
+        return -1;
+    Py_DECREF(result);
+    return 0;
+}
+#endif
+
+static PyObject *__Pyx_UnpackItem(PyObject *iter, Py_ssize_t index) {
+    PyObject *item;
+    if (!(item = PyIter_Next(iter))) {
+        if (!PyErr_Occurred()) {
+            PyErr_Format(PyExc_ValueError,
+                #if PY_VERSION_HEX < 0x02050000
+                    "need more than %d values to unpack", (int)index);
+                #else
+                    "need more than %zd values to unpack", index);
+                #endif
+        }
+    }
+    return item;
+}
+
+static int __Pyx_EndUnpack(PyObject *iter) {
+    PyObject *item;
+    if ((item = PyIter_Next(iter))) {
+        Py_DECREF(item);
+        PyErr_SetString(PyExc_ValueError, "too many values to unpack");
+        return -1;
+    }
+    else if (!PyErr_Occurred())
+        return 0;
+    else
+        return -1;
+}
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb) {
+    Py_XINCREF(type);
+    Py_XINCREF(value);
+    Py_XINCREF(tb);
+    /* First, check the traceback argument, replacing None with NULL. */
+    if (tb == Py_None) {
+        Py_DECREF(tb);
+        tb = 0;
+    }
+    else if (tb != NULL && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto raise_error;
+    }
+    /* Next, replace a missing value with None */
+    if (value == NULL) {
+        value = Py_None;
+        Py_INCREF(value);
+    }
+    #if PY_VERSION_HEX < 0x02050000
+    if (!PyClass_Check(type))
+    #else
+    if (!PyType_Check(type))
+    #endif
+    {
+        /* Raising an instance.  The value should be a dummy. */
+        if (value != Py_None) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        /* Normalize to raise <class>, <instance> */
+        Py_DECREF(value);
+        value = type;
+        #if PY_VERSION_HEX < 0x02050000
+            if (PyInstance_Check(type)) {
+                type = (PyObject*) ((PyInstanceObject*)type)->in_class;
+                Py_INCREF(type);
+            }
+            else {
+                type = 0;
+                PyErr_SetString(PyExc_TypeError,
+                    "raise: exception must be an old-style class or instance");
+                goto raise_error;
+            }
+        #else
+            type = (PyObject*) Py_TYPE(type);
+            Py_INCREF(type);
+            if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+                PyErr_SetString(PyExc_TypeError,
+                    "raise: exception class must be a subclass of BaseException");
+                goto raise_error;
+            }
+        #endif
+    }
+    PyErr_Restore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+
+#ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(char *module_name, char *class_name,
+    long size)
+{
+    PyObject *py_module = 0;
+    PyObject *result = 0;
+    PyObject *py_name = 0;
+
+    #if PY_MAJOR_VERSION < 3
+    py_name = PyString_FromString(module_name);
+    #else
+    py_name = PyUnicode_FromString(module_name);
+    #endif
+    if (!py_name)
+        goto bad;
+
+    py_module = __Pyx_ImportModule(module_name);
+    if (!py_module)
+        goto bad;
+    result = PyObject_GetAttrString(py_module, class_name);
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError, 
+            "%s.%s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+    if (((PyTypeObject *)result)->tp_basicsize != size) {
+        PyErr_Format(PyExc_ValueError, 
+            "%s.%s does not appear to be the correct type object",
+            module_name, class_name);
+        goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(py_name);
+    Py_XDECREF(result);
+    return 0;
+}
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+
+    #if PY_MAJOR_VERSION < 3
+    py_name = PyString_FromString(name);
+    #else
+    py_name = PyUnicode_FromString(name);
+    #endif
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+
+static void __Pyx_AddTraceback(const char *funcname) {
+    PyObject *py_srcfile = 0;
+    PyObject *py_funcname = 0;
+    PyObject *py_globals = 0;
+    PyObject *empty_string = 0;
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+
+    #if PY_MAJOR_VERSION < 3
+    py_srcfile = PyString_FromString(__pyx_filename);
+    #else
+    py_srcfile = PyUnicode_FromString(__pyx_filename);
+    #endif
+    if (!py_srcfile) goto bad;
+    if (__pyx_clineno) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, __pyx_clineno);
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, __pyx_clineno);
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        #else
+        py_funcname = PyUnicode_FromString(funcname);
+        #endif
+    }
+    if (!py_funcname) goto bad;
+    py_globals = PyModule_GetDict(__pyx_m);
+    if (!py_globals) goto bad;
+    #if PY_MAJOR_VERSION < 3
+    empty_string = PyString_FromStringAndSize("", 0);
+    #else
+    empty_string = PyBytes_FromStringAndSize("", 0);
+    #endif
+    if (!empty_string) goto bad;
+    py_code = PyCode_New(
+        0,            /*int argcount,*/
+        #if PY_MAJOR_VERSION >= 3
+        0,            /*int kwonlyargcount,*/
+        #endif
+        0,            /*int nlocals,*/
+        0,            /*int stacksize,*/
+        0,            /*int flags,*/
+        empty_string, /*PyObject *code,*/
+        __pyx_empty_tuple,  /*PyObject *consts,*/
+        __pyx_empty_tuple,  /*PyObject *names,*/
+        __pyx_empty_tuple,  /*PyObject *varnames,*/
+        __pyx_empty_tuple,  /*PyObject *freevars,*/
+        __pyx_empty_tuple,  /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        __pyx_lineno,   /*int firstlineno,*/
+        empty_string  /*PyObject *lnotab*/
+    );
+    if (!py_code) goto bad;
+    py_frame = PyFrame_New(
+        PyThreadState_Get(), /*PyThreadState *tstate,*/
+        py_code,             /*PyCodeObject *code,*/
+        py_globals,          /*PyObject *globals,*/
+        0                    /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    py_frame->f_lineno = __pyx_lineno;
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_srcfile);
+    Py_XDECREF(py_funcname);
+    Py_XDECREF(empty_string);
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode && (!t->is_identifier)) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else  /* Python 3+ has unicode identifiers */
+        if (t->is_identifier || (t->is_unicode && t->intern)) {
+            *t->p = PyUnicode_InternFromString(t->s);
+        } else if (t->is_unicode) {
+            *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
+        ++t;
+    }
+    return 0;
+}
+
+/* Type Conversion Functions */
+
+static INLINE Py_ssize_t __pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject* x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+
+static INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   if (x == Py_True) return 1;
+   else if (x == Py_False) return 0;
+   else return PyObject_IsTrue(x);
+}
+
+static INLINE PY_LONG_LONG __pyx_PyInt_AsLongLong(PyObject* x) {
+    if (PyInt_CheckExact(x)) {
+        return PyInt_AS_LONG(x);
+    }
+    else if (PyLong_CheckExact(x)) {
+        return PyLong_AsLongLong(x);
+    }
+    else {
+        PY_LONG_LONG val;
+        PyObject* tmp = PyNumber_Int(x); if (!tmp) return (PY_LONG_LONG)-1;
+        val = __pyx_PyInt_AsLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+static INLINE unsigned PY_LONG_LONG __pyx_PyInt_AsUnsignedLongLong(PyObject* x) {
+    if (PyInt_CheckExact(x)) {
+        long val = PyInt_AS_LONG(x);
+        if (unlikely(val < 0)) {
+            PyErr_SetString(PyExc_TypeError, "Negative assignment to unsigned type.");
+            return (unsigned PY_LONG_LONG)-1;
+        }
+        return val;
+    }
+    else if (PyLong_CheckExact(x)) {
+        return PyLong_AsUnsignedLongLong(x);
+    }
+    else {
+        PY_LONG_LONG val;
+        PyObject* tmp = PyNumber_Int(x); if (!tmp) return (PY_LONG_LONG)-1;
+        val = __pyx_PyInt_AsUnsignedLongLong(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+}
+
+
+static INLINE unsigned char __pyx_PyInt_unsigned_char(PyObject* x) {
+    if (sizeof(unsigned char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        unsigned char val = (unsigned char)long_val;
+        if (unlikely((val != long_val)  || (long_val < 0))) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to unsigned char");
+            return (unsigned char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE unsigned short __pyx_PyInt_unsigned_short(PyObject* x) {
+    if (sizeof(unsigned short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        unsigned short val = (unsigned short)long_val;
+        if (unlikely((val != long_val)  || (long_val < 0))) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to unsigned short");
+            return (unsigned short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE char __pyx_PyInt_char(PyObject* x) {
+    if (sizeof(char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        char val = (char)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to char");
+            return (char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE short __pyx_PyInt_short(PyObject* x) {
+    if (sizeof(short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        short val = (short)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to short");
+            return (short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE int __pyx_PyInt_int(PyObject* x) {
+    if (sizeof(int) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        int val = (int)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to int");
+            return (int)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE long __pyx_PyInt_long(PyObject* x) {
+    if (sizeof(long) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        long val = (long)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to long");
+            return (long)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed char __pyx_PyInt_signed_char(PyObject* x) {
+    if (sizeof(signed char) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed char val = (signed char)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed char");
+            return (signed char)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed short __pyx_PyInt_signed_short(PyObject* x) {
+    if (sizeof(signed short) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed short val = (signed short)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed short");
+            return (signed short)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed int __pyx_PyInt_signed_int(PyObject* x) {
+    if (sizeof(signed int) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed int val = (signed int)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed int");
+            return (signed int)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE signed long __pyx_PyInt_signed_long(PyObject* x) {
+    if (sizeof(signed long) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        signed long val = (signed long)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to signed long");
+            return (signed long)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
+static INLINE long double __pyx_PyInt_long_double(PyObject* x) {
+    if (sizeof(long double) < sizeof(long)) {
+        long long_val = __pyx_PyInt_AsLong(x);
+        long double val = (long double)long_val;
+        if (unlikely((val != long_val) )) {
+            PyErr_SetString(PyExc_OverflowError, "value too large to convert to long double");
+            return (long double)-1;
+        }
+        return val;
+    }
+    else {
+        return __pyx_PyInt_AsLong(x);
+    }
+}
+
diff --git a/py_kmeans.pyx b/py_kmeans.pyx
new file mode 100755
index 0000000..c595c2b
--- /dev/null
+++ b/py_kmeans.pyx
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+# Cython wrapper for the MPI-Kmeans library by Peter Gehler
+# (C) by Thomas Wiecki (wiecki at tuebingen.mpg.de), 2008
+# Based on the wrapper code of Christoph Lampert (christoph.lampert at tuebingen.mpg.de).
+# GPLv2 or later.
+
+from __future__ import division
+import numpy as np
+cimport numpy as np
+
+# Define data type
+DTYPE = np.double
+ctypedef np.double_t DTYPE_t
+
+# Define library call
+cdef extern from "mpi_kmeans.h":
+    double c_kmeans "kmeans" (double *CX, double *X,unsigned int *assignment,unsigned int dim,unsigned int npts,unsigned int nclus,unsigned int maxiter, unsigned int nr_restarts)
+
+from ctypes import c_uint, c_double
+
+def kmeans(np.ndarray[DTYPE_t, ndim=2] X, unsigned int num_clusters, unsigned int maxiter=0, unsigned int num_runs=1):
+    """Cython wrapper for Peter Gehlers accelerated MPI-Kmeans routine.
+    centroids, dist, assignments = kmeans(X, num_clusters, maxiter=0, num_runs=1)
+
+    --Input--
+    X            : input data (2D numpy array)
+    num_clusters : number of centroids to use (k)
+    [maxiter]    : how many iterations to run (setting this to 0 will run kmeans until it converges) (default is 0).
+    [num_runs}   : how many times to restart the clustering (default is 1).
+
+    --Output--
+    centroids    : the cluster centers
+    dist         : the sum squared error
+    assignments  : the centroids that were assigned to each data point
+    
+    Example:
+    import py_kmeans
+    import numpy as np
+    X = np.array( np.random.rand(4,3) )
+    clusters, dist, labels = py_kmeans.kmeans(X, 2)"""
+
+    # Initializing
+    cdef unsigned int num_points = X.shape[0]
+    cdef unsigned int dim = X.shape[1]
+    cdef double dist
+    num_clusters = <unsigned int> min(num_clusters, num_points)
+
+    # Init output array for assignments
+    cdef np.ndarray assignments=np.empty( (num_points), dtype=c_uint, order='C')
+
+    # Init output array for cluster centroids
+    permutation = np.random.permutation( range(num_points) )
+    cdef np.ndarray[DTYPE_t, ndim=2] centroids = np.array(X[permutation[:num_clusters],:], order='C')
+
+    # Call mpi_kmeans routine
+    dist = c_kmeans( <double *> centroids.data, <double *> X.data,
+		  <unsigned int *> assignments.data, dim, num_points,
+		  num_clusters, maxiter, num_runs)
+
+    return centroids, dist, (assignments+1)
+
+
+def test():
+    #np.random.seed(1)
+    X = np.array( np.random.rand(4,3) )
+    print X
+    clst,dist,labels = kmeans(X, 2)
+    
+    print "cluster centers=\n",clst
+    print "dist=",dist
+    print "cluster labels",labels
diff --git a/py_kmeans.so b/py_kmeans.so
new file mode 100755
index 0000000..4c4ace6
Binary files /dev/null and b/py_kmeans.so differ
diff --git a/test_code.m b/test_code.m
new file mode 100644
index 0000000..d7b60db
--- /dev/null
+++ b/test_code.m
@@ -0,0 +1,123 @@
+clear all
+
+
+%
+% Try some function calls
+% 
+try
+    mpi_assign_mex(randn(10,100),randn(20,10));
+    error('Dimension mismatch!');
+end
+
+try
+    [CX1,sse,s] = mpi_kmeans_mex(randn(10,100), 0,0);    
+    error('0 clusters');
+end
+
+%
+% a very easy test with known outcome
+%
+X = [2 0 0;
+     1 0 0;
+     0 0 2;
+     0 0 1;
+     0 0 3];
+
+[CX1,sse,s] = mpi_kmeans_mex(X',2,0,1000);
+
+targetCX = [0 1.5;
+	    0 0;
+	    2 0];
+
+d1 = sum(sum(abs(targetCX - CX1)));
+d2 = sum(sum(abs(targetCX(:,2:-1:1) - CX1)));
+
+assert(min(d1,d2)<1e-8);
+
+
+%
+% do 100 random tests
+%
+for i=1:100
+    
+    if i==1
+	dims = 1;
+    else
+	dims = ceil(80 * rand);
+    end
+    
+    nclusts = i;
+    npts = ceil(10000 * rand);
+    X = randn(dims,npts);
+    nr_restarts = round(3*rand);
+    if nclusts == 1
+	CX0 = 1;
+    else
+	CX0 = randn(dims,nclusts);
+    end
+    
+    maxiter = 0;
+
+    XX = X;
+    CC = CX0;
+    
+    t=cputime;
+    [CX1,sse,s] = mpi_kmeans_mex(X, CX0,maxiter,nr_restarts);    
+    fprintf('1: time needed %g\n',cputime-t);
+    
+    assert((sum(XX(:)-X(:)))==0,'Training points X changed');
+    assert((sum(CC(:)-CX0(:)))==0,'CX0 changed');
+    assert(~any(isnan(CX1(:))),'NaN in CX1');
+    
+    s2 = mpi_assign_mex(X,CX1);
+    assert(all(s2==s),'wrong assignment');
+
+    clear centr
+    if dims == 1
+	for i=1:size(X,2)
+	    [ignore,centr(i,1)] = min(( (CX1 - repmat(X(:,i),1,nclusts)).^2));
+	end
+    else
+	for i=1:size(X,2)
+	    [ignore,centr(i,1)] = min(sum( (CX1 - repmat(X(:,i),1,nclusts)).^2));
+	end
+    end
+    
+    assert(sum(abs([centr-double(s)]))==0,'wrong assignment');
+
+    
+    if (0)
+	t=cputime;
+	[CX2,sse2,s2] = mpi_kmeans_ordinary(X, CX0,maxiter);    
+	fprintf('2: time needed %g\n',cputime-t);
+	
+	if (sum(XX(:)-X(:)))~=0
+	    error('X changed\n');
+	end
+	if (sum(CC(:)-CX0(:)))~=0
+	    error('CX changed\n');
+	end
+
+	if any(isnan(CX2(:))) 
+	    error('CX2 isnan');
+	end
+
+	if (sum(sum(abs(CX1-CX2)))) > 1e-9
+	    sum(sum(abs(CX1-CX2)))
+	    error('bug: cluster centers not the same');
+	end
+	
+	if (sum(s-s2))~=0
+	    error('assignment not the same');
+	end
+	
+	if abs(sse-sse2) > 1e-8
+	    error('sse not the same');
+	end
+	
+
+    end
+    
+end
+
+fprintf('Test passed\n');

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/libmpikmeans.git



More information about the debian-science-commits mailing list