[Debian-astro-commits] [python-astropy] 03/04: FIX: improved detection of ascii fast_reader in non-fast parsers
Ole Streicher
olebole at moszumanska.debian.org
Thu Jan 19 11:36:56 UTC 2017
This is an automated email from the git hooks/post-receive script.
olebole pushed a commit to branch experimental
in repository python-astropy.
commit 52109dc398f434dc238688ed905aba4bf6acf68e
Author: Ole Streicher <olebole at debian.org>
Date: Thu Jan 19 09:57:04 2017 +0100
FIX: improved detection of ascii fast_reader in non-fast parsers
---
...ection-of-ascii-fast_reader-in-non-fast-p.patch | 646 +++++++++++++++++++++
debian/patches/series | 1 +
2 files changed, 647 insertions(+)
diff --git a/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch b/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch
new file mode 100644
index 0000000..08cb2f0
--- /dev/null
+++ b/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch
@@ -0,0 +1,646 @@
+From: Derek Homeier <dhomeie at gwdg.de>
+Date: Fri, 9 Dec 2016 01:15:41 +0100
+Subject: FIX: improved detection of ascii fast_reader in non-fast parsers
+
+Pull request: https://github.com/astropy/astropy/pull/5578
+---
+ astropy/io/ascii/core.py | 11 +-
+ astropy/io/ascii/cparser.pyx | 7 +-
+ astropy/io/ascii/tests/test_c_reader.py | 265 +++++++++++++++++++++++---------
+ astropy/io/ascii/tests/test_read.py | 7 +-
+ astropy/io/ascii/ui.py | 28 +++-
+ docs/io/ascii/fast_ascii_io.rst | 25 +--
+ docs/io/ascii/read.rst | 2 +
+ 7 files changed, 257 insertions(+), 88 deletions(-)
+
+diff --git a/astropy/io/ascii/core.py b/astropy/io/ascii/core.py
+index 48a3f58..0bf9f68 100644
+--- a/astropy/io/ascii/core.py
++++ b/astropy/io/ascii/core.py
+@@ -1345,8 +1345,17 @@ def _get_reader(Reader, Inputter=None, Outputter=None, **kwargs):
+ kwargs['Inputter'] = Inputter
+ return Reader(**kwargs)
+
++ # If user explicitly passed a fast reader with 'force' or with non-default
++ # options for the fast reader, raise an error for slow readers
+ if 'fast_reader' in kwargs:
+- del kwargs['fast_reader'] # ignore fast_reader parameter for slow readers
++ if kwargs['fast_reader'] == 'force' or \
++ isinstance(kwargs['fast_reader'] , dict):
++ raise ParameterError('fast_reader required with ' +
++ '{0}, but this is a slow reader: {1}'
++ .format(kwargs['fast_reader'], Reader))
++ else:
++ del kwargs['fast_reader'] # otherwise ignore fast_reader parameter
++
+ reader_kwargs = dict([k, v] for k, v in kwargs.items() if k not in extra_reader_pars)
+ reader = Reader(**reader_kwargs)
+
+diff --git a/astropy/io/ascii/cparser.pyx b/astropy/io/ascii/cparser.pyx
+index 1ac24c0..b5b7b05 100644
+--- a/astropy/io/ascii/cparser.pyx
++++ b/astropy/io/ascii/cparser.pyx
+@@ -386,7 +386,12 @@ cdef class CParser:
+ data_end = max(self.data_end - self.data_start, 0) # read nothing if data_end < 0
+
+ if tokenize(self.tokenizer, data_end, 0, <int>len(self.names)) != 0:
+- self.raise_error("an error occurred while parsing table data")
++ if self.tokenizer.code in (NOT_ENOUGH_COLS, TOO_MANY_COLS):
++ raise core.InconsistentTableError("Number of header columns " +
++ "({0}) inconsistent with data columns in data line {1}"
++ .format(self.tokenizer.num_cols, self.tokenizer.num_rows))
++ else:
++ self.raise_error("an error occurred while parsing table data")
+ elif self.tokenizer.num_rows == 0: # no data
+ return ([np.array([], dtype=np.int_)] * self.width, [])
+ self._set_fill_values()
+diff --git a/astropy/io/ascii/tests/test_c_reader.py b/astropy/io/ascii/tests/test_c_reader.py
+index cc28830..33b9e43 100644
+--- a/astropy/io/ascii/tests/test_c_reader.py
++++ b/astropy/io/ascii/tests/test_c_reader.py
+@@ -8,6 +8,7 @@ except ImportError: # cStringIO doesn't exist in Python 3
+
+ import os
+ import functools
++import copy
+
+ from textwrap import dedent
+
+@@ -16,7 +17,7 @@ from numpy import ma
+
+ from ....table import Table, MaskedColumn, Column
+ from ... import ascii
+-from ...ascii.core import ParameterError, FastOptionsError
++from ...ascii.core import ParameterError, FastOptionsError, InconsistentTableError
+ from ...ascii.cparser import CParserError
+ from ..fastbasic import FastBasic, FastCsv, FastTab, FastCommentedHeader, \
+ FastRdb, FastNoHeader
+@@ -27,7 +28,11 @@ from ....extern.six.moves import range
+
+ TRAVIS = os.environ.get('TRAVIS', False)
+
+-def assert_table_equal(t1, t2, check_meta=False):
++def assert_table_equal(t1, t2, check_meta=False, rtol=1.e-15, atol=1.e-300):
++ """
++ Test equality of all columns in a table, with stricter tolerances for
++ float columns than the np.allclose default.
++ """
+ assert_equal(len(t1), len(t2))
+ assert_equal(t1.colnames, t2.colnames)
+ if check_meta:
+@@ -43,7 +48,7 @@ def assert_table_equal(t1, t2, check_meta=False):
+ elif isinstance(el, six.string_types):
+ assert_equal(el, t2[name][i])
+ else:
+- assert_almost_equal(el, t2[name][i])
++ assert_almost_equal(el, t2[name][i], rtol=rtol, atol=atol)
+ except (TypeError, NotImplementedError):
+ pass # ignore for now
+
+@@ -374,10 +379,10 @@ A B C
+ 7 8 9 10
+ 11 12 13
+ """
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ table = FastBasic().read(text)
+- assert 'CParserError: an error occurred while parsing table data: too many ' \
+- 'columns found in line 3 of data' in str(e)
++ assert 'InconsistentTableError: Number of header columns (3) ' \
++ 'inconsistent with data columns in data line 2' in str(e)
+
+
+ def test_too_many_cols2():
+@@ -386,10 +391,10 @@ aaa,bbb
+ 1,2,
+ 3,4,
+ """
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ table = FastCsv().read(text)
+- assert 'CParserError: an error occurred while parsing table data: too many ' \
+- 'columns found in line 1 of data' in str(e)
++ assert 'InconsistentTableError: Number of header columns (2) ' \
++ 'inconsistent with data columns in data line 0' in str(e)
+
+
+ def test_too_many_cols3():
+@@ -398,10 +403,10 @@ aaa,bbb
+ 1,2,,
+ 3,4,
+ """
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ table = FastCsv().read(text)
+- assert 'CParserError: an error occurred while parsing table data: too many ' \
+- 'columns found in line 1 of data' in str(e)
++ assert 'InconsistentTableError: Number of header columns (2) ' \
++ 'inconsistent with data columns in data line 0' in str(e)
+
+
+ @pytest.mark.parametrize("parallel", [True, False])
+@@ -420,7 +425,7 @@ A,B,C
+ assert table['B'][1] is not ma.masked
+ assert table['C'][1] is ma.masked
+
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ table = FastBasic(delimiter=',').read(text)
+
+
+@@ -755,10 +760,11 @@ A B C
+ expected = Table([[7, 10], [8, 11], [91, 12]], names=('A', 'B', 'C'))
+ assert_table_equal(table, expected)
+
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ # tries to begin in the middle of quoted field
+ read_basic(text, data_start=4, parallel=parallel)
+- assert 'not enough columns found in line 1 of data' in str(e)
++ assert 'header columns (3) inconsistent with data columns in data line 0' \
++ in str(e)
+
+ table = read_basic(text, data_start=5, parallel=parallel)
+ # ignore commented line
+@@ -825,9 +831,10 @@ def test_strip_line_trailing_whitespace(parallel, read_basic):
+ row.
+ """
+ text = 'a b c\n1 2 \n3 4 5'
+- with pytest.raises(CParserError) as e:
++ with pytest.raises(InconsistentTableError) as e:
+ ascii.read(StringIO(text), format='fast_basic', guess=False)
+- assert 'not enough columns found in line 1' in str(e)
++ assert 'header columns (3) inconsistent with data columns in data line 0' \
++ in str(e)
+
+ text = 'a b c\n 1 2 3 \t \n 4 5 6 '
+ table = read_basic(text, parallel=parallel)
+@@ -956,7 +963,8 @@ def test_read_big_table(tmpdir):
+
+
+ # fast_reader configurations: False| 'use_fast_converter'=False|True
+- at pytest.mark.parametrize('reader', [ 0, 1, 2])
++ at pytest.mark.parametrize('reader', [ False, dict(use_fast_converter=False),
++ dict(use_fast_converter=True) ])
+ # catch Windows environment since we cannot use _read() with custom fast_reader
+ @pytest.mark.parametrize("parallel", [ False,
+ pytest.mark.xfail(os.name == 'nt', reason=
+@@ -971,43 +979,45 @@ def test_data_out_of_range(parallel, reader):
+ """
+ # Python reader and strtod() are expected to return precise results
+ rtol = 1.e-30
+- if reader > 1:
+- rtol = 1.e-15
+- # passing fast_reader dict with parametrize does not work!
+- if reader > 0:
+- fast_reader = {'parallel': parallel, 'use_fast_converter': reader > 1}
+- else:
+- fast_reader = False
++
++ # update fast_reader dict; pass only copies to avoid changing during read()!
++ if reader:
++ reader['parallel'] = parallel
++ if reader.get('use_fast_converter'):
++ rtol = 1.e-15
++
+ if parallel:
+- if reader < 1:
++ if not reader:
+ pytest.skip("Multiprocessing only available in fast reader")
+ elif TRAVIS:
+ pytest.xfail("Multiprocessing can sometimes fail on Travis CI")
+
+ fields = [ '10.1E+199', '3.14e+313', '2048e+306', '0.6E-325', '-2.e345' ]
+ values = np.array([ 1.01e200, np.inf, np.inf, 0.0, -np.inf ])
+- t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+- fast_reader=fast_reader)
++ t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++ fast_reader=copy.deepcopy(reader))
+ read_values = np.array([col[0] for col in t.itercols()])
+ assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+
+ # test some additional corner cases
+ fields = [ '.0101E202', '0.000000314E+314', '1777E+305', '-1799E+305', '0.2e-323',
+- '2500e-327', ' 0.0000000000000000000001024E+330' ]
++ '5200e-327', ' 0.0000000000000000000001024E+330' ]
+ values = np.array([ 1.01e200, 3.14e307, 1.777e308, -np.inf, 0.0, 4.94e-324, 1.024e308 ])
+- t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+- fast_reader=fast_reader)
++ t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++ fast_reader=copy.deepcopy(reader))
+ read_values = np.array([col[0] for col in t.itercols()])
+ assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+
+ # test corner cases again with non-standard exponent_style (auto-detection)
+- if reader < 2:
++ if reader and reader.get('use_fast_converter'):
++ reader.update({'exponent_style': 'A'})
++ else:
+ pytest.skip("Fortran exponent style only available in fast converter")
+- fast_reader.update({'exponent_style': 'A'})
++
+ fields = [ '.0101D202', '0.000000314d+314', '1777+305', '-1799E+305', '0.2e-323',
+ '2500-327', ' 0.0000000000000000000001024Q+330' ]
+- t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+- fast_reader=fast_reader)
++ t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++ fast_reader=copy.deepcopy(reader))
+ read_values = np.array([col[0] for col in t.itercols()])
+ assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+
+@@ -1029,14 +1039,14 @@ def test_int_out_of_range(parallel):
+
+ text = 'P M S\n {:d} {:d} {:s}'.format(imax, imin, huge)
+ expected = Table([[imax], [imin], [huge]], names=('P', 'M', 'S'))
+- table = ascii.read(text, format='basic', guess=False,
++ table = ascii.read(text, format='basic',
+ fast_reader={'parallel': parallel})
+ assert_table_equal(table, expected)
+
+ # check with leading zeroes to make sure strtol does not read them as octal
+ text = 'P M S\n000{:d} -0{:d} 00{:s}'.format(imax, -imin, huge)
+ expected = Table([[imax], [imin], ['00'+huge]], names=('P', 'M', 'S'))
+- table = ascii.read(text, format='basic', guess=False,
++ table = ascii.read(text, format='basic',
+ fast_reader={'parallel': parallel})
+ assert_table_equal(table, expected)
+
+@@ -1047,10 +1057,10 @@ def test_int_out_of_range(parallel):
+ expected = Table([[12.3, 10.*imax], [10.*imax, 4.56e8]],
+ names=('A', 'B'))
+
+- table = ascii.read(text, format='basic', guess=False,
++ table = ascii.read(text, format='basic',
+ fast_reader={'parallel': parallel})
+ assert_table_equal(table, expected)
+- table = ascii.read(text, format='basic', guess=False, fast_reader=False)
++ table = ascii.read(text, format='basic', fast_reader=False)
+ assert_table_equal(table, expected)
+
+
+@@ -1064,39 +1074,40 @@ def test_fortran_reader(parallel):
+ Make sure that ascii.read() can read Fortran-style exponential notation
+ using the fast_reader.
+ """
+- text = 'A B C\n100.01{:s}+99 2.0 3\n 4.2{:s}-1 5.0{:s}-1 0.6{:s}4'
+- expected = Table([[1.0001e101, 0.42], [2, 0.5], [3.0, 6000]],
+- names=('A', 'B', 'C'))
++ # check for nominal np.float64 precision
++ rtol = 1.e-15
++ atol = 0.0
++ text = 'A B C D\n100.01{:s}99 2.0 2.0{:s}-103 3\n' + \
++ ' 4.2{:s}-1 5.0{:s}-1 0.6{:s}4 .017{:s}+309'
++ expc = Table([[1.0001e101, 0.42], [2, 0.5], [2.e-103, 6.e3], [3, 1.7e307]],
++ names=('A', 'B', 'C', 'D'))
+
+- expstyles = { 'e': 4*('E'), 'D': ('D', 'd', 'd', 'D'), 'Q': 2*('q', 'Q'),
+- 'fortran': ('D', 'E', 'Q', 'd') }
++ expstyles = { 'e': 6*('E'),
++ 'D': ('D', 'd', 'd', 'D', 'd', 'D'),
++ 'Q': 3*('q', 'Q'),
++ 'Fortran': ('E', '0', 'D', 'Q', 'd', '0') }
+
+ # C strtod (not-fast converter) can't handle Fortran exp
+ with pytest.raises(FastOptionsError) as e:
+- ascii.read(text.format(*(4*('D'))), format='basic', guess=False,
++ ascii.read(text.format(*(6*('D'))), format='basic',
+ fast_reader={'use_fast_converter': False,
+ 'parallel': parallel, 'exponent_style': 'D'})
+ assert 'fast_reader: exponent_style requires use_fast_converter' in str(e)
+
+- # enable multiprocessing and the fast converter
+- # iterate over all style-exponent combinations
++ # enable multiprocessing and the fast converter iterate over
++ # all style-exponent combinations, with auto-detection
+ for s, c in expstyles.items():
+- table = ascii.read(text.format(*c), format='basic', guess=False,
+- fast_reader={'parallel': parallel,
+- 'exponent_style': s})
+- assert_table_equal(table, expected)
+-
+- # mixes and triple-exponents without any character using autodetect option
+- text = 'A B C\n1.0001+101 2.0E0 3\n.42d0 0.5 6.+003'
+- table = ascii.read(text, format='basic', guess=False,
+- fast_reader={'parallel': parallel, 'exponent_style': 'fortran'})
+- assert_table_equal(table, expected)
++ table = ascii.read(text.format(*c), fast_reader={'parallel': parallel,
++ 'exponent_style': s})
++ assert_table_equal(table, expc, rtol=rtol, atol=atol)
+
+- # additional corner-case checks
+- text = 'A B C\n1.0001+101 2.0+000 3\n0.42+000 0.5 6000.-000'
+- table = ascii.read(text, format='basic', guess=False,
+- fast_reader={'parallel': parallel, 'exponent_style': 'fortran'})
+- assert_table_equal(table, expected)
++ # additional corner-case checks including triple-exponents without
++ # any character and mixed whitespace separators
++ text = 'A B\t\t C D\n1.0001+101 2.0+000\t 0.0002-099 3\n ' + \
++ '0.42-000 \t 0.5 6.+003 0.000000000000000000000017+330'
++ table = ascii.read(text, fast_reader={'parallel': parallel,
++ 'exponent_style': 'A'})
++ assert_table_equal(table, expc, rtol=rtol, atol=atol)
+
+
+ @pytest.mark.parametrize("parallel", [
+@@ -1112,12 +1123,126 @@ def test_fortran_invalid_exp(parallel):
+ if parallel and TRAVIS:
+ pytest.xfail("Multiprocessing can sometimes fail on Travis CI")
+
++ rtol = 1.e-15
++ atol = 0.0
++
++ formats = { 'basic': ' ', 'tab': '\t', 'csv': ',' }
++ header = ['S1', 'F2', 'S2', 'F3', 'S3', 'F4', 'F5', 'S4', 'I1', 'F6', 'F7']
++ # tested entries and expected returns, first for auto-detect,
++ # then for different specified exponents
+ fields = [ '1.0001+1', '.42d1', '2.3+10', '0.5', '3+1001', '3000.',
+- '2', '4.56e-2.3', '8000', '4.2-122' ]
+- values = [ '1.0001+1', 4.2, '2.3+10', 0.5, '3+1001', 3.e3,
+- 2, '4.56e-2.3', 8000, 4.2e-122 ]
+-
+- t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+- fast_reader={'parallel': parallel, 'exponent_style': 'A'})
+- read_values = [col[0] for col in t.itercols()]
+- assert read_values == values
++ '2', '4.56e-2.3', '8000', '4.2-022', '.00000145e314' ]
++ vals_e = [ '1.0001+1', '.42d1', '2.3+10', 0.5, '3+1001', 3.e3,
++ 2, '4.56e-2.3', 8000, '4.2-022', 1.45e308 ]
++ vals_d = [ '1.0001+1', 4.2, '2.3+10', 0.5, '3+1001', 3.e3,
++ 2, '4.56e-2.3', 8000, '4.2-022', '.00000145e314' ]
++ vals_a = [ '1.0001+1', 4.2, '2.3+10', 0.5, '3+1001', 3.e3,
++ 2, '4.56e-2.3', 8000, 4.2e-22, 1.45e308 ]
++ vals_v = [ '1.0001+1', 4.2, '2.3+10', 0.5, '3+1001', 3.e3,
++ 2, '4.56e-2.3', 8000, '4.2-022', 1.45e308 ]
++
++ # iterate over supported format types and separators
++ for f, s in formats.items():
++ t1 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)), format=f,
++ fast_reader={'parallel': parallel, 'exponent_style': 'A'})
++ # assert t1['I1'].dtype.kind == 'i'
++ assert_table_equal(t1, Table([[col] for col in vals_a], names=header))
++
++ # try another separator as well with auto-detection
++ #formats['bar'] = '|'
++
++ for s in formats.values():
++ t2 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++ fast_reader={'parallel': parallel, 'exponent_style': 'a'})
++
++ assert_table_equal(t2, Table([[col] for col in vals_a], names=header))
++
++ # iterate for (default) expchar 'E'
++ for s in formats.values():
++ t3 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++ fast_reader={'parallel': parallel, 'use_fast_converter': True})
++
++ assert_table_equal(t3, Table([[col] for col in vals_e], names=header))
++
++ # iterate for expchar 'D'
++ for s in formats.values():
++ t4 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++ fast_reader={'parallel': parallel, 'exponent_style': 'D'})
++
++ assert_table_equal(t4, Table([[col] for col in vals_d], names=header))
++
++ # iterate for regular converter (strtod)
++ for s in formats.values():
++ t5 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++ fast_reader={'parallel': parallel, 'use_fast_converter': False})
++
++ read_values = [col[0] for col in t5.itercols()]
++ if os.name == 'nt':
++ # apparently C strtod() on (some?) MSVC recognises 'd' exponents!
++ assert read_values == vals_v or read_values == vals_e
++ else:
++ assert read_values == vals_e
++
++
++def test_fortran_reader_notbasic():
++ """
++ Check if readers without a fast option raise a value error when a
++ fast_reader is asked for.
++ """
++
++ tabstr = dedent("""
++ a b
++ 1 1.23D4
++ 2 5.67D-8
++ """)[1:-1]
++
++ t1 = ascii.read(tabstr.split('\n'), fast_reader=dict(exponent_style='D'))
++
++ assert t1['b'].dtype.kind == 'f'
++
++ tabrdb = dedent("""
++ a\tb
++ # A simple RDB table
++ N\tN
++ 1\t 1.23D4
++ 2\t 5.67-008
++ """)[1:-1]
++
++ t2 = ascii.read(tabrdb.split('\n'), format='rdb',
++ fast_reader=dict(exponent_style='fortran'))
++
++ assert t2['b'].dtype.kind == 'f'
++
++ tabrst = dedent("""
++ = =======
++ a b
++ = =======
++ 1 1.23E4
++ 2 5.67E-8
++ = =======
++ """)[1:-1]
++
++ t3 = ascii.read(tabrst.split('\n'), format='rst')
++
++ assert t3['b'].dtype.kind == 'f'
++
++ # in the special case of fast_converter=True (the default),
++ # incompatibility is ignored
++ t4 = ascii.read(tabrst.split('\n'), format='rst', fast_reader=True)
++
++ assert t4['b'].dtype.kind == 'f'
++
++ # pytest.xfail("Readers do not correctly check for incompatible options")
++ with pytest.raises(ParameterError):
++ t5 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++ fast_reader='force')
++
++ with pytest.raises(ParameterError):
++ t6 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++ fast_reader=dict(use_fast_converter=False))
++
++ tabrst = tabrst.replace('E', 'D')
++
++ with pytest.raises(ParameterError):
++ t7 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++ fast_reader=dict(exponent_style='D'))
+diff --git a/astropy/io/ascii/tests/test_read.py b/astropy/io/ascii/tests/test_read.py
+index e1a571b..e878a3d 100644
+--- a/astropy/io/ascii/tests/test_read.py
++++ b/astropy/io/ascii/tests/test_read.py
+@@ -112,7 +112,12 @@ def test_read_with_names_arg(fast_reader):
+ """
+ Test that a bad value of `names` raises an exception.
+ """
+- with pytest.raises(ValueError):
++ # CParser only uses columns in `names` and thus reports mismach in num_col
++ if fast_reader:
++ e = ascii.InconsistentTableError
++ else:
++ e = ValueError
++ with pytest.raises(e):
+ dat = ascii.read(['c d', 'e f'], names=('a', ), guess=False, fast_reader=fast_reader)
+
+
+diff --git a/astropy/io/ascii/ui.py b/astropy/io/ascii/ui.py
+index 5b5e09a..ae3e625 100644
+--- a/astropy/io/ascii/ui.py
++++ b/astropy/io/ascii/ui.py
+@@ -162,7 +162,10 @@ def get_reader(Reader=None, Inputter=None, Outputter=None, **kwargs):
+ # This function is a light wrapper around core._get_reader to provide a public interface
+ # with a default Reader.
+ if Reader is None:
+- Reader = basic.Basic
++ if kwargs.get('fast_reader', False):
++ Reader = fastbasic.FastBasic
++ else:
++ Reader = basic.Basic
+ reader = core._get_reader(Reader, Inputter=Inputter, Outputter=Outputter, **kwargs)
+ return reader
+
+@@ -324,7 +327,6 @@ def read(table, guess=None, **kwargs):
+ guess = False
+
+ if not guess:
+- reader = get_reader(**new_kwargs)
+ # Try the fast reader version of `format` first if applicable. Note that
+ # if user specified a fast format (e.g. format='fast_basic') this test
+ # will fail and the else-clause below will be used.
+@@ -339,14 +341,16 @@ def read(table, guess=None, **kwargs):
+ 'status': 'Success with fast reader (no guessing)'})
+ except (core.ParameterError, cparser.CParserError) as e:
+ # special testing value to avoid falling back on the slow reader
+- if fast_reader_param == 'force':
++ if fast_reader_param == 'force' or isinstance(fast_reader_param, dict):
+ raise e
+ # If the fast reader doesn't work, try the slow version
++ reader = get_reader(**new_kwargs)
+ dat = reader.read(table)
+ _read_trace.append({'kwargs': new_kwargs,
+ 'status': 'Success with slow reader after failing'
+ ' with fast (no guessing)'})
+ else:
++ reader = get_reader(**new_kwargs)
+ dat = reader.read(table)
+ _read_trace.append({'kwargs': new_kwargs,
+ 'status': 'Success with specified Reader class '
+@@ -407,6 +411,11 @@ def _guess(table, read_kwargs, format, fast_reader):
+ else:
+ fast_kwargs = None
+
++ # dictionary arguments are passed by reference per default and might
++ # (usually will!) be altered by `read()` - especially `cparser` - calls,
++ # backup them here
++ user_kwargs = copy.deepcopy(read_kwargs)
++
+ # Filter the full guess list so that each entry is consistent with user kwarg inputs.
+ # This also removes any duplicates from the list.
+ filtered_guess_kwargs = []
+@@ -417,8 +426,10 @@ def _guess(table, read_kwargs, format, fast_reader):
+ if fast_reader is False and guess_kwargs['Reader'] in core.FAST_CLASSES.values():
+ continue
+
+- # If user required a fast reader with 'force' then skip all non-fast readers
+- if fast_reader == 'force' and guess_kwargs['Reader'] not in core.FAST_CLASSES.values():
++ # If user explicitly required a fast reader with 'force' or as dict of
++ # options then skip all non-fast readers
++ if (fast_reader == 'force' or isinstance(fast_reader, dict)) \
++ and guess_kwargs['Reader'] not in core.FAST_CLASSES.values():
+ continue
+
+ guess_kwargs_ok = True # guess_kwargs are consistent with user_kwargs?
+@@ -460,12 +471,19 @@ def _guess(table, read_kwargs, format, fast_reader):
+ # keep track of the failed guess and move on.
+ for guess_kwargs in filtered_guess_kwargs:
+ t0 = time.time()
++ for key, val in user_kwargs.items():
++ # update guess_kwargs again; need a deep copy to preserve dicts
++ if key not in guess_kwargs:
++ guess_kwargs[key] = val.copy()
++ elif val != guess_kwargs[key] and guess_kwargs != fast_kwargs:
++ guess_kwargs[key] = val.copy()
+ try:
+ # If guessing will try all Readers then use strict req'ts on column names
+ if 'Reader' not in read_kwargs:
+ guess_kwargs['strict_names'] = True
+
+ reader = get_reader(**guess_kwargs)
++
+ reader.guessing = True
+ dat = reader.read(table)
+ _read_trace.append({'kwargs': guess_kwargs, 'status': 'Success (guessing)',
+diff --git a/docs/io/ascii/fast_ascii_io.rst b/docs/io/ascii/fast_ascii_io.rst
+index ac25cba..fe7b0a9 100644
+--- a/docs/io/ascii/fast_ascii_io.rst
++++ b/docs/io/ascii/fast_ascii_io.rst
+@@ -19,7 +19,9 @@ are currently compatible with the fast engine:
+ * ``tab``
+
+ The fast engine can also be enabled through the format parameter by prefixing
+-a compatible format with "fast" and then an underscore. In this case, |read|
++a compatible format with "fast" and then an underscore. In this case, or
++when enforcing the fast engine by either setting ``fast_reader='force'``
++or explicitly setting any of the :ref:`fast_conversion_opts`, |read|
+ will not fall back on an ordinary reader if fast reading fails.
+ For example::
+
+@@ -35,21 +37,24 @@ To disable the fast engine, specify ``fast_reader=False`` or
+
+ .. Note:: Guessing and Fast reading
+
+- By default |read| will try to guess the format of in the input data by successively
+- trying different formats until one succeeds ([reference the guessing section]).
+- For the default ``'ascii'`` format this means that a number of pure Python readers
+- with no fast implementation will be tried before getting to the fast readers.
++ By default |read| will try to guess the format of in the input data by
++ successively trying different formats until one succeeds
++ (see the section on :ref:`guess_formats`).
++ For the default ``'ascii'`` format it will try all fast reader formats
++ before testing any pure Python readers with no fast implementation.
+
+- **For optimum performance**, turn off guessing entirely (``guess=False``) or
+- narrow down the format options as much as possible by specifying the format
+- (e.g. ``format='csv'``) and/or other options such as the delimiter.
++ **For optimum performance** however, it is recommended to turn off
++ guessing entirely (``guess=False``) or narrow down the format options
++ as much as possible by specifying the format (e.g. ``format='csv'``)
++ and/or other options such as the delimiter.
+
+ Reading
+ ^^^^^^^
+ Since the fast engine is not part of the ordinary :mod:`astropy.io.ascii`
+ infrastructure, fast readers raise an error when passed certain
+-parameters which are not implemented in the fast reader
+-infrastructure. In this case |read| will fall back on the ordinary reader.
++parameters which are not implemented in the fast reader infrastructure.
++In this case |read| will fall back on the ordinary reader, unless the
++fast reader has been explicitly requested (see above).
+ These parameters are:
+
+ * Negative ``header_start`` (except for commented-header format)
+diff --git a/docs/io/ascii/read.rst b/docs/io/ascii/read.rst
+index eec45d8..a3210dd 100644
+--- a/docs/io/ascii/read.rst
++++ b/docs/io/ascii/read.rst
+@@ -284,6 +284,8 @@ values in with typical placeholders::
+ used ``'nan'`` for the ``<match_string>`` value then integer columns
+ would wind up as float.
+
++.. _guess_formats:
++
+ Guess table format
+ ^^^^^^^^^^^^^^^^^^
+
diff --git a/debian/patches/series b/debian/patches/series
index c47f1df..67910dc 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@ external_python_pkgs.patch
Fix-default-value-for-remote_data-option.patch
Use-parametrize-instead-of-yield.patch
Allow-pytest-3.x-to-use-plugin-for-doctests-in-.rst-files.patch
+FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-astro/packages/python-astropy.git
More information about the Debian-astro-commits
mailing list