[Debian-astro-commits] [python-astropy] 03/04: FIX: improved detection of ascii fast_reader in non-fast parsers

Ole Streicher olebole at moszumanska.debian.org
Thu Jan 19 11:36:56 UTC 2017


This is an automated email from the git hooks/post-receive script.

olebole pushed a commit to branch experimental
in repository python-astropy.

commit 52109dc398f434dc238688ed905aba4bf6acf68e
Author: Ole Streicher <olebole at debian.org>
Date:   Thu Jan 19 09:57:04 2017 +0100

    FIX: improved detection of ascii fast_reader in non-fast parsers
---
 ...ection-of-ascii-fast_reader-in-non-fast-p.patch | 646 +++++++++++++++++++++
 debian/patches/series                              |   1 +
 2 files changed, 647 insertions(+)

diff --git a/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch b/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch
new file mode 100644
index 0000000..08cb2f0
--- /dev/null
+++ b/debian/patches/FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch
@@ -0,0 +1,646 @@
+From: Derek Homeier <dhomeie at gwdg.de>
+Date: Fri, 9 Dec 2016 01:15:41 +0100
+Subject: FIX: improved detection of ascii fast_reader in non-fast parsers
+
+Pull request: https://github.com/astropy/astropy/pull/5578
+---
+ astropy/io/ascii/core.py                |  11 +-
+ astropy/io/ascii/cparser.pyx            |   7 +-
+ astropy/io/ascii/tests/test_c_reader.py | 265 +++++++++++++++++++++++---------
+ astropy/io/ascii/tests/test_read.py     |   7 +-
+ astropy/io/ascii/ui.py                  |  28 +++-
+ docs/io/ascii/fast_ascii_io.rst         |  25 +--
+ docs/io/ascii/read.rst                  |   2 +
+ 7 files changed, 257 insertions(+), 88 deletions(-)
+
+diff --git a/astropy/io/ascii/core.py b/astropy/io/ascii/core.py
+index 48a3f58..0bf9f68 100644
+--- a/astropy/io/ascii/core.py
++++ b/astropy/io/ascii/core.py
+@@ -1345,8 +1345,17 @@ def _get_reader(Reader, Inputter=None, Outputter=None, **kwargs):
+             kwargs['Inputter'] = Inputter
+         return Reader(**kwargs)
+ 
++    # If user explicitly passed a fast reader with 'force' or with non-default
++    # options for the fast reader, raise an error for slow readers
+     if 'fast_reader' in kwargs:
+-        del kwargs['fast_reader'] # ignore fast_reader parameter for slow readers
++        if kwargs['fast_reader'] == 'force' or \
++           isinstance(kwargs['fast_reader'] , dict):
++            raise ParameterError('fast_reader required with ' +
++                                 '{0}, but this is a slow reader: {1}'
++                                 .format(kwargs['fast_reader'], Reader))
++        else:
++            del kwargs['fast_reader'] # otherwise ignore fast_reader parameter
++
+     reader_kwargs = dict([k, v] for k, v in kwargs.items() if k not in extra_reader_pars)
+     reader = Reader(**reader_kwargs)
+ 
+diff --git a/astropy/io/ascii/cparser.pyx b/astropy/io/ascii/cparser.pyx
+index 1ac24c0..b5b7b05 100644
+--- a/astropy/io/ascii/cparser.pyx
++++ b/astropy/io/ascii/cparser.pyx
+@@ -386,7 +386,12 @@ cdef class CParser:
+             data_end = max(self.data_end - self.data_start, 0) # read nothing if data_end < 0
+ 
+         if tokenize(self.tokenizer, data_end, 0, <int>len(self.names)) != 0:
+-            self.raise_error("an error occurred while parsing table data")
++            if self.tokenizer.code in (NOT_ENOUGH_COLS, TOO_MANY_COLS):
++                raise core.InconsistentTableError("Number of header columns " +
++                      "({0}) inconsistent with data columns in data line {1}"
++                      .format(self.tokenizer.num_cols, self.tokenizer.num_rows))
++            else:
++                self.raise_error("an error occurred while parsing table data")
+         elif self.tokenizer.num_rows == 0: # no data
+             return ([np.array([], dtype=np.int_)] * self.width, [])
+         self._set_fill_values()
+diff --git a/astropy/io/ascii/tests/test_c_reader.py b/astropy/io/ascii/tests/test_c_reader.py
+index cc28830..33b9e43 100644
+--- a/astropy/io/ascii/tests/test_c_reader.py
++++ b/astropy/io/ascii/tests/test_c_reader.py
+@@ -8,6 +8,7 @@ except ImportError: # cStringIO doesn't exist in Python 3
+ 
+ import os
+ import functools
++import copy
+ 
+ from textwrap import dedent
+ 
+@@ -16,7 +17,7 @@ from numpy import ma
+ 
+ from ....table import Table, MaskedColumn, Column
+ from ... import ascii
+-from ...ascii.core import ParameterError, FastOptionsError
++from ...ascii.core import ParameterError, FastOptionsError, InconsistentTableError
+ from ...ascii.cparser import CParserError
+ from ..fastbasic import FastBasic, FastCsv, FastTab, FastCommentedHeader, \
+     FastRdb, FastNoHeader
+@@ -27,7 +28,11 @@ from ....extern.six.moves import range
+ 
+ TRAVIS = os.environ.get('TRAVIS', False)
+ 
+-def assert_table_equal(t1, t2, check_meta=False):
++def assert_table_equal(t1, t2, check_meta=False, rtol=1.e-15, atol=1.e-300):
++    """
++    Test equality of all columns in a table, with stricter tolerances for
++    float columns than the np.allclose default.
++    """
+     assert_equal(len(t1), len(t2))
+     assert_equal(t1.colnames, t2.colnames)
+     if check_meta:
+@@ -43,7 +48,7 @@ def assert_table_equal(t1, t2, check_meta=False):
+                     elif isinstance(el, six.string_types):
+                         assert_equal(el, t2[name][i])
+                     else:
+-                        assert_almost_equal(el, t2[name][i])
++                        assert_almost_equal(el, t2[name][i], rtol=rtol, atol=atol)
+                 except (TypeError, NotImplementedError):
+                     pass # ignore for now
+ 
+@@ -374,10 +379,10 @@ A B C
+ 7 8 9 10
+ 11 12 13
+ """
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         table = FastBasic().read(text)
+-    assert 'CParserError: an error occurred while parsing table data: too many ' \
+-        'columns found in line 3 of data' in str(e)
++    assert 'InconsistentTableError: Number of header columns (3) ' \
++           'inconsistent with data columns in data line 2' in str(e)
+ 
+ 
+ def test_too_many_cols2():
+@@ -386,10 +391,10 @@ aaa,bbb
+ 1,2,
+ 3,4,
+ """
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         table = FastCsv().read(text)
+-    assert 'CParserError: an error occurred while parsing table data: too many ' \
+-        'columns found in line 1 of data' in str(e)
++    assert 'InconsistentTableError: Number of header columns (2) ' \
++           'inconsistent with data columns in data line 0' in str(e)
+ 
+ 
+ def test_too_many_cols3():
+@@ -398,10 +403,10 @@ aaa,bbb
+ 1,2,,
+ 3,4,
+ """
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         table = FastCsv().read(text)
+-    assert 'CParserError: an error occurred while parsing table data: too many ' \
+-        'columns found in line 1 of data' in str(e)
++    assert 'InconsistentTableError: Number of header columns (2) ' \
++           'inconsistent with data columns in data line 0' in str(e)
+ 
+ 
+ @pytest.mark.parametrize("parallel", [True, False])
+@@ -420,7 +425,7 @@ A,B,C
+     assert table['B'][1] is not ma.masked
+     assert table['C'][1] is ma.masked
+ 
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         table = FastBasic(delimiter=',').read(text)
+ 
+ 
+@@ -755,10 +760,11 @@ A B C
+     expected = Table([[7, 10], [8, 11], [91, 12]], names=('A', 'B', 'C'))
+     assert_table_equal(table, expected)
+ 
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         # tries to begin in the middle of quoted field
+         read_basic(text, data_start=4, parallel=parallel)
+-    assert 'not enough columns found in line 1 of data' in str(e)
++    assert 'header columns (3) inconsistent with data columns in data line 0' \
++        in str(e)
+ 
+     table = read_basic(text, data_start=5, parallel=parallel)
+     # ignore commented line
+@@ -825,9 +831,10 @@ def test_strip_line_trailing_whitespace(parallel, read_basic):
+     row.
+     """
+     text = 'a b c\n1 2 \n3 4 5'
+-    with pytest.raises(CParserError) as e:
++    with pytest.raises(InconsistentTableError) as e:
+         ascii.read(StringIO(text), format='fast_basic', guess=False)
+-    assert 'not enough columns found in line 1' in str(e)
++    assert 'header columns (3) inconsistent with data columns in data line 0' \
++        in str(e)
+ 
+     text = 'a b c\n 1 2 3   \t \n 4 5 6 '
+     table = read_basic(text, parallel=parallel)
+@@ -956,7 +963,8 @@ def test_read_big_table(tmpdir):
+ 
+ 
+ # fast_reader configurations: False| 'use_fast_converter'=False|True
+- at pytest.mark.parametrize('reader', [ 0, 1, 2])
++ at pytest.mark.parametrize('reader', [ False, dict(use_fast_converter=False),
++                                     dict(use_fast_converter=True) ])
+ # catch Windows environment since we cannot use _read() with custom fast_reader
+ @pytest.mark.parametrize("parallel", [ False,
+     pytest.mark.xfail(os.name == 'nt', reason=
+@@ -971,43 +979,45 @@ def test_data_out_of_range(parallel, reader):
+     """
+     # Python reader and strtod() are expected to return precise results
+     rtol = 1.e-30
+-    if reader > 1:
+-        rtol = 1.e-15
+-    # passing fast_reader dict with parametrize does not work!
+-    if reader > 0:
+-        fast_reader = {'parallel': parallel, 'use_fast_converter': reader > 1}
+-    else:
+-        fast_reader = False
++
++    # update fast_reader dict; pass only copies to avoid changing during read()!
++    if reader:
++        reader['parallel'] = parallel
++        if reader.get('use_fast_converter'):
++            rtol = 1.e-15
++
+     if parallel:
+-        if reader < 1:
++        if not reader:
+             pytest.skip("Multiprocessing only available in fast reader")
+         elif TRAVIS:
+             pytest.xfail("Multiprocessing can sometimes fail on Travis CI")
+ 
+     fields = [ '10.1E+199', '3.14e+313', '2048e+306', '0.6E-325', '-2.e345' ]
+     values = np.array([ 1.01e200, np.inf, np.inf, 0.0, -np.inf ])
+-    t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+-                   fast_reader=fast_reader)
++    t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++                   fast_reader=copy.deepcopy(reader))
+     read_values = np.array([col[0] for col in t.itercols()])
+     assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+ 
+     # test some additional corner cases
+     fields = [ '.0101E202', '0.000000314E+314', '1777E+305', '-1799E+305', '0.2e-323',
+-               '2500e-327', ' 0.0000000000000000000001024E+330' ]
++               '5200e-327', ' 0.0000000000000000000001024E+330' ]
+     values = np.array([ 1.01e200, 3.14e307, 1.777e308, -np.inf, 0.0, 4.94e-324, 1.024e308 ])
+-    t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+-                   fast_reader=fast_reader)
++    t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++                   fast_reader=copy.deepcopy(reader))
+     read_values = np.array([col[0] for col in t.itercols()])
+     assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+ 
+     # test corner cases again with non-standard exponent_style (auto-detection)
+-    if  reader < 2:
++    if reader and reader.get('use_fast_converter'):
++        reader.update({'exponent_style': 'A'})
++    else:
+         pytest.skip("Fortran exponent style only available in fast converter")
+-    fast_reader.update({'exponent_style': 'A'})
++
+     fields = [ '.0101D202', '0.000000314d+314', '1777+305', '-1799E+305', '0.2e-323',
+                '2500-327', ' 0.0000000000000000000001024Q+330' ]
+-    t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+-                   fast_reader=fast_reader)
++    t = ascii.read(StringIO(' '.join(fields)), format='no_header',
++                   fast_reader=copy.deepcopy(reader))
+     read_values = np.array([col[0] for col in t.itercols()])
+     assert_almost_equal(read_values, values, rtol=rtol, atol=1.e-324)
+ 
+@@ -1029,14 +1039,14 @@ def test_int_out_of_range(parallel):
+ 
+     text = 'P M S\n {:d} {:d} {:s}'.format(imax, imin, huge)
+     expected = Table([[imax], [imin], [huge]], names=('P', 'M', 'S'))
+-    table = ascii.read(text, format='basic', guess=False,
++    table = ascii.read(text, format='basic',
+                        fast_reader={'parallel': parallel})
+     assert_table_equal(table, expected)
+ 
+     # check with leading zeroes to make sure strtol does not read them as octal
+     text = 'P M S\n000{:d} -0{:d} 00{:s}'.format(imax, -imin, huge)
+     expected = Table([[imax], [imin], ['00'+huge]], names=('P', 'M', 'S'))
+-    table = ascii.read(text, format='basic', guess=False,
++    table = ascii.read(text, format='basic',
+                        fast_reader={'parallel': parallel})
+     assert_table_equal(table, expected)
+ 
+@@ -1047,10 +1057,10 @@ def test_int_out_of_range(parallel):
+     expected = Table([[12.3, 10.*imax], [10.*imax, 4.56e8]],
+                      names=('A', 'B'))
+ 
+-    table = ascii.read(text, format='basic', guess=False,
++    table = ascii.read(text, format='basic',
+                        fast_reader={'parallel': parallel})
+     assert_table_equal(table, expected)
+-    table = ascii.read(text, format='basic', guess=False, fast_reader=False)
++    table = ascii.read(text, format='basic', fast_reader=False)
+     assert_table_equal(table, expected)
+ 
+ 
+@@ -1064,39 +1074,40 @@ def test_fortran_reader(parallel):
+     Make sure that ascii.read() can read Fortran-style exponential notation
+     using the fast_reader.
+     """
+-    text = 'A B C\n100.01{:s}+99 2.0 3\n 4.2{:s}-1 5.0{:s}-1 0.6{:s}4'
+-    expected = Table([[1.0001e101, 0.42], [2, 0.5], [3.0, 6000]],
+-                     names=('A', 'B', 'C'))
++    # check for nominal np.float64 precision
++    rtol = 1.e-15
++    atol = 0.0
++    text = 'A B C D\n100.01{:s}99       2.0  2.0{:s}-103 3\n' + \
++           ' 4.2{:s}-1 5.0{:s}-1     0.6{:s}4 .017{:s}+309'
++    expc = Table([[1.0001e101, 0.42], [2, 0.5], [2.e-103, 6.e3], [3, 1.7e307]],
++                 names=('A', 'B', 'C', 'D'))
+ 
+-    expstyles = { 'e': 4*('E'), 'D': ('D', 'd', 'd', 'D'), 'Q': 2*('q', 'Q'),
+-                  'fortran': ('D', 'E', 'Q', 'd') }
++    expstyles = { 'e': 6*('E'),
++                  'D': ('D', 'd', 'd', 'D', 'd', 'D'),
++                  'Q': 3*('q', 'Q'),
++                  'Fortran': ('E', '0', 'D', 'Q', 'd', '0') }
+ 
+     # C strtod (not-fast converter) can't handle Fortran exp
+     with pytest.raises(FastOptionsError) as e:
+-        ascii.read(text.format(*(4*('D'))), format='basic', guess=False,
++        ascii.read(text.format(*(6*('D'))), format='basic',
+                    fast_reader={'use_fast_converter': False,
+                                 'parallel': parallel, 'exponent_style': 'D'})
+     assert 'fast_reader: exponent_style requires use_fast_converter' in str(e)
+ 
+-    # enable multiprocessing and the fast converter
+-    # iterate over all style-exponent combinations
++    # enable multiprocessing and the fast converter iterate over
++    # all style-exponent combinations, with auto-detection
+     for s, c in expstyles.items():
+-        table = ascii.read(text.format(*c), format='basic', guess=False,
+-                           fast_reader={'parallel': parallel,
+-                                        'exponent_style': s})
+-        assert_table_equal(table, expected)
+-
+-    # mixes and triple-exponents without any character using autodetect option
+-    text = 'A B C\n1.0001+101 2.0E0 3\n.42d0 0.5 6.+003'
+-    table = ascii.read(text, format='basic', guess=False,
+-                       fast_reader={'parallel': parallel, 'exponent_style': 'fortran'})
+-    assert_table_equal(table, expected)
++        table = ascii.read(text.format(*c), fast_reader={'parallel': parallel,
++                                                         'exponent_style': s})
++        assert_table_equal(table, expc, rtol=rtol, atol=atol)
+ 
+-    # additional corner-case checks
+-    text = 'A B C\n1.0001+101 2.0+000 3\n0.42+000 0.5 6000.-000'
+-    table = ascii.read(text, format='basic', guess=False,
+-                       fast_reader={'parallel': parallel, 'exponent_style': 'fortran'})
+-    assert_table_equal(table, expected)
++    # additional corner-case checks including triple-exponents without
++    # any character and mixed whitespace separators
++    text = 'A B\t\t C D\n1.0001+101 2.0+000\t 0.0002-099 3\n ' + \
++           '0.42-000 \t 0.5 6.+003   0.000000000000000000000017+330'
++    table = ascii.read(text, fast_reader={'parallel': parallel,
++                                          'exponent_style': 'A'})
++    assert_table_equal(table, expc, rtol=rtol, atol=atol)
+ 
+ 
+ @pytest.mark.parametrize("parallel", [
+@@ -1112,12 +1123,126 @@ def test_fortran_invalid_exp(parallel):
+     if parallel and TRAVIS:
+         pytest.xfail("Multiprocessing can sometimes fail on Travis CI")
+ 
++    rtol = 1.e-15
++    atol = 0.0
++
++    formats = { 'basic': ' ', 'tab': '\t', 'csv': ',' }
++    header = ['S1', 'F2', 'S2', 'F3', 'S3', 'F4', 'F5', 'S4', 'I1', 'F6', 'F7']
++    # tested entries and expected returns, first for auto-detect,
++    # then for different specified exponents
+     fields = [ '1.0001+1', '.42d1', '2.3+10', '0.5', '3+1001', '3000.',
+-               '2', '4.56e-2.3', '8000', '4.2-122' ]
+-    values = [ '1.0001+1', 4.2, '2.3+10', 0.5, '3+1001', 3.e3,
+-               2, '4.56e-2.3', 8000, 4.2e-122 ]
+-
+-    t = ascii.read(StringIO(' '.join(fields)), format='no_header', guess=False,
+-                   fast_reader={'parallel': parallel, 'exponent_style': 'A'})
+-    read_values = [col[0] for col in t.itercols()]
+-    assert read_values == values
++               '2', '4.56e-2.3', '8000', '4.2-022', '.00000145e314' ]
++    vals_e = [ '1.0001+1', '.42d1', '2.3+10',   0.5, '3+1001',  3.e3,
++               2, '4.56e-2.3',    8000,  '4.2-022', 1.45e308 ]
++    vals_d = [ '1.0001+1',     4.2, '2.3+10',   0.5, '3+1001',  3.e3,
++               2, '4.56e-2.3',    8000,  '4.2-022', '.00000145e314' ]
++    vals_a = [ '1.0001+1',     4.2, '2.3+10',   0.5, '3+1001',  3.e3,
++               2, '4.56e-2.3',    8000,   4.2e-22,  1.45e308 ]
++    vals_v = [ '1.0001+1', 4.2, '2.3+10',   0.5, '3+1001',  3.e3,
++               2, '4.56e-2.3',    8000,  '4.2-022', 1.45e308 ]
++
++    # iterate over supported format types and separators
++    for f, s in formats.items():
++        t1 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)), format=f,
++                fast_reader={'parallel': parallel, 'exponent_style': 'A'})
++        # assert t1['I1'].dtype.kind == 'i'
++        assert_table_equal(t1, Table([[col] for col in vals_a], names=header))
++
++    # try another separator as well with auto-detection
++    #formats['bar'] = '|'
++
++    for s in formats.values():
++        t2 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++                fast_reader={'parallel': parallel, 'exponent_style': 'a'})
++
++        assert_table_equal(t2, Table([[col] for col in vals_a], names=header))
++
++    # iterate for (default) expchar 'E'
++    for s in formats.values():
++        t3 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++                fast_reader={'parallel': parallel, 'use_fast_converter': True})
++
++        assert_table_equal(t3, Table([[col] for col in vals_e], names=header))
++
++    # iterate for expchar 'D'
++    for s in formats.values():
++        t4 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++                fast_reader={'parallel': parallel, 'exponent_style': 'D'})
++
++        assert_table_equal(t4, Table([[col] for col in vals_d], names=header))
++
++    # iterate for regular converter (strtod)
++    for s in formats.values():
++        t5 = ascii.read(StringIO(s.join(header)+'\n'+s.join(fields)),
++                fast_reader={'parallel': parallel, 'use_fast_converter': False})
++
++        read_values = [col[0] for col in t5.itercols()]
++        if os.name == 'nt':
++            # apparently C strtod() on (some?) MSVC recognises 'd' exponents!
++            assert read_values == vals_v or read_values == vals_e
++        else:
++            assert read_values == vals_e
++
++
++def test_fortran_reader_notbasic():
++    """
++    Check if readers without a fast option raise a value error when a
++    fast_reader is asked for.
++    """
++
++    tabstr = dedent("""
++    a b
++    1 1.23D4
++    2 5.67D-8
++    """)[1:-1]
++
++    t1 = ascii.read(tabstr.split('\n'), fast_reader=dict(exponent_style='D'))
++
++    assert t1['b'].dtype.kind == 'f'
++
++    tabrdb = dedent("""
++    a\tb
++    # A simple RDB table
++    N\tN
++    1\t 1.23D4
++    2\t 5.67-008
++    """)[1:-1]
++
++    t2 = ascii.read(tabrdb.split('\n'), format='rdb',
++                    fast_reader=dict(exponent_style='fortran'))
++
++    assert t2['b'].dtype.kind == 'f'
++
++    tabrst = dedent("""
++    = =======
++    a b
++    = =======
++    1 1.23E4
++    2 5.67E-8
++    = =======
++    """)[1:-1]
++
++    t3 = ascii.read(tabrst.split('\n'), format='rst')
++
++    assert t3['b'].dtype.kind == 'f'
++
++    # in the special case of fast_converter=True (the default),
++    # incompatibility is ignored
++    t4 = ascii.read(tabrst.split('\n'), format='rst', fast_reader=True)
++
++    assert t4['b'].dtype.kind == 'f'
++
++    # pytest.xfail("Readers do not correctly check for incompatible options")
++    with pytest.raises(ParameterError):
++        t5 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++                        fast_reader='force')
++
++    with pytest.raises(ParameterError):
++        t6 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++                        fast_reader=dict(use_fast_converter=False))
++
++    tabrst = tabrst.replace('E', 'D')
++
++    with pytest.raises(ParameterError):
++        t7 = ascii.read(tabrst.split('\n'), format='rst', guess=False,
++                        fast_reader=dict(exponent_style='D'))
+diff --git a/astropy/io/ascii/tests/test_read.py b/astropy/io/ascii/tests/test_read.py
+index e1a571b..e878a3d 100644
+--- a/astropy/io/ascii/tests/test_read.py
++++ b/astropy/io/ascii/tests/test_read.py
+@@ -112,7 +112,12 @@ def test_read_with_names_arg(fast_reader):
+     """
+     Test that a bad value of `names` raises an exception.
+     """
+-    with pytest.raises(ValueError):
++    # CParser only uses columns in `names` and thus reports mismach in num_col
++    if fast_reader:
++        e = ascii.InconsistentTableError
++    else:
++        e = ValueError
++    with pytest.raises(e):
+         dat = ascii.read(['c d', 'e f'], names=('a', ), guess=False, fast_reader=fast_reader)
+ 
+ 
+diff --git a/astropy/io/ascii/ui.py b/astropy/io/ascii/ui.py
+index 5b5e09a..ae3e625 100644
+--- a/astropy/io/ascii/ui.py
++++ b/astropy/io/ascii/ui.py
+@@ -162,7 +162,10 @@ def get_reader(Reader=None, Inputter=None, Outputter=None, **kwargs):
+     # This function is a light wrapper around core._get_reader to provide a public interface
+     # with a default Reader.
+     if Reader is None:
+-        Reader = basic.Basic
++        if kwargs.get('fast_reader', False):
++            Reader = fastbasic.FastBasic
++        else:
++            Reader = basic.Basic
+     reader = core._get_reader(Reader, Inputter=Inputter, Outputter=Outputter, **kwargs)
+     return reader
+ 
+@@ -324,7 +327,6 @@ def read(table, guess=None, **kwargs):
+             guess = False
+ 
+     if not guess:
+-        reader = get_reader(**new_kwargs)
+         # Try the fast reader version of `format` first if applicable.  Note that
+         # if user specified a fast format (e.g. format='fast_basic') this test
+         # will fail and the else-clause below will be used.
+@@ -339,14 +341,16 @@ def read(table, guess=None, **kwargs):
+                                     'status': 'Success with fast reader (no guessing)'})
+             except (core.ParameterError, cparser.CParserError) as e:
+                 # special testing value to avoid falling back on the slow reader
+-                if fast_reader_param == 'force':
++                if fast_reader_param == 'force' or isinstance(fast_reader_param, dict):
+                     raise e
+                 # If the fast reader doesn't work, try the slow version
++                reader = get_reader(**new_kwargs)
+                 dat = reader.read(table)
+                 _read_trace.append({'kwargs': new_kwargs,
+                                     'status': 'Success with slow reader after failing'
+                                              ' with fast (no guessing)'})
+         else:
++            reader = get_reader(**new_kwargs)
+             dat = reader.read(table)
+             _read_trace.append({'kwargs': new_kwargs,
+                                 'status': 'Success with specified Reader class '
+@@ -407,6 +411,11 @@ def _guess(table, read_kwargs, format, fast_reader):
+     else:
+         fast_kwargs = None
+ 
++    # dictionary arguments are passed by reference per default and might
++    # (usually will!) be altered by `read()` - especially `cparser` - calls,
++    # backup them here
++    user_kwargs = copy.deepcopy(read_kwargs)
++
+     # Filter the full guess list so that each entry is consistent with user kwarg inputs.
+     # This also removes any duplicates from the list.
+     filtered_guess_kwargs = []
+@@ -417,8 +426,10 @@ def _guess(table, read_kwargs, format, fast_reader):
+         if fast_reader is False and guess_kwargs['Reader'] in core.FAST_CLASSES.values():
+             continue
+ 
+-        # If user required a fast reader with 'force' then skip all non-fast readers
+-        if fast_reader == 'force' and guess_kwargs['Reader'] not in core.FAST_CLASSES.values():
++        # If user explicitly required a fast reader with 'force' or as dict of
++        # options then skip all non-fast readers
++        if (fast_reader == 'force' or isinstance(fast_reader, dict)) \
++           and guess_kwargs['Reader'] not in core.FAST_CLASSES.values():
+             continue
+ 
+         guess_kwargs_ok = True  # guess_kwargs are consistent with user_kwargs?
+@@ -460,12 +471,19 @@ def _guess(table, read_kwargs, format, fast_reader):
+     # keep track of the failed guess and move on.
+     for guess_kwargs in filtered_guess_kwargs:
+         t0 = time.time()
++        for key, val in user_kwargs.items():
++            # update guess_kwargs again; need a deep copy to preserve dicts
++            if key not in guess_kwargs:
++                guess_kwargs[key] = val.copy()
++            elif val != guess_kwargs[key] and guess_kwargs != fast_kwargs:
++                guess_kwargs[key] = val.copy()
+         try:
+             # If guessing will try all Readers then use strict req'ts on column names
+             if 'Reader' not in read_kwargs:
+                 guess_kwargs['strict_names'] = True
+ 
+             reader = get_reader(**guess_kwargs)
++
+             reader.guessing = True
+             dat = reader.read(table)
+             _read_trace.append({'kwargs': guess_kwargs, 'status': 'Success (guessing)',
+diff --git a/docs/io/ascii/fast_ascii_io.rst b/docs/io/ascii/fast_ascii_io.rst
+index ac25cba..fe7b0a9 100644
+--- a/docs/io/ascii/fast_ascii_io.rst
++++ b/docs/io/ascii/fast_ascii_io.rst
+@@ -19,7 +19,9 @@ are currently compatible with the fast engine:
+  * ``tab``
+ 
+ The fast engine can also be enabled through the format parameter by prefixing
+-a compatible format with "fast" and then an underscore. In this case, |read|
++a compatible format with "fast" and then an underscore. In this case, or
++when enforcing the fast engine by either setting ``fast_reader='force'``
++or explicitly setting any of the :ref:`fast_conversion_opts`, |read|
+ will not fall back on an ordinary reader if fast reading fails.
+ For example::
+ 
+@@ -35,21 +37,24 @@ To disable the fast engine, specify ``fast_reader=False`` or
+ 
+ .. Note:: Guessing and Fast reading
+ 
+-   By default |read| will try to guess the format of in the input data by successively
+-   trying different formats until one succeeds ([reference the guessing section]).
+-   For the default ``'ascii'`` format this means that a number of pure Python readers
+-   with no fast implementation will be tried before getting to the fast readers.
++   By default |read| will try to guess the format of in the input data by
++   successively trying different formats until one succeeds
++   (see the section on :ref:`guess_formats`). 
++   For the default ``'ascii'`` format it will try all fast reader formats
++   before testing any pure Python readers with no fast implementation.
+ 
+-   **For optimum performance**, turn off guessing entirely (``guess=False``) or
+-   narrow down the format options as much as possible by specifying the format
+-   (e.g. ``format='csv'``) and/or other options such as the delimiter.
++   **For optimum performance** however, it is recommended to turn off
++   guessing entirely (``guess=False``) or narrow down the format options
++   as much as possible by specifying the format (e.g. ``format='csv'``)
++   and/or other options such as the delimiter.
+ 
+ Reading
+ ^^^^^^^
+ Since the fast engine is not part of the ordinary :mod:`astropy.io.ascii`
+ infrastructure, fast readers raise an error when passed certain
+-parameters which are not implemented in the fast reader
+-infrastructure. In this case |read| will fall back on the ordinary reader.
++parameters which are not implemented in the fast reader infrastructure.
++In this case |read| will fall back on the ordinary reader, unless the
++fast reader has been explicitly requested (see above).
+ These parameters are:
+ 
+  * Negative ``header_start`` (except for commented-header format)
+diff --git a/docs/io/ascii/read.rst b/docs/io/ascii/read.rst
+index eec45d8..a3210dd 100644
+--- a/docs/io/ascii/read.rst
++++ b/docs/io/ascii/read.rst
+@@ -284,6 +284,8 @@ values in with typical placeholders::
+          used ``'nan'`` for the ``<match_string>`` value then integer columns
+          would wind up as float.
+ 
++.. _guess_formats:
++
+ Guess table format
+ ^^^^^^^^^^^^^^^^^^
+ 
diff --git a/debian/patches/series b/debian/patches/series
index c47f1df..67910dc 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@ external_python_pkgs.patch
 Fix-default-value-for-remote_data-option.patch
 Use-parametrize-instead-of-yield.patch
 Allow-pytest-3.x-to-use-plugin-for-doctests-in-.rst-files.patch
+FIX-improved-detection-of-ascii-fast_reader-in-non-fast-p.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-astro/packages/python-astropy.git



More information about the Debian-astro-commits mailing list