I find it necessary to specify an explicit date parser when reading certain CSV files because read_csv(..., error_bad_lines=True)
does not recognize errors that fall within the index column; it falls back to string-indexing instead. I've dealt with this by specifying coerce=True
, but that method is deprecated since #10653 combined the coerce
and errors
keywords. Now, neither method appears to work for me.
Here is the test snippet:
import pandas as pd
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
fakefile = """\
"TOA5","indoorair(1.2.3)","CR1000","47257","CR1000.Std.28.02","CPU:indoorair.cr1","37538","tsdata"
"TIMESTAMP","RECORD","dc1100_pm_small","dc1100_pm_large","li840a_CO2","li840a_H2O","li840a_cell_T","li840a_cell_P","li840a_dew_T","li840a_pwr_src","typeK_amb_T","logger_panel_T"
"TS","RN","counts/m^3","counts/m^3","ppmv","ppthv","degC","mbar","degC","Vdc","degC","degC"
"","","Smp","Smp","Avg","Avg","Avg","Avg","Avg","Avg","Avg","Avg"
"2016-04-06 13:20:00",5756,1373740,42377,1149.936,10.30014,51.20879,928.455,6.267447,24.1189,21.4,25.93
"2016-04-06 13:21:00",5757,1437306,56503,1147.533,10.30781,51.20538,928.4556,6.278251,24.1182,21.43,25.94
"2016-04-06 13:22:00",5758,1454964,49440,1146.63,10.31073,51.20726,928.449,6.282249,24.11856,21.43,25.95
"2016-04-06 13:23:00",5759,1433775,52972,1145.948,10.30542,51.2077,928.4606,6.274971,24.11839,21.4,25.94
"2016-04-06 13:24:00",5760,1483216,56503,1145.541,10.30182,51.20682,928.451,6.26975,24.11856,21.4,25.95"""
df = pd.read_csv(StringIO(fakefile),
index_col=0,
parse_dates=True,
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
errors='coerce'),
header=1,
skiprows=[2,3],
na_values=['NAN', -7999, 7999],
keep_default_na=False)
Machine 1
Here's the traceback I received on my Linux Mint box (before reducing to MWE):
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-dca3f9d3edf1> in <module>()
----> 1 indoor_air = read_TOA5('pullman04/winter16/indoor-rack/indoor-air_tsdata.dat')
<ipython-input-10-406a7964b57c> in read_TOA5(fpath)
12 # https://s.campbellsci.com/documents/us/manuals/cr1000.pdf
13 ## HINT csv parser consumes the dbl-quotes around NANs
---> 14 na_values=['NAN', -7999, 7999], keep_default_na=False)
15 #if len(df.index.get_duplicates()): # if dup rows found
16 # df = df.groupby(level=0).first() # discard copies
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
560 skip_blank_lines=skip_blank_lines)
561
--> 562 return _read(filepath_or_buffer, kwds)
563
564 parser_f.__name__ = name
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
323 return parser
324
--> 325 return parser.read()
326
327 _parser_defaults = {
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in read(self, nrows)
813 raise ValueError('skip_footer not supported for iteration')
814
--> 815 ret = self._engine.read(nrows)
816
817 if self.options.get('as_recarray'):
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in read(self, nrows)
1385
1386 names, data = self._do_date_conversions(names, data)
-> 1387 index, names = self._make_index(data, alldata, names)
1388
1389 # maybe create a mi on the columns
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _make_index(self, data, alldata, columns, indexnamerow)
1028 elif not self._has_complex_date_col:
1029 index = self._get_simple_index(alldata, columns)
-> 1030 index = self._agg_index(index)
1031
1032 elif self._has_complex_date_col:
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in _agg_index(self, index, try_parse_dates)
1109
1110 if (try_parse_dates and self._should_parse_dates(i)):
-> 1111 arr = self._date_conv(arr)
1112
1113 col_na_values = self.na_values
/usr/local/lib/python2.7/dist-packages/pandas/io/parsers.pyc in converter(*date_cols)
2286 errors='ignore')
2287 except Exception:
-> 2288 return generic_parser(date_parser, *date_cols)
2289
2290 return converter
/usr/local/lib/python2.7/dist-packages/pandas/io/date_converters.pyc in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-10-406a7964b57c> in <lambda>(ts)
6 # explicit parser + coerce to prevent fallback to string-indexing
7 date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
----> 8 errors='coerce'),
9 header=1, # 1st line metadata 2nd headers
10 skiprows=[2,3], # 3rd units 4th agg type
/usr/local/lib/python2.7/dist-packages/pandas/util/decorators.pyc in wrapper(*args, **kwargs)
89 else:
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
93 return _deprecate_kwarg
TypeError: to_datetime() got multiple values for keyword argument 'errors'
using pandas 0.18.1 (freshly updated) with Python 2.7 (Sorry, I don't have pd.show_versions()
output...)
Machine 2
When I run the same snippet on Windows 7 with pandas 0.18.1, Python 3.5.1:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in converter(*date_cols)
2275 result = tools.to_datetime(
-> 2276 date_parser(*date_cols), errors='ignore')
2277 if isinstance(result, datetime.datetime):
<ipython-input-11-5805c9b0f012> in <lambda>(ts)
4 date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
----> 5 errors='coerce'),
6 header=1,
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py in wrapper(*args, **kwargs)
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
TypeError: to_datetime() got multiple values for argument 'errors'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in converter(*date_cols)
2284 parser=date_parser,
-> 2285 dayfirst=dayfirst),
2286 errors='ignore')
pandas\src\inference.pyx in pandas.lib.try_parse_dates (pandas\lib.c:57884)()
pandas\src\inference.pyx in pandas.lib.try_parse_dates (pandas\lib.c:57802)()
<ipython-input-11-5805c9b0f012> in <lambda>(ts)
4 date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
----> 5 errors='coerce'),
6 header=1,
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py in wrapper(*args, **kwargs)
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
TypeError: to_datetime() got multiple values for argument 'errors'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-11-5805c9b0f012> in <module>()
7 skiprows=[2,3],
8 na_values=['NAN', -7999, 7999],
----> 9 keep_default_na=False)
10 df
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
560 skip_blank_lines=skip_blank_lines)
561
--> 562 return _read(filepath_or_buffer, kwds)
563
564 parser_f.__name__ = name
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
323 return parser
324
--> 325 return parser.read()
326
327 _parser_defaults = {
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
813 raise ValueError('skip_footer not supported for iteration')
814
--> 815 ret = self._engine.read(nrows)
816
817 if self.options.get('as_recarray'):
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1385
1386 names, data = self._do_date_conversions(names, data)
-> 1387 index, names = self._make_index(data, alldata, names)
1388
1389 # maybe create a mi on the columns
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in _make_index(self, data, alldata, columns, indexnamerow)
1028 elif not self._has_complex_date_col:
1029 index = self._get_simple_index(alldata, columns)
-> 1030 index = self._agg_index(index)
1031
1032 elif self._has_complex_date_col:
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in _agg_index(self, index, try_parse_dates)
1109
1110 if (try_parse_dates and self._should_parse_dates(i)):
-> 1111 arr = self._date_conv(arr)
1112
1113 col_na_values = self.na_values
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py in converter(*date_cols)
2286 errors='ignore')
2287 except Exception:
-> 2288 return generic_parser(date_parser, *date_cols)
2289
2290 return converter
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\date_converters.py in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-11-5805c9b0f012> in <lambda>(ts)
3 parse_dates=True,
4 date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
----> 5 errors='coerce'),
6 header=1,
7 skiprows=[2,3],
C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py in wrapper(*args, **kwargs)
89 else:
90 kwargs[new_arg_name] = new_arg_value
---> 91 return func(*args, **kwargs)
92 return wrapper
93 return _deprecate_kwarg
TypeError: to_datetime() got multiple values for argument 'errors'
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 23 Stepping 10, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
pandas: 0.18.1
nose: 1.3.7
pip: 8.1.2
setuptools: 23.0.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.17.1
statsmodels: 0.6.1
xarray: None
IPython: 4.2.0
sphinx: 1.3.1
patsy: 0.4.1
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.6.0
matplotlib: 1.5.1
openpyxl: 2.3.2
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.2
lxml: 3.6.0
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.13
pymysql: None
psycopg2: None
jinja2: 2.8
boto: 2.40.0
pandas_datareader: None
Machine 2 (again)
Strangely, I get the same error using a different (older) distribution on the same Win7 machine:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-1-4ac37d43a03f> in <module>()
23 skiprows=[2,3],
24 na_values=['NAN', -7999, 7999],
---> 25 keep_default_na=False)
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
463 skip_blank_lines=skip_blank_lines)
464
--> 465 return _read(filepath_or_buffer, kwds)
466
467 parser_f.__name__ = name
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in _read(filepath_or_buffer, kwds)
249 return parser
250
--> 251 return parser.read()
252
253 _parser_defaults = {
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
708 raise ValueError('skip_footer not supported for iteration')
709
--> 710 ret = self._engine.read(nrows)
711
712 if self.options.get('as_recarray'):
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in read(self, nrows)
1210
1211 names, data = self._do_date_conversions(names, data)
-> 1212 index, names = self._make_index(data, alldata, names)
1213
1214 # maybe create a mi on the columns
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in _make_index(self, data, alldata, columns, indexnamerow)
876 elif not self._has_complex_date_col:
877 index = self._get_simple_index(alldata, columns)
--> 878 index = self._agg_index(index)
879
880 elif self._has_complex_date_col:
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in _agg_index(self, index, try_parse_dates)
957
958 if (try_parse_dates and self._should_parse_dates(i)):
--> 959 arr = self._date_conv(arr)
960
961 col_na_values = self.na_values
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\parsers.pyc in converter(*date_cols)
2048 dayfirst=dayfirst)
2049 except Exception:
-> 2050 return generic_parser(date_parser, *date_cols)
2051
2052 return converter
C:\WinPython\python-2.7.9\lib\site-packages\pandas\io\date_converters.pyc in generic_parser(parse_func, *cols)
36 for i in range(N):
37 args = [c[i] for c in cols]
---> 38 results[i] = parse_func(*args)
39
40 return results
<ipython-input-1-4ac37d43a03f> in <lambda>(ts)
19 parse_dates=True,
20 date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
---> 21 errors='coerce'),
22 header=1,
23 skiprows=[2,3],
TypeError: to_datetime() got multiple values for keyword argument 'errors'
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.9.final.0
python-bits: 32
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 23 Stepping 10, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
pandas: 0.15.2
nose: 1.3.4
Cython: 0.21.1
numpy: 1.8.2
scipy: 0.14.0
statsmodels: 0.6.1
IPython: 2.3.1
sphinx: 1.2.3
patsy: 0.3.0
dateutil: 2.3
pytz: 2014.10
bottleneck: None
tables: 3.1.1
numexpr: 2.4
matplotlib: 1.4.2
openpyxl: None
xlrd: 0.9.3
xlwt: None
xlsxwriter: 0.6.4
lxml: 3.4.1
bs4: 4.3.2
html5lib: None
httplib2: None
apiclient: None
rpy2: 2.5.2
sqlalchemy: 0.9.8
pymysql: None
psycopg2: None
Machine 3
Different physical hardware, same test snippet and same environment (Anaconda3-4.1.0-Windows-x86_64)...
...produced the same traceback.
The Hang-Up
I would have just ignored the deprecation warning and continued to use coerce=True
, except now I can't get back into a working state...
Reverting to coerce=True
does not halt the "multiple values for argument 'errors' traceback:
import pandas as pd
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
fakefile = """\
"TOA5","indoorair(1.2.3)","CR1000","47257","CR1000.Std.28.02","CPU:indoorair.cr1","37538","tsdata"
"TIMESTAMP","RECORD","dc1100_pm_small","dc1100_pm_large","li840a_CO2","li840a_H2O","li840a_cell_T","li840a_cell_P","li840a_dew_T","li840a_pwr_src","typeK_amb_T","logger_panel_T"
"TS","RN","counts/m^3","counts/m^3","ppmv","ppthv","degC","mbar","degC","Vdc","degC","degC"
"","","Smp","Smp","Avg","Avg","Avg","Avg","Avg","Avg","Avg","Avg"
"2016-04-06 13:20:00",5756,1373740,42377,1149.936,10.30014,51.20879,928.455,6.267447,24.1189,21.4,25.93
"2016-04-06 13:21:00",5757,1437306,56503,1147.533,10.30781,51.20538,928.4556,6.278251,24.1182,21.43,25.94
"2016-04-06 13:22:00",5758,1454964,49440,1146.63,10.31073,51.20726,928.449,6.282249,24.11856,21.43,25.95
"2016-04-06 13:23:00",5759,1433775,52972,1145.948,10.30542,51.2077,928.4606,6.274971,24.11839,21.4,25.94
"2016-04-06 13:24:00",5760,1483216,56503,1145.541,10.30182,51.20682,928.451,6.26975,24.11856,21.4,25.95"""
df = pd.read_csv(StringIO(fakefile),
index_col=0,
parse_dates=True,
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
coerce=True),
header=1,
skiprows=[2,3],
na_values=['NAN', -7999, 7999],
keep_default_na=False)
produces
K:\temp.py:24: FutureWarning: the coerce=True keyword is deprecated, use errors='coerce' instead
coerce=True),
Traceback (most recent call last):
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2276, in converter
date_parser(*date_cols), errors='ignore')
File "K:\temp.py", line 24, in <lambda>
coerce=True),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
TypeError: to_datetime() got multiple values for argument 'errors'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2285, in converter
dayfirst=dayfirst),
File "pandas\src\inference.pyx", line 841, in pandas.lib.try_parse_dates (pandas\lib.c:57884)
File "pandas\src\inference.pyx", line 838, in pandas.lib.try_parse_dates (pandas\lib.c:57802)
File "K:\temp.py", line 24, in <lambda>
coerce=True),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
TypeError: to_datetime() got multiple values for argument 'errors'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "K:\temp.py", line 28, in <module>
keep_default_na=False)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 562, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 325, in _read
return parser.read()
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 815, in read
ret = self._engine.read(nrows)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1387, in read
index, names = self._make_index(data, alldata, names)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1030, in _make_index
index = self._agg_index(index)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1111, in _agg_index
arr = self._date_conv(arr)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2288, in converter
return generic_parser(date_parser, *date_cols)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\date_converters.py", line 38, in generic_parser
results[i] = parse_func(*args)
File "K:\temp.py", line 24, in <lambda>
coerce=True),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
TypeError: to_datetime() got multiple values for argument 'errors'
Neither does removing both coerce
and errors
arguments:
import pandas as pd
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
fakefile = """\
"TOA5","indoorair(1.2.3)","CR1000","47257","CR1000.Std.28.02","CPU:indoorair.cr1","37538","tsdata"
"TIMESTAMP","RECORD","dc1100_pm_small","dc1100_pm_large","li840a_CO2","li840a_H2O","li840a_cell_T","li840a_cell_P","li840a_dew_T","li840a_pwr_src","typeK_amb_T","logger_panel_T"
"TS","RN","counts/m^3","counts/m^3","ppmv","ppthv","degC","mbar","degC","Vdc","degC","degC"
"","","Smp","Smp","Avg","Avg","Avg","Avg","Avg","Avg","Avg","Avg"
"2016-04-06 13:20:00",5756,1373740,42377,1149.936,10.30014,51.20879,928.455,6.267447,24.1189,21.4,25.93
"2016-04-06 13:21:00",5757,1437306,56503,1147.533,10.30781,51.20538,928.4556,6.278251,24.1182,21.43,25.94
"2016-04-06 13:22:00",5758,1454964,49440,1146.63,10.31073,51.20726,928.449,6.282249,24.11856,21.43,25.95
"2016-04-06 13:23:00",5759,1433775,52972,1145.948,10.30542,51.2077,928.4606,6.274971,24.11839,21.4,25.94
"2016-04-06 13:24:00",5760,1483216,56503,1145.541,10.30182,51.20682,928.451,6.26975,24.11856,21.4,25.95"""
df = pd.read_csv(StringIO(fakefile),
index_col=0,
parse_dates=True,
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S'),
#errors='coerce'),
header=1,
skiprows=[2,3],
na_values=['NAN', -7999, 7999],
keep_default_na=False)
produces
Traceback (most recent call last):
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2276, in converter
date_parser(*date_cols), errors='ignore')
File "K:\temp.py", line 23, in <lambda>
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S'),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 291, in to_datetime
unit=unit, infer_datetime_format=infer_datetime_format)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 427, in _to_datetime
return _convert_listlike(arg, box, format)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 398, in _convert_listlike
require_iso8601=require_iso8601
File "pandas\tslib.pyx", line 2134, in pandas.tslib.array_to_datetime (pandas\tslib.c:41972)
File "pandas\tslib.pyx", line 2150, in pandas.tslib.array_to_datetime (pandas\tslib.c:38535)
AssertionError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2285, in converter
dayfirst=dayfirst),
File "pandas\src\inference.pyx", line 841, in pandas.lib.try_parse_dates (pandas\lib.c:57884)
File "pandas\src\inference.pyx", line 838, in pandas.lib.try_parse_dates (pandas\lib.c:57802)
File "K:\temp.py", line 23, in <lambda>
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S'),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 291, in to_datetime
unit=unit, infer_datetime_format=infer_datetime_format)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 429, in _to_datetime
return _convert_listlike(np.array([arg]), box, format)[0]
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 398, in _convert_listlike
require_iso8601=require_iso8601
File "pandas\tslib.pyx", line 2134, in pandas.tslib.array_to_datetime (pandas\tslib.c:41972)
File "pandas\tslib.pyx", line 2150, in pandas.tslib.array_to_datetime (pandas\tslib.c:38535)
AssertionError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "K:\temp.py", line 28, in <module>
keep_default_na=False)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 562, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 325, in _read
return parser.read()
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 815, in read
ret = self._engine.read(nrows)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1387, in read
index, names = self._make_index(data, alldata, names)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1030, in _make_index
index = self._agg_index(index)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1111, in _agg_index
arr = self._date_conv(arr)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 2288, in converter
return generic_parser(date_parser, *date_cols)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\io\date_converters.py", line 38, in generic_parser
results[i] = parse_func(*args)
File "K:\temp.py", line 23, in <lambda>
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S'),
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\util\decorators.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 291, in to_datetime
unit=unit, infer_datetime_format=infer_datetime_format)
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 429, in _to_datetime
return _convert_listlike(np.array([arg]), box, format)[0]
File "C:\Users\pokeeffe\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\tseries\tools.py", line 398, in _convert_listlike
require_iso8601=require_iso8601
File "pandas\tslib.pyx", line 2134, in pandas.tslib.array_to_datetime (pandas\tslib.c:41972)
File "pandas\tslib.pyx", line 2150, in pandas.tslib.array_to_datetime (pandas\tslib.c:38535)
AssertionError
To be clear, I can still use coerce=True
in older versions of pandas. For example, in 0.15.2, both of these work fine:
...
df = pd.read_csv(StringIO(fakefile),
index_col=0,
parse_dates=True,
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S',
coerce=True),
header=1,
skiprows=[2,3],
na_values=['NAN', -7999, 7999],
keep_default_na=False)
...
df = pd.read_csv(StringIO(fakefile),
index_col=0,
parse_dates=True,
date_parser=lambda ts: pd.to_datetime(ts, '%Y-%m-%d %H:%M:%S'),
# coerce=True),
header=1,
skiprows=[2,3],
na_values=['NAN', -7999, 7999],
keep_default_na=False)
Whew! I know that was a lot of scrolling sorry
Comment From: chris-b1
You need to specify format
as a keyword argument - errors
is the first positional argument. The error message is a little weird because to_datetime
has a decorator that handles deprecated arguments, not sure if anything can be done about that.
date_parser=lambda ts: pd.to_datetime(ts, format='%Y-%m-%d %H:%M:%S',
errors='coerce')
Comment From: patricktokeeffe
Perfect thank you! Looks like I've only been explicit with the format=
keyword about half the time so I expect I'll be running into this a lot. The tracebacks were sending me the wrong direction but I can't think of a much better error message.