I am trying to solve pandas.errors.ParserError: Error tokenizing data
problem.
I have two types of data.
I use a same code but it does not work with a type of data as I attach below. (It works well with another)
(msnoise) [sujan@node01 MSNoise_test2]$ msnoise plot dvv
Traceback (most recent call last):
File "/home/sujan/anaconda3/envs/msnoise/bin/msnoise", line 8, in <module>
sys.exit(run())
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/scripts/msnoise.py", line 1202, in run
cli(obj={})
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/decorators.py", line 21, in new_func
return f(get_current_context(), *args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/scripts/msnoise.py", line 943, in dvv
main(mov_stack, dttname, comp, filterid, pair, all, show, outfile)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/plots/dvv.py", line 89, in main
df = pd.read_csv(day,sep=",", header=0, index_col=0, parse_dates=True)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/io/parsers.py", line 709, in parser_f
return _read(filepath_or_buffer, kwds)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/io/parsers.py", line 455, in _read
data = parser.read(nrows)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/io/parsers.py", line 1069, in read
ret = self._engine.read(nrows)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/io/parsers.py", line 1839, in read
data = self._reader.read(nrows)
File "pandas/_libs/parsers.pyx", line 902, in pandas._libs.parsers.TextReader.read
File "pandas/_libs/parsers.pyx", line 924, in pandas._libs.parsers.TextReader._read_low_memory
File "pandas/_libs/parsers.pyx", line 978, in pandas._libs.parsers.TextReader._read_rows
File "pandas/_libs/parsers.pyx", line 965, in pandas._libs.parsers.TextReader._tokenize_rows
File "pandas/_libs/parsers.pyx", line 2208, in pandas._libs.parsers.raise_parser_error
pandas.errors.ParserError: Error tokenizing data. C error: Expected 8 fields in line 114, saw 15
I add , error_bad_lines=False
but it does not help and shows error as below.
(msnoise) [sujan@node01 MSNoise_test2]$ msnoise plot dvv
Skipping line 114: expected 8 fields, saw 15
(1, A EA EM EM0 M \
Date
2013-09-29 00:00:00 -0.076348 inf inf 0.000501 -0.002737
2013-09-29 00:00:00 0.014844 0.021573 0.001400 0.001239 0.000257
2013-09-29 00:00:00 -0.071597 0.002802 0.000144 0.001724 -0.000043
2013-09-29 00:00:00 -0.047929 inf inf 0.002285 0.001605
2013-09-29 00:00:00 -0.135391 inf inf 0.002244 0.011393
M0 Pairs
Date
2013-09-29 00:00:00 0.000836 05_TP01_05_TP10
2013-09-29 00:00:00 0.000558 05_TP02_05_TP10
2013-09-29 00:00:00 0.002713 05_TP09_05_TP10
2013-09-29 00:00:00 0.008074 05_TP01_05_TP09
2013-09-29 00:00:00 0.000346 05_TP02_05_TP09 )
Traceback (most recent call last):
File "/home/sujan/anaconda3/envs/msnoise/bin/msnoise", line 8, in <module>
sys.exit(run())
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/scripts/msnoise.py", line 1202, in run
cli(obj={})
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/click/decorators.py", line 21, in new_func
return f(get_current_context(), *args, **kwargs)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/scripts/msnoise.py", line 943, in dvv
main(mov_stack, dttname, comp, filterid, pair, all, show, outfile)
File "/home/sujan/anaconda3/envs/msnoise/lib/python2.7/site-packages/msnoise/plots/dvv.py", line 140, in main
tmp2 = allbut[dttname].resample('D').mean()
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/core/generic.py", line 5522, in resample
base=base, key=on, level=level)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/core/resample.py", line 999, in resample
return tg._get_resampler(obj, kind=kind)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/core/resample.py", line 1096, in _get_resampler
self._set_grouper(obj)
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/core/groupby.py", line 439, in _set_grouper
indexer = self.indexer = ax.argsort(kind='mergesort')
File "/home/sujan/.local/lib/python2.7/site-packages/pandas/core/indexes/base.py", line 2151, in argsort
return result.argsort(*args, **kwargs)
File "pandas/_libs/tslib.pyx", line 1165, in pandas._libs.tslib._Timestamp.__richcmp__
TypeError: Cannot compare type 'Timestamp' with type 'str'
However, the data with problem worked well until two weeks ago but suddenly shows the parsererror.
I even did not touch any data or results.
Additionally, the code that makes problems I think is like below.
for i, mov_stack in enumerate(mov_stacks):
current = start
first = True
alldf = []
while current <= end:
for comp in components:
day = os.path.join('DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, comp, '%s.txt' % current)
if os.path.isfile(day):
df = pd.read_csv(day, header=0, index_col=0, parse_dates=True)
alldf.append(df)
current += datetime.timedelta(days=1)
if len(alldf) == 0:
print("No Data for %s m%i f%i" % (components, mov_stack, filterid))
continue
the code day = os.path.join('DTT', "%02i" % filterid, "%03i_DAYS" % mov_stack, comp, '%s.txt' % current)
reads txt file like below.
Date,A,EA,EM,EM0,M,M0,Pairs
2014-05-10,0.419549372718,inf,inf,0.000458496085412,-0.0160997929491,0.000732900920237,05_SS08_05_TP01
2014-05-10,-0.0429633365955,inf,inf,0.000525405329004,0.000306985380522,0.00237631297525,05_TP01_05_TP07
2014-05-10,0.067236405269,inf,inf,0.00256763292024,-0.000489522024887,0.000310750516333,05_SS08_05_TP10
2014-05-10,-0.0286482054004,inf,inf,0.00101017717763,-0.00188012718704,-0.00148293566406,05_SS02_05_SS05
But the data without problem has the same txt file format and there's no problem. So weird.
It makes my work all stopped.. So if you know what I have to do or need other information to solve this, please let me know.
I find the solution. The reason was the environment variable. I add python path there for solving no module
problem which occurred before parsererror
. But it was not the solution for the no module
problem but to edit bashrc. Anyway, when I delete the python path in the environment variable and do all the steps (cc, mwcs etc), msnoise plot dvv
finally works so well.