Pandas version checks
-
[x] I have checked that this issue has not already been reported.
-
[X] I have confirmed this bug exists on the latest version of pandas.
-
[X] I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
foo = pd.DataFrame({'bar': [1,2,3], 'baz': [4,5,6]}, index=['a','b','c'])
foo.to_csv('s3://my-bucket-name/test.csv')
Issue Description
Trying to export a dataframe as a CSV to an S3 bucket as above results in the error trace below.
This problem is not with permissions, as running foo.to_parquet('s3://my-bucket-name/test.parquet')
successfully exports the dataframe to a parquet file. I'm not sure why the CreateBucket operating is being called anyway.
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
~\anaconda3\lib\site-packages\s3fs\core.py in _call_s3(self, method, *akwarglist, **kwargs)
245 try:
--> 246 out = await method(**additional_kwargs)
247 return out
~\anaconda3\lib\site-packages\aiobotocore\client.py in _make_api_call(self, operation_name, api_params)
154 error_class = self.exceptions.from_code(error_code)
--> 155 raise error_class(parsed_response, operation_name)
156 else:
ClientError: An error occurred (AccessDenied) when calling the CreateBucket operation: Access Denied
The above exception was the direct cause of the following exception:
PermissionError Traceback (most recent call last)
c:\Users\...\test.py in <module>
----> 1 foo.to_csv('s3://my-bucket-name/test.csv')
~\anaconda3\lib\site-packages\pandas\core\generic.py in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)
3464 )
3465
-> 3466 return DataFrameRenderer(formatter).to_csv(
3467 path_or_buf,
3468 line_terminator=line_terminator,
~\anaconda3\lib\site-packages\pandas\io\formats\format.py in to_csv(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)
1103 formatter=self.fmt,
1104 )
-> 1105 csv_formatter.save()
1106
1107 if created_buffer:
~\anaconda3\lib\site-packages\pandas\io\formats\csvs.py in save(self)
235 """
236 # apply compression and byte/text conversion
--> 237 with get_handle(
238 self.filepath_or_buffer,
239 self.mode,
~\anaconda3\lib\site-packages\pandas\io\common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
606
607 # open URLs
--> 608 ioargs = _get_filepath_or_buffer(
609 path_or_buf,
610 encoding=encoding,
~\anaconda3\lib\site-packages\pandas\io\common.py in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
366 storage_options = dict(storage_options)
367 storage_options["anon"] = True
--> 368 file_obj = fsspec.open(
369 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
370 ).open()
~\anaconda3\lib\site-packages\fsspec\core.py in open(urlpath, mode, compression, encoding, errors, protocol, newline, **kwargs)
427 ``OpenFile`` object.
428 """
--> 429 return open_files(
430 urlpath=[urlpath],
431 mode=mode,
~\anaconda3\lib\site-packages\fsspec\core.py in open_files(urlpath, mode, compression, encoding, errors, name_function, num, protocol, newline, auto_mkdir, expand, **kwargs)
290 if "r" not in mode and auto_mkdir:
291 parents = {fs._parent(path) for path in paths}
--> 292 [fs.makedirs(parent, exist_ok=True) for parent in parents]
293 return OpenFiles(
294 [
~\anaconda3\lib\site-packages\fsspec\core.py in <listcomp>(.0)
290 if "r" not in mode and auto_mkdir:
291 parents = {fs._parent(path) for path in paths}
--> 292 [fs.makedirs(parent, exist_ok=True) for parent in parents]
293 return OpenFiles(
294 [
~\anaconda3\lib\site-packages\fsspec\asyn.py in wrapper(*args, **kwargs)
86 def wrapper(*args, **kwargs):
87 self = obj or args[0]
---> 88 return sync(self.loop, func, *args, **kwargs)
89
90 return wrapper
~\anaconda3\lib\site-packages\fsspec\asyn.py in sync(loop, func, timeout, *args, **kwargs)
67 raise FSTimeoutError
68 if isinstance(result[0], BaseException):
---> 69 raise result[0]
70 return result[0]
71
~\anaconda3\lib\site-packages\fsspec\asyn.py in _runner(event, coro, result, timeout)
23 coro = asyncio.wait_for(coro, timeout=timeout)
24 try:
---> 25 result[0] = await coro
26 except Exception as ex:
27 result[0] = ex
~\anaconda3\lib\site-packages\s3fs\core.py in _makedirs(self, path, exist_ok)
713 async def _makedirs(self, path, exist_ok=False):
714 try:
--> 715 await self._mkdir(path, create_parents=True)
716 except FileExistsError:
717 if exist_ok:
~\anaconda3\lib\site-packages\s3fs\core.py in _mkdir(self, path, acl, create_parents, **kwargs)
698 "LocationConstraint": region_name
699 }
--> 700 await self._call_s3("create_bucket", **params)
701 self.invalidate_cache("")
702 self.invalidate_cache(bucket)
~\anaconda3\lib\site-packages\s3fs\core.py in _call_s3(self, method, *akwarglist, **kwargs)
263 except Exception as e:
264 err = e
--> 265 raise translate_boto_error(err)
266
267 call_s3 = sync_wrapper(_call_s3)
PermissionError: Access Denied
Expected Behavior
The dataframe should be exported to the S3 bucket as a CSV file, just as the to_parquet function successfully exports the dataframe to the S3 bucket as a parquet file.
Installed Versions
Comment From: mroeschke
to_parquet
uses external libraries (pyarrow/fastparquet) that have different permission handling behavior than to_csv
.
For to_csv
, pandas leverages fsspec which requires credentials to specified via storage_options
https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files
Comment From: jgarveyanalytics
to_parquet
uses external libraries (pyarrow/fastparquet) that have different permission handling behavior thanto_csv
.For
to_csv
, pandas leverages fsspec which requires credentials to specified viastorage_options
https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files
Thank you. This helped me also.