This code fails with an exception in pandas 0.18
import pandas
df = pandas.DataFrame({'dates': pandas.date_range('2015', periods=6, tz='utc'), 'nums': range(6)},
index=pandas.MultiIndex.from_product([[1, 2], ['a', 'b', 'c']]))
panel = df.to_panel() # this fails
The error is:
Traceback (most recent call last):
File "/Users/ajenkins/Library/Preferences/PyCharm50/scratches/test.py", line 6, in <module>
panel = df.to_panel()
File "/Users/ajenkins/dev/pandas/pandas/core/frame.py", line 1257, in to_panel
ref_items=selfsorted.columns)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 2994, in reshape_nd
return self.apply('reshape_nd', axes=axes, **kwargs)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 2890, in apply
applied = getattr(b, f)(**kwargs)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 242, in reshape_nd
labels=labels, ref_items=ref_items)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 4386, in _block2d_to_blocknd
pvalues = np.empty(panel_shape, dtype=values.dtype)
TypeError: data type not understood
This is another error where code is passing a DatetimeTZDtype object to numpy.empty, and numpy is complaining because it doesn't recognize that dtype.
I tried fixing that problem with this patch:
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index abfc5c9..108981f 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -4383,7 +4383,10 @@ def _block2d_to_blocknd(values, placement, shape, labels, ref_items):
mask.put(selector, True)
if mask.all():
- pvalues = np.empty(panel_shape, dtype=values.dtype)
+ dtype = values.dtype
+ if is_extension_type(dtype):
+ dtype = dtype.base
+ pvalues = np.empty(panel_shape, dtype=dtype)
else:
dtype, fill_value = _maybe_promote(values.dtype)
pvalues = np.empty(panel_shape, dtype=dtype)
but then I get another error which isn't as obvious to me how to fix:
Traceback (most recent call last):
File "/Users/ajenkins/Library/Preferences/PyCharm50/scratches/test.py", line 7, in <module>
panel = df.to_panel() # this fails
File "/Users/ajenkins/dev/pandas/pandas/core/frame.py", line 1257, in to_panel
ref_items=selfsorted.columns)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 2994, in reshape_nd
return self.apply('reshape_nd', axes=axes, **kwargs)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 2890, in apply
applied = getattr(b, f)(**kwargs)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 242, in reshape_nd
labels=labels, ref_items=ref_items)
File "/Users/ajenkins/dev/pandas/pandas/core/internals.py", line 4397, in _block2d_to_blocknd
pvalues[i].flat[mask] = values[:, i]
File "/Users/ajenkins/dev/pandas/pandas/tseries/base.py", line 210, in __getitem__
result = getitem(key)
IndexError: too many indices for array
This error seems to occur because the code is assuming that the values
property of an ND Block
object will itself be an ND array. However for tz-aware datetime blocks, the values
property is a DatetimeIndex object, so when the code tries to index into it with 2 indices it fails.
output of pd.show_versions()
INSTALLED VERSIONS
------------------
commit: 856c3bde62fa7b753a25cfbacc544d1fa415a676
python: 2.7.9.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.18.0+150.g856c3bd.dirty
nose: 1.3.7
pip: 8.1.1
setuptools: 20.9.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.15.1
statsmodels: None
xarray: None
IPython: 3.1.0
sphinx: None
patsy: None
dateutil: 2.5.3
pytz: 2016.4
blosc: None
bottleneck: None
tables: None
numexpr: None
matplotlib: 1.4.2
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: None
boto: None
pandas_datareader: None
Comment From: ajenkins-cargometrics
I was able to get past the "too many indices for array" problem mentioned above and get the example program to work with this patch:
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index abfc5c9..520affc 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -237,7 +237,11 @@ class Block(PandasObject):
return a new block that is transformed to a nd block
"""
- return _block2d_to_blocknd(values=self.get_values().T,
+ values = self.get_values()
+ if is_datetimetz(values) and self.ndim != 1:
+ values = np.array(list(values)).reshape(self.shape)
+
+ return _block2d_to_blocknd(values=values.T,
placement=self.mgr_locs, shape=shape,
labels=labels, ref_items=ref_items)
I'm not sure this is the best fix though. It might be better to add a method to the Block class, called something like get_shaped_values
, which is the same as get_values
except it guarantees its result will have the same shape as the Block. The DatetimeTZBlock class can override get_shaped_values
to perform the reshape if necessary.
Comment From: jreback
I don't know if its possible to support these 1-d block types (e.g. catogical/datetime with tz) with an inherent 2-d repr under the hood (e.g. blocks of 2-d structrues). Further we are deprecating Panel in the next version anythow. More interesting would be to have upgraded support for these (already there is some) in xarray
.
Comment From: jreback
closing as Panel deprecated