Code Sample, a copy-pastable example if possible
In [1]: import pandas as pd
In [2]: import io
In [3]: pd.__version__
Out[3]: '0.20.1'
In [4]: csv = io.StringIO(
...: "i1,i2,x,y\n"
...: "A,a,1,2\n"
...: "A,b,2,3\n"
...: "A,c,1,2\n"
...: "A,d,4,5\n"
...: "A,e,6,7\n"
...: "B,a,1,2\n"
...: "B,b,2,3\n"
...: "B,c,1,2\n"
...: "B,d,4,5\n"
...: "B,e,6,7\n")
...:
In [5]: df = pd.read_csv(csv).set_index(['i1', 'i2'])
In [6]: df.groupby(level='i1', group_keys=False).apply(lambda x: x.ewm(halflife=3).corr().loc[:, 'x', 'y'])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-c3f7ff852204> in <module>()
----> 1 df.groupby(level='i1', group_keys=False).apply(lambda x: x.ewm(halflife=3).corr().loc[:, 'x', 'y'])
D:\Anaconda3\lib\site-packages\pandas\core\groupby.py in apply(self, func, *args, **kwargs)
714 # ignore SettingWithCopy here in case the user mutates
715 with option_context('mode.chained_assignment', None):
--> 716 return self._python_apply_general(f)
717
718 def _python_apply_general(self, f):
D:\Anaconda3\lib\site-packages\pandas\core\groupby.py in _python_apply_general(self, f)
718 def _python_apply_general(self, f):
719 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 720 self.axis)
721
722 return self._wrap_applied_output(
D:\Anaconda3\lib\site-packages\pandas\core\groupby.py in apply(self, f, data, axis)
1727 # group might be modified
1728 group_axes = _get_axes(group)
-> 1729 res = f(group)
1730 if not _is_indexed_like(res, group_axes):
1731 mutated = True
<ipython-input-6-c3f7ff852204> in <lambda>(x)
----> 1 df.groupby(level='i1', group_keys=False).apply(lambda x: x.ewm(halflife=3).corr().loc[:, 'x', 'y'])
D:\Anaconda3\lib\site-packages\pandas\core\window.py in corr(self, other, pairwise, **kwargs)
1841
1842 return _flex_binary_moment(self._selected_obj, other._selected_obj,
-> 1843 _get_corr, pairwise=bool(pairwise))
1844
1845 # Helper Funcs
D:\Anaconda3\lib\site-packages\pandas\core\window.py in _flex_binary_moment(arg1, arg2, f, pairwise)
1931 arg2.columns.name)
1932 result.index = result.index.set_names(
-> 1933 [arg1.index.name, arg1.columns.name])
1934
1935 return result
D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in set_names(self, names, level, inplace)
1119 else:
1120 idx = self._shallow_copy()
-> 1121 idx._set_names(names, level=level)
1122 if not inplace:
1123 return idx
D:\Anaconda3\lib\site-packages\pandas\core\indexes\multi.py in _set_names(self, names, level, validate)
514 raise ValueError('Length of names must match length of level.')
515 if validate and level is None and len(names) != self.nlevels:
--> 516 raise ValueError('Length of names must match number of levels in '
517 'MultiIndex.')
518
ValueError: Length of names must match number of levels in MultiIndex.
Problem description
This code works in pandas 0.19.2 but failed in pandas 0.20.1
Expected Output
In [1]: import io
In [2]: import pandas as pd
In [3]: pd.__version__
Out[3]: '0.19.2'
In [4]: csv = io.StringIO(
...: "i1,i2,x,y\n"
...: "A,a,1,2\n"
...: "A,b,2,3\n"
...: "A,c,1,2\n"
...: "A,d,4,5\n"
...: "A,e,6,7\n"
...: "B,a,1,2\n"
...: "B,b,2,3\n"
...: "B,c,1,2\n"
...: "B,d,4,5\n"
...: "B,e,6,7\n")
...:
In [5]: df = pd.read_csv(csv).set_index(['i1', 'i2'])
In [6]: df.groupby(level='i1', group_keys=False).apply(lambda x: x.ewm(halflife=3).corr().loc[:, 'x', 'y'])
Out[6]:
i1 i2
A a NaN
b 1.0
c 1.0
d 1.0
e 1.0
B a NaN
b 1.0
c 1.0
d 1.0
e 1.0
Name: y, dtype: float64
Output of pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.13.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 58 Stepping 9, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
pandas: 0.20.1
pytest: None
pip: 9.0.1
setuptools: 27.2.0
Cython: None
numpy: 1.12.1
scipy: 0.19.0
xarray: None
IPython: 5.3.0
sphinx: None
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.0.1
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.999
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
pandas_gbq: None
pandas_datareader: None
Comment From: jreback
this was an API change in 0.20.1
see the docs: http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0200-api-breaking-rolling-pairwise
A MI DataFrame is returned and not a (deprecated) Panel.
In [9]: df.loc['A'].ewm(halflife=3).corr()
Out[9]:
x y
i2
a x NaN NaN
y NaN NaN
b x 1.0 1.0
y 1.0 1.0
c x 1.0 1.0
y 1.0 1.0
d x 1.0 1.0
y 1.0 1.0
e x 1.0 1.0
y 1.0 1.0