A small, complete example of the issue
df = pd.DataFrame({"data": np.arange(0, 10, 0.1)})
v = pd.cut(df.data, [0, 1, 2, 5, 10], include_lowest=True).rename("cuts")
df.join(pd.get_dummies(v))
Expected Output
A data frame similar to the input dataframe with 4 additional variables for the cut categories.
What happens
----> 3 df.join(pd.get_dummies(v))
/home/pwaller/.local/lib/python3.5/site-packages/pandas/core/frame.py in join(self, other, on, how, lsuffix, rsuffix, sort)
4534 # For SparseDataFrame's benefit
4535 return self._join_compat(other, on=on, how=how, lsuffix=lsuffix,
-> 4536 rsuffix=rsuffix, sort=sort)
4537
4538 def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
/home/pwaller/.local/lib/python3.5/site-packages/pandas/core/frame.py in _join_compat(self, other, on, how, lsuffix, rsuffix, sort)
4548 return merge(self, other, left_on=on, how=how,
4549 left_index=on is None, right_index=True,
-> 4550 suffixes=(lsuffix, rsuffix), sort=sort)
4551 else:
4552 if on is not None:
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
57 right_index=right_index, sort=sort, suffixes=suffixes,
58 copy=copy, indicator=indicator)
---> 59 return op.get_result()
60 if __debug__:
61 merge.__doc__ = _merge_doc % '\nleft : DataFrame'
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in get_result(self)
505 result_data = concatenate_block_managers(
506 [(ldata, lindexers), (rdata, rindexers)],
--> 507 axes=[llabels.append(rlabels), join_index],
508 concat_axis=0, copy=self.copy)
509
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/base.py in append(self, other)
1440 # if any of the to_concat is category
1441 from pandas.indexes.category import CategoricalIndex
-> 1442 return CategoricalIndex._append_same_dtype(self, to_concat, name)
1443
1444 if len(typs) == 1:
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/category.py in _append_same_dtype(self, to_concat, name)
578 ValueError if other is not in the categories
579 """
--> 580 to_concat = [self._is_dtype_compat(c) for c in to_concat]
581 codes = np.concatenate([c.codes for c in to_concat])
582 result = self._create_from_codes(codes, name=name)
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/category.py in <listcomp>(.0)
578 ValueError if other is not in the categories
579 """
--> 580 to_concat = [self._is_dtype_compat(c) for c in to_concat]
581 codes = np.concatenate([c.codes for c in to_concat])
582 result = self._create_from_codes(codes, name=name)
AttributeError: 'Index' object has no attribute '_is_dtype_compat'
Output of pd.show_versions()
## INSTALLED VERSIONS
commit: None
python: 3.5.2.final.0
python-bits: 64
OS: Linux
OS-release: 4.4.0-43-generic
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.19.0
nose: None
pip: 8.1.1
setuptools: 28.6.0
Cython: 0.24.1
numpy: 1.11.2
scipy: 0.18.1
statsmodels: None
xarray: None
IPython: 5.1.0
sphinx: None
patsy: None
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: None
tables: None
numexpr: 2.6.1
matplotlib: 1.5.3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.999
httplib2: None
apiclient: None
sqlalchemy: 1.1.2
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None
pandas_datareader: None
Comment From: pwaller
I note that:
- This seems to work in 0.18.
- It affects pd.concat([df, get_dummies(cut(df.v))], axis=1)
as well, with the same error.
- It may be related to #14298 (the only other example of this AttributeError
I can find, though I don't understand that bug).
Comment From: pwaller
I accidentally posted the stack trace for concat in the original issue. I've edited the issue. Here is the other stack trace for pd.concat([df, get_dummies(cut(df.v))], axis=1)
.
----> 1 pd.concat([vs.first_time.to_frame(), pd.get_dummies(by_stay)], axis=1).head()
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
1323 keys=keys, levels=levels, names=names,
1324 verify_integrity=verify_integrity,
-> 1325 copy=copy)
1326 return op.get_result()
1327
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
1462 self.copy = copy
1463
-> 1464 self.new_axes = self._get_new_axes()
1465
1466 def get_result(self):
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in _get_new_axes(self)
1550 new_axes[i] = ax
1551
-> 1552 new_axes[self.axis] = self._get_concat_axis()
1553 return new_axes
1554
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in _get_concat_axis(self)
1604
1605 if self.keys is None:
-> 1606 concat_axis = _concat_indexes(indexes)
1607 else:
1608 concat_axis = _make_concat_multiindex(indexes, self.keys,
/home/pwaller/.local/lib/python3.5/site-packages/pandas/tools/merge.py in _concat_indexes(indexes)
1622
1623 def _concat_indexes(indexes):
-> 1624 return indexes[0].append(indexes[1:])
1625
1626
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/base.py in append(self, other)
1440 # if any of the to_concat is category
1441 from pandas.indexes.category import CategoricalIndex
-> 1442 return CategoricalIndex._append_same_dtype(self, to_concat, name)
1443
1444 if len(typs) == 1:
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/category.py in _append_same_dtype(self, to_concat, name)
578 ValueError if other is not in the categories
579 """
--> 580 to_concat = [self._is_dtype_compat(c) for c in to_concat]
581 codes = np.concatenate([c.codes for c in to_concat])
582 result = self._create_from_codes(codes, name=name)
/home/pwaller/.local/lib/python3.5/site-packages/pandas/indexes/category.py in <listcomp>(.0)
578 ValueError if other is not in the categories
579 """
--> 580 to_concat = [self._is_dtype_compat(c) for c in to_concat]
581 codes = np.concatenate([c.codes for c in to_concat])
582 result = self._create_from_codes(codes, name=name)
AttributeError: 'Index' object has no attribute '_is_dtype_compat'
Comment From: jreback
@pwaller this looks like a manifestation of #14298 as you indicated. an untested path. pull-requests are welcome! thans