import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
data = iris.data
column_names = iris.feature_names

df = pd.DataFrame(iris.data, columns = column_names)

print(df.head(n=5))

df_dummy = pd.get_dummies(df, sparse = True, columns = ['petal length (cm)'] )

print(list(df), df_dummy.shape)

X = df_dummy.drop(['sepal length (cm)'], axis = 1)

Problem description

I can't do anything with the dataframe based on col names when using Sparse = True - such as dropping a variable, which is replicated above. The full error print is under details.

Expected Output

The expected output can be seen by removing sparse = True from df_dummy = pd.get_dummies(df, sparse = True, columns = ['petal length (cm)'] ), and then running the subsequent lines.

Output of pd.show_versions()

INSTALLED VERSIONS ------------------ commit: None pandas: 0.21.0 pytest: 3.2.1 pip: 9.0.1 setuptools: 36.5.0.post20170921 Cython: 0.26.1 numpy: 1.12.1 scipy: 1.0.0 pyarrow: None xarray: None IPython: 6.1.0 sphinx: 1.6.3 patsy: 0.4.1 dateutil: 2.6.1 pytz: 2017.2 blosc: None bottleneck: 1.2.1 tables: 3.4.2 numexpr: 2.6.2 feather: None matplotlib: 2.1.0 openpyxl: 2.4.8 xlrd: 1.1.0 xlwt: 1.2.0 xlsxwriter: 1.0.2 lxml: 4.1.0 bs4: 4.6.0 html5lib: 0.999999999 sqlalchemy: 1.1.13 pymysql: None psycopg2: None jinja2: 2.9.6 s3fs: None fastparquet: None pandas_gbq: None pandas_datareader: None In [ ]: COMPLETE ERROR PRINT ------------------ --------------------------------------------------------------------------- TypeError Traceback (most recent call last) in () 15 print(list(df), df_dummy.shape) 16 ---> 17 X = df_dummy.drop(['sepal length (cm)'], axis = 1) /anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors) 2528 for axis, labels in axes.items(): 2529 if labels is not None: -> 2530 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 2531 2532 if inplace: /anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors) 2561 else: 2562 new_axis = axis.drop(labels, errors=errors) -> 2563 dropped = self.reindex(**{axis_name: new_axis}) 2564 try: 2565 dropped.axes[axis_].set_names(axis.names, inplace=True) /anaconda3/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs) 125 @wraps(func) 126 def wrapper(*args, **kwargs): --> 127 return func(*args, **kwargs) 128 129 if not PY2: /anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs) 2933 kwargs.pop('axis', None) 2934 kwargs.pop('labels', None) -> 2935 return super(DataFrame, self).reindex(**kwargs) 2936 2937 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) /anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs) 3021 # perform the reindex on the axes 3022 return self._reindex_axes(axes, level, limit, tolerance, method, -> 3023 fill_value, copy).__finalize__(self) 3024 3025 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, /anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy) 2863 if columns is not None: 2864 frame = frame._reindex_columns(columns, method, copy, level, -> 2865 fill_value, limit, tolerance) 2866 2867 index = axes['index'] /anaconda3/lib/python3.6/site-packages/pandas/core/sparse/frame.py in _reindex_columns(self, columns, method, copy, level, fill_value, limit, takeable) 701 return self._constructor( 702 sdict, index=self.index, columns=columns, --> 703 default_fill_value=self._default_fill_value).__finalize__(self) 704 705 def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, /anaconda3/lib/python3.6/site-packages/pandas/core/sparse/frame.py in __init__(self, data, index, columns, default_kind, default_fill_value, dtype, copy) 89 fill_value=default_fill_value) 90 elif isinstance(data, dict): ---> 91 mgr = self._init_dict(data, index, columns, dtype=dtype) 92 elif isinstance(data, (np.ndarray, list)): 93 mgr = self._init_matrix(data, index, columns, dtype=dtype) /anaconda3/lib/python3.6/site-packages/pandas/core/sparse/frame.py in _init_dict(self, data, index, columns, dtype) 168 sdict.update((c, nan_arr) for c in columns if c not in sdict) 169 --> 170 return to_manager(sdict, columns, index) 171 172 def _init_matrix(self, data, index, columns, dtype=None): /anaconda3/lib/python3.6/site-packages/pandas/core/sparse/frame.py in to_manager(sdf, columns, index) 896 897 return create_block_manager_from_arrays( --> 898 [sdf[c] for c in columns], columns, axes) 899 900 /anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in create_block_manager_from_arrays(arrays, names, axes) 4630 4631 try: -> 4632 blocks = form_blocks(arrays, names, axes) 4633 mgr = BlockManager(blocks, axes) 4634 mgr._consolidate_inplace() /anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in form_blocks(arrays, names, axes) 4726 4727 if len(sparse_items) > 0: -> 4728 sparse_blocks = _sparse_blockify(sparse_items) 4729 blocks.extend(sparse_blocks) 4730 /anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in _sparse_blockify(tuples, dtype) 4788 array = _maybe_to_sparse(array) 4789 block = make_block(array, klass=SparseBlock, fastpath=True, -> 4790 placement=[i]) 4791 new_blocks.append(block) 4792 /anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath) 2950 placement=placement, dtype=dtype) 2951 -> 2952 return klass(values, ndim=ndim, fastpath=fastpath, placement=placement) 2953 2954 # TODO: flexible with index=None and/or items=None /anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim, fastpath, **kwargs) 1701 1702 if not isinstance(values, self._holder): -> 1703 raise TypeError("values must be {0}".format(self._holder.__name__)) 1704 1705 self.values = values

Comment From: TomAugspurger

I think this is a duplicate of https://github.com/pandas-dev/pandas/issues/18686

Basically, get_dummies(..., sparse=True) converts everything to sparse, not just the newly created columns.