Code Sample, a copy-pastable example if possible
In [2]: from collections import OrderedDict
In [3]: param_index = OrderedDict([((('a', 'b'), ('c', 'd')), 1),
...: ((('a', None), ('c', 'd')), 2),
...: ])
...:
In [4]: pd.Series([1, 2], index=param_index.keys())
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/nobackup/repo/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
634 try:
--> 635 order = uniques.argsort()
636 order2 = order.argsort()
TypeError: unorderable types: NoneType() < str()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/nobackup/repo/pandas/pandas/core/sorting.py in safe_sort(values, labels, na_sentinel, assume_unique)
450 try:
--> 451 sorter = values.argsort()
452 ordered = values.take(sorter)
TypeError: unorderable types: NoneType() < str()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
397 try:
--> 398 codes, categories = factorize(values, sort=True)
399 except TypeError:
~/nobackup/repo/pandas/pandas/util/_decorators.py in wrapper(*args, **kwargs)
177 kwargs[new_arg_name] = new_arg_value
--> 178 return func(*args, **kwargs)
179 return wrapper
~/nobackup/repo/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
642 na_sentinel=na_sentinel,
--> 643 assume_unique=True)
644
~/nobackup/repo/pandas/pandas/core/sorting.py in safe_sort(values, labels, na_sentinel, assume_unique)
454 # try this anyway
--> 455 ordered = sort_mixed(values)
456
~/nobackup/repo/pandas/pandas/core/sorting.py in sort_mixed(values)
440 dtype=bool)
--> 441 nums = np.sort(values[~str_pos])
442 strs = np.sort(values[str_pos])
~/.local/lib/python3.5/site-packages/numpy/core/fromnumeric.py in sort(a, axis, kind, order)
846 a = asanyarray(a).copy(order="K")
--> 847 a.sort(axis=axis, kind=kind, order=order)
848 return a
TypeError: unorderable types: NoneType() < str()
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-4-2fff0a9c0f74> in <module>()
----> 1 pd.Series([1, 2], index=param_index.keys())
~/nobackup/repo/pandas/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
191
192 if index is not None:
--> 193 index = ensure_index(index)
194
195 if data is None:
~/nobackup/repo/pandas/pandas/core/indexes/base.py in ensure_index(index_like, copy)
5006 index_like = copy(index_like)
5007
-> 5008 return Index(index_like)
5009
5010
~/nobackup/repo/pandas/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, fastpath, tupleize_cols, **kwargs)
448 from .multi import MultiIndex
449 return MultiIndex.from_tuples(
--> 450 data, names=name or kwargs.get('names'))
451 # other iterable of some kind
452 subarr = com.asarray_tuplesafe(data, dtype=object)
~/nobackup/repo/pandas/pandas/core/indexes/multi.py in from_tuples(cls, tuples, sortorder, names)
1333 arrays = lzip(*tuples)
1334
-> 1335 return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names)
1336
1337 @classmethod
~/nobackup/repo/pandas/pandas/core/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
1277 from pandas.core.arrays.categorical import _factorize_from_iterables
1278
-> 1279 labels, levels = _factorize_from_iterables(arrays)
1280 if names is None:
1281 names = [getattr(arr, "name", None) for arr in arrays]
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterables(iterables)
2549 # For consistency, it should return a list of 2 lists.
2550 return [[], []]
-> 2551 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in <listcomp>(.0)
2549 # For consistency, it should return a list of 2 lists.
2550 return [[], []]
-> 2551 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterable(values)
2521 codes = values.codes
2522 else:
-> 2523 cat = Categorical(values, ordered=True)
2524 categories = cat.categories
2525 codes = cat.codes
~/nobackup/repo/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
402 # raise, as we don't have a sortable data structure and so
403 # the user should give us one by specifying categories
--> 404 raise TypeError("'values' is not ordered, please "
405 "explicitly specify the categories order "
406 "by passing in a categories argument.")
TypeError: 'values' is not ordered, please explicitly specify the categories order by passing in a categories argument.
Problem description
The above is a simplified version of the example in this comment - and both of them used to work (I tested in 0.19.0+git14-ga40e185
, @jolespin tested in 0.22
). Creating this separate issue because #15457 itself is not a regression.
Notice the error changes if you replace param_index.keys()
with list(param_index.keys())
(but stays the same if you just replace the OrderedDict
with an ordinary dict
).
Expected Output
In 0.19.0+git14-ga40e185
:
In [4]: pd.Series([1, 2], index=param_index.keys())
Out[4]:
((a, b), (c, d)) 1
((a, None), (c, d)) 2
dtype: int64
Output of pd.show_versions()
Comment From: evfro
having the same problem
Comment From: mroeschke
The latest result on master looks okay to me (coercing to a MutiIndex instead of having a flat Index). Could use a test
In [13]: In [2]: from collections import OrderedDict
...: In [3]: param_index = OrderedDict([((('a', 'b'), ('c', 'd')), 1),
...: ...: ((('a', None), ('c', 'd')), 2),
...: ...: ])
...: ...:
...:
...: In [4]: pd.Series([1, 2], index=param_index.keys())
Out[13]:
(a, b) (c, d) 1
(a, None) (c, d) 2
dtype: int64
Comment From: devdattakhoche
@mroeschke, @toobaz can we close this issue, I think its its fixed and a informative error message seems to show when we do pass multi dimensional index in Series .
ValueError: Index data must be 1-dimensional
https://github.com/pandas-dev/pandas/issues/15457
Comment From: jreback
would take a PR with a test like the OP
Comment From: devdattakhoche
would take a PR with a test like the OP
@jreback Can you elaborate, I didn't got you ? What do we need test for here ? I didn't understand 'OP' here ?
Comment From: devdattakhoche
I am willing to contribute here, Can I know what is required ?