Pandas version checks
-
[X] I have checked that this issue has not already been reported.
-
[X] I have confirmed this bug exists on the latest version of pandas.
-
[x] I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
import pandas as pd
ts = pd.Series([0, 1, 2],
index=pd.Index(
["2018-04-09", None, "2018-04-10"], dtype="datetime64[ns]")
)
# this works and prints just the first row
print(ts.first('1D'))
# raises KeyError
print(ts.first('2D'))
Issue Description
When first
should select rows spanning a null in the index, it throws KeyError
like KeyError: Timestamp('2018-04-11 00:00:00')
.
Stack trace
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:548, in pandas._libs.index.DatetimeEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:2263, in pandas._libs.hashtable.Int64HashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:2273, in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1523404800000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:3803, in Index.get_loc(self, key, method, tolerance)
3802 try:
-> 3803 return self._engine.get_loc(casted_key)
3804 except KeyError as err:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:516, in pandas._libs.index.DatetimeEngine.get_loc()
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/_libs/index.pyx:550, in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2018-04-11 00:00:00')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:736, in DatetimeIndex.get_loc(self, key, method, tolerance)
735 try:
--> 736 return Index.get_loc(self, key, method, tolerance)
737 except KeyError as err:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805, in Index.get_loc(self, key, method, tolerance)
3804 except KeyError as err:
-> 3805 raise KeyError(key) from err
3806 except TypeError:
3807 # If we have a listlike key, _check_indexing_error will raise
3808 # InvalidIndexError. Otherwise we fall through and re-raise
3809 # the TypeError.
KeyError: Timestamp('2018-04-11 00:00:00')
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In [1], line 10
8 print(ts.first('1D'))
9 # raises KeyError
---> 10 print(ts.first('2D'))
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/generic.py:8946, in NDFrame.first(self, offset)
8943 end = self.index.searchsorted(end_date, side="left")
8944 return self.iloc[:end]
-> 8946 return self.loc[:end]
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1073, in _LocationIndexer.__getitem__(self, key)
1070 axis = self.axis or 0
1072 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073 return self._getitem_axis(maybe_callable, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1290, in _LocIndexer._getitem_axis(self, key, axis)
1288 if isinstance(key, slice):
1289 self._validate_key(key, axis)
-> 1290 return self._get_slice_axis(key, axis=axis)
1291 elif com.is_bool_indexer(key):
1292 return self._getbool_axis(key, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexing.py:1324, in _LocIndexer._get_slice_axis(self, slice_obj, axis)
1321 return obj.copy(deep=False)
1323 labels = obj._get_axis(axis)
-> 1324 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1326 if isinstance(indexer, slice):
1327 return self.obj._slice(indexer, axis=axis)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:809, in DatetimeIndex.slice_indexer(self, start, end, step, kind)
801 # GH#33146 if start and end are combinations of str and None and Index is not
802 # monotonic, we can not use Index.slice_indexer because it does not honor the
803 # actual elements, is only searching for start and end
804 if (
805 check_str_or_none(start)
806 or check_str_or_none(end)
807 or self.is_monotonic_increasing
808 ):
--> 809 return Index.slice_indexer(self, start, end, step, kind=kind)
811 mask = np.array(True)
812 deprecation_mask = np.array(True)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6602, in Index.slice_indexer(self, start, end, step, kind)
6559 """
6560 Compute the slice indexer for input labels and step.
6561
(...)
6598 slice(1, 3, None)
6599 """
6600 self._deprecated_arg(kind, "kind", "slice_indexer")
-> 6602 start_slice, end_slice = self.slice_locs(start, end, step=step)
6604 # return a slice
6605 if not is_scalar(start_slice):
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6816, in Index.slice_locs(self, start, end, step, kind)
6814 end_slice = None
6815 if end is not None:
-> 6816 end_slice = self.get_slice_bound(end, "right")
6817 if end_slice is None:
6818 end_slice = len(self)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6729, in Index.get_slice_bound(self, label, side, kind)
6726 return self._searchsorted_monotonic(label, side)
6727 except ValueError:
6728 # raise the original KeyError
-> 6729 raise err
6731 if isinstance(slc, np.ndarray):
6732 # get_loc may return a boolean array, which
6733 # is OK as long as they are representable by a slice.
6734 assert is_bool_dtype(slc.dtype)
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/base.py:6723, in Index.get_slice_bound(self, label, side, kind)
6721 # we need to look up the label
6722 try:
-> 6723 slc = self.get_loc(label)
6724 except KeyError as err:
6725 try:
File ~/opt/anaconda3/envs/pandas-dev/lib/python3.10/site-packages/pandas/core/indexes/datetimes.py:738, in DatetimeIndex.get_loc(self, key, method, tolerance)
736 return Index.get_loc(self, key, method, tolerance)
737 except KeyError as err:
--> 738 raise KeyError(orig_key) from err
KeyError: Timestamp('2018-04-11 00:00:00')
Expected Behavior
I should get all the matching times, excluding nulls.
Installed Versions
Comment From: topper-123
ts.first(offset)
is shorthand for ts.loc[:first_data + offset]
. Slicing requires monotonic index: https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#non-monotonic-indexes-require-exact-matches.
Your index is not monotonic. Can you check if this problem persists with a monotonic index.
IMO the error message here is not nice, this could at least fail earlier with a better error message IMO,
Comment From: topper-123
Closing. It problem also exists with monotonic indexes, we can reopen.