Pandas version checks
-
[X] I have checked that this issue has not already been reported.
-
[X] I have confirmed this bug exists on the latest version of pandas.
-
[ ] I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
============================= test session starts ==============================
platform linux -- Python 3.13.1, pytest-8.3.4, pluggy-1.5.0 -- /usr/bin/python3
cachedir: /builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/pytest-cache
hypothesis profile 'ci' -> deadline=None, suppress_health_check=[HealthCheck.too_slow, HealthCheck.differing_executors], database=DirectoryBasedExampleDatabase(PosixPath('/builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/.hypothesis/examples'))
rootdir: /builddir/build/BUILD/python-pandas-2.2.3-build/BUILDROOT/usr/lib64/python3.13/site-packages/pandas
configfile: pyproject.toml
plugins: asyncio-0.24.0, xdist-3.6.1, hypothesis-6.104.2
asyncio: mode=Mode.STRICT, default_loop_scope=None
created: 1/1 worker
=================================== FAILURES ===================================
____________________ test_array_inference[data7-expected7] _____________________
[gw0] linux -- Python 3.13.1 /usr/bin/python3
data = [datetime.datetime(2000, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>), datetime.datetime(2001, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>)]
expected = <DatetimeArray>
['2000-01-01 00:00:00+01:00', '2001-01-01 00:00:00+01:00']
Length: 2, dtype: datetime64[ns, CET]
@pytest.mark.parametrize(
"data, expected",
[
# period
(
[pd.Period("2000", "D"), pd.Period("2001", "D")],
period_array(["2000", "2001"], freq="D"),
),
# interval
([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
# datetime
(
[pd.Timestamp("2000"), pd.Timestamp("2001")],
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
),
(
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
),
(
np.array([1, 2], dtype="M8[ns]"),
DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
),
(
np.array([1, 2], dtype="M8[us]"),
DatetimeArray._simple_new(
np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
),
),
# datetimetz
(
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
),
),
(
[
datetime.datetime(2000, 1, 1, tzinfo=cet),
datetime.datetime(2001, 1, 1, tzinfo=cet),
],
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
),
),
# timedelta
(
[pd.Timedelta("1h"), pd.Timedelta("2h")],
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
),
(
np.array([1, 2], dtype="m8[ns]"),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
),
(
np.array([1, 2], dtype="m8[us]"),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
),
# integer
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
# float
([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
# integer-like float
([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
# mixed-integer-float
([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
(
[1, np.nan, 2.0],
FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
),
# string
(
["a", "b"],
pd.StringDtype()
.construct_array_type()
._from_sequence(["a", "b"], dtype=pd.StringDtype()),
),
(
["a", None],
pd.StringDtype()
.construct_array_type()
._from_sequence(["a", None], dtype=pd.StringDtype()),
),
# Boolean
([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
],
)
def test_array_inference(data, expected):
result = pd.array(data)
> tm.assert_equal(result, expected)
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/tests/arrays/test_array.py:377:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
left = array(['1999-12-31T23:42:00.000000000', '2000-12-31T23:42:00.000000000'],
dtype='datetime64[ns]')
right = array(['1999-12-31T23:00:00.000000000', '2000-12-31T23:00:00.000000000'],
dtype='datetime64[ns]')
err_msg = None
def _raise(left, right, err_msg) -> NoReturn:
if err_msg is None:
if left.shape != right.shape:
raise_assert_detail(
obj, f"{obj} shapes are different", left.shape, right.shape
)
diff = 0
for left_arr, right_arr in zip(left, right):
# count up differences
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
diff += 1
diff = diff * 100.0 / left.size
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
> raise_assert_detail(obj, msg, left, right, index_values=index_values)
E AssertionError: DatetimeArray._ndarray are different
E
E DatetimeArray._ndarray values are different (100.0 %)
E [left]: [1999-12-31T23:42:00.000000000, 2000-12-31T23:42:00.000000000]
E [right]: [1999-12-31T23:00:00.000000000, 2000-12-31T23:00:00.000000000]
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/_testing/asserters.py:684: AssertionError
Full build log with more test failures is here: https://kojipkgs.fedoraproject.org//work/tasks/9043/126999043/build.log
### Issue Description
We are updating Fedora to pandas 2.2.3 and numpy 2.0.5 but are getting test failures.
### Expected Behavior
No test failures
### Installed Versions
<details>
2.2.3
</details>
**Comment From: opoplawski**
Also:
__ TestDataFrameToXArray.testto_xarray_index_types[string] ___
[gw0] linux -- Python 3.13.1 /usr/bin/python3
self =
from xarray import Dataset
df.index = index[:4]
df.index.name = "foo"
df.columns.name = "bar"
result = df.to_xarray()
assert result.sizes["foo"] == 4
assert len(result.coords) == 1
assert len(result.data_vars) == 8
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
assert isinstance(result, Dataset)
# idempotency
# datetimes w/tz are preserved
# column names are lost
expected = df.copy()
expected["f"] = expected["f"].astype(
object if not using_infer_string else "string[pyarrow_numpy]"
)
expected.columns.name = None
tm.assert_frame_equal(result.to_dataframe(), expected)
E AssertionError: Attributes of DataFrame.iloc[:, 5] (column name="f") are different E
E Attribute "dtype" are different E [left]: CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=False, categories_dtype=object) E [right]: object ../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/tests/generic/test_to_xarray.py:58: AssertionError
**Comment From: rhshadrach**
Thanks for the report. Can you post the output of `pd.show_versions()`.
**Comment From: opoplawski**
INSTALLED VERSIONS
commit : 0691c5cf90477d3503834d983f69350f250a6ff7 python : 3.13.1 python-bits : 64 OS : Linux OS-release : 6.13.0-0.rc3.20241217gitf44d154d6e3d.30.fc42.x86_64 Version : #1 SMP PREEMPT_DYNAMIC Tue Dec 17 22:37:32 UTC 2024 machine : x86_64 processor : byteorder : little LC_ALL : None LANG : C.UTF-8 LOCALE : C.UTF-8
pandas : 2.2.3 numpy : 2.2.0 pytz : 2024.2 dateutil : 2.8.2 pip : 24.3.1 Cython : 3.0.11 sphinx : 7.3.7 IPython : 8.30.0 adbc-driver-postgresql: None adbc-driver-sqlite : None bs4 : 4.12.3 blosc : None bottleneck : 1.4.2 dataframe-api-compat : None fastparquet : None fsspec : 2024.12.0 html5lib : 1.1 hypothesis : 6.104.2 gcsfs : 2024.9.0 jinja2 : 3.1.4 lxml.etree : 5.3.0 matplotlib : 3.9.4 numba : None numexpr : 2.10.2 odfpy : None openpyxl : 3.1.2 pandas_gbq : None psycopg2 : 2.9.9 pymysql : 1.4.6 pyarrow : 18.0.0 pyreadstat : None pytest : 8.3.4 python-calamine : None pyxlsb : None s3fs : None scipy : 1.14.1 sqlalchemy : 2.0.36 tables : 3.10.1 tabulate : 0.9.0 xarray : 2024.10.0 xlrd : 2.0.1 xlsxwriter : 3.2.0 zstandard : 0.23.0 tzdata : None qtpy : 2.4.2 pyqt5 : None ```
Comment From: rhshadrach
tzdata : None
pandas 2.2.3 requires tzdata >= 2022.7
. It appears you have an invalid environment. Can you try installing tzdata and see if this resolves the issue?
Comment From: keszybz
The build environment has tzdata-2024b-1.fc42 installed. So there's something wrong with how pandas detects the presence of tzdata.
An earlier build for pandas-2.2.1 with tzdata-2024a-1.fc2 passed.
Note that we sed -i '/tzdata>=2022.7/d' pyproject.toml
because we provide tzdata in the system, we don't want to use the python package.
Comment From: opoplawski
We seem to be stalled here. Any more information that would be useful?
Comment From: rhshadrach
It seems like the use of tzdata is non-standard and the tests that are failing have timezones. I do not see any indication that there is something wrong in pandas.