- [x] I have checked that this issue has not already been reported.
- [x] I have confirmed this bug exists on the latest version of pandas.
- [ ] (optional) I have confirmed this bug exists on the master branch of pandas.
Code Sample, a copy-pastable example
import pandas as pd
import numpy as np
from datetime import datetime
>>> time_index = pd.date_range(datetime(2021, 1, 1, 1), datetime(2021, 1, 1, 10), freq='H', tz='Europe/Oslo')
>>> s1 = pd.Series(np.random.random(10), index=time_index)
>>> s2 = pd.Series()
>>> s1.combine_first(s2)
Traceback
AttributeError Traceback (most recent call last)
<ipython-input-14-9af79f353bf5> in <module>
----> 1 s1.combine_first(pd.Series())
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/series.py in combine_first(self, other)
2983 other = to_datetime(other)
2984
-> 2985 return this.where(notna(this), other)
2986
2987 def update(self, other) -> None:
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/generic.py in where(self, cond, other, inplace, axis, level, errors, try_cast)
9285 other = com.apply_if_callable(other, self)
9286 return self._where(
-> 9287 cond, other, inplace, axis, level, errors=errors, try_cast=try_cast
9288 )
9289
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/generic.py in _where(self, cond, other, inplace, axis, level, errors, try_cast)
9019 cond = com.apply_if_callable(cond, self)
9020 if isinstance(cond, NDFrame):
-> 9021 cond, _ = cond.align(self, join="right", broadcast_axis=1)
9022 else:
9023 if not hasattr(cond, "shape"):
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/series.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
4228 limit=limit,
4229 fill_axis=fill_axis,
-> 4230 broadcast_axis=broadcast_axis,
4231 )
4232
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/generic.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
8832 method=method,
8833 limit=limit,
-> 8834 fill_axis=fill_axis,
8835 )
8836 else: # pragma: no cover
~/anaconda3/envs/powerml_dev/lib/python3.7/site-packages/pandas/core/generic.py in _align_series(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
8985 if is_series or (not is_series and axis == 0):
8986 if is_datetime64tz_dtype(left.index.dtype):
-> 8987 if left.index.tz != right.index.tz:
8988 if join_index is not None:
8989 # GH#33671 ensure we don't change the index on
AttributeError: 'Index' object has no attribute 'tz'
Problem description
Combine fails when one series has a timezone-aware datetime index and the other is empty. This does not happen if s1 has naive datetimes.
Expected output
The output should be equal to s1.
Output of python pd.show_versions()
INSTALLED VERSIONS
------------------
commit : 2cb96529396d93b46abab7bbc73a208e708c642e
python : 3.7.7.final.0
python-bits : 64
OS : Linux
OS-release : 4.15.0-142-generic
Version : #146-Ubuntu SMP Tue Apr 13 01:11:19 UTC 2021
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 1.2.4
numpy : 1.19.0
pytz : 2020.5
dateutil : 2.8.1
pip : 20.1.1
setuptools : 49.2.0.post20200714
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : 1.3.7
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : 2.8.3 (dt dec pq3 ext lo64)
jinja2 : 2.11.2
IPython : 7.19.0
pandas_datareader: None
bs4 : None
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.4.1
numexpr : None
odfpy : None
openpyxl : 3.0.5
pandas_gbq : None
pyarrow : 2.0.0
pyxlsb : None
s3fs : None
scipy : 1.2.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : None
numba : None
Comment From: MarcoGorelli
This works now:
In [2]: time_index = pd.date_range(datetime(2021, 1, 1, 1), datetime(2021, 1, 1, 10), freq='H', tz='Europe/Oslo')
In [3]: s1 = pd.Series(np.random.random(10), index=time_index)
In [4]: s2 = pd.Series()
In [5]: s1.combine_first(s2)
Out[5]:
2021-01-01 01:00:00+01:00 0.859067
2021-01-01 02:00:00+01:00 0.553345
2021-01-01 03:00:00+01:00 0.836079
2021-01-01 04:00:00+01:00 0.695629
2021-01-01 05:00:00+01:00 0.824679
2021-01-01 06:00:00+01:00 0.429457
2021-01-01 07:00:00+01:00 0.917812
2021-01-01 08:00:00+01:00 0.234022
2021-01-01 09:00:00+01:00 0.100995
2021-01-01 10:00:00+01:00 0.239752
dtype: float64
A PR to add a test would be welcome
Comment From: MarcoGorelli
This was fixed in #49169
https://www.kaggle.com/code/marcogorelli/pandas-regression-example/notebook?scriptVersionId=114076167
Comment From: MarcoGorelli
closed in https://github.com/pandas-dev/pandas/pull/50677