Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REGR: fix period dtype <-> arrow roundtrip for pyarrow < 4 #45524

Merged
merged 1 commit into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pandas/core/internals/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pandas.core.internals.blocks import (
Block,
DatetimeTZBlock,
ExtensionBlock,
check_ndim,
ensure_block_shape,
extract_pandas_array,
Expand Down Expand Up @@ -51,6 +52,12 @@ def make_block(

values, dtype = extract_pandas_array(values, dtype, ndim)

if klass is ExtensionBlock and is_period_dtype(values.dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defer to @jbrockmendel but not sure this is the right place for this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is fine.

issue a deprecation warning?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue a deprecation warning?

For the case we know about (pyarrow), this is already fixed in the latest release. For in general, we might want to raise a warning for any time that klass is specified? So that it always will be inferred from the values (which pyarrow already does by now), and we don't have this issue again when changing Block classes.

Giving the timing of the release, I think we can discuss that in a follow up

# GH-44681 changed PeriodArray to be stored in the 2D
# NDArrayBackedExtensionBlock instead of ExtensionBlock
# -> still allow ExtensionBlock to be passed in this case for back compat
klass = None

if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(dtype)
Expand Down
11 changes: 0 additions & 11 deletions pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import pytest

from pandas.compat import pa_version_under4p0

from pandas.core.dtypes.dtypes import PeriodDtype

import pandas as pd
Expand Down Expand Up @@ -71,9 +69,6 @@ def test_arrow_array_missing():
assert result.storage.equals(expected)


@pytest.mark.xfail(
pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_table_roundtrip():
from pandas.core.arrays._arrow_utils import ArrowPeriodType

Expand All @@ -93,9 +88,6 @@ def test_arrow_table_roundtrip():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(
pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_load_from_zero_chunks():
# GH-41040

Expand All @@ -114,9 +106,6 @@ def test_arrow_load_from_zero_chunks():
tm.assert_frame_equal(result, df)


@pytest.mark.xfail(
pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_table_roundtrip_without_metadata():
arr = PeriodArray([1, 2, 3], freq="H")
arr[1] = pd.NaT
Expand Down
8 changes: 1 addition & 7 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import numpy as np
import pytest

from pandas.compat.pyarrow import pa_version_under4p0

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -87,11 +85,7 @@ def test_basic(self):
),
}
)
if not pa_version_under4p0:
# older pyarrow incorrectly uses pandas internal API, so
# constructs invalid Block
df["periods"] = pd.period_range("2013", freq="M", periods=3)

df["periods"] = pd.period_range("2013", freq="M", periods=3)
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
df["intervals"] = pd.interval_range(0, 3, 3)

Expand Down
12 changes: 1 addition & 11 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from pandas.compat.pyarrow import (
pa_version_under2p0,
pa_version_under4p0,
pa_version_under5p0,
pa_version_under6p0,
)
Expand Down Expand Up @@ -652,13 +651,7 @@ def test_use_nullable_dtypes(self, engine, request):
"object",
"datetime64[ns, UTC]",
"float",
pytest.param(
"period[D]",
marks=pytest.mark.xfail(
pa_version_under4p0,
reason="pyarrow uses pandas internal API incorrectly",
),
),
"period[D]",
"Float64",
"string",
],
Expand Down Expand Up @@ -897,9 +890,6 @@ def test_pyarrow_backed_string_array(self, pa, string_storage):
check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]"))

@td.skip_if_no("pyarrow")
@pytest.mark.xfail(
pa_version_under4p0, reason="pyarrow uses pandas internal API incorrectly"
)
def test_additional_extension_types(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol + by defining a custom ExtensionType
Expand Down