Skip to content

Commit

Permalink
fix: Fix Spark offline store type conversion to arrow (#3071)
Browse files Browse the repository at this point in the history
* Fix unit tests related to empty list types

Signed-off-by: niklasvm <niklasvm@gmail.com>

* formatting

Signed-off-by: niklasvm <niklasvm@gmail.com>

Signed-off-by: niklasvm <niklasvm@gmail.com>
  • Loading branch information
niklasvm authored Aug 11, 2022
1 parent d3253c3 commit b26566d
Showing 1 changed file with 7 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tempfile
import warnings
from datetime import datetime
from typing import Dict, List, Optional, Tuple, Union
Expand All @@ -6,6 +7,7 @@
import pandas
import pandas as pd
import pyarrow
import pyarrow.parquet as pq
import pyspark
from pydantic import StrictStr
from pyspark import SparkConf
Expand Down Expand Up @@ -267,8 +269,11 @@ def _to_df_internal(self) -> pd.DataFrame:

def _to_arrow_internal(self) -> pyarrow.Table:
"""Return dataset as pyarrow Table synchronously"""
df = self.to_df()
return pyarrow.Table.from_pandas(df) # noqa

# write to temp parquet and then load it as pyarrow table from disk
with tempfile.TemporaryDirectory() as temp_dir:
self.to_spark_df().write.parquet(temp_dir, mode="overwrite")
return pq.read_table(temp_dir)

def persist(self, storage: SavedDatasetStorage):
"""
Expand Down

0 comments on commit b26566d

Please sign in to comment.