Skip to content

Commit 192290d

Browse files
authored
Merge pull request #120 from awslabs/pandas-read-redshift-empty
Handling empty results for Pandas.read_sql_redshift()
2 parents 037433b + 7890d75 commit 192290d

File tree

4 files changed

+43
-12
lines changed

4 files changed

+43
-12
lines changed

awswrangler/pandas.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,8 @@ def read_parquet(self,
14081408
:param wait_objects_timeout: Wait objects Timeout (seconds)
14091409
:return: Pandas DataFrame
14101410
"""
1411+
if type(path) == list and not path: # Empty list
1412+
return pd.DataFrame()
14111413
procs_cpu_bound = procs_cpu_bound if procs_cpu_bound is not None else self._session.procs_cpu_bound if self._session.procs_cpu_bound is not None else 1
14121414
logger.debug(f"procs_cpu_bound: {procs_cpu_bound}")
14131415
dfs: List[pd.DataFrame] = []
@@ -1640,7 +1642,10 @@ def read_sql_redshift(self,
16401642
iam_role=iam_role,
16411643
connection=connection)
16421644
logger.debug(f"paths: {paths}")
1643-
df: pd.DataFrame = self.read_parquet(path=paths, procs_cpu_bound=procs_cpu_bound) # type: ignore
1645+
if paths:
1646+
df: pd.DataFrame = self.read_parquet(path=paths, procs_cpu_bound=procs_cpu_bound) # type: ignore
1647+
else:
1648+
df = pd.DataFrame()
16441649
except Exception as ex:
16451650
connection.rollback()
16461651
if paths is not None:

awswrangler/redshift.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,9 @@ def to_parquet(self,
455455
logger.debug(f"paths: {paths}")
456456
connection.commit()
457457
cursor.close()
458-
if manifest_str != "":
459-
self._session.s3.wait_object_exists(path=f"{path}manifest", timeout=30.0)
460-
for p in paths:
461-
self._session.s3.wait_object_exists(path=p, timeout=30.0)
458+
if paths:
459+
if manifest_str != "":
460+
self._session.s3.wait_object_exists(path=f"{path}manifest", timeout=30.0)
461+
for p in paths:
462+
self._session.s3.wait_object_exists(path=p, timeout=30.0)
462463
return paths

testing/test_awswrangler/test_pandas.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,13 +2353,15 @@ def test_s3_overall_nan(bucket, database):
23532353
df["c_int"] = df["c_int"].astype("Int64")
23542354
print(df)
23552355
wr.pandas.to_parquet(dataframe=df,
2356-
database=database,
2357-
path=path,
2358-
mode="overwrite",
2359-
preserve_index=False,
2360-
procs_cpu_bound=1,
2361-
inplace=False)
2356+
database=database,
2357+
path=path,
2358+
mode="overwrite",
2359+
preserve_index=False,
2360+
procs_cpu_bound=1,
2361+
inplace=False)
23622362
sleep(15)
2363-
df2 = wr.pandas.read_sql_athena(database=database, sql="SELECT * FROM test_s3_overall_nan ORDER BY id", ctas_approach=True)
2363+
df2 = wr.pandas.read_sql_athena(database=database,
2364+
sql="SELECT * FROM test_s3_overall_nan ORDER BY id",
2365+
ctas_approach=True)
23642366
wr.s3.delete_objects(path=path)
23652367
assert df.equals(df2)

testing/test_awswrangler/test_redshift.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,3 +752,26 @@ def test_read_sql_redshift_pandas_glue_conn(session, bucket, redshift_parameters
752752
temp_s3_path=path2)
753753
assert len(df.index) == len(df2.index)
754754
assert len(df.columns) + 1 == len(df2.columns)
755+
756+
757+
def test_read_sql_redshift_pandas_empty(session, bucket, redshift_parameters):
758+
path = f"s3://{bucket}/test_read_sql_redshift_pandas_empty/"
759+
path2 = f"s3://{bucket}/test_read_sql_redshift_pandas_empty2/"
760+
wr.s3.delete_objects(path=path)
761+
wr.s3.delete_objects(path=path2)
762+
df = pd.DataFrame({"id": [1, 2, 3, 4, 5]})
763+
session.pandas.to_redshift(
764+
dataframe=df,
765+
path=path,
766+
schema="public",
767+
table="test",
768+
connection="aws-data-wrangler-redshift",
769+
iam_role=redshift_parameters.get("RedshiftRole"),
770+
mode="overwrite",
771+
preserve_index=True,
772+
)
773+
df2 = session.pandas.read_sql_redshift(sql="SELECT * FROM public.test WHERE id = 6",
774+
iam_role=redshift_parameters.get("RedshiftRole"),
775+
connection="aws-data-wrangler-redshift",
776+
temp_s3_path=path2)
777+
assert df2.equals(pd.DataFrame())

0 commit comments

Comments
 (0)