From 41025d8f2f75c01f26b7414c8a2fa26cf29a28ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=98=E6=9D=B0?= Date: Thu, 8 Sep 2022 15:16:13 +0000 Subject: [PATCH 1/8] Modify ignore_empty output --- janitor/functions/concatenate_columns.py | 23 ++++++++++++--------- tests/functions/test_concatenate_columns.py | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/janitor/functions/concatenate_columns.py b/janitor/functions/concatenate_columns.py index 757b7a715..e7226e6ad 100644 --- a/janitor/functions/concatenate_columns.py +++ b/janitor/functions/concatenate_columns.py @@ -25,19 +25,19 @@ def concatenate_columns( >>> import pandas as pd >>> import janitor - >>> df = pd.DataFrame({"a": [1, 3, 5], "b": list("xyz")}) + >>> df = pd.DataFrame({"a": [1, pd.NA, 5], "b": list("xyz")}) >>> df - a b - 0 1 x - 1 3 y - 2 5 z + a b + 0 1 x + 1 y + 2 5 z >>> df.concatenate_columns( ... column_names=["a", "b"], new_column_name="m", ... ) - a b m - 0 1 x 1-x - 1 3 y 3-y - 2 5 z 5-z + a b m + 0 x x + 1 3 y 3-y + 2 5 z 5-z :param df: A pandas DataFrame. :param column_names: A list of columns to concatenate together. @@ -52,7 +52,10 @@ def concatenate_columns( raise JanitorError("At least two columns must be specified") df[new_column_name] = ( - df[column_names].astype(str).fillna("").agg(sep.join, axis=1) + df[column_names] + .astype(str) + .replace(["NaT", "nan", ""], "") + .agg(sep.join, axis=1) ) if ignore_empty: diff --git a/tests/functions/test_concatenate_columns.py b/tests/functions/test_concatenate_columns.py index 25a7d7bee..341fb344d 100644 --- a/tests/functions/test_concatenate_columns.py +++ b/tests/functions/test_concatenate_columns.py @@ -28,7 +28,7 @@ def test_concatenate_columns_null_values(missingdata_df): new_column_name="index", ignore_empty=True, ) - expected_values = ["1.0-1", "2.0-2", "nan-3"] * 3 + expected_values = ["1.0-1", "2.0-2", "3"] * 3 assert expected_values == df["index"].tolist() From d74a829e8e6b1c57b51761a98cfc3dcd5f9ef97e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=98=E6=9D=B0?= Date: Thu, 8 Sep 2022 15:36:29 +0000 Subject: [PATCH 2/8] Modify ignore_empty output --- AUTHORS.md | 1 + CHANGELOG.md | 1 + 2 files changed, 2 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index d457f72ad..b52a7af77 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -109,3 +109,4 @@ Contributors - [@ethompsy](https://github.com/ethompsy) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Aethompsy) - [@apatao](https://github.com/apatao) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Aapatao) - [@OdinTech3](https://github.com/OdinTech3) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/pull/1094) +- [@Fu-Jie](https://github.com/Fu-Jie) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues/1164) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca51b7fa4..09e3d7f24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - [ENH] Enable `encode_categorical` handle 2 (or more ) dimensions array. PR #1153 @Zeroto521 - [ENH] Faster computation for a single non-equi join, with a numba engine. Issue #1102 @samukweku - [INF] Cancel old workflow runs via Github Action `concurrency`. PR #1161 @Zeroto521 +- [BUG] Modify ignore_empty output in `concatenate_columns`. PR #1164 @Fu-Jie ## [v0.23.1] - 2022-05-03 From f012b9d6c76fc58ff241df0754e4b4da20bd5c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BB=98=E6=9D=B0?= Date: Thu, 8 Sep 2022 16:03:07 +0000 Subject: [PATCH 3/8] solve doc format --- janitor/functions/concatenate_columns.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/janitor/functions/concatenate_columns.py b/janitor/functions/concatenate_columns.py index e7226e6ad..3e806068f 100644 --- a/janitor/functions/concatenate_columns.py +++ b/janitor/functions/concatenate_columns.py @@ -25,19 +25,19 @@ def concatenate_columns( >>> import pandas as pd >>> import janitor - >>> df = pd.DataFrame({"a": [1, pd.NA, 5], "b": list("xyz")}) + >>> df = pd.DataFrame({"a": [1, 3, 5], "b": list("xyz")}) >>> df - a b - 0 1 x - 1 y - 2 5 z + a b + 0 1 x + 1 3 y + 2 5 z >>> df.concatenate_columns( ... column_names=["a", "b"], new_column_name="m", ... ) - a b m - 0 x x - 1 3 y 3-y - 2 5 z 5-z + a b m + 0 1 x 1-x + 1 3 y 3-y + 2 5 z 5-z :param df: A pandas DataFrame. :param column_names: A list of columns to concatenate together. From adaaf36f595a93fd3b2f4b9f561888e148a911cd Mon Sep 17 00:00:00 2001 From: Jeff <33599649+Fu-Jie@users.noreply.github.com> Date: Tue, 13 Sep 2022 10:21:26 +0800 Subject: [PATCH 4/8] Update AUTHORS.md Co-authored-by: 40% --- AUTHORS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index b52a7af77..3dc684b78 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -109,4 +109,4 @@ Contributors - [@ethompsy](https://github.com/ethompsy) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Aethompsy) - [@apatao](https://github.com/apatao) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%3Aapatao) - [@OdinTech3](https://github.com/OdinTech3) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/pull/1094) -- [@Fu-Jie](https://github.com/Fu-Jie) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues/1164) +- [@Fu-Jie](https://github.com/Fu-Jie) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/pulls?q=is%3Aclosed+mentions%3AFu-Jie) From 30c1f3c8dc9def92bb6a6293b6c9f7f1640f1e6e Mon Sep 17 00:00:00 2001 From: Jeff <33599649+Fu-Jie@users.noreply.github.com> Date: Tue, 13 Sep 2022 10:23:09 +0800 Subject: [PATCH 5/8] Update janitor/functions/concatenate_columns.py Co-authored-by: 40% --- janitor/functions/concatenate_columns.py | 1 + 1 file changed, 1 insertion(+) diff --git a/janitor/functions/concatenate_columns.py b/janitor/functions/concatenate_columns.py index 3e806068f..10795d737 100644 --- a/janitor/functions/concatenate_columns.py +++ b/janitor/functions/concatenate_columns.py @@ -51,6 +51,7 @@ def concatenate_columns( if len(column_names) < 2: raise JanitorError("At least two columns must be specified") + df = df.copy() # avoid mutating original data df[new_column_name] = ( df[column_names] .astype(str) From a25d983ab236dd9741a065d02779f9fb38d7f342 Mon Sep 17 00:00:00 2001 From: Jeff <33599649+Fu-Jie@users.noreply.github.com> Date: Wed, 14 Sep 2022 16:00:44 +0800 Subject: [PATCH 6/8] Update concatenate_columns.py use astype("string") --- janitor/functions/concatenate_columns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/janitor/functions/concatenate_columns.py b/janitor/functions/concatenate_columns.py index 10795d737..c4377cf60 100644 --- a/janitor/functions/concatenate_columns.py +++ b/janitor/functions/concatenate_columns.py @@ -54,8 +54,8 @@ def concatenate_columns( df = df.copy() # avoid mutating original data df[new_column_name] = ( df[column_names] - .astype(str) - .replace(["NaT", "nan", ""], "") + .astype("string") + .fillna("") .agg(sep.join, axis=1) ) From 08fe78c7242ad7b118fd96f207d3aa21daa090b9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 3 Nov 2022 13:16:41 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- janitor/functions/concatenate_columns.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/janitor/functions/concatenate_columns.py b/janitor/functions/concatenate_columns.py index c4377cf60..5772658b8 100644 --- a/janitor/functions/concatenate_columns.py +++ b/janitor/functions/concatenate_columns.py @@ -53,10 +53,7 @@ def concatenate_columns( df = df.copy() # avoid mutating original data df[new_column_name] = ( - df[column_names] - .astype("string") - .fillna("") - .agg(sep.join, axis=1) + df[column_names].astype("string").fillna("").agg(sep.join, axis=1) ) if ignore_empty: From 66ea4c04f46707d3581928dd47bfba2617268734 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Mon, 28 Nov 2022 21:49:04 +1100 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fab656847..788282a46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - [INF] Replace `pytest.ini` file with `pyproject.toml` file. PR #1204 @Zeroto521 - [INF] Extract docstrings tests from all tests. PR #1205 @Zeroto521 - [BUG] address the `TypeError` when importing v0.24.0 (issue #1201 @xujiboy and @joranbeasley) +- [BUG] Modify ignore_empty output in `concatenate_columns`. Issue #1164 @Fu-Jie ## [v0.24.0] - 2022-11-12 @@ -28,7 +29,6 @@ - [ENH] Enable `encode_categorical` handle 2 (or more ) dimensions array. PR #1153 @Zeroto521 - [TST] Fix testcases failing on Window. Issue #1160 @Zeroto521, and @samukweku - [INF] Cancel old workflow runs via Github Action `concurrency`. PR #1161 @Zeroto521 -- [BUG] Modify ignore_empty output in `concatenate_columns`. PR #1164 @Fu-Jie - [ENH] Faster computation for non-equi join, with a numba engine. Speed improvement for left/right joins when `sort_by_appearance` is False. Issue #1102 @samukweku - [BUG] Avoid `change_type` mutating original `DataFrame`. PR #1162 @Zeroto521 - [ENH] The parameter `column_name` of `change_type` totally supports inputing multi-column now. #1163 @Zeroto521