From 148f87d93a5fb17c58f000e4784016309d21a732 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Wed, 16 Jul 2025 22:11:17 -0500
Subject: [PATCH 1/7] Initial testcase for read_csv

---
 .../io/parser/usecols/test_usecols_basic.py   | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 82b42beb38ae0..1e4b7dbaa0167 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -545,3 +545,27 @@ def test_usecols_dtype(all_parsers):
         {"col1": array(["a", "b"]), "col2": np.array([1, 2], dtype="uint8")}
     )
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [(2, 0), ("c", "a")])
+def test_usecols_order(all_parsers, usecols, request):
+    # TODO add future flag
+    parser = all_parsers
+    data = """\
+a,b,c,d
+1,2,3,0
+4,5,6,
+7,8,9,0
+10,11,12,13"""
+    # print(usecols)
+    # print(data)
+
+    if parser.engine == "pyarrow" and isinstance(usecols[0], int):
+        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+            parser.read_csv(StringIO(data), usecols=usecols)
+        return
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+
+    expected = DataFrame([[3, 1], [6, 4], [9, 7], [12, 10]], columns=["c", "a"])
+    tm.assert_frame_equal(result, expected)

From b94edd5c1e8ea18d35acdb1d7f2eaba9d048e0bb Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Fri, 18 Jul 2025 17:36:10 -0500
Subject: [PATCH 2/7] Added missing 0 in testcase

---
 pandas/tests/io/parser/usecols/test_usecols_basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 1e4b7dbaa0167..01dfa526044b4 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -554,7 +554,7 @@ def test_usecols_order(all_parsers, usecols, request):
     data = """\
 a,b,c,d
 1,2,3,0
-4,5,6,
+4,5,6,0
 7,8,9,0
 10,11,12,13"""
     # print(usecols)

From 7971351dfc21007fc6d76997a03248886a267941 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Mon, 21 Jul 2025 15:45:20 -0500
Subject: [PATCH 3/7] Added simple implementation of usecols order for read_csv

---
 pandas/io/parsers/readers.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 4fbd71ed03662..5641e7948ce50 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -11,6 +11,7 @@
     defaultdict,
 )
 import csv
+from inspect import isfunction
 import sys
 from textwrap import fill
 from typing import (
@@ -1516,8 +1517,10 @@ def read(self, nrows: int | None = None) -> DataFrame:
 
             if hasattr(self, "orig_options"):
                 dtype_arg = self.orig_options.get("dtype", None)
+                usecols = self.orig_options["usecols"]
             else:
                 dtype_arg = None
+                usecols = None
 
             if isinstance(dtype_arg, dict):
                 dtype = defaultdict(lambda: None)  # type: ignore[var-annotated]
@@ -1530,6 +1533,18 @@ def read(self, nrows: int | None = None) -> DataFrame:
             else:
                 dtype = None
 
+            if dtype is None:
+                if usecols is None or isfunction(usecols):
+                    # Doesn't change anything if function or None gets passed
+                    pass
+                elif len(usecols) == len(columns):
+                    # uses size of number in usecols to determine corresponding columns
+                    usecols_sorted = sorted(
+                        range(len(usecols)), key=lambda i: usecols[i]
+                    )
+                    columns = [columns[i] for i in usecols_sorted]
+                    col_dict = {k: col_dict[k] for k in columns}
+
             if dtype is not None:
                 new_col_dict = {}
                 for k, v in col_dict.items():
@@ -1548,7 +1563,6 @@ def read(self, nrows: int | None = None) -> DataFrame:
                 index=index,
                 copy=False,
             )
-
             self._currow += new_rows
         return df
 

From e394592b8ed72f4c1cd084782b185861ae6d5b5f Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Fri, 25 Jul 2025 17:06:11 -0500
Subject: [PATCH 4/7] Added future flag for usecols_use_order

---
 pandas/core/config_init.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 20fe8cbab1c9f..4e55aedbb2845 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -897,3 +897,13 @@ def register_converter_cb(key: str) -> None:
         "(at which point this option will be deprecated).",
         validator=is_one_of_factory([True, False]),
     )
+
+    cf.register_option(
+        "usecols_use_order",
+        False,
+        ": bool\n "
+        "Whether usecols parameter will use order of input when "
+        "making a DataFrame. \n This feature will be default in pandas 3.0"
+        "(at which point this option will be deprecated).",
+        validator=is_one_of_factory([True, False]),
+    )

From 8c0d2d47e4604bbd2a49455a244dbe717ac96184 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Fri, 25 Jul 2025 17:38:38 -0500
Subject: [PATCH 5/7] Added check for future flag, improved testcase to check
 flag and more complicated usecols order

---
 pandas/io/parsers/readers.py                  |  4 ++-
 .../io/parser/usecols/test_usecols_basic.py   | 27 +++++++++++++------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 5641e7948ce50..29d3d0a9838b7 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -27,6 +27,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import lib
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas.errors import (
@@ -1533,7 +1535,7 @@ def read(self, nrows: int | None = None) -> DataFrame:
             else:
                 dtype = None
 
-            if dtype is None:
+            if dtype is None and get_option("future.usecols_use_order"):
                 if usecols is None or isfunction(usecols):
                     # Doesn't change anything if function or None gets passed
                     pass
diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 01dfa526044b4..00a52e1f3a18c 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config.config import option_context
+
 from pandas.errors import ParserError
 
 from pandas import (
@@ -547,9 +549,10 @@ def test_usecols_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("usecols", [(2, 0), ("c", "a")])
-def test_usecols_order(all_parsers, usecols, request):
-    # TODO add future flag
+@pytest.mark.parametrize("usecols", [(3, 0, 2), ("d", "a", "c")])
+@pytest.mark.parametrize("usecols_use_order", (True, False))
+def test_usecols_order(all_parsers, usecols, usecols_use_order):
+    # TODOE add portion in doc for 3.0 transition
     parser = all_parsers
     data = """\
 a,b,c,d
@@ -557,15 +560,23 @@ def test_usecols_order(all_parsers, usecols, request):
 4,5,6,0
 7,8,9,0
 10,11,12,13"""
-    # print(usecols)
-    # print(data)
 
+    msg = "The pyarrow engine does not allow 'usecols' to be integer column positions"
     if parser.engine == "pyarrow" and isinstance(usecols[0], int):
-        with pytest.raises(ValueError, match=_msg_pyarrow_requires_names):
+        with pytest.raises(ValueError, match=msg):
             parser.read_csv(StringIO(data), usecols=usecols)
         return
 
     result = parser.read_csv(StringIO(data), usecols=usecols)
 
-    expected = DataFrame([[3, 1], [6, 4], [9, 7], [12, 10]], columns=["c", "a"])
-    tm.assert_frame_equal(result, expected)
+    if usecols_use_order:
+        expected = DataFrame(
+            {"d": [0, 0, 0, 13], "a": [1, 4, 7, 10], "c": [3, 6, 9, 12]}
+        )
+    else:
+        expected = DataFrame(
+            {"a": [1, 4, 7, 10], "c": [3, 6, 9, 12], "d": [0, 0, 0, 13]}
+        )
+
+    with option_context("future.usecols_use_order", usecols_use_order):
+        tm.assert_frame_equal(result, expected)

From 7ec20a24faa509589b5b5666f416b62d84f0f033 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Fri, 25 Jul 2025 18:25:19 -0500
Subject: [PATCH 6/7] Fixed issue with reading out of order lists, added
 exception for pyarrow.

---
 pandas/io/parsers/readers.py                  |  9 ++++----
 .../io/parser/usecols/test_usecols_basic.py   | 21 ++++++++++++-------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 29d3d0a9838b7..847259403c72c 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1535,16 +1535,15 @@ def read(self, nrows: int | None = None) -> DataFrame:
             else:
                 dtype = None
 
-            if dtype is None and get_option("future.usecols_use_order"):
+            if get_option("future.usecols_use_order"):
                 if usecols is None or isfunction(usecols):
                     # Doesn't change anything if function or None gets passed
                     pass
                 elif len(usecols) == len(columns):
                     # uses size of number in usecols to determine corresponding columns
-                    usecols_sorted = sorted(
-                        range(len(usecols)), key=lambda i: usecols[i]
-                    )
-                    columns = [columns[i] for i in usecols_sorted]
+                    value_ranked = {v: i for i, v in enumerate(sorted(usecols))}
+                    usecols_pressed = [value_ranked[v] for v in usecols]
+                    columns = [columns[i] for i in usecols_pressed]
                     col_dict = {k: col_dict[k] for k in columns}
 
             if dtype is not None:
diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 00a52e1f3a18c..e09f88ba3f113 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -554,6 +554,7 @@ def test_usecols_dtype(all_parsers):
 def test_usecols_order(all_parsers, usecols, usecols_use_order):
     # TODOE add portion in doc for 3.0 transition
     parser = all_parsers
+    pyarrow_flag = False
     data = """\
 a,b,c,d
 1,2,3,0
@@ -561,15 +562,18 @@ def test_usecols_order(all_parsers, usecols, usecols_use_order):
 7,8,9,0
 10,11,12,13"""
 
-    msg = "The pyarrow engine does not allow 'usecols' to be integer column positions"
-    if parser.engine == "pyarrow" and isinstance(usecols[0], int):
-        with pytest.raises(ValueError, match=msg):
-            parser.read_csv(StringIO(data), usecols=usecols)
-        return
-
-    result = parser.read_csv(StringIO(data), usecols=usecols)
+    if parser.engine == "pyarrow":
+        if isinstance(usecols[0], int):
+            msg = "The pyarrow engine does not allow 'usecols' to be integer column"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(StringIO(data), usecols=usecols)
+            return
+        else:
+            # looks like pyarrow already considers column order by default.
+            # Modifies test to account for it in selecting expected df
+            pyarrow_flag = True
 
-    if usecols_use_order:
+    if usecols_use_order or pyarrow_flag:
         expected = DataFrame(
             {"d": [0, 0, 0, 13], "a": [1, 4, 7, 10], "c": [3, 6, 9, 12]}
         )
@@ -579,4 +583,5 @@ def test_usecols_order(all_parsers, usecols, usecols_use_order):
         )
 
     with option_context("future.usecols_use_order", usecols_use_order):
+        result = parser.read_csv(StringIO(data), usecols=usecols)
         tm.assert_frame_equal(result, expected)

From df37372acf871a54978886407b1141014bb43c19 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Fri, 25 Jul 2025 20:31:59 -0500
Subject: [PATCH 7/7] fixed issues where usecols became a required input

---
 pandas/io/parsers/readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 847259403c72c..7d345791b5a7d 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1519,7 +1519,7 @@ def read(self, nrows: int | None = None) -> DataFrame:
 
             if hasattr(self, "orig_options"):
                 dtype_arg = self.orig_options.get("dtype", None)
-                usecols = self.orig_options["usecols"]
+                usecols = self.orig_options.get("usecols", None)
             else:
                 dtype_arg = None
                 usecols = None