From a3a85da88fb7f618355950cc87e498227d3245ed Mon Sep 17 00:00:00 2001
From: Rob D'Aveta <rob.daveta@gmail.com>
Date: Tue, 5 Dec 2023 10:44:15 -0500
Subject: [PATCH 1/2] changed to batch processing

---
 splunk_core/splunk_full.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/splunk_core/splunk_full.py b/splunk_core/splunk_full.py
index b32f99a..b06ce68 100644
--- a/splunk_core/splunk_full.py
+++ b/splunk_core/splunk_full.py
@@ -15,6 +15,7 @@
 from splunk_utils.splunk_api import SplunkAPI
 from splunk_utils.helper_functions import splunk_time, parse_times
 from splunk_utils.user_input_parser import UserInputParser
+import splunklib.results as results
 
 @magics_class
 class Splunk(Integration):
@@ -26,7 +27,7 @@ class Splunk(Integration):
     # These are the variables in the opts dict that allowed to be set by the user. 
     # These are specific to this custom integration and are joined with the 
     # base_allowed_set_opts from the integration base
-    custom_allowed_set_opts = ["splunk_conn_default", "splunk_default_earliest_time", "splunk_default_latest_time", "splunk_parse_times", "splunk_autologin"]
+    custom_allowed_set_opts = ["splunk_conn_default", "splunk_default_earliest_time", "splunk_default_latest_time", "splunk_parse_times", "splunk_autologin", "splunk_results_count_size"]
 
     myopts = {}
     myopts["splunk_conn_default"] = ["default", "Default instance to connect with"]
@@ -34,6 +35,7 @@ class Splunk(Integration):
     myopts["splunk_default_latest_time"] = ["now", "The default latest time sent to the Splunk server"]
     myopts["splunk_parse_times"] = [1, "If this is 1, it will parse your query for earliest or latest and get the value. It will not alter the query, but update the default earliest/latest for subqueries"]
     myopts["splunk_autologin"] = [True, "Works with the the autologin setting on connect"]
+    myopts["splunk_results_count_size"] = [0, "Changing this value from its default - which is not recommended - will limit the number of results that the results reader displays. It does **NOT** limit the number of results in your query (you must set that limit in your Splunk query)"]
 
     # Class Init function - Obtain a reference to the get_ipython()
     def __init__(self, shell, debug=False, *args, **kwargs):
@@ -211,8 +213,24 @@ def customQuery(self, query, instance, reconnect=True):
                 sleep(1)
             
             if search_job.results is not None:
-                dataframe = pd.read_csv(search_job.results(output_mode="csv", count=0))
+                # dataframe = pd.read_csv(search_job.results(output_mode="csv", count=self.opts["splunk_results_count_size"][0]))
+                resultCount = search_job["resultCount"]
+                offset = 0
+                count = 100
+                accumulated_results = []
+                
+                while (offset < int(resultCount)):
+                    kwargs_paginate = {"count": count, "offset": offset, "output_mode": "json"}
+                    
+                    search_results = search_job.results(**kwargs_paginate)
+                    for result in results.JSONResultsReader(search_results):
+                        if isinstance(result, dict):
+                            accumulated_results.append(result)
+                    
+                    offset += count
+                        
                 str_err = "Success"
+                dataframe = pd.json_normalize(accumulated_results)
             else:
                 dataframe = None
                 str_err = "Success - No Results"

From d764223d497b0815f9ea5e373b13bc3639ef3222 Mon Sep 17 00:00:00 2001
From: Rob D'Aveta <rob.daveta@gmail.com>
Date: Tue, 5 Dec 2023 11:47:45 -0500
Subject: [PATCH 2/2] added option for result count size

---
 splunk_core/splunk_full.py | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/splunk_core/splunk_full.py b/splunk_core/splunk_full.py
index b06ce68..6c29ba0 100644
--- a/splunk_core/splunk_full.py
+++ b/splunk_core/splunk_full.py
@@ -15,7 +15,6 @@
 from splunk_utils.splunk_api import SplunkAPI
 from splunk_utils.helper_functions import splunk_time, parse_times
 from splunk_utils.user_input_parser import UserInputParser
-import splunklib.results as results
 
 @magics_class
 class Splunk(Integration):
@@ -213,24 +212,8 @@ def customQuery(self, query, instance, reconnect=True):
                 sleep(1)
             
             if search_job.results is not None:
-                # dataframe = pd.read_csv(search_job.results(output_mode="csv", count=self.opts["splunk_results_count_size"][0]))
-                resultCount = search_job["resultCount"]
-                offset = 0
-                count = 100
-                accumulated_results = []
-                
-                while (offset < int(resultCount)):
-                    kwargs_paginate = {"count": count, "offset": offset, "output_mode": "json"}
-                    
-                    search_results = search_job.results(**kwargs_paginate)
-                    for result in results.JSONResultsReader(search_results):
-                        if isinstance(result, dict):
-                            accumulated_results.append(result)
-                    
-                    offset += count
-                        
+                dataframe = pd.read_csv(search_job.results(output_mode="csv", count=self.opts["splunk_results_count_size"][0]))
                 str_err = "Success"
-                dataframe = pd.json_normalize(accumulated_results)
             else:
                 dataframe = None
                 str_err = "Success - No Results"