GatorEducator · PCain02 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/execexam/advise.py b/execexam/advise.py
@@ -127,6 +127,8 @@ def fix_failures(  # noqa: PLR0913
     filtered_test_output: str,
     exec_exam_test_assertion_details: str,
     test_overview: str,
+    traceback: List[str],
+    functions: List,
     failing_test_details: str,
     failing_test_code: str,
     advice_method: enumerations.AdviceMethod,
@@ -148,18 +150,30 @@ def fix_failures(  # noqa: PLR0913
             test_overview = (
                 filtered_test_output + exec_exam_test_assertion_details
             )
+            # create an LLM debugging request that contains all of the
+            # information that is needed to provide advice about how
+            # to fix the bug(s) in the program that are part of an
+            # executable examination; note that, essentially, an
+            # examination consists of Python functions that a student
+            # must complete and then test cases that confirm the correctness
+            # of the functions that are implemented; note also that
+            # ExecExam has a Pytest plugin that collects additional details
             llm_debugging_request = (
                 "I am an undergraduate student completing a programming examination."
-                + " You may never make suggestions to change the source code of the test cases."
-                + " Always make suggestions about how to improve the Python source code of the program under test."
-                + " Always give Python code in a Markdown fenced code block with your suggested program."
-                + " Always start your response with a friendly greeting and overview of what you will provide."
-                + " Always conclude by saying that you are making a helpful suggestion but could be wrong."
-                + " Always be helpful, upbeat, friendly, encouraging, and concise when making a response."
-                + " Your task is to suggest, in a step-by-step fashion, how to fix the bug(s) in the program?"
-                + f" Here is the test overview with test output and details about test assertions: {test_overview}"
-                + f" Here is a brief overview of the test failure information: {failing_test_details}"
-                + f" Here is the source code for the one or more failing test(s): {failing_test_code}"
+                + "You may never make suggestions to change the source code of the test cases."
+                + "Always make suggestions about how to improve the Python source code of the program under test."
+                + "Always give Python code in a Markdown fenced code block with your suggested program."
+                + "Always start your response with a friendly greeting and overview of what you will provide."
+                + "Always conclude by saying that you are making a helpful suggestion but could be wrong."
+                + "Always be helpful, upbeat, friendly, encouraging, and concise when making a response."
+                + "Your task is to suggest, in a step-by-step fashion, how to fix the bug(s) in the program?"
+                + "What follows is all of the information you need to complete the debugging task."
+                + f"Here is the error traceback, which will guide you in identifying which functions to fix: {traceback}"
+                + f"Below is the source code for all functions that have failed; focus your suggestions on these functions:  {functions}"
+                + f"Here is an overview of the test details and output, which will help you understand the issue: {test_overview}"
+                + f"A brief summary of the test failure information is provided here: {failing_test_details}"
+                + f"Finally, here is the source code for the failing test(s):: {failing_test_code}"
+                + "Based on this, suggest what changes need to be made to fix the failing functions."
             )
 
             if advice_method == enumerations.AdviceMethod.api_key:

diff --git a/execexam/extract.py b/execexam/extract.py
@@ -1,7 +1,11 @@
 """Extract contents from data structures."""
 
+import ast
+import importlib
+import inspect
+import re
 from pathlib import Path
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 from . import convert
 
@@ -178,3 +182,269 @@ def extract_test_output_multiple_labels(
             filtered_output += line + "\n"
     # return the filtered output
     return filtered_output
+
+
+def extract_tested_functions(failing_test_code: str) -> Any:
+    """Extract all functions being tested from the failing test code."""
+    # Find all function calls in the code
+    function_calls = re.findall(r"(\w+)\(", failing_test_code)
+    # List of prefixes for functions we want to ignore
+    ignore_prefixes = ["assert", "test_"]
+    # Initialize a list to store valid function names
+    tested_functions = set()
+    # Check each function call
+    for func_name in function_calls:
+        # If the function name doesn't start with any ignore prefix, add it to the list
+        if not any(func_name.startswith(prefix) for prefix in ignore_prefixes):
+            tested_functions.add(func_name)
+    # If no matching functions are found, return the full failing_test_code
+    return tested_functions if tested_functions else failing_test_code
+
+
+def get_called_functions_from_test(test_path: str) -> list[str]:
+    """Get the functions called in a test from the test path."""
+    # Extract the module name and function name from test_path
+    module_name, func_name = test_path.split("::")
+    # Import the test module
+    test_module = importlib.import_module(
+        module_name.replace("/", ".").replace(".py", "")
+    )
+    # Get the function object
+    test_function = getattr(test_module, func_name)
+    # Get the source code of the function
+    source_code = inspect.getsource(test_function)
+    # Use regex to find called functions in the source code
+    called_functions = re.findall(r"\b(\w+)\s*\(", source_code)
+    return called_functions
+
+
+def function_exists_in_file(file_path: str, function_name: str) -> bool:
+    """Check if a function with the given name is defined in the source file."""
+    try:
+        with open(file_path, "r") as file:
+            file_contents = file.read()
+        # Parse file contents
+        tree = ast.parse(file_contents)
+        # Search for the function definition
+        for node in ast.walk(tree):
+            if (
+                isinstance(node, ast.FunctionDef)
+                and node.name == function_name
+            ):
+                return True
+    except Exception:
+        return False
+    return False
+
+
+def find_source_file(test_path: str, function: str) -> str:
+    """Find the source file being tested using imports"""
+    test_file = test_path.split("::")[0]
+    try:
+        with open(test_file, "r") as f:
+            for line in f:
+                if "import" in line:
+                    # Extract the module being imported
+                    imported = line.split("import")[-1].strip()
+                    if "." in imported:
+                        imported = imported.split(".")[-1]
+                    if "from" in line:
+                        imported = (
+                            line.split("from")[-1].split("import")[0].strip()
+                        )
+                    # Skip if 'pytest' is imported
+                    if imported == "pytest":
+                        continue
+                    # Convert module name to potential file path
+                    file_path = f"{imported.replace('.', '/')}.py"
+                    if file_path != "pytest.py":
+                        if function_exists_in_file(file_path, function):
+                            return file_path
+    except Exception as e:
+        return f"Error reading file {test_file}: {e}"
+    return ""
+
+
+def extract_tracebacks(json_report: Optional[dict], failing_code: str) -> list:
+    """Extract comprehensive test failure information from pytest JSON report including test details, assertions, variables, and complete stack traces. Handles if JSON report returns string or dictionary"""
+    # Handle the case where there is no json_report
+    if not json_report:
+        return ["No Traceback Found"]
+    traceback_info_list = []
+    tests = json_report.get("tests", [])
+    # Go through all the tests and pull out which ones failed
+    for test in tests:
+        if test.get("outcome") in ("failed", "error"):
+            test_path = test.get("nodeid", "")
+            call = test.get("call", {})
+            traceback_info = {
+                "test_path": test_path,
+                "source_file": "",
+                "tested_function": "",
+                "full_traceback": "",
+                "error_type": "",
+                "error_message": "",
+                "stack_trace": [],
+                "variables": {},
+                "assertion_detail": "",
+                "expected_value": None,
+                "actual_value": None,
+            }
+            longrepr = call.get("longrepr", {})
+            # Handle string longrepr
+            if isinstance(longrepr, str):
+                process_string_longrepr(
+                    longrepr, traceback_info, test_path, failing_code
+                )
+            # Handle dictionary of longrepr
+            elif isinstance(longrepr, dict):
+                process_dict_longrepr(
+                    longrepr, traceback_info, test_path, failing_code
+                )
+            # Ensure we have a full traceback
+            if not traceback_info["full_traceback"] and "log" in call:
+                traceback_info["full_traceback"] = call["log"]
+            # Append if there is information
+            if (
+                traceback_info["full_traceback"]
+                or traceback_info["error_message"]
+                or traceback_info["stack_trace"]
+            ):
+                traceback_info_list.append(traceback_info)
+    return traceback_info_list
+
+
+def process_string_longrepr(
+    longrepr: str, traceback_info: dict, test_path: str, failing_code: str
+) -> None:
+    """Process traceback when longrepr is a string."""
+    traceback_info["full_traceback"] = longrepr
+    lines = longrepr.split("\n")
+    # Get the name of the actual function being tested
+    called_functions = get_called_functions_from_test(test_path)
+    tested_funcs = extract_tested_functions(failing_code)
+    func = ""
+    for func in tested_funcs:
+        if func in called_functions:
+            traceback_info["tested_function"] = func
+            break
+    # Find source file from imports
+    source_file = find_source_file(test_path, func)
+    if source_file:
+        traceback_info["source_file"] = source_file
+    for i, line in enumerate(lines):
+        # Look for file locations in traceback
+        if "File " in line and ", line " in line:
+            loc = line.strip()
+            traceback_info["stack_trace"].append(loc)
+        # Extract error type and message
+        elif line.startswith("E   "):
+            if not traceback_info["error_message"]:
+                error_parts = line[4:].split(": ", 1)
+                if len(error_parts) > 1:
+                    traceback_info["error_type"] = error_parts[0]
+                    traceback_info["error_message"] = error_parts[1]
+                else:
+                    traceback_info["error_message"] = error_parts[0]
+        # Look for assertion details
+        if "assert" in line:
+            traceback_info["assertion_detail"] = line.strip()
+            try:
+                if "==" in line:
+                    expr = line.split("assert")[-1].strip()
+                    actual, expected = expr.split("==", 1)
+                    traceback_info["actual_value"] = eval(actual.strip("() "))
+                    traceback_info["expected_value"] = eval(
+                        expected.strip("() ")
+                    )
+            except Exception:
+                pass
+
+
+def process_dict_longrepr(
+    longrepr: dict, traceback_info: dict, test_path: str, failing_code: str
+) -> None:
+    """Process traceback when longrepr is a dictionary."""
+    crash = longrepr.get("reprcrash", {})
+    entries = longrepr.get("reprtraceback", {}).get("reprentries", [])
+    # Initialize stack_trace if it doesn't exist
+    if "stack_trace" not in traceback_info:
+        traceback_info["stack_trace"] = []
+    # Get the name of the actual function being tested
+    tested_funcs = extract_tested_functions(failing_code)
+    called_functions = get_called_functions_from_test(test_path)
+    func = ""
+    # Find the function name from the tested and called functions
+    for func in tested_funcs:
+        if func in called_functions:
+            traceback_info["tested_function"] = func
+            break
+    # First try to find source file from traceback entries
+    source_file = ""
+    try:
+        source_file = find_source_file(test_path, func)
+    except Exception:
+        pass
+    # If no source file is found, set the default value
+    if not source_file:
+        source_file = "File not found"
+    traceback_info["source_file"] = source_file
+    # Get error type and message (split based on the first occurrence of ": ")
+    message = crash.get("message", "")
+    if ": " in message:
+        error_type, error_msg = message.split(": ", 1)
+        traceback_info["error_type"] = error_type
+        traceback_info["error_message"] = error_msg
+    else:
+        traceback_info["error_message"] = message
+    # Build stack trace
+    for entry in entries:
+        if isinstance(entry, dict):
+            loc = entry.get("reprfileloc", {})
+            if loc:
+                file_path = loc.get("path", "")
+                line_no = loc.get("lineno", "")
+                if file_path and line_no:
+                    stack_entry = f"File {file_path}, line {line_no}"
+                    traceback_info["stack_trace"].append(stack_entry)
+
+
+def extract_function_code_from_traceback(
+    traceback_info_list: list,
+) -> List[List[str]]:
+    """Extracts function code from a traceback information list."""
+    # Check if the list is empty
+    if not traceback_info_list:
+        return [["No Functions Found"]]
+    functions = []
+    for test_info in traceback_info_list:
+        source_file = test_info.get("source_file", "")
+        tested_function = test_info.get("tested_function", "")
+        # Proceed if the source file and function name are provided
+        if source_file and tested_function:
+            try:
+                # Read the file contents
+                with open(source_file, "r") as file:
+                    file_contents = file.read()
+                # Parse the file contents to find the function definition
+                tree = ast.parse(file_contents)
+                for node in ast.walk(tree):
+                    if (
+                        isinstance(node, ast.FunctionDef)
+                        and node.name == tested_function
+                    ):
+                        # Ensure end_lineno is accessible
+                        if hasattr(node, "end_lineno"):
+                            function_lines = [
+                                line.strip()
+                                for line in file_contents.splitlines()[
+                                    node.lineno - 1 : node.end_lineno
+                                ]
+                            ]
+                            functions.append(function_lines)
+                        break
+            except FileNotFoundError:
+                functions.append([f"File not found: {source_file}"])
+            except Exception as e:
+                functions.append([f"Error: {e}"])
+    return functions
diff --git a/execexam/main.py b/execexam/main.py
@@ -286,7 +286,7 @@ def run(  # noqa: PLR0913, PLR0915
             # build the command for running symbex; this tool can
             # perform static analysis of Python source code and
             # extract the code of a function inside of a file
-            command = f"symbex {test_name} -f {failing_test_path}"
+            command = f'symbex "{test_name}" -f "{failing_test_path}"'
             # run the symbex command and collect its output
             process = subprocess.run(
                 command,
@@ -335,6 +335,10 @@ def run(  # noqa: PLR0913, PLR0915
         # litellm module has been loaded in a separate thread
         litellm_thread.join()
         debugger.debug(debug, debugger.Debug.stopped_litellm_thread.value)
+        tracebacks = extract.extract_tracebacks(
+            json_report_plugin.report, failing_test_code_overall
+        )
+        functions = extract.extract_function_code_from_traceback(tracebacks)
         # provide advice about how to fix the failing tests
         # because the non-zero return code indicates that
         # there was a test failure and that overall there
@@ -346,6 +350,8 @@ def run(  # noqa: PLR0913, PLR0915
                 filtered_test_output,
                 exec_exam_test_assertion_details,
                 filtered_test_output + exec_exam_test_assertion_details,
+                tracebacks,
+                functions,
                 failing_test_details,
                 failing_test_code_overall,
                 advice_method,

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "execexam"
-version = "0.3.2"
+version = "0.3.3"
 description = "ExecExam runs executable examinations, providing feedback and assistance!"
-authors = ["Hemani Alaparthi <alaparthi01@allegheny.edu>","Gregory M. Kapfhammer <gkapfham@allegheny.edu>"]
+authors = ["Hemani Alaparthi <alaparthi01@allegheny.edu>","Pallas-Athena Cain <cain01@allegheny.edu>","Gregory M. Kapfhammer <gkapfham@allegheny.edu>"]
 readme = "README.md"
 
 [tool.poetry.scripts]