Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
08e5566
feat: improve traceback feature
PCain02 Oct 31, 2024
9d79a5f
fix: bug in import stategy picking up pytest
PCain02 Oct 31, 2024
b365e91
feat: add the ability to see tested functions
PCain02 Oct 31, 2024
e79c7aa
feat: correctly identify which function fails each test
PCain02 Oct 31, 2024
d00048d
fix: remove hard coded test paths and variables
PCain02 Oct 31, 2024
5486a42
feat: add extract function and give it to llm as list of lists
PCain02 Oct 31, 2024
74f61ba
fix: remove other strategies and strategy statement
PCain02 Oct 31, 2024
c09f5c9
fix: move extract function to make more sense
PCain02 Oct 31, 2024
369efef
feat: add print
PCain02 Nov 6, 2024
dec6f19
feat: clean up comments
PCain02 Nov 8, 2024
c3ecc35
feat: add test cases for the new extract functions
PCain02 Nov 10, 2024
eb938f2
fix: remove test files and add auto removal in tests
PCain02 Nov 10, 2024
7643e68
feat: delete debug print statements
PCain02 Nov 10, 2024
cece26e
feat: improve LLM prompting
PCain02 Nov 10, 2024
46a4152
lint: lint files
PCain02 Nov 10, 2024
f783e58
lint: organize imports
PCain02 Nov 10, 2024
f573a5a
lint: fix imports
PCain02 Nov 10, 2024
218faf0
fix: too many branches in main fix
PCain02 Nov 10, 2024
8507ee7
lint: ruff lint
PCain02 Nov 10, 2024
5421388
feat: add debug statements for extract
PCain02 Nov 11, 2024
fafbb8c
Merge branch 'GatorEducator:main' into traceback_to_llm
PCain02 Nov 11, 2024
c3bbd2b
fix: make 2 helper functions for extract tracebacks to help with bran…
PCain02 Nov 11, 2024
d35dc9a
lint: remove debug print statements
PCain02 Nov 11, 2024
89eebd9
lint: ruff format
PCain02 Nov 11, 2024
9908827
lint: ruff lint format
PCain02 Nov 11, 2024
4174cf9
fix: ruff fix imports
PCain02 Nov 11, 2024
cc7892d
fix: fix longrepr dict source file gathering
PCain02 Nov 11, 2024
6c0add5
fix: fix line error with finding functions
PCain02 Nov 12, 2024
7f2684a
lint: ruff format
PCain02 Nov 12, 2024
09f8d2f
fix: add docstring
PCain02 Nov 12, 2024
03a76a5
fix: tests function extract
PCain02 Nov 12, 2024
7328fb9
fix: fix failure test extract for ubuntu
PCain02 Nov 12, 2024
4323ef8
Merge branch 'main' into traceback_to_llm
PCain02 Nov 13, 2024
67ebecb
lint: ruff format advise
PCain02 Nov 13, 2024
be5b174
chore: update coverage report
PCain02 Nov 13, 2024
7d548ac
chore: update toml file
PCain02 Nov 13, 2024
dd45226
feat: Update pyproject.toml to v0.3.4
PCain02 Nov 14, 2024
2458332
chore: Update pyproject.toml
PCain02 Nov 14, 2024
874ec81
fix: Delete coverage.json
PCain02 Nov 16, 2024
e475565
Merge branch 'main' into traceback_to_llm
PCain02 Nov 21, 2024
d8b764f
Fix: Update pyproject.toml alphabetize authors
PCain02 Nov 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions execexam/advise.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ def fix_failures( # noqa: PLR0913
filtered_test_output: str,
exec_exam_test_assertion_details: str,
test_overview: str,
traceback: List[str],
functions: List,
failing_test_details: str,
failing_test_code: str,
advice_method: enumerations.AdviceMethod,
Expand All @@ -148,18 +150,30 @@ def fix_failures( # noqa: PLR0913
test_overview = (
filtered_test_output + exec_exam_test_assertion_details
)
# create an LLM debugging request that contains all of the
# information that is needed to provide advice about how
# to fix the bug(s) in the program that are part of an
# executable examination; note that, essentially, an
# examination consists of Python functions that a student
# must complete and then test cases that confirm the correctness
# of the functions that are implemented; note also that
# ExecExam has a Pytest plugin that collects additional details
llm_debugging_request = (
"I am an undergraduate student completing a programming examination."
+ " You may never make suggestions to change the source code of the test cases."
+ " Always make suggestions about how to improve the Python source code of the program under test."
+ " Always give Python code in a Markdown fenced code block with your suggested program."
+ " Always start your response with a friendly greeting and overview of what you will provide."
+ " Always conclude by saying that you are making a helpful suggestion but could be wrong."
+ " Always be helpful, upbeat, friendly, encouraging, and concise when making a response."
+ " Your task is to suggest, in a step-by-step fashion, how to fix the bug(s) in the program?"
+ f" Here is the test overview with test output and details about test assertions: {test_overview}"
+ f" Here is a brief overview of the test failure information: {failing_test_details}"
+ f" Here is the source code for the one or more failing test(s): {failing_test_code}"
+ "You may never make suggestions to change the source code of the test cases."
+ "Always make suggestions about how to improve the Python source code of the program under test."
+ "Always give Python code in a Markdown fenced code block with your suggested program."
+ "Always start your response with a friendly greeting and overview of what you will provide."
+ "Always conclude by saying that you are making a helpful suggestion but could be wrong."
+ "Always be helpful, upbeat, friendly, encouraging, and concise when making a response."
+ "Your task is to suggest, in a step-by-step fashion, how to fix the bug(s) in the program?"
+ "What follows is all of the information you need to complete the debugging task."
+ f"Here is the error traceback, which will guide you in identifying which functions to fix: {traceback}"
+ f"Below is the source code for all functions that have failed; focus your suggestions on these functions: {functions}"
+ f"Here is an overview of the test details and output, which will help you understand the issue: {test_overview}"
+ f"A brief summary of the test failure information is provided here: {failing_test_details}"
+ f"Finally, here is the source code for the failing test(s):: {failing_test_code}"
+ "Based on this, suggest what changes need to be made to fix the failing functions."
)

if advice_method == enumerations.AdviceMethod.api_key:
Expand Down
272 changes: 271 additions & 1 deletion execexam/extract.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"""Extract contents from data structures."""

import ast
import importlib
import inspect
import re
from pathlib import Path
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional, Tuple

from . import convert

Expand Down Expand Up @@ -178,3 +182,269 @@ def extract_test_output_multiple_labels(
filtered_output += line + "\n"
# return the filtered output
return filtered_output


def extract_tested_functions(failing_test_code: str) -> Any:
"""Extract all functions being tested from the failing test code."""
# Find all function calls in the code
function_calls = re.findall(r"(\w+)\(", failing_test_code)
# List of prefixes for functions we want to ignore
ignore_prefixes = ["assert", "test_"]
# Initialize a list to store valid function names
tested_functions = set()
# Check each function call
for func_name in function_calls:
# If the function name doesn't start with any ignore prefix, add it to the list
if not any(func_name.startswith(prefix) for prefix in ignore_prefixes):
tested_functions.add(func_name)
# If no matching functions are found, return the full failing_test_code
return tested_functions if tested_functions else failing_test_code


def get_called_functions_from_test(test_path: str) -> list[str]:
"""Get the functions called in a test from the test path."""
# Extract the module name and function name from test_path
module_name, func_name = test_path.split("::")
# Import the test module
test_module = importlib.import_module(
module_name.replace("/", ".").replace(".py", "")
)
# Get the function object
test_function = getattr(test_module, func_name)
# Get the source code of the function
source_code = inspect.getsource(test_function)
# Use regex to find called functions in the source code
called_functions = re.findall(r"\b(\w+)\s*\(", source_code)
return called_functions


def function_exists_in_file(file_path: str, function_name: str) -> bool:
"""Check if a function with the given name is defined in the source file."""
try:
with open(file_path, "r") as file:
file_contents = file.read()
# Parse file contents
tree = ast.parse(file_contents)
# Search for the function definition
for node in ast.walk(tree):
if (
isinstance(node, ast.FunctionDef)
and node.name == function_name
):
return True
except Exception:
return False
return False


def find_source_file(test_path: str, function: str) -> str:
"""Find the source file being tested using imports"""
test_file = test_path.split("::")[0]
try:
with open(test_file, "r") as f:
for line in f:
if "import" in line:
# Extract the module being imported
imported = line.split("import")[-1].strip()
if "." in imported:
imported = imported.split(".")[-1]
if "from" in line:
imported = (
line.split("from")[-1].split("import")[0].strip()
)
# Skip if 'pytest' is imported
if imported == "pytest":
continue
# Convert module name to potential file path
file_path = f"{imported.replace('.', '/')}.py"
if file_path != "pytest.py":
if function_exists_in_file(file_path, function):
return file_path
except Exception as e:
return f"Error reading file {test_file}: {e}"
return ""


def extract_tracebacks(json_report: Optional[dict], failing_code: str) -> list:
"""Extract comprehensive test failure information from pytest JSON report including test details, assertions, variables, and complete stack traces. Handles if JSON report returns string or dictionary"""
# Handle the case where there is no json_report
if not json_report:
return ["No Traceback Found"]
traceback_info_list = []
tests = json_report.get("tests", [])
# Go through all the tests and pull out which ones failed
for test in tests:
if test.get("outcome") in ("failed", "error"):
test_path = test.get("nodeid", "")
call = test.get("call", {})
traceback_info = {
"test_path": test_path,
"source_file": "",
"tested_function": "",
"full_traceback": "",
"error_type": "",
"error_message": "",
"stack_trace": [],
"variables": {},
"assertion_detail": "",
"expected_value": None,
"actual_value": None,
}
longrepr = call.get("longrepr", {})
# Handle string longrepr
if isinstance(longrepr, str):
process_string_longrepr(
longrepr, traceback_info, test_path, failing_code
)
# Handle dictionary of longrepr
elif isinstance(longrepr, dict):
process_dict_longrepr(
longrepr, traceback_info, test_path, failing_code
)
# Ensure we have a full traceback
if not traceback_info["full_traceback"] and "log" in call:
traceback_info["full_traceback"] = call["log"]
# Append if there is information
if (
traceback_info["full_traceback"]
or traceback_info["error_message"]
or traceback_info["stack_trace"]
):
traceback_info_list.append(traceback_info)
return traceback_info_list


def process_string_longrepr(
longrepr: str, traceback_info: dict, test_path: str, failing_code: str
) -> None:
"""Process traceback when longrepr is a string."""
traceback_info["full_traceback"] = longrepr
lines = longrepr.split("\n")
# Get the name of the actual function being tested
called_functions = get_called_functions_from_test(test_path)
tested_funcs = extract_tested_functions(failing_code)
func = ""
for func in tested_funcs:
if func in called_functions:
traceback_info["tested_function"] = func
break
# Find source file from imports
source_file = find_source_file(test_path, func)
if source_file:
traceback_info["source_file"] = source_file
for i, line in enumerate(lines):
# Look for file locations in traceback
if "File " in line and ", line " in line:
loc = line.strip()
traceback_info["stack_trace"].append(loc)
# Extract error type and message
elif line.startswith("E "):
if not traceback_info["error_message"]:
error_parts = line[4:].split(": ", 1)
if len(error_parts) > 1:
traceback_info["error_type"] = error_parts[0]
traceback_info["error_message"] = error_parts[1]
else:
traceback_info["error_message"] = error_parts[0]
# Look for assertion details
if "assert" in line:
traceback_info["assertion_detail"] = line.strip()
try:
if "==" in line:
expr = line.split("assert")[-1].strip()
actual, expected = expr.split("==", 1)
traceback_info["actual_value"] = eval(actual.strip("() "))
traceback_info["expected_value"] = eval(
expected.strip("() ")
)
except Exception:
pass


def process_dict_longrepr(
longrepr: dict, traceback_info: dict, test_path: str, failing_code: str
) -> None:
"""Process traceback when longrepr is a dictionary."""
crash = longrepr.get("reprcrash", {})
entries = longrepr.get("reprtraceback", {}).get("reprentries", [])
# Initialize stack_trace if it doesn't exist
if "stack_trace" not in traceback_info:
traceback_info["stack_trace"] = []
# Get the name of the actual function being tested
tested_funcs = extract_tested_functions(failing_code)
called_functions = get_called_functions_from_test(test_path)
func = ""
# Find the function name from the tested and called functions
for func in tested_funcs:
if func in called_functions:
traceback_info["tested_function"] = func
break
# First try to find source file from traceback entries
source_file = ""
try:
source_file = find_source_file(test_path, func)
except Exception:
pass
# If no source file is found, set the default value
if not source_file:
source_file = "File not found"
traceback_info["source_file"] = source_file
# Get error type and message (split based on the first occurrence of ": ")
message = crash.get("message", "")
if ": " in message:
error_type, error_msg = message.split(": ", 1)
traceback_info["error_type"] = error_type
traceback_info["error_message"] = error_msg
else:
traceback_info["error_message"] = message
# Build stack trace
for entry in entries:
if isinstance(entry, dict):
loc = entry.get("reprfileloc", {})
if loc:
file_path = loc.get("path", "")
line_no = loc.get("lineno", "")
if file_path and line_no:
stack_entry = f"File {file_path}, line {line_no}"
traceback_info["stack_trace"].append(stack_entry)


def extract_function_code_from_traceback(
traceback_info_list: list,
) -> List[List[str]]:
"""Extracts function code from a traceback information list."""
# Check if the list is empty
if not traceback_info_list:
return [["No Functions Found"]]
functions = []
for test_info in traceback_info_list:
source_file = test_info.get("source_file", "")
tested_function = test_info.get("tested_function", "")
# Proceed if the source file and function name are provided
if source_file and tested_function:
try:
# Read the file contents
with open(source_file, "r") as file:
file_contents = file.read()
# Parse the file contents to find the function definition
tree = ast.parse(file_contents)
for node in ast.walk(tree):
if (
isinstance(node, ast.FunctionDef)
and node.name == tested_function
):
# Ensure end_lineno is accessible
if hasattr(node, "end_lineno"):
function_lines = [
line.strip()
for line in file_contents.splitlines()[
node.lineno - 1 : node.end_lineno
]
]
functions.append(function_lines)
break
except FileNotFoundError:
functions.append([f"File not found: {source_file}"])
except Exception as e:
functions.append([f"Error: {e}"])
return functions
8 changes: 7 additions & 1 deletion execexam/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def run( # noqa: PLR0913, PLR0915
# build the command for running symbex; this tool can
# perform static analysis of Python source code and
# extract the code of a function inside of a file
command = f"symbex {test_name} -f {failing_test_path}"
command = f'symbex "{test_name}" -f "{failing_test_path}"'
# run the symbex command and collect its output
process = subprocess.run(
command,
Expand Down Expand Up @@ -335,6 +335,10 @@ def run( # noqa: PLR0913, PLR0915
# litellm module has been loaded in a separate thread
litellm_thread.join()
debugger.debug(debug, debugger.Debug.stopped_litellm_thread.value)
tracebacks = extract.extract_tracebacks(
json_report_plugin.report, failing_test_code_overall
)
functions = extract.extract_function_code_from_traceback(tracebacks)
# provide advice about how to fix the failing tests
# because the non-zero return code indicates that
# there was a test failure and that overall there
Expand All @@ -346,6 +350,8 @@ def run( # noqa: PLR0913, PLR0915
filtered_test_output,
exec_exam_test_assertion_details,
filtered_test_output + exec_exam_test_assertion_details,
tracebacks,
functions,
failing_test_details,
failing_test_code_overall,
advice_method,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.poetry]
name = "execexam"
version = "0.3.2"
version = "0.3.3"
description = "ExecExam runs executable examinations, providing feedback and assistance!"
authors = ["Hemani Alaparthi <alaparthi01@allegheny.edu>","Gregory M. Kapfhammer <gkapfham@allegheny.edu>"]
authors = ["Hemani Alaparthi <alaparthi01@allegheny.edu>","Pallas-Athena Cain <cain01@allegheny.edu>","Gregory M. Kapfhammer <gkapfham@allegheny.edu>"]
readme = "README.md"

[tool.poetry.scripts]
Expand Down
Loading
Loading