diff --git a/.env b/.env deleted file mode 100644 index e578db1..0000000 --- a/.env +++ /dev/null @@ -1,5 +0,0 @@ -# LlamaCloud API configuration -LLAMA_CLOUD_API_KEY=llx-your-api-key-here - -# OpenAI API configuration -OPENAI_API_KEY=sk-your-openai-api-key-here diff --git a/.env.template b/.env.template new file mode 100644 index 0000000..eb7fa0a --- /dev/null +++ b/.env.template @@ -0,0 +1,2 @@ +# Copy this to .env and set any necessary secrets +OPENAI_API_KEY=sk-your-openai-api-key-here diff --git a/.github/workflows/check-regeneration.yml b/.github/workflows/check-regeneration.yml new file mode 100644 index 0000000..cb15cea --- /dev/null +++ b/.github/workflows/check-regeneration.yml @@ -0,0 +1,69 @@ +name: Check Template Regeneration + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + check-template: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Install uv + uses: astral-sh/setup-uv@v3 + + - name: Run regeneration check + run: uv run copier/copy_utils.py check-regeneration + + check-python: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Install uv + uses: astral-sh/setup-uv@v3 + + - name: Run Python checks + run: uv run hatch run all-check + working-directory: test-proj + + check-ui: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '24' + + - name: Enable Corepack + run: corepack enable + + - name: Activate pnpm version + working-directory: test-proj/ui + run: corepack prepare --activate + + + - name: Run UI checks + run: pnpm run all-check + working-directory: test-proj/ui \ No newline at end of file diff --git a/.gitignore.jinja b/.gitignore.jinja new file mode 100644 index 0000000..06b8b0d --- /dev/null +++ b/.gitignore.jinja @@ -0,0 +1,4 @@ +.env +__pycache__ +workflows.db +.venv diff --git a/copier.yaml b/copier.yaml index 8439d41..eedba2f 100644 --- a/copier.yaml +++ b/copier.yaml @@ -8,17 +8,10 @@ project_name: Project name must contain only letters, numbers, and dashes {% endif %} -llama_project_id: - type: str - help: What is your Llama Cloud project ID? - default: "" - required: true - -llama_org_id: +project_title: type: str - help: What is your Llama Cloud organization ID? - default: "" - required: true + help: What is the title of your project? This will be used in the UI Title Bar. + default: "{{ project_name.replace('-', ' ').title() }}" # computed variables project_name_snake: @@ -26,15 +19,10 @@ project_name_snake: default: "{{ project_name.replace('-', '_') }}" when: false -project_title: - type: str - default: "{{ (project_name.replace('-', ' '))[:1] | upper ~ (project_name.replace('-', ' '))[1:] }}" - when: false - _exclude: - "test-proj" - ".git" - ".github" - "copier" - "CONTRIBUTING.md" - - "copier.yaml" \ No newline at end of file + - "copier.yaml" diff --git a/copier/copy_utils.py b/copier/copy_utils.py index 2c37c30..6555d29 100755 --- a/copier/copy_utils.py +++ b/copier/copy_utils.py @@ -44,6 +44,7 @@ def run_copier_quietly(src_path: str, dst_path: str, data: Dict[str, str]) -> No data=data, unsafe=True, quiet=True, + vcs_ref="HEAD", ) diff --git a/llama_deploy.yaml b/llama_deploy.yaml deleted file mode 100644 index 1654876..0000000 --- a/llama_deploy.yaml +++ /dev/null @@ -1,8 +0,0 @@ -env_files: - - ".env" -llama_cloud: true -workflows: - upload: "document-qa.qa_workflows:DocumentUploadWorkflow" - chat: "document-qa.qa_workflows:ChatWorkflow" -ui: - directory: ui diff --git a/llama_deploy.yaml.jinja b/llama_deploy.yaml.jinja deleted file mode 100644 index be93469..0000000 --- a/llama_deploy.yaml.jinja +++ /dev/null @@ -1,8 +0,0 @@ -env_files: - - ".env" -llama_cloud: true -workflows: - upload: "{{project_name_snake}}.qa_workflows:DocumentUploadWorkflow" - chat: "{{project_name_snake}}.qa_workflows:ChatWorkflow" -ui: - directory: ui diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 5fbb656..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,25 +0,0 @@ -[project] -name = "document-qa" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -authors = [ - { name = "Terry Zhao", email = "terry@runllama.ai" } -] -requires-python = ">=3.12" -dependencies = [ - "llama-index-workflows>=2.2.0", - "python-cowsay>=1.2.1", - "llama-cloud-services>=0.6.0", - "llama-index-core>=0.12.0", - "llama-index-llms-openai>=0.3.0", - "llama-index-embeddings-openai>=0.3.0", - "python-dotenv>=1.0.1", -] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[dependency-groups] -dev = [] diff --git a/pyproject.toml.jinja b/pyproject.toml.jinja index e4a2fc3..08d2618 100644 --- a/pyproject.toml.jinja +++ b/pyproject.toml.jinja @@ -1,20 +1,17 @@ [project] -name = "{{project_name_snake}}" +name = "{{ project_name_snake }}" version = "0.1.0" description = "Add your description here" readme = "README.md" -authors = [ - { name = "Terry Zhao", email = "terry@runllama.ai" } -] +authors = [] requires-python = ">=3.12" dependencies = [ - "llama-index-workflows>=2.2.0", - "python-cowsay>=1.2.1", - "llama-cloud-services>=0.6.0", - "llama-index-core>=0.12.0", - "llama-index-llms-openai>=0.3.0", - "llama-index-embeddings-openai>=0.3.0", - "python-dotenv>=1.0.1", + "llama-index-workflows>=2.2.0,<3.0.0", + "llama-cloud-services>=0.6.68", + "llama-index-core>=0.14.0", + "llama-index-llms-openai>=0.5.6", + "llama-index-embeddings-openai>=0.5.1", + "python-dotenv>=1.1.1", ] [build-system] @@ -22,5 +19,30 @@ requires = ["hatchling"] build-backend = "hatchling.build" [dependency-groups] -dev = [] +dev = [ + "hatch>=1.14.1", + "pytest>=8.4.2", + "ruff>=0.13.0", + "ty>=0.0.1a20", +] + +[tool.hatch.envs.default.scripts] +"format" = "ruff format ." +"format-check" = "ruff format --check ." +"lint" = "ruff check --fix ." +"lint-check" = ["ruff check ."] +typecheck = "ty check src" +test = "pytest" +"all-check" = ["format-check", "lint-check", "test"] +"all-fix" = ["format", "lint", "test"] + +[tool.llamadeploy] +env-files = [".env"] +llama_cloud = true + +[tool.llamadeploy.ui] +directory = "./ui" +[tool.llamadeploy.workflows] +upload = "test_proj.qa_workflows:upload" +chat = "test_proj.qa_workflows:chat" diff --git a/src/{{project_name_snake}}/__init__.py b/src/{{ project_name_snake }}/__init__.py similarity index 100% rename from src/{{project_name_snake}}/__init__.py rename to src/{{ project_name_snake }}/__init__.py diff --git a/src/{{ project_name_snake }}/clients.py b/src/{{ project_name_snake }}/clients.py new file mode 100644 index 0000000..9f8d0d8 --- /dev/null +++ b/src/{{ project_name_snake }}/clients.py @@ -0,0 +1,52 @@ +import functools +import os +import httpx + +from llama_cloud.client import AsyncLlamaCloud +from llama_cloud_services import LlamaParse + +# deployed agents may infer their name from the deployment name +# Note: Make sure that an agent deployment with this name actually exists +# otherwise calls to get or set data will fail. You may need to adjust the `or ` +# name for development +DEPLOYMENT_NAME = os.getenv("LLAMA_DEPLOY_DEPLOYMENT_NAME") +# required for all llama cloud calls +LLAMA_CLOUD_API_KEY = os.environ["LLAMA_CLOUD_API_KEY"] +# get this in case running against a different environment than production +LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL") +LLAMA_CLOUD_PROJECT_ID = os.getenv("LLAMA_DEPLOY_PROJECT_ID") +INDEX_NAME = "document_qa_index" + + +def get_custom_client() -> httpx.AsyncClient: + return httpx.AsyncClient( + timeout=60, + headers={"Project-Id": LLAMA_CLOUD_PROJECT_ID} + if LLAMA_CLOUD_PROJECT_ID + else None, + ) + + +@functools.cache +def get_llama_cloud_client() -> AsyncLlamaCloud: + return AsyncLlamaCloud( + base_url=LLAMA_CLOUD_BASE_URL, + token=LLAMA_CLOUD_API_KEY, + httpx_client=get_custom_client(), + ) + + +@functools.cache +def get_llama_parse_client() -> LlamaParse: + return LlamaParse( + parse_mode="parse_page_with_agent", + model="openai-gpt-4-1-mini", + high_res_ocr=True, + adaptive_long_table=True, + outlined_table_extraction=True, + output_tables_as_HTML=True, + result_type="markdown", + api_key=LLAMA_CLOUD_API_KEY, + project_id=LLAMA_CLOUD_PROJECT_ID, + custom_client=get_custom_client(), + ) diff --git a/src/{{project_name_snake}}/qa_workflows.py b/src/{{ project_name_snake }}/qa_workflows.py similarity index 59% rename from src/{{project_name_snake}}/qa_workflows.py rename to src/{{ project_name_snake }}/qa_workflows.py index 96feecb..5363255 100644 --- a/src/{{project_name_snake}}/qa_workflows.py +++ b/src/{{ project_name_snake }}/qa_workflows.py @@ -1,27 +1,35 @@ import logging import os -import uuid import httpx from llama_cloud.types import RetrievalMode import tempfile from llama_index.core.chat_engine.types import BaseChatEngine, ChatMode from workflows import Workflow, step, Context -from workflows.events import StartEvent, StopEvent, Event, InputRequiredEvent, HumanResponseEvent +from workflows.events import ( + StartEvent, + StopEvent, + Event, + InputRequiredEvent, + HumanResponseEvent, +) from workflows.retry_policy import ConstantDelayRetryPolicy -from workflows.server import WorkflowServer -from llama_cloud_services import LlamaParse, LlamaCloudIndex +from llama_cloud_services import LlamaCloudIndex from llama_index.core import Settings from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.core.memory import ChatMemoryBuffer -from dotenv import load_dotenv -from .clients import get_custom_client, get_llama_cloud_client -from .config import PROJECT_ID, ORGANIZATION_ID -# Load environment variables -load_dotenv() +from .clients import ( + LLAMA_CLOUD_API_KEY, + LLAMA_CLOUD_BASE_URL, + get_custom_client, + get_llama_cloud_client, + get_llama_parse_client, + LLAMA_CLOUD_PROJECT_ID, +) + logger = logging.getLogger(__name__) @@ -30,57 +38,45 @@ class FileEvent(StartEvent): file_id: str index_name: str + class DownloadFileEvent(Event): file_id: str + class FileDownloadedEvent(Event): file_id: str file_path: str filename: str + class ChatEvent(StartEvent): index_name: str session_id: str + # Configure LLM and embedding model Settings.llm = OpenAI(model="gpt-4", temperature=0.1) Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") custom_client = get_custom_client() + class DocumentUploadWorkflow(Workflow): """Workflow to upload and index documents using LlamaParse and LlamaCloud Index""" - + def __init__(self, **kwargs): super().__init__(**kwargs) # Get API key with validation - api_key = os.getenv("LLAMA_CLOUD_API_KEY") - if not api_key: - logger.warning("Warning: LLAMA_CLOUD_API_KEY not found in environment. Document upload will not work.") - self.parser = None - else: - # Initialize LlamaParse with recommended settings - logger.info(f"Initializing LlamaParse with API key: {api_key}") - self.parser = LlamaParse( - parse_mode="parse_page_with_agent", - model="openai-gpt-4-1-mini", - high_res_ocr=True, - adaptive_long_table=True, - outlined_table_extraction=True, - output_tables_as_HTML=True, - result_type="markdown", - api_key=api_key, - project_id=PROJECT_ID, - organization_id=ORGANIZATION_ID, - custom_client=custom_client - ) + + # Initialize LlamaParse with recommended settings + self.parser = get_llama_parse_client() @step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10)) async def run_file(self, event: FileEvent, ctx: Context) -> DownloadFileEvent: logger.info(f"Running file {event.file_id}") await ctx.store.set("index_name", event.index_name) return DownloadFileEvent(file_id=event.file_id) - + @step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10)) async def download_file( self, event: DownloadFileEvent, ctx: Context @@ -114,78 +110,61 @@ async def download_file( logger.error(f"Error downloading file {event.file_id}: {e}", exc_info=True) raise e - @step async def parse_document(self, ev: FileDownloadedEvent, ctx: Context) -> StopEvent: """Parse document and index it to LlamaCloud""" try: logger.info(f"Parsing document {ev.file_id}") - # Check if parser is initialized - if not self.parser: - return StopEvent(result={ - "success": False, - "error": "LLAMA_CLOUD_API_KEY not configured. Please set it in your .env file." - }) - # Get file path or content from event file_path = ev.file_path file_name = file_path.split("/")[-1] index_name = await ctx.store.get("index_name") - + # Parse the document if file_path: # Parse from file path result = await self.parser.aparse(file_path) - + # Get parsed documents documents = result.get_text_documents() - + # Create or connect to LlamaCloud Index - try: - logger.info(f"Connecting to existing index {index_name}") - # Try to connect to existing index - index = LlamaCloudIndex( - name=index_name, - project_id=PROJECT_ID, - organization_id=ORGANIZATION_ID, - api_key=os.getenv("LLAMA_CLOUD_API_KEY"), - custom_client=custom_client - ) - for document in documents: - index.insert(document) - except Exception: - # Create new index if doesn't exist - logger.info(f"Creating new index {index_name}") - index = LlamaCloudIndex.from_documents( - documents=documents, - name=index_name, - project_id=PROJECT_ID, - organization_id=ORGANIZATION_ID, - api_key=os.getenv("LLAMA_CLOUD_API_KEY"), - show_progress=True, - custom_client=custom_client - ) - - return StopEvent(result={ - "success": True, - "index_name": index_name, - "index_url": f"https://cloud.llamaindex.ai/projects/{PROJECT_ID}/indexes/{index.id}", - "document_count": len(documents), - "file_name": file_name, - "message": f"Successfully indexed {len(documents)} documents to '{index_name}'" - }) - + index = LlamaCloudIndex.create_index( + documents=documents, + name=index_name, + project_id=LLAMA_CLOUD_PROJECT_ID, + api_key=LLAMA_CLOUD_API_KEY, + base_url=LLAMA_CLOUD_BASE_URL, + show_progress=True, + custom_client=custom_client, + ) + + # Insert documents to index + logger.info(f"Inserting {len(documents)} documents to {index_name}") + for document in documents: + index.insert(document) + + return StopEvent( + result={ + "success": True, + "index_name": index_name, + "document_count": len(documents), + "index_url": f"https://cloud.llamaindex.ai/projects/{LLAMA_CLOUD_PROJECT_ID}/indexes/{index.id}", + "file_name": file_name, + "message": f"Successfully indexed {len(documents)} documents to '{index_name}'", + } + ) + except Exception as e: logger.error(e.stack_trace) - return StopEvent(result={ - "success": False, - "error": str(e), - "stack_trace": e.stack_trace - }) + return StopEvent( + result={"success": False, "error": str(e), "stack_trace": e.stack_trace} + ) class ChatResponseEvent(Event): """Event emitted when chat engine generates a response""" + response: str sources: list query: str @@ -193,6 +172,7 @@ class ChatResponseEvent(Event): class ChatDeltaEvent(Event): """Streaming delta for incremental response output""" + delta: str @@ -201,7 +181,9 @@ class ChatWorkflow(Workflow): def __init__(self, **kwargs): super().__init__(**kwargs) - self.chat_engines: dict[str, BaseChatEngine] = {} # Cache chat engines per index + self.chat_engines: dict[ + str, BaseChatEngine + ] = {} # Cache chat engines per index @step async def initialize_chat(self, ev: ChatEvent, ctx: Context) -> InputRequiredEvent: @@ -225,10 +207,10 @@ async def initialize_chat(self, ev: ChatEvent, ctx: Context) -> InputRequiredEve # Connect to LlamaCloud Index index = LlamaCloudIndex( name=index_name, - project_id=PROJECT_ID, - organization_id=ORGANIZATION_ID, - api_key=os.getenv("LLAMA_CLOUD_API_KEY"), - custom_client=custom_client + project_id=LLAMA_CLOUD_PROJECT_ID, + api_key=LLAMA_CLOUD_API_KEY, + base_url=LLAMA_CLOUD_BASE_URL, + async_httpx_client=custom_client, ) # Create chat engine with memory @@ -252,13 +234,17 @@ async def initialize_chat(self, ev: ChatEvent, ctx: Context) -> InputRequiredEve ) except Exception as e: - return StopEvent(result={ - "success": False, - "error": f"Failed to initialize chat: {str(e)}" - }) + return StopEvent( + result={ + "success": False, + "error": f"Failed to initialize chat: {str(e)}", + } + ) @step - async def process_user_response(self, ev: HumanResponseEvent, ctx: Context) -> InputRequiredEvent | HumanResponseEvent | StopEvent | None: + async def process_user_response( + self, ev: HumanResponseEvent, ctx: Context + ) -> InputRequiredEvent | HumanResponseEvent | StopEvent | None: """Process user input and generate response""" try: logger.info(f"Processing user response {ev.response}") @@ -268,13 +254,17 @@ async def process_user_response(self, ev: HumanResponseEvent, ctx: Context) -> I # Check for exit command if user_input.lower() == "exit": - logger.info(f"User input is exit") - conversation_history = await ctx.store.get("conversation_history", default=[]) - return StopEvent(result={ - "success": True, - "message": "Chat session ended.", - "conversation_history": conversation_history - }) + logger.info("User input is exit") + conversation_history = await ctx.store.get( + "conversation_history", default=[] + ) + return StopEvent( + result={ + "success": True, + "message": "Chat session ended.", + "conversation_history": conversation_history, + } + ) # Get session info from context index_name = await ctx.store.get("index_name") @@ -295,29 +285,43 @@ async def process_user_response(self, ev: HumanResponseEvent, ctx: Context) -> I # Extract source nodes for citations sources = [] - if hasattr(stream_response, 'source_nodes'): + if hasattr(stream_response, "source_nodes"): for node in stream_response.source_nodes: - sources.append({ - "text": node.text[:200] + "..." if len(node.text) > 200 else node.text, - "score": node.score if hasattr(node, 'score') else None, - "metadata": node.metadata if hasattr(node, 'metadata') else {} - }) + sources.append( + { + "text": node.text[:200] + "..." + if len(node.text) > 200 + else node.text, + "score": node.score if hasattr(node, "score") else None, + "metadata": node.metadata + if hasattr(node, "metadata") + else {}, + } + ) # Update conversation history - conversation_history = await ctx.store.get("conversation_history", default=[]) - conversation_history.append({ - "query": user_input, - "response": full_text.strip() if full_text else str(stream_response), - "sources": sources - }) + conversation_history = await ctx.store.get( + "conversation_history", default=[] + ) + conversation_history.append( + { + "query": user_input, + "response": full_text.strip() + if full_text + else str(stream_response), + "sources": sources, + } + ) await ctx.store.set("conversation_history", conversation_history) # After streaming completes, emit a summary response event to stream for frontend/main printing - ctx.write_event_to_stream(ChatResponseEvent( - response=full_text.strip() if full_text else str(stream_response), - sources=sources, - query=user_input, - )) + ctx.write_event_to_stream( + ChatResponseEvent( + response=full_text.strip() if full_text else str(stream_response), + sources=sources, + query=user_input, + ) + ) # Prompt for next input return InputRequiredEvent( @@ -325,14 +329,10 @@ async def process_user_response(self, ev: HumanResponseEvent, ctx: Context) -> I ) except Exception as e: - return StopEvent(result={ - "success": False, - "error": f"Error processing query: {str(e)}" - }) - + return StopEvent( + result={"success": False, "error": f"Error processing query: {str(e)}"} + ) -# Create workflow server -app = WorkflowServer() -app.add_workflow("upload", DocumentUploadWorkflow(timeout=300)) -app.add_workflow("chat", ChatWorkflow(timeout=None)) +upload = DocumentUploadWorkflow(timeout=None) +chat = ChatWorkflow(timeout=None) diff --git a/src/{{project_name_snake}}/__pycache__/__init__.cpython-312.pyc b/src/{{project_name_snake}}/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 5f8b8bb..0000000 Binary files a/src/{{project_name_snake}}/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/src/{{project_name_snake}}/__pycache__/clients.cpython-312.pyc b/src/{{project_name_snake}}/__pycache__/clients.cpython-312.pyc deleted file mode 100644 index d006654..0000000 Binary files a/src/{{project_name_snake}}/__pycache__/clients.cpython-312.pyc and /dev/null differ diff --git a/src/{{project_name_snake}}/__pycache__/qa_workflows.cpython-312.pyc b/src/{{project_name_snake}}/__pycache__/qa_workflows.cpython-312.pyc deleted file mode 100644 index d8de40b..0000000 Binary files a/src/{{project_name_snake}}/__pycache__/qa_workflows.cpython-312.pyc and /dev/null differ diff --git a/src/{{project_name_snake}}/__pycache__/workflows.cpython-312.pyc b/src/{{project_name_snake}}/__pycache__/workflows.cpython-312.pyc deleted file mode 100644 index 5ec673f..0000000 Binary files a/src/{{project_name_snake}}/__pycache__/workflows.cpython-312.pyc and /dev/null differ diff --git a/src/{{project_name_snake}}/clients.py b/src/{{project_name_snake}}/clients.py deleted file mode 100644 index ea324cd..0000000 --- a/src/{{project_name_snake}}/clients.py +++ /dev/null @@ -1,34 +0,0 @@ -import functools -import os -import httpx - -import dotenv -from llama_cloud.client import AsyncLlamaCloud - -dotenv.load_dotenv() - -# deployed agents may infer their name from the deployment name -# Note: Make sure that an agent deployment with this name actually exists -# otherwise calls to get or set data will fail. You may need to adjust the `or ` -# name for development -agent_name = os.getenv("LLAMA_DEPLOY_DEPLOYMENT_NAME") -agent_name_or_default = agent_name or "test-proj" -# required for all llama cloud calls -api_key = os.environ["LLAMA_CLOUD_API_KEY"] -# get this in case running against a different environment than production -base_url = os.getenv("LLAMA_CLOUD_BASE_URL") -project_id = os.getenv("LLAMA_DEPLOY_PROJECT_ID") - - -def get_custom_client(): - return httpx.AsyncClient( - timeout=60, headers={"Project-Id": project_id} if project_id else None - ) - -@functools.lru_cache(maxsize=None) -def get_llama_cloud_client(): - return AsyncLlamaCloud( - base_url=base_url, - token=api_key, - httpx_client=get_custom_client(), - ) diff --git a/src/{{project_name_snake}}/config.py.jinja b/src/{{project_name_snake}}/config.py.jinja deleted file mode 100644 index 1d7f57a..0000000 --- a/src/{{project_name_snake}}/config.py.jinja +++ /dev/null @@ -1,2 +0,0 @@ -PROJECT_ID = "{{ llama_project_id }}" -ORGANIZATION_ID = "{{ llama_org_id }}" \ No newline at end of file diff --git a/test-proj/.copier-answers.yml b/test-proj/.copier-answers.yml new file mode 100644 index 0000000..8463373 --- /dev/null +++ b/test-proj/.copier-answers.yml @@ -0,0 +1,6 @@ +# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY +_commit: '2405947' +_src_path: . +llama_org_id: asdf +llama_project_id: asdf +project_name: test-proj diff --git a/test-proj/.env.template b/test-proj/.env.template new file mode 100644 index 0000000..eb7fa0a --- /dev/null +++ b/test-proj/.env.template @@ -0,0 +1,2 @@ +# Copy this to .env and set any necessary secrets +OPENAI_API_KEY=sk-your-openai-api-key-here diff --git a/test-proj/.gitignore b/test-proj/.gitignore new file mode 100644 index 0000000..06b8b0d --- /dev/null +++ b/test-proj/.gitignore @@ -0,0 +1,4 @@ +.env +__pycache__ +workflows.db +.venv diff --git a/test-proj/README.md b/test-proj/README.md new file mode 100644 index 0000000..92d1b72 --- /dev/null +++ b/test-proj/README.md @@ -0,0 +1,17 @@ +# Document Q&A Application + +A document question-answering application built with LlamaIndex workflows and LlamaCloud services. + + +This application uses LlamaDeploy. For more information see [the docs](https://developers.llamaindex.ai/python/cloud/llamadeploy/getting-started) + +# Getting Started + +1. install `uv` if you haven't `brew install uv` +2. run `uvx llamactl serve` +3. Visit http://localhost:4501/docs and see workflow APIs + + +# Organization + +- `src` contains python workflow sources. The name of the deployment here is defined as `document-qa`requests. See http://localhost:4501/docs for openAPI docs diff --git a/test-proj/pyproject.toml b/test-proj/pyproject.toml new file mode 100644 index 0000000..7206b7e --- /dev/null +++ b/test-proj/pyproject.toml @@ -0,0 +1,48 @@ +[project] +name = "test_proj" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +authors = [] +requires-python = ">=3.12" +dependencies = [ + "llama-index-workflows>=2.2.0,<3.0.0", + "llama-cloud-services>=0.6.68", + "llama-index-core>=0.14.0", + "llama-index-llms-openai>=0.5.6", + "llama-index-embeddings-openai>=0.5.1", + "python-dotenv>=1.1.1", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "hatch>=1.14.1", + "pytest>=8.4.2", + "ruff>=0.13.0", + "ty>=0.0.1a20", +] + +[tool.hatch.envs.default.scripts] +"format" = "ruff format ." +"format-check" = "ruff format --check ." +"lint" = "ruff check --fix ." +"lint-check" = ["ruff check ."] +typecheck = "ty check src" +test = "pytest" +"all-check" = ["format-check", "lint-check", "test"] +"all-fix" = ["format", "lint", "test"] + +[tool.llamadeploy] +env-files = [".env"] +llama_cloud = true + +[tool.llamadeploy.ui] +directory = "./ui" + +[tool.llamadeploy.workflows] +upload = "test_proj.qa_workflows:upload" +chat = "test_proj.qa_workflows:chat" diff --git a/test-proj/src/test_proj/__init__.py b/test-proj/src/test_proj/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test-proj/src/test_proj/clients.py b/test-proj/src/test_proj/clients.py new file mode 100644 index 0000000..9f8d0d8 --- /dev/null +++ b/test-proj/src/test_proj/clients.py @@ -0,0 +1,52 @@ +import functools +import os +import httpx + +from llama_cloud.client import AsyncLlamaCloud +from llama_cloud_services import LlamaParse + +# deployed agents may infer their name from the deployment name +# Note: Make sure that an agent deployment with this name actually exists +# otherwise calls to get or set data will fail. You may need to adjust the `or ` +# name for development +DEPLOYMENT_NAME = os.getenv("LLAMA_DEPLOY_DEPLOYMENT_NAME") +# required for all llama cloud calls +LLAMA_CLOUD_API_KEY = os.environ["LLAMA_CLOUD_API_KEY"] +# get this in case running against a different environment than production +LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL") +LLAMA_CLOUD_PROJECT_ID = os.getenv("LLAMA_DEPLOY_PROJECT_ID") +INDEX_NAME = "document_qa_index" + + +def get_custom_client() -> httpx.AsyncClient: + return httpx.AsyncClient( + timeout=60, + headers={"Project-Id": LLAMA_CLOUD_PROJECT_ID} + if LLAMA_CLOUD_PROJECT_ID + else None, + ) + + +@functools.cache +def get_llama_cloud_client() -> AsyncLlamaCloud: + return AsyncLlamaCloud( + base_url=LLAMA_CLOUD_BASE_URL, + token=LLAMA_CLOUD_API_KEY, + httpx_client=get_custom_client(), + ) + + +@functools.cache +def get_llama_parse_client() -> LlamaParse: + return LlamaParse( + parse_mode="parse_page_with_agent", + model="openai-gpt-4-1-mini", + high_res_ocr=True, + adaptive_long_table=True, + outlined_table_extraction=True, + output_tables_as_HTML=True, + result_type="markdown", + api_key=LLAMA_CLOUD_API_KEY, + project_id=LLAMA_CLOUD_PROJECT_ID, + custom_client=get_custom_client(), + ) diff --git a/test-proj/src/test_proj/qa_workflows.py b/test-proj/src/test_proj/qa_workflows.py new file mode 100644 index 0000000..5363255 --- /dev/null +++ b/test-proj/src/test_proj/qa_workflows.py @@ -0,0 +1,338 @@ +import logging +import os + +import httpx +from llama_cloud.types import RetrievalMode +import tempfile +from llama_index.core.chat_engine.types import BaseChatEngine, ChatMode +from workflows import Workflow, step, Context +from workflows.events import ( + StartEvent, + StopEvent, + Event, + InputRequiredEvent, + HumanResponseEvent, +) +from workflows.retry_policy import ConstantDelayRetryPolicy + +from llama_cloud_services import LlamaCloudIndex +from llama_index.core import Settings +from llama_index.llms.openai import OpenAI +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.core.memory import ChatMemoryBuffer + +from .clients import ( + LLAMA_CLOUD_API_KEY, + LLAMA_CLOUD_BASE_URL, + get_custom_client, + get_llama_cloud_client, + get_llama_parse_client, + LLAMA_CLOUD_PROJECT_ID, +) + + +logger = logging.getLogger(__name__) + + +class FileEvent(StartEvent): + file_id: str + index_name: str + + +class DownloadFileEvent(Event): + file_id: str + + +class FileDownloadedEvent(Event): + file_id: str + file_path: str + filename: str + + +class ChatEvent(StartEvent): + index_name: str + session_id: str + + +# Configure LLM and embedding model +Settings.llm = OpenAI(model="gpt-4", temperature=0.1) +Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") + +custom_client = get_custom_client() + + +class DocumentUploadWorkflow(Workflow): + """Workflow to upload and index documents using LlamaParse and LlamaCloud Index""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + # Get API key with validation + + # Initialize LlamaParse with recommended settings + self.parser = get_llama_parse_client() + + @step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10)) + async def run_file(self, event: FileEvent, ctx: Context) -> DownloadFileEvent: + logger.info(f"Running file {event.file_id}") + await ctx.store.set("index_name", event.index_name) + return DownloadFileEvent(file_id=event.file_id) + + @step(retry_policy=ConstantDelayRetryPolicy(maximum_attempts=3, delay=10)) + async def download_file( + self, event: DownloadFileEvent, ctx: Context + ) -> FileDownloadedEvent: + """Download the file reference from the cloud storage""" + logger.info(f"Downloading file {event.file_id}") + try: + file_metadata = await get_llama_cloud_client().files.get_file( + id=event.file_id + ) + file_url = await get_llama_cloud_client().files.read_file_content( + event.file_id + ) + + temp_dir = tempfile.gettempdir() + filename = file_metadata.name + file_path = os.path.join(temp_dir, filename) + client = httpx.AsyncClient() + # Report progress to the UI + logger.info(f"Downloading file {file_url.url} to {file_path}") + + async with client.stream("GET", file_url.url) as response: + with open(file_path, "wb") as f: + async for chunk in response.aiter_bytes(): + f.write(chunk) + logger.info(f"Downloaded file {file_url.url} to {file_path}") + return FileDownloadedEvent( + file_id=event.file_id, file_path=file_path, filename=filename + ) + except Exception as e: + logger.error(f"Error downloading file {event.file_id}: {e}", exc_info=True) + raise e + + @step + async def parse_document(self, ev: FileDownloadedEvent, ctx: Context) -> StopEvent: + """Parse document and index it to LlamaCloud""" + try: + logger.info(f"Parsing document {ev.file_id}") + # Get file path or content from event + file_path = ev.file_path + file_name = file_path.split("/")[-1] + index_name = await ctx.store.get("index_name") + + # Parse the document + if file_path: + # Parse from file path + result = await self.parser.aparse(file_path) + + # Get parsed documents + documents = result.get_text_documents() + + # Create or connect to LlamaCloud Index + index = LlamaCloudIndex.create_index( + documents=documents, + name=index_name, + project_id=LLAMA_CLOUD_PROJECT_ID, + api_key=LLAMA_CLOUD_API_KEY, + base_url=LLAMA_CLOUD_BASE_URL, + show_progress=True, + custom_client=custom_client, + ) + + # Insert documents to index + logger.info(f"Inserting {len(documents)} documents to {index_name}") + for document in documents: + index.insert(document) + + return StopEvent( + result={ + "success": True, + "index_name": index_name, + "document_count": len(documents), + "index_url": f"https://cloud.llamaindex.ai/projects/{LLAMA_CLOUD_PROJECT_ID}/indexes/{index.id}", + "file_name": file_name, + "message": f"Successfully indexed {len(documents)} documents to '{index_name}'", + } + ) + + except Exception as e: + logger.error(e.stack_trace) + return StopEvent( + result={"success": False, "error": str(e), "stack_trace": e.stack_trace} + ) + + +class ChatResponseEvent(Event): + """Event emitted when chat engine generates a response""" + + response: str + sources: list + query: str + + +class ChatDeltaEvent(Event): + """Streaming delta for incremental response output""" + + delta: str + + +class ChatWorkflow(Workflow): + """Workflow to handle continuous chat queries against indexed documents""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.chat_engines: dict[ + str, BaseChatEngine + ] = {} # Cache chat engines per index + + @step + async def initialize_chat(self, ev: ChatEvent, ctx: Context) -> InputRequiredEvent: + """Initialize the chat session and request first input""" + try: + logger.info(f"Initializing chat {ev.index_name}") + index_name = ev.index_name + session_id = ev.session_id + + # Store session info in context + await ctx.store.set("index_name", index_name) + await ctx.store.set("session_id", session_id) + await ctx.store.set("conversation_history", []) + + # Create cache key for chat engine + cache_key = f"{index_name}_{session_id}" + + # Initialize chat engine if not exists + if cache_key not in self.chat_engines: + logger.info(f"Initializing chat engine {cache_key}") + # Connect to LlamaCloud Index + index = LlamaCloudIndex( + name=index_name, + project_id=LLAMA_CLOUD_PROJECT_ID, + api_key=LLAMA_CLOUD_API_KEY, + base_url=LLAMA_CLOUD_BASE_URL, + async_httpx_client=custom_client, + ) + + # Create chat engine with memory + memory = ChatMemoryBuffer.from_defaults(token_limit=3900) + self.chat_engines[cache_key] = index.as_chat_engine( + chat_mode=ChatMode.CONTEXT, + memory=memory, + llm=Settings.llm, + context_prompt=( + "You are a helpful assistant that answers questions based on the provided documents. " + "Always cite specific information from the documents when answering. " + "If you cannot find the answer in the documents, say so clearly." + ), + verbose=False, + retriever_mode=RetrievalMode.CHUNKS, + ) + + # Request first user input + return InputRequiredEvent( + prefix="Chat initialized. Ask a question (or type 'exit' to quit): " + ) + + except Exception as e: + return StopEvent( + result={ + "success": False, + "error": f"Failed to initialize chat: {str(e)}", + } + ) + + @step + async def process_user_response( + self, ev: HumanResponseEvent, ctx: Context + ) -> InputRequiredEvent | HumanResponseEvent | StopEvent | None: + """Process user input and generate response""" + try: + logger.info(f"Processing user response {ev.response}") + user_input = ev.response.strip() + + logger.info(f"User input: {user_input}") + + # Check for exit command + if user_input.lower() == "exit": + logger.info("User input is exit") + conversation_history = await ctx.store.get( + "conversation_history", default=[] + ) + return StopEvent( + result={ + "success": True, + "message": "Chat session ended.", + "conversation_history": conversation_history, + } + ) + + # Get session info from context + index_name = await ctx.store.get("index_name") + session_id = await ctx.store.get("session_id") + cache_key = f"{index_name}_{session_id}" + + # Get chat engine + chat_engine = self.chat_engines[cache_key] + + # Process query with chat engine (streaming) + stream_response = await chat_engine.astream_chat(user_input) + full_text = "" + + # Emit streaming deltas to the event stream + async for token in stream_response.async_response_gen(): + full_text += token + ctx.write_event_to_stream(ChatDeltaEvent(delta=token)) + + # Extract source nodes for citations + sources = [] + if hasattr(stream_response, "source_nodes"): + for node in stream_response.source_nodes: + sources.append( + { + "text": node.text[:200] + "..." + if len(node.text) > 200 + else node.text, + "score": node.score if hasattr(node, "score") else None, + "metadata": node.metadata + if hasattr(node, "metadata") + else {}, + } + ) + + # Update conversation history + conversation_history = await ctx.store.get( + "conversation_history", default=[] + ) + conversation_history.append( + { + "query": user_input, + "response": full_text.strip() + if full_text + else str(stream_response), + "sources": sources, + } + ) + await ctx.store.set("conversation_history", conversation_history) + + # After streaming completes, emit a summary response event to stream for frontend/main printing + ctx.write_event_to_stream( + ChatResponseEvent( + response=full_text.strip() if full_text else str(stream_response), + sources=sources, + query=user_input, + ) + ) + + # Prompt for next input + return InputRequiredEvent( + prefix="\nAsk another question (or type 'exit' to quit): " + ) + + except Exception as e: + return StopEvent( + result={"success": False, "error": f"Error processing query: {str(e)}"} + ) + + +upload = DocumentUploadWorkflow(timeout=None) +chat = ChatWorkflow(timeout=None) diff --git a/test-proj/tests/test_placeholder.py b/test-proj/tests/test_placeholder.py new file mode 100644 index 0000000..201975f --- /dev/null +++ b/test-proj/tests/test_placeholder.py @@ -0,0 +1,2 @@ +def test_placeholder(): + pass diff --git a/test-proj/ui/.gitignore b/test-proj/ui/.gitignore new file mode 100644 index 0000000..31ee023 --- /dev/null +++ b/test-proj/ui/.gitignore @@ -0,0 +1,4 @@ +node_modules +dist +# uses pnpm +pnpm-lock.yaml diff --git a/test-proj/ui/index.html b/test-proj/ui/index.html new file mode 100644 index 0000000..37e7c42 --- /dev/null +++ b/test-proj/ui/index.html @@ -0,0 +1,14 @@ + + +
+ + ++ No messages yet +
++ Start a conversation! +
++ {message.content} +
++ {message.timestamp.toLocaleTimeString()} +
++ Press Enter to send • Shift+Enter for new line +
++ Upload documents and ask questions about them +
+