Skip to content

Commit f54e17c

Browse files
author
TaimoorKhan10
committed
Fix code formatting with Black
1 parent 9109868 commit f54e17c

30 files changed

+3584
-2886
lines changed

docs/assets/generate_charts.py

Lines changed: 353 additions & 182 deletions
Large diffs are not rendered by default.

examples/basic_query_example.py

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,57 +14,53 @@
1414

1515
from src.enterprise_rag import RAGSystem
1616

17+
1718
def main():
1819
"""Run a basic query example."""
19-
20+
2021
# Initialize the RAG system with a basic configuration
2122
rag_system = RAGSystem(
2223
vector_store_config={
2324
"type": "faiss",
2425
"index_path": "data/index",
25-
"embedding_model": "sentence-transformers/all-mpnet-base-v2"
26-
},
27-
retrieval_config={
28-
"type": "hybrid",
29-
"top_k": 5,
30-
"use_reranker": True
26+
"embedding_model": "sentence-transformers/all-mpnet-base-v2",
3127
},
28+
retrieval_config={"type": "hybrid", "top_k": 5, "use_reranker": True},
3229
generation_config={
3330
"model": "gpt-3.5-turbo",
3431
"temperature": 0.7,
35-
"max_tokens": 500
36-
}
32+
"max_tokens": 500,
33+
},
3734
)
38-
35+
3936
# Define your query
40-
query = "What are the key benefits of using a RAG system for enterprise applications?"
41-
37+
query = (
38+
"What are the key benefits of using a RAG system for enterprise applications?"
39+
)
40+
4241
# Optional query parameters
4342
query_options = {
4443
"filters": {
4544
"metadata.doc_type": "pdf", # Optional filter by document type
46-
"metadata.date": {"$gt": "2022-01-01"} # Filter by date
45+
"metadata.date": {"$gt": "2022-01-01"}, # Filter by date
4746
},
48-
"retrieval_options": {
49-
"use_semantic": True,
50-
"use_keyword": True
51-
}
47+
"retrieval_options": {"use_semantic": True, "use_keyword": True},
5248
}
53-
49+
5450
# Execute the query
5551
print(f"Querying: '{query}'")
56-
52+
5753
response = rag_system.query(query, options=query_options)
58-
54+
5955
# Print the results
6056
print("\n----- ANSWER -----")
6157
print(response["answer"])
62-
58+
6359
print("\n----- SOURCES -----")
6460
for i, source in enumerate(response["sources"], 1):
6561
print(f"{i}. {source['title']} (score: {source['score']:.3f})")
6662
print(f" Snippet: {source['text'][:150]}...")
67-
63+
6864
# Print performance metrics
6965
print("\n----- PERFORMANCE METRICS -----")
7066
for key, value in response["metrics"].items():
@@ -80,5 +76,5 @@ def main():
8076
print("WARNING: OPENAI_API_KEY environment variable is not set.")
8177
print("Set it with: export OPENAI_API_KEY=your_api_key_here")
8278
print("For this example, using a mock response instead.\n")
83-
79+
8480
main()

examples/document_indexing_example.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,28 @@
2323
from src.enterprise_rag import RAGSystem
2424
from src.document_processing.processor import DocumentProcessor
2525

26+
2627
def main():
2728
"""Run a document indexing example."""
28-
29+
2930
# Initialize the RAG system
3031
rag_system = RAGSystem(
3132
vector_store_config={
3233
"type": "faiss",
3334
"index_path": "data/index",
34-
"embedding_model": "sentence-transformers/all-mpnet-base-v2"
35+
"embedding_model": "sentence-transformers/all-mpnet-base-v2",
3536
}
3637
)
37-
38+
3839
# Define document paths (replace with your actual documents)
3940
documents_dir = Path("examples/sample_documents")
4041
if not documents_dir.exists():
4142
logger.warning(f"Sample documents directory not found: {documents_dir}")
4243
logger.info("Creating sample documents directory and a sample text file")
43-
44+
4445
# Create directory
4546
documents_dir.mkdir(parents=True, exist_ok=True)
46-
47+
4748
# Create a sample text file
4849
sample_text = """
4950
# Enterprise-Ready RAG System
@@ -68,43 +69,41 @@ def main():
6869
6970
5. **Cost Efficiency**: More efficient than fine-tuning large models on domain-specific data.
7071
"""
71-
72+
7273
with open(documents_dir / "rag_overview.txt", "w", encoding="utf-8") as f:
7374
f.write(sample_text)
74-
75+
7576
# Process documents
7677
document_processor = DocumentProcessor(
77-
chunking_strategy="recursive",
78-
chunk_size=1000,
79-
chunk_overlap=200
78+
chunking_strategy="recursive", chunk_size=1000, chunk_overlap=200
8079
)
81-
80+
8281
# Find documents
8382
document_paths = list(documents_dir.glob("**/*.*"))
8483
logger.info(f"Found {len(document_paths)} documents to process")
85-
84+
8685
# Process and index documents
8786
for doc_path in document_paths:
8887
try:
8988
logger.info(f"Processing document: {doc_path}")
90-
89+
9190
# Process document into chunks
9291
doc_chunks = document_processor.process_document(str(doc_path))
93-
92+
9493
logger.info(f"Created {len(doc_chunks)} chunks from {doc_path.name}")
95-
94+
9695
# Add document chunks to RAG system
9796
doc_ids = rag_system.add_documents(doc_chunks)
98-
97+
9998
logger.info(f"Indexed document with {len(doc_ids)} chunks")
100-
99+
101100
except Exception as e:
102101
logger.error(f"Error processing document {doc_path}: {str(e)}")
103-
102+
104103
# Save the index
105104
rag_system.save_index()
106105
logger.info(f"Index saved to {rag_system.vector_store.index_path}")
107-
106+
108107
# Print statistics
109108
stats = rag_system.get_stats()
110109
print("\n----- INDEX STATISTICS -----")

0 commit comments

Comments
 (0)