diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt new file mode 100644 index 00000000..78204cb6 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_QA-team_wednesday.txt @@ -0,0 +1,38 @@ +MEETING TRANSCRIPT - QA TEAM +Date: Wednesday, September 18, 2025 +Time: 10:00 AM - 11:30 AM +Participants: Maria (QA Lead), Tom (Senior QA Engineer), Lisa (QA Automation Engineer), Roberto (Manual Testing Specialist) + +[10:02] Maria: Let's review CRM migration testing progress. Tom, report on data import tests? + +[10:03] Tom: Found critical issues. Import failures with special characters in addresses and names. + +[10:06] Tom: UTF-8 parsing problems with accents, currency symbols, and Asian characters. + +[10:08] Tom: 12% of records affected - about 15,000 out of 125,000 total records. + +[10:09] Roberto: Confirmed. Also, failed imports corrupt entire batches. + +[10:12] Lisa: No atomic transactions for batches? + +[10:13] Tom: Correct. Each record processed independently without rollback. + +[10:15] Roberto: Found referential integrity issues - orphaned references between contacts and companies. + +[10:19] Maria: Need three validation types: pre-import, during import, and post-import. + +[10:25] Tom: Recommend smaller migration batches to reduce risk? + +[10:26] Maria: Excellent. Batches of 5,000 records with validation between each. + +[10:30] Maria: Four recommendations: UTF-8 parser fix, atomic transactions, handle orphaned references, small batch migration. + +[10:33] Roberto: Also need concurrency testing during migration. + +[10:40] Maria: Complete additional testing in one week. Feasible? + +[10:42] Tom: Will share test cases today. + +[10:44] Maria: Friday 2 PM meeting before management review. + +[10:45] Lisa: Will prepare testing metrics dashboard. \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt new file mode 100644 index 00000000..7435951a --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_development-team_monday.txt @@ -0,0 +1,32 @@ +MEETING TRANSCRIPT - DEVELOPMENT TEAM +Date: Monday, September 16, 2025 +Time: 09:00 AM - 10:15 AM +Participants: Alice (Tech Lead), John (Senior Developer), Sarah (Backend Developer), Mike (DevOps Engineer) + +[09:02] Alice: Let's review the search API deployed last week. Any issues? + +[09:03] Sarah: API works but performance degrades with 1,000+ queries per minute. Response times jump from 200ms to 3 seconds. + +[09:05] John: Database queries and no caching layer? + +[09:06] Sarah: Exactly. Complex queries are slow, and we need a caching layer. + +[09:07] Mike: Also hitting CPU limits during spikes. Need auto-scaling. + +[09:08] Alice: Three priorities: query optimization, cache implementation, and infrastructure scaling. + +[09:11] Sarah: Propose 15-minute TTL cache with event-based invalidation. + +[09:13] John: I'll optimize bool queries and add calculated index fields. + +[09:17] Mike: Can set up auto-scaling by tomorrow - scale to 6 instances at 70% CPU. + +[09:18] Sarah: Starting cache implementation today, basic version by Wednesday. + +[09:19] John: New indexes and query optimization ready for testing Wednesday. + +[09:24] Alice: Clear plan. Mike handles scaling, Sarah implements cache, John optimizes queries. + +[09:26] Alice: I'll coordinate with product team on deployment impacts and QA for load testing. + +[09:30] Alice: Meeting Wednesday 3 PM to review progress. Thanks team! \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt new file mode 100644 index 00000000..fea4865e --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/meeting_management-sync_friday.txt @@ -0,0 +1,36 @@ +MEETING TRANSCRIPT - MANAGEMENT SYNC +Date: Friday, September 20, 2025 +Time: 02:00 PM - 03:00 PM +Participants: David (Project Manager), Alice (Tech Lead), Maria (QA Lead), Emma (Product Manager), Carlos (DevOps Manager) + +[14:03] Emma: Good progress. Users report 40% search speed improvement, but support tickets show peak hour performance issues. + +[14:05] Alice: We've identified bottlenecks. Working on cache layer implementation and search engine query optimization. + +[14:06] David: Can we resolve issues without impacting October migration date? + +[14:09] Alice: Recommend two-week extension for complete migration due to performance issues. + +[14:10] Maria: QA agrees. Found data import blockers with special characters and integrity issues. + +[14:12] Maria: Need one week to fix issues, another for complete re-testing. + +[14:14] Carlos: Infrastructure supports extension for proper rollback and disaster recovery testing. + +[14:15] Emma: Could we do partial migration on original date? + +[14:17] Alice: Yes. Contact management module first, reports and analytics in phase two. + +[14:21] Maria: Phased migration ideal for QA - validate each module independently. + +[14:22] David: Proposal: Phase 1 - Contact management October 15th. Phase 2 - Complete migration October 30th. + +[14:23] Alice: Reasonable timeline for performance fixes. + +[14:24] Emma: Works from product perspective. Will update stakeholder communications. + +[14:25] Maria: QA commits to these timelines. + +[14:26] Carlos: Will prepare deployment strategies for both phases. + +[14:32] David: Carlos, send deployment calendar by Monday. Thanks team! \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt new file mode 100644 index 00000000..c5730a84 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_QA-team.txt @@ -0,0 +1,31 @@ +WEEKLY REPORT - QA TEAM +Week of September 16-20, 2025 +Prepared by: Maria Gonzalez, QA Lead + +=== EXECUTIVE SUMMARY === +QA team identified critical issues in CRM migration testing. Significant problems in legacy data import and referential integrity require immediate attention. + +=== TESTING COMPLETED === +- Functional: Contact management (100%), Authentication (100%), Search (75%), Analytics (60%) +- Data import: 125,000 legacy records tested, 12 critical issues found +- Performance: Core modules complete, identified issues with 500+ concurrent users + +=== CRITICAL ISSUES === +**QA-2025-001 - Data Import Failures** +- UTF-8 parsing problems with special characters +- 15,000 records affected (12% of total) +- Escalated to development + +**QA-2025-002 - Transaction Integrity** +- Failed imports leave batches in inconsistent state +- No atomic transactions for batches +- Requires architecture redesign + +**QA-2025-003 - Orphaned References** +- 2,300 records with invalid company/contact references +- Pending business logic decision + +=== METRICS === +- Test cases executed: 847 of 1,200 (70.6%) +- Pass rate: 79.3%, Automation coverage: 36% +- Bugs: 28 total (4 critical, 8 high, 12 medium, 4 low) diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt new file mode 100644 index 00000000..c4760150 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/Dataset/report_development-team.txt @@ -0,0 +1,30 @@ +WEEKLY REPORT - DEVELOPMENT TEAM +Week of September 16-20, 2025 +Prepared by: Alice Thompson, Tech Lead + +=== EXECUTIVE SUMMARY === +Development team completed critical infrastructure components but identified performance bottlenecks requiring attention before production deployment. + +=== KEY ACCOMPLISHMENTS === +- Database schema and indexes completed for CRM +- 12 of 18 API endpoints integrated with authentication +- Contact management: 95% complete, Search: 80%, Analytics: 70% + +=== TECHNICAL CHALLENGES === +- Critical: Search API degrades at 1,000+ queries/minute (200ms to 3+ seconds) +- Root cause: Complex database queries without caching layer +- Multi-filter searches average 1.2 seconds execution time + +=== ACTION PLAN NEXT WEEK === +1. Cache system implementation (Sarah) - Basic by Wednesday, complete by Friday +2. Search engine query optimization (John) - Testing ready Wednesday +3. Auto-scaling setup (Mike) - Scale to 6 instances at 70% CPU + +=== METRICS === +- Story points: 43 of 50 completed (86%) +- Bugs: 7 reported, 12 resolved +- Code coverage: 78% (target: 80%) + +=== TIMELINE === +- October 15 Contact Management: 85% confidence, 2 sprints remaining +- October 30 Complete Migration: 90% confidence, 4 sprints remaining \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/README.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/README.md new file mode 100644 index 00000000..2a9143ec --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/README.md @@ -0,0 +1,48 @@ +# Build a Local lightweight RAG System with Elasticsearch + +Simple RAG (Retrieval-Augmented Generation) system using Elasticsearch for semantic search and Local AI as model provider. This application serves as supporting content for the blog post [Build a Local lightweight RAG System with Elasticsearch](https://www.elastic.co/search-labs/blog/local-rag-with-lightweight-elasticsearch) + +## Prerequisites + +- Docker +- Python 3.11+ + +## Quick Start + +### 1. Activate Virtual Environment + +```bash +python -m venv venv +source venv/bin/activate +``` + +### 2. Install Python Dependencies + +```bash +pip install -r requirements.txt +``` + +### 3. Configure Environment Variables + +Create an `.env` and put there your settings: + +```yaml +# Elasticsearch Configuration +ES_URL=http://localhost:9200 +ES_API_KEY="your_elasticsearch_api_key_here" +INDEX_NAME=team-data + +# Local AI Configuration +LOCAL_AI_URL=http://localhost:8080/v1 + +# Dataset Configuration +DATASET_FOLDER=./Dataset +``` + +### 4. Run the Script + +```bash +python script.py +``` + + diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md new file mode 100644 index 00000000..ceeff2d2 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/llama-smoltalk-3.2-1b-instruct_results.md @@ -0,0 +1,26 @@ +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: llama-smoltalk-3.2-1b-instruct + +## šŸ’” Question: +Can you summarize the performance issues in the API? + +## šŸ“ Answer: +Based on the documents, it appears that the main performance issue with the API is related to the search query optimization. The API degrades to around 1,000+ queries per minute (QP/min) when there are 12 of 18 API endpoints integrated with authentication. This issue is caused by complex queries without a caching layer, leading to performance degrades and slow response times. + +However, there is also a smaller issue with the "Search" API, where it degrades to around 3+ seconds after 1.2 seconds execution time. This is likely due to multi-filter searches and the need for a caching layer to improve performance. + +To address these issues, the team is working on implementing a caching layer (Sarah) and optimizing bool queries and adding calculated index fields (John) to improve query efficiency. They are also working on setting up auto-scaling for the database (Mike) to ensure that it can handle increased traffic. + +A meeting was held to discuss these issues and a plan for improvement was agreed upon. The team will work together to implement a caching layer and optimize the queries, and the team will work with product team to ensure that the migration is completed on time and does not impact the October migration date. + +šŸ“š Citations: + [1] report_development-team.txt + [2] meeting_development-team_monday.txt + [3] meeting_management-sync_friday.txt + + +## Stats +šŸ” Search Latency: 12ms + +šŸ¤– AI Latency: 21019ms | 5.8 tokens/s \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md new file mode 100644 index 00000000..d3cd7dc3 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/results.md @@ -0,0 +1,27 @@ + +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: dolphin3.0-qwen2.5-0.5b + +## šŸ’” Question: +Can you summarize the performance issues in the API? +## šŸ“ Answer: + +The performance issues in the API can be summarized as follows: + +1. **Search API degrades with 1,000+ queries per minute**: The search API has degraded, with performance degrades to a time of 3 seconds compared to the target of 200ms. + +2. **Complex queries are slow and no caching layer**: Complex queries take longer to execute as expected. + +3. **CPU limits during spikes**: The CPU usage is at 100% at the peak hour and limits to 70% at the peak hour. + +šŸ“š Citations: + [1] report_development-team.txt + [2] meeting_development-team_monday.txt + [3] meeting_management-sync_friday.txt + +## Stats + +šŸ” Search Latency: 14ms + +šŸ¤– AI Latency: 16044ms | 9.5 tokens/s \ No newline at end of file diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md new file mode 100644 index 00000000..455b5c69 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/smollm2-1.7b-instruct_results.md @@ -0,0 +1,24 @@ +šŸ” Search: 'Can you summarize the performance issues in the API?' + +šŸ¤– Asking to model: smollm2-1.7b-instruct + +## šŸ’” Question: + +Can you summarize the performance issues in the API? +## šŸ“ Answer: + +The development team identified two key technical challenges for the API: + +1. The search API degrades at 1,000+ queries per minute, causing average execution times to jump from 200ms to 3 seconds. +2. The root cause is complex database queries without a caching layer, leading to poor query performance. + +šŸ“š Citations: + [1] report_development-team.txt + [2] meeting_development-team_monday.txt + [3] meeting_management-sync_friday.txt + +## Stats + +šŸ” Search Latency: 16ms + +šŸ¤– AI Latency: 47561ms | 4.8 tokens/s diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-is-the-sky-blue.md b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-is-the-sky-blue.md new file mode 100644 index 00000000..53929146 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/app-logs/why-is-the-sky-blue.md @@ -0,0 +1,18 @@ +>>> Why is the sky blue? + +## Raw Response + +```json +{"created":1763994585,"object":"chat.completion","id":"d96f7e7a-003f-4167-bb07-501aa41ef8e7","model":"dolphin3.0-qwen2.5-0.5b","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"The sky being blue is primarily due to the scattering of sunlight by the Earth's atmosphere. Here are a few key factors contributing to the blue color of the sky:\n\n1. **Scattering of Light**: Blue light is scattered more than other colors by the Earth's atmosphere. This scattering occurs because blue light has a shorter wavelength, which is more likely to travel through the Earth's atmosphere than other wavelengths. The scattering occurs as the light bounces off particles in the atmosphere, such as dust and water vapor, which have a higher concentration in the upper atmosphere.\n\n2. **Atmospheric Composition**: The Earth's atmosphere is composed of gases like nitrogen, oxygen, and water vapor. These gases have different properties, and their interactions with sunlight can affect the colors they transmit. Nitrogen and oxygen gases have a scattering effect that is more intense at shorter wavelengths than water vapor, which results in the blue color observed in the sky.\n\n3. **Atmospheric Reflection**: The blue color of the sky also depends on the amount of light reflecting off the Earth's surface. Clouds, for instance, can reflect a lot of sunlight, particularly blue and green light, which creates a blue tint to the sky. Additionally, the reflection of sunlight from the Earth's surface can contribute to the blue color observed in the sky.\n\n4. **Clouds and Precipitation**: The presence of clouds and precipitation can also affect the color of the sky. Clouds can scatter blue light, but they can also trap it, making the sky appear a bit less blue. Precipitation, particularly heavy rain or snow, can also scatter light, making the sky appear a bit darker and more gray.\n\nIn summary, the blue color of the sky is a result of the scattering of sunlight by the Earth's atmosphere, with the primary contributor being the scattering of blue light and the absorption of other wavelengths by particles in the atmosphere."}}],"usage":{"prompt_tokens":14,"completion_tokens":384,"total_tokens":398}} +``` + +## Answer + +The sky being blue is primarily due to the scattering of sunlight by the Earth's atmosphere. Here are a few key factors contributing to the blue color of the sky: + +1. **Scattering of Light**: Blue light is scattered more than other colors by the Earth's atmosphere. This scattering occurs because blue light has a shorter wavelength, which is more likely to travel through the Earth's atmosphere than other wavelengths. The scattering occurs as the light bounces off particles in the atmosphere, such as dust and water vapor, which have a higher concentration in the upper atmosphere. +2. **Atmospheric Composition**: The Earth's atmosphere is composed of gases like nitrogen, oxygen, and water vapor. These gases have different properties, and their interactions with sunlight can affect the colors they transmit. Nitrogen and oxygen gases have a scattering effect that is more intense at shorter wavelengths than water vapor, which results in the blue color observed in the sky. +3. **Atmospheric Reflection**: The blue color of the sky also depends on the amount of light reflecting off the Earth's surface. Clouds, for instance, can reflect a lot of sunlight, particularly blue and green light, which creates a blue tint to the sky. Additionally, the reflection of sunlight from the Earth's surface can contribute to the blue color observed in the sky. +4. **Clouds and Precipitation**: The presence of clouds and precipitation can also affect the color of the sky. Clouds can scatter blue light, but they can also trap it, making the sky appear a bit less blue. Precipitation, particularly heavy rain or snow, can also scatter light, making the sky appear a bit darker and more gray. + +In summary, the blue color of the sky is a result of the scattering of sunlight by the Earth's atmosphere, with the primary contributor being the scattering of blue light and the absorption of other wavelengths by particles in the atmosphere. diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt new file mode 100644 index 00000000..6ad807a9 --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/requirements.txt @@ -0,0 +1,230 @@ +absl-py==2.3.1 +aiohappyeyeballs==2.4.6 +aiohttp==3.11.13 +aiosignal==1.3.2 +alembic==1.14.1 +annotated-types==0.7.0 +anyio==4.10.0 +appdirs==1.4.4 +appnope==0.1.4 +asgiref==3.8.1 +asttokens==3.0.0 +async-timeout==5.0.1 +attrs==25.1.0 +auth0-python==4.8.1 +backoff==2.2.1 +bcrypt==4.3.0 +beautifulsoup4==4.13.3 +blinker==1.9.0 +build==1.2.2.post1 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +chroma-hnswlib==0.7.6 +chromadb==0.5.23 +click==8.1.8 +cohere==5.14.0 +coloredlogs==15.0.1 +comm==0.2.2 +crewai==0.102.0 +crewai-tools==0.36.0 +cryptography==44.0.2 +dataclasses-json==0.6.7 +debugpy==1.8.12 +decorator==5.2.1 +Deprecated==1.2.18 +deprecation==2.1.0 +distro==1.9.0 +docker==7.1.0 +docstring_parser==0.16 +durationpy==0.9 +elastic-transport==8.17.0 +elasticsearch==8.17.0 +embedchain==0.1.127 +et_xmlfile==2.0.0 +exceptiongroup==1.3.0 +executing==2.2.0 +fastapi==0.104.1 +fastavro==1.10.0 +filelock==3.17.0 +flatbuffers==25.2.10 +frozenlist==1.5.0 +fsspec==2025.2.0 +google-api-core==2.24.1 +google-auth==2.38.0 +google-cloud-aiplatform==1.82.0 +google-cloud-bigquery==3.30.0 +google-cloud-core==2.4.2 +google-cloud-resource-manager==1.14.1 +google-cloud-storage==2.19.0 +google-crc32c==1.6.0 +google-genai==1.30.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.68.0 +gptcache==0.1.44 +grpc-google-iam-v1==0.14.0 +grpcio==1.70.0 +grpcio-status==1.70.0 +grpcio-tools==1.70.0 +h11==0.14.0 +h2==4.2.0 +hpack==4.1.0 +httpcore==1.0.7 +httptools==0.6.4 +httpx==0.28.1 +httpx-sse==0.4.0 +huggingface-hub==0.29.1 +humanfriendly==10.0 +hyperframe==6.1.0 +idna==3.10 +importlib_metadata==8.5.0 +importlib_resources==6.5.2 +instructor==1.7.2 +ipykernel==6.29.5 +ipython==9.0.1 +ipython_pygments_lexers==1.1.1 +jedi==0.19.2 +Jinja2==3.1.5 +jiter==0.8.2 +json5==0.10.0 +json_repair==0.39.1 +jsonpatch==1.33 +jsonpickle==4.0.2 +jsonpointer==3.0.0 +jsonref==1.1.0 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +kubernetes==32.0.1 +lancedb==0.20.0 +langchain==0.3.19 +langchain-cohere==0.3.5 +langchain-community==0.3.18 +langchain-core==0.3.40 +langchain-experimental==0.3.4 +langchain-openai==0.2.14 +langchain-text-splitters==0.3.6 +langextract==1.0.8 +langsmith==0.1.147 +litellm==1.60.2 +Mako==1.3.9 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +mem0ai==0.1.60 +ml_collections==1.1.0 +mmh3==5.1.0 +monotonic==1.6 +more-itertools==10.7.0 +mpmath==1.3.0 +multidict==6.1.0 +mypy-extensions==1.0.0 +nest-asyncio==1.6.0 +networkx==3.4.2 +nodeenv==1.9.1 +numpy==1.26.4 +oauthlib==3.2.2 +onnxruntime==1.20.1 +openai==1.65.2 +openpyxl==3.1.5 +opentelemetry-api==1.30.0 +opentelemetry-exporter-otlp-proto-common==1.30.0 +opentelemetry-exporter-otlp-proto-grpc==1.30.0 +opentelemetry-exporter-otlp-proto-http==1.30.0 +opentelemetry-instrumentation==0.51b0 +opentelemetry-instrumentation-asgi==0.51b0 +opentelemetry-instrumentation-fastapi==0.51b0 +opentelemetry-proto==1.30.0 +opentelemetry-sdk==1.30.0 +opentelemetry-semantic-conventions==0.51b0 +opentelemetry-util-http==0.51b0 +orjson==3.10.15 +overrides==7.7.0 +packaging==24.2 +pandas==2.2.3 +parso==0.8.4 +pdfminer.six==20231228 +pdfplumber==0.11.5 +pexpect==4.9.0 +pillow==11.1.0 +platformdirs==4.3.6 +portalocker==2.10.1 +posthog==3.18.0 +prompt_toolkit==3.0.50 +propcache==0.3.0 +proto-plus==1.26.0 +protobuf==5.29.3 +psutil==7.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pyarrow==19.0.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.1 +pycparser==2.22 +pydantic==2.5.0 +pydantic-settings==2.8.1 +pydantic_core==2.14.1 +Pygments==2.19.1 +PyJWT==2.10.1 +pylance==0.23.2 +pypdf==5.3.1 +pypdfium2==4.30.1 +PyPika==0.48.9 +pyproject_hooks==1.2.0 +pyright==1.1.396 +pysbd==0.3.4 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytube==15.0.0 +pytz==2024.2 +pyvis==0.3.2 +PyYAML==6.0.2 +pyzmq==26.2.1 +qdrant-client==1.13.2 +referencing==0.36.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rich==13.9.4 +rpds-py==0.23.1 +rsa==4.9 +schema==0.7.7 +shapely==2.0.7 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.38 +stack-data==0.6.3 +starlette==0.27.0 +sympy==1.13.3 +tabulate==0.9.0 +tenacity==9.0.0 +tiktoken==0.7.0 +tokenizers==0.20.3 +tomli==2.2.1 +tomli_w==1.2.0 +tornado==6.4.2 +tqdm==4.67.1 +traitlets==5.14.3 +typer==0.15.2 +types-requests==2.32.0.20250301 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +tzdata==2025.1 +urllib3==2.3.0 +uv==0.6.3 +uvicorn==0.24.0 +uvloop==0.21.0 +watchfiles==1.0.4 +wcwidth==0.2.13 +websocket-client==1.8.0 +websockets==15.0.1 +wrapt==1.17.2 +yarl==1.18.3 +zipp==3.21.0 diff --git a/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py new file mode 100644 index 00000000..adcc24ae --- /dev/null +++ b/supporting-blog-content/local-rag-with-lightweight-elasticsearch/script.py @@ -0,0 +1,214 @@ +import os +import time + +from dotenv import load_dotenv +from elasticsearch import Elasticsearch, helpers +from openai import OpenAI + +load_dotenv() + +ES_URL = os.getenv("ES_URL", "http://localhost:9200") +ES_API_KEY = os.getenv("ES_API_KEY") +INDEX_NAME = os.getenv("INDEX_NAME", "team-data") +LOCAL_AI_URL = os.getenv("LOCAL_AI_URL", "http://localhost:8080/v1") +DATASET_FOLDER = os.getenv("DATASET_FOLDER", "./Dataset") + + +es_client = Elasticsearch(ES_URL, api_key=ES_API_KEY) +ai_client = OpenAI( + base_url=LOCAL_AI_URL, api_key="sk-x" +) # You don't need a real OpenAI key for Local AI but we need to pass something, if you leave it blank it throws an error + + +def setup_inference_endpoint(): + """Create the e5-small-model inference endpoint for text embeddings if it doesn't exist.""" + + inference_id = "e5-small-model" + + try: + es_client.inference.get(inference_id=inference_id) + print(f"āœ… Inference endpoint '{inference_id}' already exists") + except Exception: + print(f"šŸ“¦ Creating inference endpoint '{inference_id}'...") + + try: + es_client.inference.put( + inference_id=inference_id, + task_type="text_embedding", + body={ + "service": "elasticsearch", + "service_settings": { + "num_allocations": 1, + "num_threads": 1, + "model_id": ".multilingual-e5-small", + }, + }, + ) + print(f"āœ… Inference endpoint '{inference_id}' created successfully") + except Exception as e: + print(f"āŒ Error creating inference endpoint: {str(e)}") + + +def setup_index(): + """Create the Elasticsearch index with semantic_text field mappings if it doesn't exist.""" + + try: + if es_client.indices.exists(index=INDEX_NAME): + print(f"āœ… Index '{INDEX_NAME}' already exists") + return False + + print(f"šŸ“¦ Creating index '{INDEX_NAME}'...") + es_client.indices.create( + index=INDEX_NAME, + body={ + "mappings": { + "properties": { + "file_title": {"type": "text", "copy_to": "semantic_field"}, + "file_content": {"type": "text", "copy_to": "semantic_field"}, + "semantic_field": { + "type": "semantic_text", + "inference_id": "e5-small-model", + }, + } + } + }, + ) + print(f"āœ… Index '{INDEX_NAME}' created successfully") + return True + except Exception as e: + print(f"āŒ Error creating index: {str(e)}") + exit(1) + + +def load_documents(dataset_folder, index_name): + """Generator that yields documents from .txt files in the dataset folder for bulk indexing.""" + + for filename in os.listdir(dataset_folder): + if filename.endswith(".txt"): + filepath = os.path.join(dataset_folder, filename) + + with open( + filepath, "r", encoding="utf-8" + ) as file: # UTF-8 encoding ensures proper handling of special characters and international text + content = file.read() + + yield { + "_index": index_name, + "_source": {"file_title": filename, "file_content": content}, + } + + +def index_documents(): + """Bulk index all documents from the dataset folder into Elasticsearch and return success count and latency.""" + + try: + if es_client.indices.exists(index=INDEX_NAME) is False: + print(f"āŒ Error: Index '{INDEX_NAME}' does not exist. ") + exit(1) + + success, _ = helpers.bulk(es_client, load_documents(DATASET_FOLDER, INDEX_NAME)) + + print(f"āœ… Indexed {success} documents successfully") + return success + except Exception as e: + print(f"āŒ Error indexing documents: {str(e)}") + exit(1) + + +def semantic_search(query, size=3): + """Perform semantic search and return top results with latency.""" + + start_time = time.time() + search_body = { + "query": {"semantic": {"field": "semantic_field", "query": query}}, + "size": size, + } + + response = es_client.search(index=INDEX_NAME, body=search_body) + search_latency = (time.time() - start_time) * 1000 # ms + + return response["hits"]["hits"], search_latency + + +def query_local_ai(prompt, model): + """Send a prompt to Local AI model and return the response, latency, and tokens per second.""" + + start_time = time.time() + + try: + # Using simple completions without streaming. + response = ai_client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + ) + + ai_latency = (time.time() - start_time) * 1000 # ms + + # Extract response text + response_text = response.choices[0].message.content + + # Calculate tokens per second if usage info is available + tokens_per_second = 0 + if hasattr(response, "usage") and response.usage: + total_tokens = response.usage.completion_tokens + if ai_latency > 0: + tokens_per_second = (total_tokens / ai_latency) * 1000 # tokens/second + + return response_text, ai_latency, tokens_per_second + except Exception as e: + ai_latency = (time.time() - start_time) * 1000 + + return f"Error: {str(e)}", ai_latency, 0 + + +if __name__ == "__main__": + print("šŸš€ Setting up infrastructure...") + + # Setup inference endpoint and index + setup_inference_endpoint() + is_created = setup_index() + + if is_created: # Index was just created, need to index documents + print("\nšŸ“„ Indexing documents...") + success = index_documents() + + if success == 0: # if indexing failed, exit + print("āŒ Documents indexing failed. Exiting.") + exit(1) + + time.sleep(1) # Wait for indexing to complete + + query = "Can you summarize the performance issues in the API?" + + print(f"šŸ” Search: '{query}'") + search_results, search_latency = semantic_search(query) + + context = "" + citations = [] + for idx, hit in enumerate(search_results, 1): + source = hit["_source"] + context += f"[{idx}] File: {source['file_title']}\n" + context += f"Content: {source['file_content']}\n\n" + citations.append(f"[{idx}] {source['file_title']}") + + prompt = f"""Based on the following documents, answer the user's question. + You MUST cite your sources using the format [1], [2], etc. when referencing information from the documents. + + Documents: + {context} + + User Question: {query} + """ + + ai_model = "dolphin3.0-qwen2.5-0.5b" + + print(f"šŸ¤– Asking to model: {ai_model}") + response, ai_latency, tokens_per_second = query_local_ai(prompt, ai_model) + + print(f"\nšŸ’” Question: {query}\nšŸ“ Answer: {response}") + print("\nšŸ“š Citations:") + for citation in citations: + print(f" {citation}") + + print(f"\nšŸ” Search Latency: {search_latency:.0f}ms") + print(f"šŸ¤– AI Latency: {ai_latency:.0f}ms | {tokens_per_second:.1f} tokens/s")