Skip to content

Commit 75b1590

Browse files
authored
feat: refactored tracker db folder structure. added alembic migrations (#219)
* feat: refactored tracker db folder structure. added alembic migrations * added tracker logs integration test. added alembic check of migration consistency to actions * fix: fixed the test actions for alembic * fix: fixed path for script on the test actions for alembic * fix: addded tracker dependencies to integration test action * fix: integration test action installation of tracker * fix: integration test action cache poetry of tracker * chore: updated llmstudio lib poetry lock * fix: integration test action changed working dir for alembic script * fix: integration test action path for alembic.ini script * chore: changed the migration bash to python * chore: changed the migration bash to python and corrected path * fix: action * test: adding columns to check for integration tests for migrations * fix: alembic was not recognizing changes * fix: alembic * chore: removed testing columns for logs * chore: added readme.md * feat: added extras to logs schema. alembic upgrades on tracker start server if needed * chore: added .env.template; changed llmstudio alembic default name * chore: moved server alembic upgrade to utils; reverted poetry.lock of llmstudio lib
1 parent 5fbd461 commit 75b1590

40 files changed

+1452
-800
lines changed

.env.template

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
OPENAI_API_KEY="sk-proj-XXXXX"
2+
ANTHROPIC_API_KEY="sk-XXXXX"
3+
COHERE_API_KEY="XXXX"
4+
GOOGLE_API_KEY="XXXX"
5+
DECART_API_KEY="XXXX"
6+
AI71_API_KEY="XXXX"
7+
AI21_API_KEY="XXXX"
8+
BEDROCK_ACCESS_KEY="XXXX"
9+
BEDROCK_SECRET_KEY="r+"XXXX""
10+
BEDROCK_REGION="us-west-2"
11+
HUGGING_FACE_API_KEY="hf_"XXXX""
12+
AZURE_API_KEY=""XXXX""
13+
AZURE_API_ENDPOINT="https://XXXXX.openai.azure.com/"
14+
AZURE_API_VERSION="2023-07-01-preview"
15+
ENGINE_HOST="localhost"
16+
ENGINE_PORT=8000
17+
UI_HOST="localhost"
18+
UI_PORT=3000
19+
LOG_LEVEL="info"
20+
21+
#LLMSTUDIO_TRACKING_URI="postgresql://postgres:postgres@localhost:5433/tracker_db"
22+
LLMSTUDIO_TRACKING_URI="sqlite:///./llmstudio_mgmt.db"
23+
LLMSTUDIO_TRACKING_HOST="127.0.0.1"
24+
LLMSTUDIO_TRACKING_PORT="50002"
25+
LLMSTUDIO_ALEMBIC_TABLE_NAME="llmstudio_alembic_version"

.github/workflows/tests.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,32 @@ jobs:
8282
uses: actions/cache@v3
8383
with:
8484
path: ~/.cache/pypoetry
85-
key: poetry-integration-${{ runner.os }}-${{ hashFiles('libs/llmstudio/poetry.lock') }}
85+
key: poetry-integration-${{ runner.os }}-${{ hashFiles('libs/llmstudio/poetry.lock', 'libs/llmstudio/pyproject.toml') }}
8686
restore-keys: |
8787
poetry-integration-${{ runner.os }}-
8888
8989
# Install llmstudio
9090
- name: Install llmstudio
9191
working-directory: ./libs/llmstudio
9292
run: |
93-
poetry install
93+
poetry install --extras tracker
9494
INTEGRATION_ENV=$(poetry env info --path)
9595
echo $INTEGRATION_ENV
9696
echo "INTEGRATION_ENV=$INTEGRATION_ENV" >> $GITHUB_ENV
9797
98+
# Set Env vars for sqlite db
99+
- name: Set hardcoded DB URI, HOST and PORT
100+
run: |
101+
echo "LLMSTUDIO_TRACKING_URI=sqlite:///./test_tracker.db" >> $GITHUB_ENV
102+
echo "LLMSTUDIO_TRACKING_HOST=127.0.0.1" >> $GITHUB_ENV
103+
echo "LLMSTUDIO_TRACKING_PORT=50002" >> $GITHUB_ENV
104+
105+
# Run Alembic migrations
106+
- name: Run Alembic migrations
107+
run: |
108+
source ${{ env.INTEGRATION_ENV }}/bin/activate
109+
poetry run alembic upgrade head
110+
98111
# Run Integration Tests
99112
- name: Run Integration Tests
100113
run: |

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,4 @@ bun.lockb
7878
llmstudio/llm_engine/logs/execution_logs.jsonl
7979
*.db
8080
.prettierignore
81-
db
81+

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ repos:
3030
hooks:
3131
- id: autoflake
3232
files: libs/
33-
exclude: 'libs/core/llmstudio_core/providers/__init__.py|libs/llmstudio/llmstudio/providers/__init__.py'
33+
exclude: 'libs/core/llmstudio_core/providers/__init__.py|libs/llmstudio/llmstudio/providers/__init__.py|libs/tracker/llmstudio_tracker/db/migrations/env.py|libs/tracker/llmstudio_tracker/base.py'
3434
args:
3535
- --remove-all-unused-imports
3636
- --recursive

alembic.ini

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# A generic, single database configuration.
2+
3+
[alembic]
4+
# path to migration scripts
5+
# Use forward slashes (/) also on windows to provide an os agnostic path
6+
script_location = libs/tracker/llmstudio_tracker/db/migrations
7+
8+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
9+
# Uncomment the line below if you want the files to be prepended with date and time
10+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
11+
# for all available tokens
12+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
13+
14+
# sys.path path, will be prepended to sys.path if present.
15+
# defaults to the current working directory.
16+
prepend_sys_path = .
17+
18+
# timezone to use when rendering the date within the migration file
19+
# as well as the filename.
20+
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
21+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
22+
# string value is passed to ZoneInfo()
23+
# leave blank for localtime
24+
# timezone =
25+
26+
# max length of characters to apply to the "slug" field
27+
# truncate_slug_length = 40
28+
29+
# set to 'true' to run the environment during
30+
# the 'revision' command, regardless of autogenerate
31+
# revision_environment = false
32+
33+
# set to 'true' to allow .pyc and .pyo files without
34+
# a source .py file to be detected as revisions in the
35+
# versions/ directory
36+
# sourceless = false
37+
38+
# version location specification; This defaults
39+
# to alembic/versions. When using multiple version
40+
# directories, initial revisions must be specified with --version-path.
41+
# The path separator used here should be the separator specified by "version_path_separator" below.
42+
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
43+
44+
# version path separator; As mentioned above, this is the character used to split
45+
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46+
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47+
# Valid values for version_path_separator are:
48+
#
49+
# version_path_separator = :
50+
# version_path_separator = ;
51+
# version_path_separator = space
52+
# version_path_separator = newline
53+
#
54+
# Use os.pathsep. Default configuration used for new projects.
55+
version_path_separator = os
56+
57+
# set to 'true' to search source files recursively
58+
# in each "version_locations" directory
59+
# new in Alembic version 1.10
60+
# recursive_version_locations = false
61+
62+
# the output encoding used when revision files
63+
# are written from script.py.mako
64+
# output_encoding = utf-8
65+
66+
sqlalchemy.url = placeholder
67+
68+
[post_write_hooks]
69+
# post_write_hooks defines scripts or Python functions that are run
70+
# on newly generated revision scripts. See the documentation for further
71+
# detail and examples
72+
73+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
74+
# hooks = black
75+
# black.type = console_scripts
76+
# black.entrypoint = black
77+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
78+
79+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
80+
# hooks = ruff
81+
# ruff.type = exec
82+
# ruff.executable = %(here)s/.venv/bin/ruff
83+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
84+
85+
# Logging configuration
86+
[loggers]
87+
keys = root,sqlalchemy,alembic
88+
89+
[handlers]
90+
keys = console
91+
92+
[formatters]
93+
keys = generic
94+
95+
[logger_root]
96+
level = WARNING
97+
handlers = console
98+
qualname =
99+
100+
[logger_sqlalchemy]
101+
level = WARNING
102+
handlers =
103+
qualname = sqlalchemy.engine
104+
105+
[logger_alembic]
106+
level = INFO
107+
handlers =
108+
qualname = alembic
109+
110+
[handler_console]
111+
class = StreamHandler
112+
args = (sys.stderr,)
113+
level = NOTSET
114+
formatter = generic
115+
116+
[formatter_generic]
117+
format = %(levelname)-5.5s [%(name)s] %(message)s
118+
datefmt = %H:%M:%S

examples/core.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,35 @@
11

22
from llmstudio_core.providers import LLMCore
3-
3+
from llmstudio.providers import LLM
4+
from llmstudio_tracker.tracker import TrackingConfig
5+
from llmstudio.server import start_servers
46

57
from pprint import pprint
68
import os
79
import asyncio
810
from dotenv import load_dotenv
11+
import uuid
912
load_dotenv()
1013

14+
start_servers(proxy=False, tracker=True)
15+
16+
tracking_config = TrackingConfig(
17+
host=os.environ["LLMSTUDIO_TRACKING_HOST"],
18+
port=os.environ["LLMSTUDIO_TRACKING_PORT"]
19+
)
20+
21+
session_id = str(uuid.uuid4())
22+
23+
use_logging = True
24+
25+
1126
def run_provider(provider, model, api_key=None, **kwargs):
1227
print(f"\n\n###RUNNING for <{provider}>, <{model}> ###")
13-
llm = LLMCore(provider=provider, api_key=api_key, **kwargs)
28+
29+
if use_logging:
30+
llm = LLM(provider=provider, api_key=api_key, session_id=session_id, tracking_config=tracking_config, **kwargs)
31+
else:
32+
llm = LLMCore(provider=provider, api_key=api_key, **kwargs)
1433

1534
latencies = {}
1635
print("\nAsync Non-Stream")
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import os
2+
import uuid
3+
4+
import pytest
5+
6+
# Load .env
7+
from dotenv import load_dotenv
8+
from llmstudio.providers import LLM
9+
from llmstudio.server import start_servers
10+
from llmstudio_tracker.db.models.logs import LogDefault
11+
from llmstudio_tracker.tracker import TrackingConfig
12+
from sqlalchemy import create_engine, select
13+
from sqlalchemy.orm import sessionmaker
14+
15+
load_dotenv()
16+
17+
18+
DATABASE_URL = os.environ["LLMSTUDIO_TRACKING_URI"]
19+
LLMSTUDIO_TRACKING_HOST = os.environ["LLMSTUDIO_TRACKING_HOST"]
20+
LLMSTUDIO_TRACKING_PORT = os.environ["LLMSTUDIO_TRACKING_PORT"]
21+
22+
engine = create_engine(DATABASE_URL)
23+
Session = sessionmaker(bind=engine)
24+
25+
26+
@pytest.mark.parametrize(
27+
"provider, model, api_key_name",
28+
[
29+
("openai", "gpt-4o-mini", "OPENAI_API_KEY"),
30+
],
31+
)
32+
def test_llm_tracking_logs(provider, model, api_key_name):
33+
session_id = str(uuid.uuid4())
34+
35+
start_servers(proxy=False, tracker=True)
36+
37+
tracking_config = TrackingConfig(
38+
host=LLMSTUDIO_TRACKING_HOST, port=LLMSTUDIO_TRACKING_PORT
39+
)
40+
41+
llm = LLM(
42+
provider=provider,
43+
api_key=os.environ[api_key_name],
44+
session_id=session_id,
45+
tracking_config=tracking_config,
46+
)
47+
48+
chat_request = {
49+
"chat_input": f"Hello, my name is Alice - session {session_id}",
50+
"model": model,
51+
"is_stream": False,
52+
"retries": 0,
53+
"parameters": {"temperature": 0, "max_tokens": 1000},
54+
}
55+
56+
response = llm.chat(**chat_request)
57+
print(response)
58+
59+
assert hasattr(response, "chat_output"), "Missing 'chat_output'"
60+
assert response.chat_output is not None, "'chat_output' is None"
61+
62+
# DB: Check if row was logged
63+
db = Session()
64+
logs = (
65+
db.execute(select(LogDefault).where(LogDefault.session_id == session_id))
66+
.scalars()
67+
.all()
68+
)
69+
70+
assert len(logs) == 1, "No log entry found for session"
71+
log = logs[0]
72+
73+
assert log.chat_input == f"Hello, my name is Alice - session {session_id}"
74+
assert log.model == "gpt-4o-mini"
75+
assert log.session_id == session_id
76+
assert log.chat_output is not None
77+
assert isinstance(log.parameters, dict)
78+
db.close()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from llmstudio_tracker.db.models.logs import LogDefault
2+
from llmstudio_tracker.db.models.prompt_manager import PromptDefault
3+
from llmstudio_tracker.db.models.session import SessionDefault
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from sqlalchemy.orm import DeclarativeBase
2+
3+
4+
class Base(DeclarativeBase):
5+
pass

libs/tracker/llmstudio_tracker/database.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from llmstudio_tracker.config import DB_TYPE, TRACKING_URI
22
from sqlalchemy import create_engine
3-
from sqlalchemy.orm import declarative_base, sessionmaker
3+
from sqlalchemy.orm import sessionmaker
44

55

66
def create_tracking_engine(uri: str):
@@ -13,8 +13,6 @@ def create_tracking_engine(uri: str):
1313

1414
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
1515

16-
Base = declarative_base()
17-
1816

1917
def get_db():
2018
db = SessionLocal()

0 commit comments

Comments
 (0)