From b3ec1c0af6c66802535192820b0c1da882ee7b8f Mon Sep 17 00:00:00 2001 From: David Fridrich Date: Tue, 15 Jul 2025 17:55:25 +0200 Subject: [PATCH] simple-inference --- python/llamacpp/README.md | 91 +++++++++++++++++++ python/llamacpp/function/__init__.py | 1 + python/llamacpp/function/func.py | 127 +++++++++++++++++++++++++++ python/llamacpp/manifest.yaml | 2 + python/llamacpp/pyproject.toml | 26 ++++++ python/llamacpp/tests/test_func.py | 38 ++++++++ 6 files changed, 285 insertions(+) create mode 100644 python/llamacpp/README.md create mode 100644 python/llamacpp/function/__init__.py create mode 100644 python/llamacpp/function/func.py create mode 100644 python/llamacpp/manifest.yaml create mode 100644 python/llamacpp/pyproject.toml create mode 100644 python/llamacpp/tests/test_func.py diff --git a/python/llamacpp/README.md b/python/llamacpp/README.md new file mode 100644 index 0000000..a5f8958 --- /dev/null +++ b/python/llamacpp/README.md @@ -0,0 +1,91 @@ +# Python Llama_cpp Function (HTTP) + +Welcome to your Llama-cpp Function which integrates a basic client side structure +of the [Llama-cpp library](). The Function accepts JSON input which it processes +through a local LLM and returns the generated response. + +The Function itself uses ASGI protocol. + +## Deployment + +> [!Note] +> We recommend using the host builder + +```bash +#Run the function locally +func run --builder=host + +#Deploy to clustert +func deploy --builder=host +``` + +## How to use the API + +The Function accepts POST requests with JSON data. You can create a request like +this: +```bash +curl localhost:8080 -d '{"input":"The largest mountain in the world is"}' +``` + +GET requests return 'OK' string for a quick check. + +## Customization + +- The Function uses the ASGI protocol and is compatible with +`handle(scope,receive,send)` signature. +- You can use a local model (eg: passed through via a base image -- Dockerfile) +by switching the `Llama()` function calls in the `handle()` function for the +commented out code. You will need to provide a path to the model via `model_path` +argument instead of a `repo_id` and `filename`. +- As per usual, the Function implements a readiness and liveness checks as well +as start and stop methods implemented via functions matching their names +respectivelly. These can be found at the bottom of the Function class with more +detailed information in the comments. + +## Tests + +Tests use the `pytest` framework with asyncio. + +The function tests can be found in `tests` directory. It contains a simple +http request test. This is where you can create your own tests for desired +functionality. + +```bash +#Install dependencies (if not done already) +pip install -e . + +# Run the tests +pytest + +# Run verbosely +pytest -v +``` + +## Dependencies + +All dependencies can be found in the `pyproject.toml` file. Any additional +dependencies (eg: A model when running locally) can be also provided via the +mentioned base image. You can create a Dockerfile like so: + +```Dockerfile +FROM python3.13:slim +## RUN any bash commands for pip install etc. +COPY /path/to/model/on/host/machine /path/to/model/in/container +``` + +You will build this image for example using podman and then pass it into the +Function when building it via `--base-image` flag. +```bash +# build my base image +podman build -f Dockerfile -t my-base-image + +# use the base image when building my Function image +func build --base-image=localhost/my-base-image --builder=host + +# or deploy immediately (builds internally) +func deploy --base-image=localhost/my-base-image --builder=host +``` + +which will make the model accesible for the Function. + +For more, see [the complete documentation]('https://github.com/knative/func/tree/main/docs') diff --git a/python/llamacpp/function/__init__.py b/python/llamacpp/function/__init__.py new file mode 100644 index 0000000..c16dbac --- /dev/null +++ b/python/llamacpp/function/__init__.py @@ -0,0 +1 @@ +from .func import new diff --git a/python/llamacpp/function/func.py b/python/llamacpp/function/func.py new file mode 100644 index 0000000..9db76ca --- /dev/null +++ b/python/llamacpp/function/func.py @@ -0,0 +1,127 @@ +# Function +import logging +from llama_cpp import Llama +import json + +def new(): + """ New is the only method that must be implemented by a Function. + The instance returned can be of any name. + """ + return Function() + +class Function: + def __init__(self): + """ The init method is an optional method where initialization can be + performed. See the start method for a startup hook which includes + configuration. + """ + + async def sender(self,send,obj): + # echo the obj to the calling client + await send({ + 'type': 'http.response.start', + 'status': 200, + 'headers': [ + [b'content-type', b'text/plain'], + ], + }) + await send({ + 'type': 'http.response.body', + 'body': obj.encode(), + }) + + async def handle(self, scope, receive, send): + """ + accepts data in form of JSON with the key "input" which should + contain the input string for the LLM + { + "input": "this is passed to the LLM" + } + ex: curl localhost:8080 -d '{"input":"The largest mountain in the world is"}' + """ + if scope["method"] == "GET": + await self.sender(send,"OK") + return + + input = "" + + # fetch all of the body from request + body = b'' + more_body = True + while more_body: + message = await receive() + body += message.get('body', b'') + more_body = message.get('more_body', False) + # decode json + try: + data = json.loads(body.decode('utf-8')) + input = data['input'] + except json.JSONDecodeError: + ret = "Invalid Json" + except KeyError: + ret = "invalid key, expected 'input'" + + if input == "": + self.sender(send,"OK") + + # Pull model from Hugging Face Hub + llm = Llama.from_pretrained( + repo_id="ibm-granite/granite-3b-code-base-2k-GGUF", + filename="granite-3b-code-base.Q4_K_M.gguf", + n_ctx=1024, + ) + + ## Use a local image instead + #llm = Llama ( + # model_path = "/granite-7b-lab-Q4_K_M.gguf/snapshots/sha256-6adeaad8c048b35ea54562c55e454cc32c63118a32c7b8152cf706b290611487/granite-7b-lab-Q4_K_M.gguf", + # n_ctx = 1024, + # ) + + output = llm( + input, + max_tokens=32, + ## Stop generating just before "Q:"; doesnt work well with small models + ## some models are more tuned to the Q: ... A: ... "chat" + ## You would literally type that in your input as: f' Q: {input}. A:' + #stop=["Q:","\n"], + echo=False, + ) + #logging.info("------------") + #logging.info(output['choices'][0]['text']) + await self.sender(send,output['choices'][0]['text']) + + def start(self, cfg): + """ start is an optional method which is called when a new Function + instance is started, such as when scaling up or during an update. + Provided is a dictionary containing all environmental configuration. + Args: + cfg (Dict[str, str]): A dictionary containing environmental config. + In most cases this will be a copy of os.environ, but it is + best practice to use this cfg dict instead of os.environ. + """ + logging.info("Function starting") + + def stop(self): + """ stop is an optional method which is called when a function is + stopped, such as when scaled down, updated, or manually canceled. Stop + can block while performing function shutdown/cleanup operations. The + process will eventually be killed if this method blocks beyond the + platform's configured maximum studown timeout. + """ + logging.info("Function stopping") + + def alive(self): + """ alive is an optional method for performing a deep check on your + Function's liveness. If removed, the system will assume the function + is ready if the process is running. This is exposed by default at the + path /health/liveness. The optional string return is a message. + """ + return True, "Alive" + + def ready(self): + """ ready is an optional method for performing a deep check on your + Function's readiness. If removed, the system will assume the function + is ready if the process is running. This is exposed by default at the + path /health/rediness. + """ + return True, "Ready" diff --git a/python/llamacpp/manifest.yaml b/python/llamacpp/manifest.yaml new file mode 100644 index 0000000..51a22d8 --- /dev/null +++ b/python/llamacpp/manifest.yaml @@ -0,0 +1,2 @@ +build: + base-image: quay.io/dfridric/custom_llamacpp_base diff --git a/python/llamacpp/pyproject.toml b/python/llamacpp/pyproject.toml new file mode 100644 index 0000000..8d58c1e --- /dev/null +++ b/python/llamacpp/pyproject.toml @@ -0,0 +1,26 @@ +[project] +name = "function" +description = "" +version = "0.1.0" +requires-python = ">=3.9" +readme = "README.md" +license = "MIT" +dependencies = [ + "httpx", + "pytest", + "pytest-asyncio", + "llama_cpp-python", + "huggingface-hub" +] +authors = [ + { name="Your Name", email="you@example.com"}, +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.pytest.ini_options] +asyncio_mode = "strict" +asyncio_default_fixture_loop_scope = "function" + diff --git a/python/llamacpp/tests/test_func.py b/python/llamacpp/tests/test_func.py new file mode 100644 index 0000000..5b37a73 --- /dev/null +++ b/python/llamacpp/tests/test_func.py @@ -0,0 +1,38 @@ +""" +An example set of unit tests which confirm that the main handler (the +callable function) returns 200 OK for a simple HTTP GET. +""" +import pytest +from function import new + + +@pytest.mark.asyncio +async def test_function_handle(): + f = new() # Instantiate Function to Test + + sent_ok = False + sent_headers = False + sent_body = False + + # Mock Send + async def send(message): + nonlocal sent_ok + nonlocal sent_headers + nonlocal sent_body + + if message.get('status') == 200: + sent_ok = True + + if message.get('type') == 'http.response.start': + sent_headers = True + + if message.get('type') == 'http.response.body': + sent_body = True + + # Invoke the Function + await f.handle({}, {}, send) + + # Assert send was called + assert sent_ok, "Function did not send a 200 OK" + assert sent_headers, "Function did not send headers" + assert sent_body, "Function did not send a body"