Skip to content
Merged
6 changes: 6 additions & 0 deletions ads/llm/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,19 @@


class ChainDeployment(GenericModel):
"""Represents a model deployment with LangChain.
"""
def __init__(self, chain, **kwargs):
self.chain = chain
if "model_input_serializer" not in kwargs:
kwargs["model_input_serializer"] = self.model_input_serializer_type.JSON
super().__init__(**kwargs)

def prepare(self, **kwargs) -> GenericModel:
"""Prepares the model artifact."""
chain_yaml_uri = os.path.join(self.artifact_dir, "chain.yaml")
if not os.path.exists(self.artifact_dir):
os.makedirs(self.artifact_dir)
with open(chain_yaml_uri, "w", encoding="utf-8") as f:
f.write(yaml.safe_dump(dump(self.chain)))

Expand Down
1 change: 0 additions & 1 deletion ads/llm/guardrails/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ class Guardrail(BaseTool):

class Config:
arbitrary_types_allowed = True
underscore_attrs_are_private = True

name: str = ""
description: str = "Guardrail"
Expand Down
159 changes: 118 additions & 41 deletions ads/llm/langchain/plugins/chat_models/oci_data_science.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@

# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""Chat model for OCI data science model deployment endpoint."""


import importlib
import json
import logging
from operator import itemgetter
from typing import (
Any,
AsyncIterator,
Callable,
Dict,
Iterator,
List,
Literal,
Optional,
Sequence,
Type,
Union,
Sequence,
Callable,
)

from langchain_core.callbacks import (
Expand All @@ -33,21 +34,16 @@
generate_from_stream,
)
from langchain_core.messages import AIMessageChunk, BaseMessage, BaseMessageChunk
from langchain_core.tools import BaseTool
from langchain_core.output_parsers import (
JsonOutputParser,
PydanticOutputParser,
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
from langchain_core.tools import BaseTool
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_openai.chat_models.base import (
_convert_delta_to_message_chunk,
_convert_message_to_dict,
_convert_dict_to_message,
)
from pydantic import BaseModel, Field, model_validator

from pydantic import BaseModel, Field
from ads.llm.langchain.plugins.llms.oci_data_science_model_deployment_endpoint import (
DEFAULT_MODEL_NAME,
BaseOCIModelDeployment,
Expand All @@ -63,23 +59,48 @@ def _is_pydantic_class(obj: Any) -> bool:
class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):
"""OCI Data Science Model Deployment chat model integration.

To use, you must provide the model HTTP endpoint from your deployed
chat model, e.g. https://modeldeployment.<region>.oci.customer-oci.com/<md_ocid>/predict.
Setup:
Install ``oracle-ads`` and ``langchain-openai``.

To authenticate, `oracle-ads` has been used to automatically load
credentials: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html
.. code-block:: bash

Make sure to have the required policies to access the OCI Data
Science Model Deployment endpoint. See:
https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm#model_dep_policies_auth__predict-endpoint
pip install -U oracle-ads langchain-openai

Use `ads.set_auth()` to configure authentication.
For example, to use OCI resource_principal for authentication:

.. code-block:: python

import ads
ads.set_auth("resource_principal")

For more details on authentication, see:
https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html

Make sure to have the required policies to access the OCI Data
Science Model Deployment endpoint. See:
https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm


Key init args - completion params:
endpoint: str
The OCI model deployment endpoint.
temperature: float
Sampling temperature.
max_tokens: Optional[int]
Max number of tokens to generate.

Key init args — client params:
auth: dict
ADS auth dictionary for OCI authentication.

Instantiate:
.. code-block:: python

from langchain_community.chat_models import ChatOCIModelDeployment

chat = ChatOCIModelDeployment(
endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
endpoint="https://modeldeployment.<region>.oci.customer-oci.com/<ocid>/predict",
model="odsc-llm",
streaming=True,
max_retries=3,
Expand All @@ -94,15 +115,27 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):
.. code-block:: python

messages = [
("system", "You are a helpful translator. Translate the user sentence to French."),
("system", "Translate the user sentence to French."),
("human", "Hello World!"),
]
chat.invoke(messages)

.. code-block:: python

AIMessage(
content='Bonjour le monde!',response_metadata={'token_usage': {'prompt_tokens': 40, 'total_tokens': 50, 'completion_tokens': 10},'model_name': 'odsc-llm','system_fingerprint': '','finish_reason': 'stop'},id='run-cbed62da-e1b3-4abd-9df3-ec89d69ca012-0')
content='Bonjour le monde!',
response_metadata={
'token_usage': {
'prompt_tokens': 40,
'total_tokens': 50,
'completion_tokens': 10
},
'model_name': 'odsc-llm',
'system_fingerprint': '',
'finish_reason': 'stop'
},
id='run-cbed62da-e1b3-4abd-9df3-ec89d69ca012-0'
)

Streaming:
.. code-block:: python
Expand All @@ -112,18 +145,18 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):

.. code-block:: python

content='' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='\n' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='B' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='on' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='j' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='our' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content=' le' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content=' monde' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='!' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'
content='' response_metadata={'finish_reason': 'stop'} id='run-23df02c6-c43f-42de-87c6-8ad382e125c3'

Asyc:
content='' id='run-02c6-c43f-42de'
content='\n' id='run-02c6-c43f-42de'
content='B' id='run-02c6-c43f-42de'
content='on' id='run-02c6-c43f-42de'
content='j' id='run-02c6-c43f-42de'
content='our' id='run-02c6-c43f-42de'
content=' le' id='run-02c6-c43f-42de'
content=' monde' id='run-02c6-c43f-42de'
content='!' id='run-02c6-c43f-42de'
content='' response_metadata={'finish_reason': 'stop'} id='run-02c6-c43f-42de'

Async:
.. code-block:: python

await chat.ainvoke(messages)
Expand All @@ -133,7 +166,11 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment):

.. code-block:: python

AIMessage(content='Bonjour le monde!', response_metadata={'finish_reason': 'stop'}, id='run-8657a105-96b7-4bb6-b98e-b69ca420e5d1-0')
AIMessage(
content='Bonjour le monde!',
response_metadata={'finish_reason': 'stop'},
id='run-8657a105-96b7-4bb6-b98e-b69ca420e5d1-0'
)

Structured output:
.. code-block:: python
Expand All @@ -147,19 +184,22 @@ class Joke(BaseModel):

structured_llm = chat.with_structured_output(Joke, method="json_mode")
structured_llm.invoke(
"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
"Tell me a joke about cats, "
"respond in JSON with `setup` and `punchline` keys"
)

.. code-block:: python

Joke(setup='Why did the cat get stuck in the tree?',punchline='Because it was chasing its tail!')
Joke(
setup='Why did the cat get stuck in the tree?',
punchline='Because it was chasing its tail!'
)

See ``ChatOCIModelDeployment.with_structured_output()`` for more.

Customized Usage:

You can inherit from base class and overwrite the `_process_response`, `_process_stream_response`,
`_construct_json_body` for satisfying customized needed.
You can inherit from base class and overwrite the `_process_response`,
`_process_stream_response`, `_construct_json_body` for customized usage.

.. code-block:: python

Expand All @@ -180,12 +220,31 @@ def _construct_json_body(self, messages: list, params: dict) -> dict:
}

chat = MyChatModel(
endpoint=f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{ocid}/predict",
endpoint=f"https://modeldeployment.<region>.oci.customer-oci.com/{ocid}/predict",
model="odsc-llm",
}

chat.invoke("tell me a joke")

Response metadata
.. code-block:: python

ai_msg = chat.invoke(messages)
ai_msg.response_metadata

.. code-block:: python

{
'token_usage': {
'prompt_tokens': 40,
'total_tokens': 50,
'completion_tokens': 10
},
'model_name': 'odsc-llm',
'system_fingerprint': '',
'finish_reason': 'stop'
}

""" # noqa: E501

model_kwargs: Dict[str, Any] = Field(default_factory=dict)
Expand All @@ -198,6 +257,17 @@ def _construct_json_body(self, messages: list, params: dict) -> dict:
"""Stop words to use when generating. Model output is cut off
at the first occurrence of any of these substrings."""

@model_validator(mode="before")
@classmethod
def validate_openai(cls, values: Any) -> Any:
"""Checks if langchain_openai is installed."""
if not importlib.util.find_spec("langchain_openai"):
raise ImportError(
"Could not import langchain_openai package. "
"Please install it with `pip install langchain_openai`."
)
return values

@property
def _llm_type(self) -> str:
"""Return type of llm."""
Expand Down Expand Up @@ -552,6 +622,8 @@ def _construct_json_body(self, messages: list, params: dict) -> dict:
converted messages and additional parameters.

"""
from langchain_openai.chat_models.base import _convert_message_to_dict

return {
"messages": [_convert_message_to_dict(m) for m in messages],
**params,
Expand All @@ -578,6 +650,8 @@ def _process_stream_response(
ValueError: If the response JSON is not well-formed or does not
contain the expected structure.
"""
from langchain_openai.chat_models.base import _convert_delta_to_message_chunk

try:
choice = response_json["choices"][0]
if not isinstance(choice, dict):
Expand Down Expand Up @@ -616,6 +690,8 @@ def _process_response(self, response_json: dict) -> ChatResult:
contain the expected structure.

"""
from langchain_openai.chat_models.base import _convert_dict_to_message

generations = []
try:
choices = response_json["choices"]
Expand Down Expand Up @@ -760,8 +836,9 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment):
tool_choice: Optional[str] = None
"""Whether to use tool calling.
Defaults to None, tool calling is disabled.
Tool calling requires model support and vLLM to be configured with `--tool-call-parser`.
Set this to `auto` for the model to determine whether to make tool calls automatically.
Tool calling requires model support and the vLLM to be configured
with `--tool-call-parser`.
Set this to `auto` for the model to make tool calls automatically.
Set this to `required` to force the model to always call one or more tools.
"""

Expand Down
Loading