Skip to content

Commit 6612524

Browse files
authored
[ENH]: Support Key object in schema (#5721)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - Support Key type (introduced by search api) in schema. Users can specify this type as schema keys as well as source_key - Adds validation for allowed values for key and source_key in schema on both client and server side - Adds a small helper to pretty print schema json - New functionality - ... ## Test plan Added plenty of unit tests and end to end tests - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan None ## Observability plan Staging ## Documentation Changes None
1 parent b686e8d commit 6612524

File tree

5 files changed

+699
-13
lines changed

5 files changed

+699
-13
lines changed

chromadb/api/types.py

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
)
4040

4141
if TYPE_CHECKING:
42-
pass
42+
from chromadb.execution.expression.operator import Key
4343

4444
try:
4545
from chromadb.is_thin_client import is_thin_client
@@ -1530,10 +1530,34 @@ class VectorIndexConfig(BaseModel):
15301530
model_config = {"arbitrary_types_allowed": True}
15311531
space: Optional[Space] = None
15321532
embedding_function: Optional[Any] = DefaultEmbeddingFunction()
1533-
source_key: Optional[str] = None # key to source the vector from
1533+
source_key: Optional[str] = None # key to source the vector from (accepts str or Key)
15341534
hnsw: Optional[HnswIndexConfig] = None
15351535
spann: Optional[SpannIndexConfig] = None
15361536

1537+
@field_validator("source_key", mode="before")
1538+
@classmethod
1539+
def validate_source_key_field(cls, v: Any) -> Optional[str]:
1540+
"""Convert Key objects to strings automatically. Accepts both str and Key types."""
1541+
if v is None:
1542+
return None
1543+
# Import Key at runtime to avoid circular import
1544+
from chromadb.execution.expression.operator import Key as KeyType
1545+
if isinstance(v, KeyType):
1546+
v = v.name # Extract string from Key
1547+
elif isinstance(v, str):
1548+
pass # Already a string
1549+
else:
1550+
raise ValueError(f"source_key must be str or Key, got {type(v).__name__}")
1551+
1552+
# Validate: only #document is allowed if key starts with #
1553+
if v.startswith("#") and v != "#document":
1554+
raise ValueError(
1555+
"source_key cannot begin with '#'. "
1556+
"The only valid key starting with '#' is Key.DOCUMENT or '#document'."
1557+
)
1558+
1559+
return v # type: ignore[no-any-return]
1560+
15371561
@field_validator("embedding_function", mode="before")
15381562
@classmethod
15391563
def validate_embedding_function_field(cls, v: Any) -> Any:
@@ -1553,9 +1577,33 @@ class SparseVectorIndexConfig(BaseModel):
15531577
model_config = {"arbitrary_types_allowed": True}
15541578
# TODO(Sanket): Change this to the appropriate sparse ef and use a default here.
15551579
embedding_function: Optional[Any] = None
1556-
source_key: Optional[str] = None # key to source the sparse vector from
1580+
source_key: Optional[str] = None # key to source the sparse vector from (accepts str or Key)
15571581
bm25: Optional[bool] = None
15581582

1583+
@field_validator("source_key", mode="before")
1584+
@classmethod
1585+
def validate_source_key_field(cls, v: Any) -> Optional[str]:
1586+
"""Convert Key objects to strings automatically. Accepts both str and Key types."""
1587+
if v is None:
1588+
return None
1589+
# Import Key at runtime to avoid circular import
1590+
from chromadb.execution.expression.operator import Key as KeyType
1591+
if isinstance(v, KeyType):
1592+
v = v.name # Extract string from Key
1593+
elif isinstance(v, str):
1594+
pass # Already a string
1595+
else:
1596+
raise ValueError(f"source_key must be str or Key, got {type(v).__name__}")
1597+
1598+
# Validate: only #document is allowed if key starts with #
1599+
if v.startswith("#") and v != "#document":
1600+
raise ValueError(
1601+
"source_key cannot begin with '#'. "
1602+
"The only valid key starting with '#' is Key.DOCUMENT or '#document'."
1603+
)
1604+
1605+
return v # type: ignore[no-any-return]
1606+
15591607
@field_validator("embedding_function", mode="before")
15601608
@classmethod
15611609
def validate_embedding_function_field(cls, v: Any) -> Any:
@@ -1739,9 +1787,14 @@ def __init__(self) -> None:
17391787
self._initialize_keys()
17401788

17411789
def create_index(
1742-
self, config: Optional[IndexConfig] = None, key: Optional[str] = None
1790+
self, config: Optional[IndexConfig] = None, key: Optional[Union[str, "Key"]] = None
17431791
) -> "Schema":
17441792
"""Create an index configuration."""
1793+
# Convert Key to string if provided
1794+
from chromadb.execution.expression.operator import Key as KeyType
1795+
if key is not None and isinstance(key, KeyType):
1796+
key = key.name
1797+
17451798
# Disallow config=None and key=None - too dangerous
17461799
if config is None and key is None:
17471800
raise ValueError(
@@ -1754,6 +1807,13 @@ def create_index(
17541807
f"Cannot create index on special key '{key}'. These keys are managed automatically by the system. Invoke create_index(VectorIndexConfig(...)) without specifying a key to configure the vector index globally."
17551808
)
17561809

1810+
# Disallow any key starting with #
1811+
if key is not None and key.startswith("#"):
1812+
raise ValueError(
1813+
"key cannot begin with '#'. "
1814+
"Keys starting with '#' are reserved for system use."
1815+
)
1816+
17571817
# Special handling for vector index
17581818
if isinstance(config, VectorIndexConfig):
17591819
if key is None:
@@ -1809,9 +1869,14 @@ def create_index(
18091869
return self
18101870

18111871
def delete_index(
1812-
self, config: Optional[IndexConfig] = None, key: Optional[str] = None
1872+
self, config: Optional[IndexConfig] = None, key: Optional[Union[str, "Key"]] = None
18131873
) -> "Schema":
18141874
"""Disable an index configuration (set enabled=False)."""
1875+
# Convert Key to string if provided
1876+
from chromadb.execution.expression.operator import Key as KeyType
1877+
if key is not None and isinstance(key, KeyType):
1878+
key = key.name
1879+
18151880
# Case 1: Both config and key are None - fail the request
18161881
if config is None and key is None:
18171882
raise ValueError(
@@ -1824,6 +1889,13 @@ def delete_index(
18241889
f"Cannot delete index on special key '{key}'. These keys are managed automatically by the system."
18251890
)
18261891

1892+
# Disallow any key starting with #
1893+
if key is not None and key.startswith("#"):
1894+
raise ValueError(
1895+
"key cannot begin with '#'. "
1896+
"Keys starting with '#' are reserved for system use."
1897+
)
1898+
18271899
# TODO: Consider removing these checks in the future to allow disabling vector, FTS, and sparse vector indexes
18281900
# Temporarily disallow deleting vector index (both globally and per-key)
18291901
if isinstance(config, VectorIndexConfig):

0 commit comments

Comments
 (0)