Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b1b11de
add array data type for milvus vector store collection create
Jun 20, 2024
275ea24
add array data type for milvus vector store collection create
Jun 20, 2024
a8e2d08
add array data type for milvus vector store collection create
Jun 20, 2024
c7fe30c
Merge branch 'master' into array_data_type
rgupta2508 Jul 2, 2024
3ab4b6c
add array data type for milvus vector store collection create
Jul 2, 2024
91635fa
add array data type for milvus vector store collection create
Jul 2, 2024
d3012fd
add array data type for milvus vector store collection create
Jul 2, 2024
bc33253
add array data type for milvus vector store collection create
Jul 2, 2024
c77b03a
Merge branch 'master' into array_data_type
rgupta2508 Jul 2, 2024
a4ce1a9
add array data type for milvus vector store collection create
Jul 2, 2024
9f70ca7
add array data type for milvus vector store collection create
Jul 2, 2024
b8b94bd
add array data type for milvus vector store collection create
Jul 2, 2024
2b5d0cf
add array data type for milvus vector store collection create
Jul 2, 2024
cecd707
add array data type for milvus vector store collection create
Jul 2, 2024
d6b90f7
add array data type for milvus vector store collection create
Jul 2, 2024
8ea4186
Merge branch 'master' into array_data_type
rgupta2508 Jul 3, 2024
fb3ed0f
add array data type for milvus vector store collection create
Jul 3, 2024
7719a0d
add array data type for milvus vector store collection create
Jul 3, 2024
0e22cff
add array data type for milvus vector store collection create
Jul 3, 2024
afd0140
Merge branch 'master' into array_data_type
rgupta2508 Jul 3, 2024
3601c2d
add array data type for milvus vector store collection create
Jul 3, 2024
ae234cb
update
Jul 4, 2024
8fa2512
Merge branch 'master' into array_data_type
rgupta2508 Jul 4, 2024
2b9d310
lint error
Jul 5, 2024
c25cd52
lint error
Jul 5, 2024
99f8db8
lint error
Jul 5, 2024
8585008
lint error
Jul 5, 2024
410d616
lint error
Jul 5, 2024
f94a687
Merge branch 'master' into array_data_type
rgupta2508 Jul 16, 2024
9ae4191
Merge branch 'master' into array_data_type
rgupta2508 Jul 16, 2024
3195bca
resolve comments
Jul 16, 2024
ce58e66
incorporate review comments
Jul 17, 2024
66101e7
incorporate review comments
Jul 17, 2024
d14cd87
incorporate review comments
Jul 17, 2024
4f8f1c4
incorporate review comments
Jul 17, 2024
e31a606
incorporate review comments
Jul 17, 2024
6ec9b86
incorporate review comments
Jul 17, 2024
c171cd5
Merge branch 'master' into array_data_type
rgupta2508 Aug 13, 2024
63387b1
partners[milvus]: refine milvus array dtype
zc277584121 Aug 27, 2024
798dcb8
Merge branch 'master' into array_data_type
zc277584121 Aug 28, 2024
f82668f
Merge branch 'master' into array_data_type
Aug 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 40 additions & 15 deletions libs/partners/milvus/langchain_milvus/vectorstores/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def __init__(
replica_number: int = 1,
timeout: Optional[float] = None,
num_shards: Optional[int] = None,
metadata_schema: Optional[dict[str, Any]] = None,
):
"""Initialize the Milvus vector store."""
try:
Expand Down Expand Up @@ -310,6 +311,7 @@ def __init__(
self.replica_number = replica_number
self.timeout = timeout
self.num_shards = num_shards
self.metadata_schema = metadata_schema

# Create the connection to the server
if connection_args is None:
Expand Down Expand Up @@ -472,24 +474,47 @@ def _create_collection(
)
raise ValueError(f"Metadata key {key} is reserved.")
# Infer the corresponding datatype of the metadata
dtype = infer_dtype_bydata(value)
# Datatype isn't compatible
if dtype == DataType.UNKNOWN or dtype == DataType.NONE:
logger.error(
(
"Failure to create collection, "
"unrecognized dtype for key: %s"
),
key,
)
raise ValueError(f"Unrecognized datatype for {key}.")
# Datatype is a string/varchar equivalent
elif dtype == DataType.VARCHAR:
if (
key in self.metadata_schema # type: ignore
and "dtype" in self.metadata_schema[key] # type: ignore
):
kwargs = self.metadata_schema[key].get("kwargs", {}) # type: ignore
fields.append(
FieldSchema(key, DataType.VARCHAR, max_length=65_535)
FieldSchema(
name=key,
dtype=self.metadata_schema[key]["dtype"], # type: ignore
**kwargs,
)
)
else:
fields.append(FieldSchema(key, dtype))
dtype = infer_dtype_bydata(value)
# Datatype isn't compatible
if dtype == DataType.UNKNOWN or dtype == DataType.NONE:
logger.error(
(
"Failure to create collection, "
"unrecognized dtype for key: %s"
),
key,
)
raise ValueError(f"Unrecognized datatype for {key}.")
# Datatype is a string/varchar equivalent
elif dtype == DataType.VARCHAR:
fields.append(
FieldSchema(key, DataType.VARCHAR, max_length=65_535)
)
# infer_dtype_bydata currently can't recognize array type,
# so this line can not be accessed.
# This line may need to be modified in the future when
# infer_dtype_bydata can recognize array type.
# https://github.com/milvus-io/pymilvus/issues/2165
elif dtype == DataType.ARRAY:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to still rely on the solution of the pymilvus problem. It seems that pymilvus needs to support return DataType.ARRAY and other informations, before this line of code can take effect milvus-io/pymilvus#2144

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition, could you please add the corresponding unit test as the guarantee to the quality of other possible future PRs

kwargs = self.metadata_schema[key]["kwargs"] # type: ignore
fields.append(
FieldSchema(name=key, dtype=DataType.ARRAY, **kwargs)
)
else:
fields.append(FieldSchema(key, dtype))

# Create the text field
fields.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _milvus_from_texts(
# connection_args={"uri": "http://127.0.0.1:19530"},
connection_args={"uri": "./milvus_demo.db"},
drop_old=drop,
consistency_level="Strong",
**kwargs,
)

Expand Down Expand Up @@ -303,6 +304,51 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
}


def test_milvus_array_field() -> None:
"""Manually specify metadata schema, including an array_field.
For more information about array data type and filtering, please refer to
https://milvus.io/docs/array_data_type.md
"""
from pymilvus import DataType

texts = ["foo", "bar", "baz"]
metadatas = [{"id": i, "array_field": [i, i + 1, i + 2]} for i in range(len(texts))]

# Manually specify metadata schema, including an array_field.
# If some fields are not specified, Milvus will automatically infer their schemas.
docsearch = _milvus_from_texts(
metadatas=metadatas,
metadata_schema={
"array_field": {
"dtype": DataType.ARRAY,
"kwargs": {"element_type": DataType.INT64, "max_capacity": 50},
},
# "id": {
# "dtype": DataType.INT64,
# }
},
)
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
assert len(output) == 2
output = docsearch.similarity_search(
"foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)"
)
assert len(output) == 2

# If we use enable_dynamic_field,
# there is no need to manually specify metadata schema.
docsearch = _milvus_from_texts(
enable_dynamic_field=True,
metadatas=metadatas,
)
output = docsearch.similarity_search("foo", k=10, expr="array_field[0] < 2")
assert len(output) == 2
output = docsearch.similarity_search(
"foo", k=10, expr="ARRAY_CONTAINS(array_field, 3)"
)
assert len(output) == 2


# if __name__ == "__main__":
# test_milvus()
# test_milvus_vector_search()
Expand All @@ -319,3 +365,4 @@ def test_milvus_enable_dynamic_field_with_partition_key() -> None:
# test_milvus_enable_dynamic_field()
# test_milvus_disable_dynamic_field()
# test_milvus_metadata_field()
# test_milvus_array_field()