Skip to content

Commit 0cd4a11

Browse files
feat(oss-opensearch): Add KNN derived source configuration option
1 parent 64fecb1 commit 0cd4a11

File tree

4 files changed

+26
-0
lines changed

4 files changed

+26
-0
lines changed

vectordb_bench/backend/clients/oss_opensearch/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
7777
oversample_factor: float = 1.0
7878
quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32
7979
replication_type: str | None = "DOCUMENT"
80+
knn_derived_source_enabled: str | None = None
8081

8182
@root_validator
8283
def validate_engine_name(cls, values: dict):
@@ -103,6 +104,7 @@ def __eq__(self, obj: any):
103104
and self.use_routing == obj.use_routing
104105
and self.quantization_type == obj.quantization_type
105106
and self.replication_type == obj.replication_type
107+
and self.knn_derived_source_enabled == obj.knn_derived_source_enabled
106108
)
107109

108110
def __hash__(self) -> int:
@@ -117,6 +119,7 @@ def __hash__(self) -> int:
117119
self.use_routing,
118120
self.quantization_type,
119121
self.replication_type,
122+
self.knn_derived_source_enabled,
120123
)
121124
)
122125

vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def _create_index(self, client: OpenSearch) -> None:
229229
log.info(f"Creating index with ef_search: {ef_search_value}")
230230
log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}")
231231
log.info(f"Creating index with replication_type: {self.case_config.replication_type}")
232+
log.info(f"Creating index with knn_derived_source_enabled: {self.case_config.knn_derived_source_enabled}")
232233
log.info(f"Creating index with engine: {self.case_config.engine}")
233234
log.info(f"Creating index with metric type: {self.case_config.metric_type_name}")
234235
log.info(f"All case_config parameters: {self.case_config.__dict__}")
@@ -252,6 +253,12 @@ def _create_index(self, client: OpenSearch) -> None:
252253
},
253254
"refresh_interval": self.case_config.refresh_interval,
254255
}
256+
# Only add knn.derived_source.enabled if explicitly set (None = skip for versions < 3.x compatibility)
257+
if self.case_config.knn_derived_source_enabled and self.case_config.knn_derived_source_enabled != "None":
258+
is_knn_derived_source_enabled = self.case_config.knn_derived_source_enabled == "True"
259+
log.info(f"Adding knn.derived_source.enabled={is_knn_derived_source_enabled} to index settings")
260+
settings["index"]["knn.derived_source.enabled"] = is_knn_derived_source_enabled
261+
255262
settings["index"]["knn.algo_param.ef_search"] = ef_search_value
256263
# Build properties mapping, excluding _id which is automatically handled by OpenSearch
257264
properties = {}

vectordb_bench/frontend/config/dbCaseConfigs.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,17 @@ class CaseConfigInput(BaseModel):
17161716
},
17171717
)
17181718

1719+
CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch = CaseConfigInput(
1720+
label=CaseConfigParamType.knn_derived_source_enabled,
1721+
displayLabel="KNN Derived Source Enabled",
1722+
inputHelp="OpenSearch 3.x+ set this to 'True' by default. Use 'False' to disable it. Use 'None' for <3.x versions.",
1723+
inputType=InputType.Option,
1724+
inputConfig={
1725+
"options": ["None", "True", "False"],
1726+
"default": "None",
1727+
},
1728+
)
1729+
17191730
MilvusLoadConfig = [
17201731
CaseConfigParamInput_IndexType,
17211732
CaseConfigParamInput_M,
@@ -1797,12 +1808,14 @@ class CaseConfigInput(BaseModel):
17971808
CaseConfigParamInput_EFConstruction_AWSOpensearch,
17981809
CaseConfigParamInput_M_AWSOpensearch,
17991810
CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,
1811+
CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch,
18001812
]
18011813
AWSOpenSearchPerformanceConfig = [
18021814
CaseConfigParamInput_EFConstruction_AWSOpensearch,
18031815
CaseConfigParamInput_M_AWSOpensearch,
18041816
CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
18051817
CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,
1818+
CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch,
18061819
]
18071820

18081821
AliyunOpensearchLoadingConfig = []
@@ -2106,6 +2119,7 @@ class CaseConfigInput(BaseModel):
21062119
CaseConfigParamInput_EFConstruction_AWSOpensearch,
21072120
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
21082121
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2122+
CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch,
21092123
CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
21102124
CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
21112125
CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,
@@ -2121,6 +2135,7 @@ class CaseConfigInput(BaseModel):
21212135
CaseConfigParamInput_EFConstruction_AWSOpensearch,
21222136
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
21232137
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2138+
CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch,
21242139
CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
21252140
CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
21262141
CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch,

vectordb_bench/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class CaseConfigParamType(Enum):
126126
oversample_ratio = "oversample_ratio"
127127
use_routing = "use_routing"
128128
replication_type = "replication_type"
129+
knn_derived_source_enabled = "knn_derived_source_enabled"
129130

130131
dataset_with_size_type = "dataset_with_size_type"
131132
filter_rate = "filter_rate"

0 commit comments

Comments
 (0)