Skip to content

Commit 848ccdc

Browse files
authored
[ENH] Validate schema in server (#5612)
## Description of changes _Summarize the changes made by this PR._ - Improvements & Bug fixes - N/A - New functionality - Schema validation in server ## Test plan _How are these changes tested?_ - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Migration plan _Are there any migrations, or any forwards/backwards compatibility changes needed in order to make sure this change deploys reliably?_ ## Observability plan _What is the plan to instrument and monitor this change?_ ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the_ [_docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_
1 parent 951c262 commit 848ccdc

File tree

2 files changed

+80
-2
lines changed

2 files changed

+80
-2
lines changed

rust/types/src/api_types.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::plan::PlanToProtoError;
1111
use crate::plan::SearchPayload;
1212
use crate::validators::{
1313
validate_metadata_vec, validate_name, validate_non_empty_collection_update_metadata,
14-
validate_optional_metadata, validate_update_metadata_vec,
14+
validate_optional_metadata, validate_schema, validate_update_metadata_vec,
1515
};
1616
use crate::Collection;
1717
use crate::CollectionConfigurationToInternalConfigurationError;
@@ -668,6 +668,7 @@ pub struct CreateCollectionRequest {
668668
#[validate(custom(function = "validate_optional_metadata"))]
669669
pub metadata: Option<Metadata>,
670670
pub configuration: Option<InternalCollectionConfiguration>,
671+
#[validate(custom(function = "validate_schema"))]
671672
pub schema: Option<InternalSchema>,
672673
pub get_or_create: bool,
673674
}

rust/types/src/validators.rs

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::{
22
operator::{Rank, RankExpr},
3-
CollectionMetadataUpdate, Metadata, MetadataValue, UpdateMetadata, UpdateMetadataValue,
3+
CollectionMetadataUpdate, InternalSchema, Metadata, MetadataValue, UpdateMetadata,
4+
UpdateMetadataValue,
45
};
56
use regex::Regex;
67
use std::collections::HashMap;
@@ -178,6 +179,82 @@ fn validate_rank_expr(expr: &RankExpr) -> Result<(), ValidationError> {
178179
Ok(())
179180
}
180181

182+
/// Validate schema
183+
pub fn validate_schema(schema: &InternalSchema) -> Result<(), ValidationError> {
184+
let mut sparse_index_keys = Vec::new();
185+
if schema
186+
.defaults
187+
.float_list
188+
.as_ref()
189+
.is_some_and(|vt| vt.vector_index.as_ref().is_some_and(|it| it.enabled))
190+
{
191+
return Err(ValidationError::new("schema").with_message("Vector index cannot be enabled by default. It can only be enabled on #embedding field.".into()));
192+
}
193+
if schema
194+
.defaults
195+
.sparse_vector
196+
.as_ref()
197+
.is_some_and(|vt| vt.sparse_vector_index.as_ref().is_some_and(|it| it.enabled))
198+
{
199+
return Err(ValidationError::new("schema").with_message("Sparse vector index cannot be enabled by default. Please enable sparse vector index on specific keys. At most one sparse vector index is allowed for the collection.".into()));
200+
}
201+
if schema
202+
.defaults
203+
.string
204+
.as_ref()
205+
.is_some_and(|vt| vt.fts_index.as_ref().is_some_and(|it| it.enabled))
206+
{
207+
return Err(ValidationError::new("schema").with_message("Full text search / regular expression index cannot be enabled by default. It can only be enabled on #document field.".into()));
208+
}
209+
for (key, config) in &schema.key_overrides {
210+
if let Some(vit) = config
211+
.float_list
212+
.as_ref()
213+
.and_then(|vt| vt.vector_index.as_ref())
214+
{
215+
// TODO(Sicheng): Schema currently use `$embedding`. This should be updated once schema updates naming
216+
if vit.enabled && key != "$embedding" {
217+
return Err(ValidationError::new("schema").with_message(
218+
format!("Vector index can only be enabled on $embedding field: {key}").into(),
219+
));
220+
}
221+
// TODO(Sicheng): Schema currently use `$document`. This should be updated once schema updates naming
222+
if vit
223+
.config
224+
.source_key
225+
.as_ref()
226+
.is_some_and(|key| key != "$document")
227+
{
228+
return Err(ValidationError::new("schema")
229+
.with_message("Vector index can only source from $document".into()));
230+
}
231+
}
232+
if config
233+
.sparse_vector
234+
.as_ref()
235+
.is_some_and(|vt| vt.sparse_vector_index.as_ref().is_some_and(|it| it.enabled))
236+
{
237+
sparse_index_keys.push(key);
238+
if sparse_index_keys.len() > 1 {
239+
return Err(ValidationError::new("schema").with_message(
240+
format!("At most one sparse vector index is allowed for the collection: {sparse_index_keys:?}")
241+
.into(),
242+
));
243+
}
244+
}
245+
// TODO(Sicheng): Schema currently use `$document`. This should be updated once schema updates naming
246+
if config
247+
.string
248+
.as_ref()
249+
.is_some_and(|vt| vt.fts_index.as_ref().is_some_and(|it| it.enabled))
250+
&& key != "$document"
251+
{
252+
return Err(ValidationError::new("schema").with_message(format!("Full text search / regular expression index can only be enabled on $document field: {key}").into()));
253+
}
254+
}
255+
Ok(())
256+
}
257+
181258
#[cfg(test)]
182259
mod tests {
183260
use super::*;

0 commit comments

Comments
 (0)