|
1 | 1 | use crate::{ |
2 | 2 | operator::{Rank, RankExpr}, |
3 | | - CollectionMetadataUpdate, Metadata, MetadataValue, UpdateMetadata, UpdateMetadataValue, |
| 3 | + CollectionMetadataUpdate, InternalSchema, Metadata, MetadataValue, UpdateMetadata, |
| 4 | + UpdateMetadataValue, |
4 | 5 | }; |
5 | 6 | use regex::Regex; |
6 | 7 | use std::collections::HashMap; |
@@ -178,6 +179,82 @@ fn validate_rank_expr(expr: &RankExpr) -> Result<(), ValidationError> { |
178 | 179 | Ok(()) |
179 | 180 | } |
180 | 181 |
|
| 182 | +/// Validate schema |
| 183 | +pub fn validate_schema(schema: &InternalSchema) -> Result<(), ValidationError> { |
| 184 | + let mut sparse_index_keys = Vec::new(); |
| 185 | + if schema |
| 186 | + .defaults |
| 187 | + .float_list |
| 188 | + .as_ref() |
| 189 | + .is_some_and(|vt| vt.vector_index.as_ref().is_some_and(|it| it.enabled)) |
| 190 | + { |
| 191 | + return Err(ValidationError::new("schema").with_message("Vector index cannot be enabled by default. It can only be enabled on #embedding field.".into())); |
| 192 | + } |
| 193 | + if schema |
| 194 | + .defaults |
| 195 | + .sparse_vector |
| 196 | + .as_ref() |
| 197 | + .is_some_and(|vt| vt.sparse_vector_index.as_ref().is_some_and(|it| it.enabled)) |
| 198 | + { |
| 199 | + return Err(ValidationError::new("schema").with_message("Sparse vector index cannot be enabled by default. Please enable sparse vector index on specific keys. At most one sparse vector index is allowed for the collection.".into())); |
| 200 | + } |
| 201 | + if schema |
| 202 | + .defaults |
| 203 | + .string |
| 204 | + .as_ref() |
| 205 | + .is_some_and(|vt| vt.fts_index.as_ref().is_some_and(|it| it.enabled)) |
| 206 | + { |
| 207 | + return Err(ValidationError::new("schema").with_message("Full text search / regular expression index cannot be enabled by default. It can only be enabled on #document field.".into())); |
| 208 | + } |
| 209 | + for (key, config) in &schema.key_overrides { |
| 210 | + if let Some(vit) = config |
| 211 | + .float_list |
| 212 | + .as_ref() |
| 213 | + .and_then(|vt| vt.vector_index.as_ref()) |
| 214 | + { |
| 215 | + // TODO(Sicheng): Schema currently use `$embedding`. This should be updated once schema updates naming |
| 216 | + if vit.enabled && key != "$embedding" { |
| 217 | + return Err(ValidationError::new("schema").with_message( |
| 218 | + format!("Vector index can only be enabled on $embedding field: {key}").into(), |
| 219 | + )); |
| 220 | + } |
| 221 | + // TODO(Sicheng): Schema currently use `$document`. This should be updated once schema updates naming |
| 222 | + if vit |
| 223 | + .config |
| 224 | + .source_key |
| 225 | + .as_ref() |
| 226 | + .is_some_and(|key| key != "$document") |
| 227 | + { |
| 228 | + return Err(ValidationError::new("schema") |
| 229 | + .with_message("Vector index can only source from $document".into())); |
| 230 | + } |
| 231 | + } |
| 232 | + if config |
| 233 | + .sparse_vector |
| 234 | + .as_ref() |
| 235 | + .is_some_and(|vt| vt.sparse_vector_index.as_ref().is_some_and(|it| it.enabled)) |
| 236 | + { |
| 237 | + sparse_index_keys.push(key); |
| 238 | + if sparse_index_keys.len() > 1 { |
| 239 | + return Err(ValidationError::new("schema").with_message( |
| 240 | + format!("At most one sparse vector index is allowed for the collection: {sparse_index_keys:?}") |
| 241 | + .into(), |
| 242 | + )); |
| 243 | + } |
| 244 | + } |
| 245 | + // TODO(Sicheng): Schema currently use `$document`. This should be updated once schema updates naming |
| 246 | + if config |
| 247 | + .string |
| 248 | + .as_ref() |
| 249 | + .is_some_and(|vt| vt.fts_index.as_ref().is_some_and(|it| it.enabled)) |
| 250 | + && key != "$document" |
| 251 | + { |
| 252 | + return Err(ValidationError::new("schema").with_message(format!("Full text search / regular expression index can only be enabled on $document field: {key}").into())); |
| 253 | + } |
| 254 | + } |
| 255 | + Ok(()) |
| 256 | +} |
| 257 | + |
181 | 258 | #[cfg(test)] |
182 | 259 | mod tests { |
183 | 260 | use super::*; |
|
0 commit comments