diff --git a/src/main/java/com/techfork/domain/search/controller/SearchController.java b/src/main/java/com/techfork/domain/search/controller/SearchController.java index f721d1f..1af098b 100644 --- a/src/main/java/com/techfork/domain/search/controller/SearchController.java +++ b/src/main/java/com/techfork/domain/search/controller/SearchController.java @@ -23,6 +23,25 @@ public class SearchController { private final SearchService searchService; + @Operation(summary = "테스트용 : 1단계 검색 - BM25") + @GetMapping("/bm25") + public BaseResponse> searchBm25( + @RequestParam @Parameter(description = "검색어", required = true) String query + ) { + List results = searchService.searchOnlyBm25(query); + return BaseResponse.of(SuccessCode.OK, results).getBody(); + } + + @Operation(summary = "테스트용 : 1단계 검색 - semantic") + @GetMapping("/semantic") + public BaseResponse> searchSemantic( + @RequestParam @Parameter(description = "검색어", required = true) String query + ) { + List results = searchService.searchOnlySemantic(query); + return BaseResponse.of(SuccessCode.OK, results).getBody(); + } + + @Operation(summary = "1단계 검색(BM25 + 시맨틱)", description = "검색어를 기반으로 BM25 + k-NN 하이브리드 검색을 수행하고 합산하여 상위 결과를 반환합니다. (개인화 미적용)") @GetMapping("/general") public BaseResponse> searchGeneral( diff --git a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java index 6bb9340..d622032 100644 --- a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java +++ b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java @@ -21,14 +21,16 @@ public class GeneralSearchProperties { private Integer searchSize = 20; // BM25 가중치 + private Float exactBoost = 2.0f; private Float titleBoost = 3.0f; - private Float summaryBoost = 1.0f; - private Float chunkBoost = 0.5f; + private Float summaryBoost = 1.5f; + private Float fuzzyBoost = 1.0f; + private Float chunkBoost = 1.0f; // --- [Vector & KNN 설정] --- - private Integer knnK = 60; - private Integer knnNumCandidates = 100; + private Integer knnK = 40; + private Integer knnNumCandidates = 50; private Float vectorTitleBoost = 3.0f; private Float vectorSummaryBoost = 1.5f; private Float vectorContentChunkBoost = 0.8f; @@ -37,8 +39,8 @@ public class GeneralSearchProperties { private double hybridScoreWeight = 50.0; private double personalScoreWeight = 1.0; - private int RRF_K = 60; - private int RRF_WINDOW_SIZE = 60; + private int RRF_K = 40; + private int RRF_WINDOW_SIZE = 40; // --- [rerank 가중치 설정] --- private double rerankDocumentTitleWeight = 0.6; diff --git a/src/main/java/com/techfork/domain/search/service/SearchConstants.java b/src/main/java/com/techfork/domain/search/service/SearchConstants.java index 08c059c..80d45db 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchConstants.java +++ b/src/main/java/com/techfork/domain/search/service/SearchConstants.java @@ -6,5 +6,5 @@ public class SearchConstants { static final String SUMMARY_FIELD_FORMAT = "summary^%.1f"; static final String CONTENT_CHUNKS_PATH = "contentChunks"; static final String CHUNK_TEXT_FIELD = "contentChunks.chunkText"; - static final String MINIMUM_SHOULD_MATCH = "0"; + static final String MINIMUM_SHOULD_MATCH = "1"; } \ No newline at end of file diff --git a/src/main/java/com/techfork/domain/search/service/SearchService.java b/src/main/java/com/techfork/domain/search/service/SearchService.java index 98bd273..37d6b6f 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchService.java +++ b/src/main/java/com/techfork/domain/search/service/SearchService.java @@ -5,21 +5,11 @@ public interface SearchService { - /** - * 1단계 일반 검색 (Retrieval) - * - 목적: 검색 품질 평가(Recall) 및 비로그인 사용자 검색 - * - 동작: RRF(BM25 + k-NN) 하이브리드 검색만 수행하여 상위 K개 결과를 반환합니다. - * - 개인화(Re-ranking) 로직이 적용되지 않은 순수 연관도 순입니다. - */ + List searchOnlyBm25(String query); + + List searchOnlySemantic(String query); + List searchGeneral(String query); - /** - * 2단계 개인화 검색 (Re-ranking) - * - 목적: 실제 서비스 메인 검색 (로그인 사용자용) - * - 동작: - * 1. 1단계 검색으로 후보군(Top 100) 확보 - * 2. 사용자의 프로필 벡터와 문서 간 유사도 계산 (Cosine Similarity) - * 3. 1단계 점수와 2단계 점수를 가중합하여 재정렬 - */ List searchPersonalized(String query, Long userId); } \ No newline at end of file diff --git a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java index fb977af..de156b8 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java +++ b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java @@ -9,9 +9,7 @@ import com.techfork.domain.post.document.PostDocument; import com.techfork.domain.search.dto.SearchResult; import com.techfork.domain.user.document.UserProfileDocument; -import com.techfork.domain.user.entity.User; import com.techfork.domain.user.repository.UserProfileDocumentRepository; -import com.techfork.domain.user.repository.UserRepository; import com.techfork.global.llm.EmbeddingClient; import com.techfork.global.util.VectorUtil; import java.io.IOException; @@ -33,7 +31,7 @@ @Slf4j @Service -@Transactional +@Transactional(readOnly = true) @RequiredArgsConstructor public class SearchServiceImpl implements SearchService { @@ -41,45 +39,82 @@ public class SearchServiceImpl implements SearchService { private final EmbeddingClient embeddingClient; private final GeneralSearchProperties generalSearchProperties; private final UserProfileDocumentRepository userProfileDocumentRepository; - private final UserRepository userRepository; private final Executor searchAsyncExecutor; + @Override + public List searchOnlyBm25(String query) { + List searchResults = searchOnlyBM25(query); + return stripVectors(searchResults); + } + + @Override + public List searchOnlySemantic(String query) { + List searchResults = searchOnlySemantic(queryEmbedding(query)); + return stripVectors(searchResults); + } + @Override public List searchGeneral(String query) { - log.info("general search started: with query: '{}'", query); + log.debug("general search started: with query: '{}'", query); + long startTime = System.currentTimeMillis(); + List queryVector = queryEmbedding(query); List searchResults = performHybridSearch(query, queryVector); - log.info("Found {} results from hybrid search.", searchResults.size()); - return searchResults.stream() - .map(result -> result.toBuilder().summaryVector(null).build()) - .collect(Collectors.toList()); + + long duration = System.currentTimeMillis() - startTime; + log.info("Search completed. Query='{}', Results={}, Time={}ms", query, searchResults.size(), duration); + + return stripVectors(searchResults); } @Override public List searchPersonalized(String query, Long userId) { - log.info("Personalized search started for userId: {} with query: '{}'", userId, query); + log.debug("Personalized search started for userId: {} with query: '{}'", userId, query); + long startTime = System.currentTimeMillis(); + List queryVector = queryEmbedding(query); - List initialResults = performHybridSearch(query, queryVector); - log.info("Found {} initial results from hybrid search.", initialResults.size()); - User user = userRepository.getReferenceById(userId); + List initialResults = performHybridSearch(query, queryVector); + log.debug("Initial hybrid search found {} documents.", initialResults.size()); - log.info("Attempting to find user profile for userId: {}", user.getId()); - Optional userProfileOpt = userProfileDocumentRepository.findByUserId(user.getId()); - log.info("Successfully fetched user profile optional. isPresent: {}", userProfileOpt.isPresent()); + Optional userProfileOpt = userProfileDocumentRepository.findByUserId(userId); + boolean hasProfile = userProfileOpt.isPresent() && userProfileOpt.get().getProfileVector() != null; - if (userProfileOpt.map(UserProfileDocument::getProfileVector).isEmpty()) { - log.warn("User profile or vector not found for userId: {}. Returning non-personalized results.", user.getId()); - return initialResults.stream() - .map(result -> result.toBuilder().summaryVector(null).build()) - .collect(Collectors.toList()); + if (!hasProfile) { + long duration = System.currentTimeMillis() - startTime; + log.info("Personalized Search [FALLBACK]. UserID={}, Query='{}', Results={}, Time={}ms (Reason: No Profile)", + userId, query, initialResults.size(), duration); + return stripVectors(initialResults); } - log.info("User profile and vector found. Proceeding to personal reranking."); float[] userProfileVector = userProfileOpt.get().getProfileVector(); List rerankedResults = personalReranking(initialResults, userProfileVector); - log.info("Personal reranking complete. Returning {} results.", rerankedResults.size()); - return rerankedResults; + + long duration = System.currentTimeMillis() - startTime; + log.info("Personalized Search [RERANKED]. UserID={}, Query='{}', Results={}, Time={}ms", + userId, query, rerankedResults.size(), duration); + + return stripVectors(rerankedResults); + } + + private List searchOnlyBM25(String query) { + log.info("DEBUG MODE: Performing Lexical Search Only (BM25)"); + + List> lexicalHits = performLexicalSearch(query); + + return lexicalHits.stream() + .map(this::mapToSearchResult) + .collect(Collectors.toList()); + } + + private List searchOnlySemantic(List queryVector) { + log.info("DEBUG MODE: Performing Semantic Search Only"); + + List> semanticHits = performSemanticSearch(queryVector); + + return semanticHits.stream() + .map(this::mapToSearchResult) + .toList(); } private List queryEmbedding(String query) { @@ -87,21 +122,119 @@ private List queryEmbedding(String query) { } private List performHybridSearch(String query, List queryVector) { - CompletableFuture>> lexicalFuture = - CompletableFuture.supplyAsync(() -> performLexicalSearch(query), searchAsyncExecutor); - CompletableFuture>> semanticFuture = - CompletableFuture.supplyAsync(() -> performSemanticSearch(queryVector), searchAsyncExecutor); + CompletableFuture>> lexicalFuture = CompletableFuture.supplyAsync(() -> { + long t = System.currentTimeMillis(); + var result = performLexicalSearch(query); + log.debug("Lexical search done. Hits={}, Time={}ms", result.size(), System.currentTimeMillis() - t); + return result; + }, searchAsyncExecutor); + + CompletableFuture>> semanticFuture = CompletableFuture.supplyAsync(() -> { + long t = System.currentTimeMillis(); + var result = performSemanticSearch(queryVector); + log.debug("Semantic search done. Hits={}, Time={}ms", result.size(), System.currentTimeMillis() - t); + return result; + }, searchAsyncExecutor); CompletableFuture> hybridResultFuture = lexicalFuture - .thenCombine(semanticFuture, this::calculateRRF) + .thenCombine(semanticFuture, (lexicalHits, semanticHits) -> { + log.debug("Merging results: Lexical Hits={}, Semantic Hits={}", lexicalHits.size(), semanticHits.size()); + return calculateRRF(lexicalHits, semanticHits); + }) .exceptionally(ex -> { - log.error("Hybrid search failed", ex); + log.error("Hybrid search failed for query: '{}'", query, ex); throw new RuntimeException("통합 검색 중 오류 발생", ex); }); return hybridResultFuture.join(); } + private List> performLexicalSearch(String query) { + String titleField = String.format(SearchConstants.TITLE_FIELD_FORMAT, generalSearchProperties.getTitleBoost()); + String summaryField = String.format(SearchConstants.SUMMARY_FIELD_FORMAT, generalSearchProperties.getSummaryBoost()); + + Query lexicalQuery = Query.of(q -> q + .bool(b -> b + .should(sh -> sh + .multiMatch(m -> m + .query(query) + .type(TextQueryType.MostFields) + .fields(titleField, summaryField) + .boost(generalSearchProperties.getExactBoost()) + ) + ) + .should(sh -> sh + .multiMatch(m -> m + .query(query) + .fields(titleField, summaryField) + .type(TextQueryType.MostFields) + .fuzziness("AUTO") + .prefixLength(1) + .boost(generalSearchProperties.getFuzzyBoost()) + ) + ) + .should(sh -> sh + .nested(n -> n + .path(SearchConstants.CONTENT_CHUNKS_PATH) + .query(nq -> nq + .match(m -> m + .field(SearchConstants.CHUNK_TEXT_FIELD) + .query(query) + ) + ) + .boost(generalSearchProperties.getChunkBoost()) + ) + ) + .minimumShouldMatch(SearchConstants.MINIMUM_SHOULD_MATCH) + ) + ); + + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(SearchConstants.POSTS_INDEX) + .size(generalSearchProperties.getRRF_WINDOW_SIZE()) + .query(lexicalQuery), + PostDocument.class + ); + return response.hits().hits(); + } catch (IOException e) { + throw new RuntimeException("Lexical search failed", e); + } + } + + private List> performSemanticSearch(List queryVector) { + int k = generalSearchProperties.getKnnK(); + int numCandidates = generalSearchProperties.getKnnNumCandidates(); + + List knnSearches = new ArrayList<>(); + knnSearches.add(createKnnSearch("titleEmbedding", queryVector, k, numCandidates, generalSearchProperties.getVectorTitleBoost())); + knnSearches.add(createKnnSearch("summaryEmbedding", queryVector, k, numCandidates, generalSearchProperties.getVectorSummaryBoost())); + knnSearches.add(createKnnSearch("contentChunks.embedding", queryVector, k, numCandidates, generalSearchProperties.getVectorContentChunkBoost())); + + try { + SearchResponse response = elasticsearchClient.search(s -> s + .index(SearchConstants.POSTS_INDEX) + .size(generalSearchProperties.getRRF_WINDOW_SIZE()) + .knn(knnSearches), + PostDocument.class + ); + + return response.hits().hits(); + } catch (IOException e) { + throw new RuntimeException("Semantic search failed", e); + } + } + + private KnnSearch createKnnSearch(String field, List vector, int k, int numCandidates, float boost) { + return KnnSearch.of(ks -> ks + .field(field) + .queryVector(vector) + .k(k) + .numCandidates(numCandidates) + .boost(boost) + ); + } + private List calculateRRF(List> lexicalHits, List> semanticHits) { Map lexicalRankMap = new HashMap<>(); AtomicInteger rank = new AtomicInteger(1); @@ -165,92 +298,6 @@ private void processHitsForRRF(List> hits, }); } - private List> performLexicalSearch(String query) { - String titleField = String.format(SearchConstants.TITLE_FIELD_FORMAT, generalSearchProperties.getTitleBoost()); - String summaryField = String.format(SearchConstants.SUMMARY_FIELD_FORMAT, generalSearchProperties.getSummaryBoost()); - - Query lexicalQuery = Query.of(q -> q - .bool(b -> b - .should(sh -> sh - .multiMatch(m -> m - .query(query) - .type(TextQueryType.MostFields) - .fields(titleField, summaryField) - ) - ) - .should(sh -> sh - .nested(n -> n - .path(SearchConstants.CONTENT_CHUNKS_PATH) - .query(nq -> nq - .match(m -> m - .field(SearchConstants.CHUNK_TEXT_FIELD) - .query(query) - ) - ) - .boost(generalSearchProperties.getChunkBoost()) - ) - ) - .minimumShouldMatch(SearchConstants.MINIMUM_SHOULD_MATCH) - ) - ); - - try { - SearchResponse response = elasticsearchClient.search(s -> s - .index(SearchConstants.POSTS_INDEX) - .size(generalSearchProperties.getRRF_WINDOW_SIZE()) - .query(lexicalQuery), - PostDocument.class - ); - return response.hits().hits(); - } catch (IOException e) { - throw new RuntimeException("Lexical search failed", e); - } - } - - private List> performSemanticSearch(List queryVector) { - int k = generalSearchProperties.getKnnK(); - int numCandidates = generalSearchProperties.getKnnNumCandidates(); - - List knnSearches = new ArrayList<>(); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("titleEmbedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorTitleBoost()) - )); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("summaryEmbedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorSummaryBoost()) - )); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("contentChunks.embedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorContentChunkBoost()) - )); - - try { - SearchResponse response = elasticsearchClient.search(s -> s - .index(SearchConstants.POSTS_INDEX) - .size(generalSearchProperties.getRRF_WINDOW_SIZE()) - .knn(knnSearches), - PostDocument.class - ); - - return response.hits().hits(); - } catch (IOException e) { - throw new RuntimeException("Semantic search failed", e); - } - } - private SearchResult mapToSearchResult(Hit hit) { PostDocument doc = hit.source(); double score = Objects.requireNonNullElse(hit.score(), 0.0); @@ -299,4 +346,13 @@ private List personalReranking(List initialResults, .sorted(Comparator.comparing(SearchResult::getFinalScore).reversed()) .collect(Collectors.toList()); } + + private List stripVectors(List results) { + return results.stream() + .map(result -> result.toBuilder() + .titleVector(null) + .summaryVector(null) + .build()) + .collect(Collectors.toList()); + } } \ No newline at end of file diff --git a/src/main/resources/ground-truth.json b/src/main/resources/ground-truth.json index 650e139..97b580d 100644 --- a/src/main/resources/ground-truth.json +++ b/src/main/resources/ground-truth.json @@ -1,66 +1,273 @@ [ { - "query": "스프링", + "query": "Java", "idealResultsMap": { - "305": 5, - "113": 3, - "50": 1 + "103": 5, + "147": 4, + "50": 3, + "294": 2, + "494": 1 } }, { - "query": "Java", + "query": "TypeScript", + "idealResultsMap": { + "327": 5, + "518": 4, + "997": 3, + "452": 2, + "25": 1 + } + }, + { + "query": "Kotlin", + "idealResultsMap": { + "147": 5, + "494": 4, + "50": 3, + "441": 2, + "366": 1 + } + }, + { + "query": "Go", + "idealResultsMap": { + "232": 5 + } + }, + { + "query": "Rust", + "idealResultsMap": { + "442": 5, + "98": 4, + "978": 3, + "75": 2, + "70": 1 + } + }, + { + "query": "C++", + "idealResultsMap": { + "431": 5, + "88": 4, + "89": 3, + "429": 2, + "438": 1 + } + }, + { + "query": "Swift", + "idealResultsMap": { + "437": 5, + "135": 4, + "510": 3, + "192": 2, + "495": 1 + } + }, + { + "query": "Scala", + "idealResultsMap": { + "442": 5, + "408": 4, + "435": 3, + "411": 2, + "438": 1 + } + }, + { + "query": "Spring", "idealResultsMap": { - "475": 5, - "103": 3, - "335": 1 + "83": 5, + "105": 4, + "509": 3, + "234": 2, + "529": 1 } }, { "query": "Spring Boot", "idealResultsMap": { - "50": 5, - "143": 3, - "156": 1 + "105": 5, + "83": 4, + "509": 3, + "28": 2, + "234": 1 } }, { - "query": "JPA", + "query": "Django", + "idealResultsMap": {} + }, + { + "query": "Flask", "idealResultsMap": { - "501": 5, - "50": 3, - "494": 1 + "507": 5 } }, { - "query": "Docker", + "query": "React", "idealResultsMap": { - "395": 5, - "66": 3, - "463": 1 + "79": 5, + "62": 4, + "85": 3, + "349": 2, + "41": 1 } }, { - "query": "MSA", + "query": "Vue.js", "idealResultsMap": { - "965": 5, - "205": 3, - "305": 1 + "529": 5, + "523": 4, + "520": 3, + "527": 2, + "168": 1 + } + }, + { + "query": "Angular", + "idealResultsMap": { + "349": 5, + "101": 4, + "112": 3, + "121": 2 + } + }, + { + "query": "Next.js", + "idealResultsMap": { + "168": 5, + "219": 4, + "170": 3, + "511": 2, + "375": 1 } }, { - "query": "배치", + "query": "Nest.js", "idealResultsMap": { - "367": 5, - "154": 3, - "81": 1 + "55": 5, + "520": 4, + "168": 3, + "466": 2, + "64": 1 } }, { - "query": "클라우드", + "query": "FastAPI", + "idealResultsMap": { + "995": 5, + "398": 4, + "989": 3 + } + }, + { + "query": "MySQL", + "idealResultsMap": { + "971": 5, + "348": 4, + "8": 3, + "153": 2, + "479": 1 + } + }, + { + "query": "PostgreSQL", + "idealResultsMap": { + "985": 5, + "433": 4, + "983": 3, + "367": 2, + "1004": 1 + } + }, + { + "query": "MongoDB", + "idealResultsMap": {} + }, + { + "query": "Redis", + "idealResultsMap": { + "474": 5, + "967": 4, + "211": 3, + "152": 2, + "479": 1 + } + }, + { + "query": "Elasticsearch", + "idealResultsMap": { + "137": 5, + "28": 4, + "413": 3, + "174": 2, + "491": 1 + } + }, + { + "query": "DynamoDB", + "idealResultsMap": { + "519": 5, + "376": 4, + "433": 3, + "979": 2, + "981": 1 + } + }, + { + "query": "Cassandra", + "idealResultsMap": { + "947": 5, + "179": 4, + "319": 3 + } + }, + { + "query": "Oracle", + "idealResultsMap": { + "179": 5, + "142": 4, + "183": 3, + "207": 2, + "200": 1 + } + }, + { + "query": "MariaDB", + "idealResultsMap": { + "1004": 5 + } + }, + { + "query": "RDS", + "idealResultsMap": { + "969": 5, + "46": 4, + "98": 3, + "985": 2, + "952": 1 + } + }, + { + "query": "AWS", + "idealResultsMap": { + "194": 5, + "222": 4, + "205": 3, + "67": 2, + "378": 1 + } + }, + { + "query": "Docker", "idealResultsMap": { - "376": 5, - "347": 3, - "124": 1 + "463": 5, + "395": 4, + "243": 3, + "54": 2, + "66": 1 } } ] \ No newline at end of file diff --git a/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java b/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java index cfcc2ad..b9ddcc8 100644 --- a/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java +++ b/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java @@ -34,9 +34,6 @@ class GroundTruthGeneratorTest { @Autowired private UserProfileDocumentRepository userProfileDocumentRepository; - @Autowired - private UserRepository userRepository; - @Autowired private Executor searchAsyncExecutor; @@ -46,7 +43,7 @@ void generateGroundTruthTemplate() { List keywords = List.of("스프링", "Java", "Spring Boot", "JPA", "Docker", "MSA", "배치", "클라우드"); SearchServiceImpl searchService = new SearchServiceImpl( - elasticsearchClient, embeddingClient, generalSearchProperties, userProfileDocumentRepository, userRepository, searchAsyncExecutor); + elasticsearchClient, embeddingClient, generalSearchProperties, userProfileDocumentRepository, searchAsyncExecutor); System.out.println("========== [Copy Below JSON] =========="); System.out.println("["); diff --git a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java index 34f8ee5..4daf91c 100644 --- a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java +++ b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java @@ -40,9 +40,6 @@ class SearchQualityEvaluationTest { @Autowired private UserProfileDocumentRepository userProfileDocumentRepository; - @Autowired - private UserRepository userRepository; - @Autowired @Qualifier("searchAsyncExecutor") private Executor searchAsyncExecutor; @@ -77,7 +74,6 @@ void evaluateSearchQualityAcrossScenarios() throws IOException { embeddingClient, props, userProfileDocumentRepository, - userRepository, searchAsyncExecutor ); @@ -133,9 +129,25 @@ private Map createScenarios() { scenarios.put("4. Title Focus (제목 키워드 & 벡터 최우선)", createProperties(10.0f, 0.5f, 0.1f, 10.0f, 1.0f, 0.5f, 30.0, 60, 200)); - // 5. Balanced High Recall (넓게 찾기) - scenarios.put("5. High Recall (탐색 범위 확장)", - createProperties(3.0f, 2.0f, 1.0f, 3.0f, 2.0f, 1.0f, 20.0, 150, 400)); + // 6. Content Deep Dive (본문 내용 집중) + // 목적: 제목이나 요약에는 없지만, 본문 구석에 있는 구체적인 해결책이나 에러 로그 등을 찾을 때 유리한지 확인 + scenarios.put("5. Content Deep Dive (본문 청크 집중)", + createProperties(1.0f, 1.0f, 5.0f, 1.0f, 1.0f, 4.0f, 30.0, 60, 200)); + + // 7. Summary Oriented (요약문 집중) + // 목적: 제목이 너무 함축적(예: "나의 회고")일 때, 요약문에 포함된 핵심 의도를 잘 파악하는지 확인 + scenarios.put("6. Summary Oriented (요약문 집중)", + createProperties(1.0f, 4.0f, 1.0f, 1.0f, 4.0f, 1.0f, 30.0, 60, 200)); + + // 8. High Precision / Low Latency (속도 최적화 & 상위 매칭) + // 목적: 검색 후보군(Candidates)을 줄였을 때, 정확도(nDCG) 손실은 적으면서 응답 속도(Latency)가 얼마나 개선되는지 확인 + scenarios.put("7. High Precision (속도 중시, 후보군 축소)", + createProperties(3.0f, 1.0f, 0.5f, 3.0f, 1.5f, 0.8f, 30.0, 20, 40)); + + // 10. Equal Balance (모든 필드 동등 가중치) + // 목적: 특정 필드에 가중치를 주지 않고 기계적으로 동등하게 설정했을 때의 베이스라인 품질 확인 + scenarios.put("8. Equal Balance (모든 가중치 동일)", + createProperties(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 30.0, 60, 200)); return scenarios; }