From 65ce793bf4c3ffa54d7744612910e4a4c8227f0d Mon Sep 17 00:00:00 2001 From: Joon9750 Date: Wed, 26 Nov 2025 02:38:58 +0900 Subject: [PATCH 1/5] =?UTF-8?q?improve:=20=ED=8C=90=EB=8B=A8=20=EB=AA=A9?= =?UTF-8?q?=EB=A1=9D=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/ground-truth.json | 269 ++++++++++++++++-- .../search/SearchQualityEvaluationTest.java | 22 +- 2 files changed, 257 insertions(+), 34 deletions(-) diff --git a/src/main/resources/ground-truth.json b/src/main/resources/ground-truth.json index 650e139..97b580d 100644 --- a/src/main/resources/ground-truth.json +++ b/src/main/resources/ground-truth.json @@ -1,66 +1,273 @@ [ { - "query": "스프링", + "query": "Java", "idealResultsMap": { - "305": 5, - "113": 3, - "50": 1 + "103": 5, + "147": 4, + "50": 3, + "294": 2, + "494": 1 } }, { - "query": "Java", + "query": "TypeScript", + "idealResultsMap": { + "327": 5, + "518": 4, + "997": 3, + "452": 2, + "25": 1 + } + }, + { + "query": "Kotlin", + "idealResultsMap": { + "147": 5, + "494": 4, + "50": 3, + "441": 2, + "366": 1 + } + }, + { + "query": "Go", + "idealResultsMap": { + "232": 5 + } + }, + { + "query": "Rust", + "idealResultsMap": { + "442": 5, + "98": 4, + "978": 3, + "75": 2, + "70": 1 + } + }, + { + "query": "C++", + "idealResultsMap": { + "431": 5, + "88": 4, + "89": 3, + "429": 2, + "438": 1 + } + }, + { + "query": "Swift", + "idealResultsMap": { + "437": 5, + "135": 4, + "510": 3, + "192": 2, + "495": 1 + } + }, + { + "query": "Scala", + "idealResultsMap": { + "442": 5, + "408": 4, + "435": 3, + "411": 2, + "438": 1 + } + }, + { + "query": "Spring", "idealResultsMap": { - "475": 5, - "103": 3, - "335": 1 + "83": 5, + "105": 4, + "509": 3, + "234": 2, + "529": 1 } }, { "query": "Spring Boot", "idealResultsMap": { - "50": 5, - "143": 3, - "156": 1 + "105": 5, + "83": 4, + "509": 3, + "28": 2, + "234": 1 } }, { - "query": "JPA", + "query": "Django", + "idealResultsMap": {} + }, + { + "query": "Flask", "idealResultsMap": { - "501": 5, - "50": 3, - "494": 1 + "507": 5 } }, { - "query": "Docker", + "query": "React", "idealResultsMap": { - "395": 5, - "66": 3, - "463": 1 + "79": 5, + "62": 4, + "85": 3, + "349": 2, + "41": 1 } }, { - "query": "MSA", + "query": "Vue.js", "idealResultsMap": { - "965": 5, - "205": 3, - "305": 1 + "529": 5, + "523": 4, + "520": 3, + "527": 2, + "168": 1 + } + }, + { + "query": "Angular", + "idealResultsMap": { + "349": 5, + "101": 4, + "112": 3, + "121": 2 + } + }, + { + "query": "Next.js", + "idealResultsMap": { + "168": 5, + "219": 4, + "170": 3, + "511": 2, + "375": 1 } }, { - "query": "배치", + "query": "Nest.js", "idealResultsMap": { - "367": 5, - "154": 3, - "81": 1 + "55": 5, + "520": 4, + "168": 3, + "466": 2, + "64": 1 } }, { - "query": "클라우드", + "query": "FastAPI", + "idealResultsMap": { + "995": 5, + "398": 4, + "989": 3 + } + }, + { + "query": "MySQL", + "idealResultsMap": { + "971": 5, + "348": 4, + "8": 3, + "153": 2, + "479": 1 + } + }, + { + "query": "PostgreSQL", + "idealResultsMap": { + "985": 5, + "433": 4, + "983": 3, + "367": 2, + "1004": 1 + } + }, + { + "query": "MongoDB", + "idealResultsMap": {} + }, + { + "query": "Redis", + "idealResultsMap": { + "474": 5, + "967": 4, + "211": 3, + "152": 2, + "479": 1 + } + }, + { + "query": "Elasticsearch", + "idealResultsMap": { + "137": 5, + "28": 4, + "413": 3, + "174": 2, + "491": 1 + } + }, + { + "query": "DynamoDB", + "idealResultsMap": { + "519": 5, + "376": 4, + "433": 3, + "979": 2, + "981": 1 + } + }, + { + "query": "Cassandra", + "idealResultsMap": { + "947": 5, + "179": 4, + "319": 3 + } + }, + { + "query": "Oracle", + "idealResultsMap": { + "179": 5, + "142": 4, + "183": 3, + "207": 2, + "200": 1 + } + }, + { + "query": "MariaDB", + "idealResultsMap": { + "1004": 5 + } + }, + { + "query": "RDS", + "idealResultsMap": { + "969": 5, + "46": 4, + "98": 3, + "985": 2, + "952": 1 + } + }, + { + "query": "AWS", + "idealResultsMap": { + "194": 5, + "222": 4, + "205": 3, + "67": 2, + "378": 1 + } + }, + { + "query": "Docker", "idealResultsMap": { - "376": 5, - "347": 3, - "124": 1 + "463": 5, + "395": 4, + "243": 3, + "54": 2, + "66": 1 } } ] \ No newline at end of file diff --git a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java index 34f8ee5..a3c0db4 100644 --- a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java +++ b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java @@ -133,9 +133,25 @@ private Map createScenarios() { scenarios.put("4. Title Focus (제목 키워드 & 벡터 최우선)", createProperties(10.0f, 0.5f, 0.1f, 10.0f, 1.0f, 0.5f, 30.0, 60, 200)); - // 5. Balanced High Recall (넓게 찾기) - scenarios.put("5. High Recall (탐색 범위 확장)", - createProperties(3.0f, 2.0f, 1.0f, 3.0f, 2.0f, 1.0f, 20.0, 150, 400)); + // 6. Content Deep Dive (본문 내용 집중) + // 목적: 제목이나 요약에는 없지만, 본문 구석에 있는 구체적인 해결책이나 에러 로그 등을 찾을 때 유리한지 확인 + scenarios.put("5. Content Deep Dive (본문 청크 집중)", + createProperties(1.0f, 1.0f, 5.0f, 1.0f, 1.0f, 4.0f, 30.0, 60, 200)); + + // 7. Summary Oriented (요약문 집중) + // 목적: 제목이 너무 함축적(예: "나의 회고")일 때, 요약문에 포함된 핵심 의도를 잘 파악하는지 확인 + scenarios.put("6. Summary Oriented (요약문 집중)", + createProperties(1.0f, 4.0f, 1.0f, 1.0f, 4.0f, 1.0f, 30.0, 60, 200)); + + // 8. High Precision / Low Latency (속도 최적화 & 상위 매칭) + // 목적: 검색 후보군(Candidates)을 줄였을 때, 정확도(nDCG) 손실은 적으면서 응답 속도(Latency)가 얼마나 개선되는지 확인 + scenarios.put("7. High Precision (속도 중시, 후보군 축소)", + createProperties(3.0f, 1.0f, 0.5f, 3.0f, 1.5f, 0.8f, 30.0, 20, 40)); + + // 10. Equal Balance (모든 필드 동등 가중치) + // 목적: 특정 필드에 가중치를 주지 않고 기계적으로 동등하게 설정했을 때의 베이스라인 품질 확인 + scenarios.put("8. Equal Balance (모든 가중치 동일)", + createProperties(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 30.0, 60, 200)); return scenarios; } From 56f83c6dc04a7a73441fd8b5237f326b772c9072 Mon Sep 17 00:00:00 2001 From: Joon9750 Date: Thu, 27 Nov 2025 02:03:49 +0900 Subject: [PATCH 2/5] =?UTF-8?q?add:=20=ED=85=8C=EC=8A=A4=ED=8A=B8=EC=9A=A9?= =?UTF-8?q?=20API=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../search/controller/SearchController.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/java/com/techfork/domain/search/controller/SearchController.java b/src/main/java/com/techfork/domain/search/controller/SearchController.java index f721d1f..1af098b 100644 --- a/src/main/java/com/techfork/domain/search/controller/SearchController.java +++ b/src/main/java/com/techfork/domain/search/controller/SearchController.java @@ -23,6 +23,25 @@ public class SearchController { private final SearchService searchService; + @Operation(summary = "테스트용 : 1단계 검색 - BM25") + @GetMapping("/bm25") + public BaseResponse> searchBm25( + @RequestParam @Parameter(description = "검색어", required = true) String query + ) { + List results = searchService.searchOnlyBm25(query); + return BaseResponse.of(SuccessCode.OK, results).getBody(); + } + + @Operation(summary = "테스트용 : 1단계 검색 - semantic") + @GetMapping("/semantic") + public BaseResponse> searchSemantic( + @RequestParam @Parameter(description = "검색어", required = true) String query + ) { + List results = searchService.searchOnlySemantic(query); + return BaseResponse.of(SuccessCode.OK, results).getBody(); + } + + @Operation(summary = "1단계 검색(BM25 + 시맨틱)", description = "검색어를 기반으로 BM25 + k-NN 하이브리드 검색을 수행하고 합산하여 상위 결과를 반환합니다. (개인화 미적용)") @GetMapping("/general") public BaseResponse> searchGeneral( From 6af9e0fa44c725f9d8bafd4e233068262203ccd3 Mon Sep 17 00:00:00 2001 From: Joon9750 Date: Thu, 27 Nov 2025 02:04:08 +0900 Subject: [PATCH 3/5] =?UTF-8?q?add:=20fuzzy=20search=20=EC=A0=81=EC=9A=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/GeneralSearchProperties.java | 10 ++-- .../search/service/SearchConstants.java | 2 +- .../domain/search/service/SearchService.java | 18 ++---- .../search/service/SearchServiceImpl.java | 56 ++++++++++++++++++- 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java index 6bb9340..83d9bc1 100644 --- a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java +++ b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java @@ -21,14 +21,16 @@ public class GeneralSearchProperties { private Integer searchSize = 20; // BM25 가중치 + private Float exactBoost = 2.0f; private Float titleBoost = 3.0f; private Float summaryBoost = 1.0f; + private Float fuzzyBoost = 1.0f; private Float chunkBoost = 0.5f; // --- [Vector & KNN 설정] --- - private Integer knnK = 60; - private Integer knnNumCandidates = 100; + private Integer knnK = 40; + private Integer knnNumCandidates = 50; private Float vectorTitleBoost = 3.0f; private Float vectorSummaryBoost = 1.5f; private Float vectorContentChunkBoost = 0.8f; @@ -37,8 +39,8 @@ public class GeneralSearchProperties { private double hybridScoreWeight = 50.0; private double personalScoreWeight = 1.0; - private int RRF_K = 60; - private int RRF_WINDOW_SIZE = 60; + private int RRF_K = 40; + private int RRF_WINDOW_SIZE = 40; // --- [rerank 가중치 설정] --- private double rerankDocumentTitleWeight = 0.6; diff --git a/src/main/java/com/techfork/domain/search/service/SearchConstants.java b/src/main/java/com/techfork/domain/search/service/SearchConstants.java index 08c059c..80d45db 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchConstants.java +++ b/src/main/java/com/techfork/domain/search/service/SearchConstants.java @@ -6,5 +6,5 @@ public class SearchConstants { static final String SUMMARY_FIELD_FORMAT = "summary^%.1f"; static final String CONTENT_CHUNKS_PATH = "contentChunks"; static final String CHUNK_TEXT_FIELD = "contentChunks.chunkText"; - static final String MINIMUM_SHOULD_MATCH = "0"; + static final String MINIMUM_SHOULD_MATCH = "1"; } \ No newline at end of file diff --git a/src/main/java/com/techfork/domain/search/service/SearchService.java b/src/main/java/com/techfork/domain/search/service/SearchService.java index 98bd273..37d6b6f 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchService.java +++ b/src/main/java/com/techfork/domain/search/service/SearchService.java @@ -5,21 +5,11 @@ public interface SearchService { - /** - * 1단계 일반 검색 (Retrieval) - * - 목적: 검색 품질 평가(Recall) 및 비로그인 사용자 검색 - * - 동작: RRF(BM25 + k-NN) 하이브리드 검색만 수행하여 상위 K개 결과를 반환합니다. - * - 개인화(Re-ranking) 로직이 적용되지 않은 순수 연관도 순입니다. - */ + List searchOnlyBm25(String query); + + List searchOnlySemantic(String query); + List searchGeneral(String query); - /** - * 2단계 개인화 검색 (Re-ranking) - * - 목적: 실제 서비스 메인 검색 (로그인 사용자용) - * - 동작: - * 1. 1단계 검색으로 후보군(Top 100) 확보 - * 2. 사용자의 프로필 벡터와 문서 간 유사도 계산 (Cosine Similarity) - * 3. 1단계 점수와 2단계 점수를 가중합하여 재정렬 - */ List searchPersonalized(String query, Long userId); } \ No newline at end of file diff --git a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java index fb977af..93ea344 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java +++ b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java @@ -44,14 +44,33 @@ public class SearchServiceImpl implements SearchService { private final UserRepository userRepository; private final Executor searchAsyncExecutor; + @Override + public List searchOnlyBm25(String query) { + log.info("DEBUG MODE: Performing Lexical Search Only (BM25)"); + List searchResults = searchOnlyBM25(query); + log.info("Found {} results from lexical search.", searchResults.size()); + return searchResults.stream() + .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) + .collect(Collectors.toList()); + } + + @Override + public List searchOnlySemantic(String query) { + log.info("DEBUG MODE: Performing Semantic Search Only"); + List searchResults = searchOnlySemantic(queryEmbedding(query)); + log.info("Found {} results from semantic search.", searchResults.size()); + return searchResults.stream() + .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) + .collect(Collectors.toList()); + } + @Override public List searchGeneral(String query) { log.info("general search started: with query: '{}'", query); - List queryVector = queryEmbedding(query); - List searchResults = performHybridSearch(query, queryVector); + List searchResults = performHybridSearch(query, queryEmbedding(query)); log.info("Found {} results from hybrid search.", searchResults.size()); return searchResults.stream() - .map(result -> result.toBuilder().summaryVector(null).build()) + .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) .collect(Collectors.toList()); } @@ -102,6 +121,26 @@ private List performHybridSearch(String query, List queryVe return hybridResultFuture.join(); } + private List searchOnlyBM25(String query) { + log.info("DEBUG MODE: Performing Lexical Search Only (BM25)"); + + List> lexicalHits = performLexicalSearch(query); + + return lexicalHits.stream() + .map(this::mapToSearchResult) + .collect(Collectors.toList()); + } + + private List searchOnlySemantic(List queryVector) { + log.info("DEBUG MODE: Performing Semantic Search Only"); + + List> semanticHits = performSemanticSearch(queryVector); + + return semanticHits.stream() + .map(this::mapToSearchResult) + .toList(); + } + private List calculateRRF(List> lexicalHits, List> semanticHits) { Map lexicalRankMap = new HashMap<>(); AtomicInteger rank = new AtomicInteger(1); @@ -176,6 +215,17 @@ private List> performLexicalSearch(String query) { .query(query) .type(TextQueryType.MostFields) .fields(titleField, summaryField) + .boost(generalSearchProperties.getExactBoost()) + ) + ) + .should(sh -> sh + .multiMatch(m -> m + .query(query) + .fields(titleField, summaryField) + .type(TextQueryType.MostFields) + .fuzziness("AUTO") + .prefixLength(1) + .boost(generalSearchProperties.getFuzzyBoost()) ) ) .should(sh -> sh From 4caca7da24ba06bf0f88d22aecaf8c637a777f89 Mon Sep 17 00:00:00 2001 From: Joon9750 Date: Thu, 27 Nov 2025 02:18:07 +0900 Subject: [PATCH 4/5] =?UTF-8?q?fix:=20searchPersonalized=20=EB=B6=88?= =?UTF-8?q?=ED=95=84=EC=9A=94=20DB=20SELECT=20=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/GeneralSearchProperties.java | 4 +- .../search/service/SearchServiceImpl.java | 162 +++++++++--------- 2 files changed, 82 insertions(+), 84 deletions(-) diff --git a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java index 83d9bc1..d622032 100644 --- a/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java +++ b/src/main/java/com/techfork/domain/search/service/GeneralSearchProperties.java @@ -23,9 +23,9 @@ public class GeneralSearchProperties { // BM25 가중치 private Float exactBoost = 2.0f; private Float titleBoost = 3.0f; - private Float summaryBoost = 1.0f; + private Float summaryBoost = 1.5f; private Float fuzzyBoost = 1.0f; - private Float chunkBoost = 0.5f; + private Float chunkBoost = 1.0f; // --- [Vector & KNN 설정] --- diff --git a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java index 93ea344..f962893 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java +++ b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java @@ -81,14 +81,12 @@ public List searchPersonalized(String query, Long userId) { List initialResults = performHybridSearch(query, queryVector); log.info("Found {} initial results from hybrid search.", initialResults.size()); - User user = userRepository.getReferenceById(userId); - - log.info("Attempting to find user profile for userId: {}", user.getId()); - Optional userProfileOpt = userProfileDocumentRepository.findByUserId(user.getId()); + log.info("Attempting to find user profile for userId: {}", userId); + Optional userProfileOpt = userProfileDocumentRepository.findByUserId(userId); log.info("Successfully fetched user profile optional. isPresent: {}", userProfileOpt.isPresent()); if (userProfileOpt.map(UserProfileDocument::getProfileVector).isEmpty()) { - log.warn("User profile or vector not found for userId: {}. Returning non-personalized results.", user.getId()); + log.warn("User profile or vector not found for userId: {}. Returning non-personalized results.", userId); return initialResults.stream() .map(result -> result.toBuilder().summaryVector(null).build()) .collect(Collectors.toList()); @@ -101,26 +99,6 @@ public List searchPersonalized(String query, Long userId) { return rerankedResults; } - private List queryEmbedding(String query) { - return embeddingClient.embed(query); - } - - private List performHybridSearch(String query, List queryVector) { - CompletableFuture>> lexicalFuture = - CompletableFuture.supplyAsync(() -> performLexicalSearch(query), searchAsyncExecutor); - CompletableFuture>> semanticFuture = - CompletableFuture.supplyAsync(() -> performSemanticSearch(queryVector), searchAsyncExecutor); - - CompletableFuture> hybridResultFuture = lexicalFuture - .thenCombine(semanticFuture, this::calculateRRF) - .exceptionally(ex -> { - log.error("Hybrid search failed", ex); - throw new RuntimeException("통합 검색 중 오류 발생", ex); - }); - - return hybridResultFuture.join(); - } - private List searchOnlyBM25(String query) { log.info("DEBUG MODE: Performing Lexical Search Only (BM25)"); @@ -141,67 +119,24 @@ private List searchOnlySemantic(List queryVector) { .toList(); } - private List calculateRRF(List> lexicalHits, List> semanticHits) { - Map lexicalRankMap = new HashMap<>(); - AtomicInteger rank = new AtomicInteger(1); - lexicalHits.forEach(hit -> lexicalRankMap.put(hit.id(), rank.getAndIncrement())); - - Map semanticRankMap = new HashMap<>(); - rank.set(1); - semanticHits.forEach(hit -> semanticRankMap.put(hit.id(), rank.getAndIncrement())); - - Map combinedResults = new HashMap<>(); - Map rrfScores = new HashMap<>(); - - processHitsForRRF(lexicalHits, lexicalRankMap, rrfScores, combinedResults); - processHitsForRRF(semanticHits, semanticRankMap, rrfScores, combinedResults); - - return combinedResults.values().stream() - .map(searchResult -> { - double finalScore = rrfScores.get(searchResult.getPostId().toString()); - return searchResult.toBuilder() - .hybridScore(finalScore) - .finalScore(finalScore) - .build(); - }) - .sorted(Comparator.comparing(SearchResult::getFinalScore).reversed()) - .limit(generalSearchProperties.getSearchSize()) - .collect(Collectors.toList()); + private List queryEmbedding(String query) { + return embeddingClient.embed(query); } - private void processHitsForRRF(List> hits, - Map rankMap, - Map rrfScores, - Map combinedResults) { - hits.forEach(hit -> { - String docId = hit.id(); - double score = 1.0 / (generalSearchProperties.getRRF_K() + rankMap.get(docId)); - rrfScores.merge(docId, score, Double::sum); - - SearchResult newResult = mapToSearchResult(hit); - - if (!combinedResults.containsKey(docId)) { - combinedResults.put(docId, newResult); - } else { - SearchResult existing = combinedResults.get(docId); - boolean needUpdate = false; - SearchResult.SearchResultBuilder builder = existing.toBuilder(); - - if (existing.getTitleVector() == null && newResult.getTitleVector() != null) { - builder.titleVector(newResult.getTitleVector()); - needUpdate = true; - } + private List performHybridSearch(String query, List queryVector) { + CompletableFuture>> lexicalFuture = + CompletableFuture.supplyAsync(() -> performLexicalSearch(query), searchAsyncExecutor); + CompletableFuture>> semanticFuture = + CompletableFuture.supplyAsync(() -> performSemanticSearch(queryVector), searchAsyncExecutor); - if (existing.getSummaryVector() == null && newResult.getSummaryVector() != null) { - builder.summaryVector(newResult.getSummaryVector()); - needUpdate = true; - } + CompletableFuture> hybridResultFuture = lexicalFuture + .thenCombine(semanticFuture, this::calculateRRF) + .exceptionally(ex -> { + log.error("Hybrid search failed", ex); + throw new RuntimeException("통합 검색 중 오류 발생", ex); + }); - if (needUpdate) { - combinedResults.put(docId, builder.build()); - } - } - }); + return hybridResultFuture.join(); } private List> performLexicalSearch(String query) { @@ -301,6 +236,69 @@ private List> performSemanticSearch(List queryVector) { } } + private List calculateRRF(List> lexicalHits, List> semanticHits) { + Map lexicalRankMap = new HashMap<>(); + AtomicInteger rank = new AtomicInteger(1); + lexicalHits.forEach(hit -> lexicalRankMap.put(hit.id(), rank.getAndIncrement())); + + Map semanticRankMap = new HashMap<>(); + rank.set(1); + semanticHits.forEach(hit -> semanticRankMap.put(hit.id(), rank.getAndIncrement())); + + Map combinedResults = new HashMap<>(); + Map rrfScores = new HashMap<>(); + + processHitsForRRF(lexicalHits, lexicalRankMap, rrfScores, combinedResults); + processHitsForRRF(semanticHits, semanticRankMap, rrfScores, combinedResults); + + return combinedResults.values().stream() + .map(searchResult -> { + double finalScore = rrfScores.get(searchResult.getPostId().toString()); + return searchResult.toBuilder() + .hybridScore(finalScore) + .finalScore(finalScore) + .build(); + }) + .sorted(Comparator.comparing(SearchResult::getFinalScore).reversed()) + .limit(generalSearchProperties.getSearchSize()) + .collect(Collectors.toList()); + } + + private void processHitsForRRF(List> hits, + Map rankMap, + Map rrfScores, + Map combinedResults) { + hits.forEach(hit -> { + String docId = hit.id(); + double score = 1.0 / (generalSearchProperties.getRRF_K() + rankMap.get(docId)); + rrfScores.merge(docId, score, Double::sum); + + SearchResult newResult = mapToSearchResult(hit); + + if (!combinedResults.containsKey(docId)) { + combinedResults.put(docId, newResult); + } else { + SearchResult existing = combinedResults.get(docId); + boolean needUpdate = false; + SearchResult.SearchResultBuilder builder = existing.toBuilder(); + + if (existing.getTitleVector() == null && newResult.getTitleVector() != null) { + builder.titleVector(newResult.getTitleVector()); + needUpdate = true; + } + + if (existing.getSummaryVector() == null && newResult.getSummaryVector() != null) { + builder.summaryVector(newResult.getSummaryVector()); + needUpdate = true; + } + + if (needUpdate) { + combinedResults.put(docId, builder.build()); + } + } + }); + } + private SearchResult mapToSearchResult(Hit hit) { PostDocument doc = hit.source(); double score = Objects.requireNonNullElse(hit.score(), 0.0); From 0bb667afa282e888a8e69c883aa32c1ff429971f Mon Sep 17 00:00:00 2001 From: Joon9750 Date: Thu, 27 Nov 2025 02:37:25 +0900 Subject: [PATCH 5/5] =?UTF-8?q?chore:=20log=20=EC=88=98=EC=A0=95=20?= =?UTF-8?q?=EB=B0=8F=20=EB=A9=94=EC=86=8C=EB=93=9C=20=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../search/service/SearchServiceImpl.java | 132 ++++++++++-------- .../search/GroundTruthGeneratorTest.java | 5 +- .../search/SearchQualityEvaluationTest.java | 4 - 3 files changed, 71 insertions(+), 70 deletions(-) diff --git a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java index f962893..de156b8 100644 --- a/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java +++ b/src/main/java/com/techfork/domain/search/service/SearchServiceImpl.java @@ -9,9 +9,7 @@ import com.techfork.domain.post.document.PostDocument; import com.techfork.domain.search.dto.SearchResult; import com.techfork.domain.user.document.UserProfileDocument; -import com.techfork.domain.user.entity.User; import com.techfork.domain.user.repository.UserProfileDocumentRepository; -import com.techfork.domain.user.repository.UserRepository; import com.techfork.global.llm.EmbeddingClient; import com.techfork.global.util.VectorUtil; import java.io.IOException; @@ -33,7 +31,7 @@ @Slf4j @Service -@Transactional +@Transactional(readOnly = true) @RequiredArgsConstructor public class SearchServiceImpl implements SearchService { @@ -41,62 +39,62 @@ public class SearchServiceImpl implements SearchService { private final EmbeddingClient embeddingClient; private final GeneralSearchProperties generalSearchProperties; private final UserProfileDocumentRepository userProfileDocumentRepository; - private final UserRepository userRepository; private final Executor searchAsyncExecutor; @Override public List searchOnlyBm25(String query) { - log.info("DEBUG MODE: Performing Lexical Search Only (BM25)"); List searchResults = searchOnlyBM25(query); - log.info("Found {} results from lexical search.", searchResults.size()); - return searchResults.stream() - .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) - .collect(Collectors.toList()); + return stripVectors(searchResults); } @Override public List searchOnlySemantic(String query) { - log.info("DEBUG MODE: Performing Semantic Search Only"); List searchResults = searchOnlySemantic(queryEmbedding(query)); - log.info("Found {} results from semantic search.", searchResults.size()); - return searchResults.stream() - .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) - .collect(Collectors.toList()); + return stripVectors(searchResults); } @Override public List searchGeneral(String query) { - log.info("general search started: with query: '{}'", query); - List searchResults = performHybridSearch(query, queryEmbedding(query)); - log.info("Found {} results from hybrid search.", searchResults.size()); - return searchResults.stream() - .map(result -> result.toBuilder().titleVector(null).summaryVector(null).build()) - .collect(Collectors.toList()); + log.debug("general search started: with query: '{}'", query); + long startTime = System.currentTimeMillis(); + + List queryVector = queryEmbedding(query); + List searchResults = performHybridSearch(query, queryVector); + + long duration = System.currentTimeMillis() - startTime; + log.info("Search completed. Query='{}', Results={}, Time={}ms", query, searchResults.size(), duration); + + return stripVectors(searchResults); } @Override public List searchPersonalized(String query, Long userId) { - log.info("Personalized search started for userId: {} with query: '{}'", userId, query); + log.debug("Personalized search started for userId: {} with query: '{}'", userId, query); + long startTime = System.currentTimeMillis(); + List queryVector = queryEmbedding(query); + List initialResults = performHybridSearch(query, queryVector); - log.info("Found {} initial results from hybrid search.", initialResults.size()); + log.debug("Initial hybrid search found {} documents.", initialResults.size()); - log.info("Attempting to find user profile for userId: {}", userId); Optional userProfileOpt = userProfileDocumentRepository.findByUserId(userId); - log.info("Successfully fetched user profile optional. isPresent: {}", userProfileOpt.isPresent()); + boolean hasProfile = userProfileOpt.isPresent() && userProfileOpt.get().getProfileVector() != null; - if (userProfileOpt.map(UserProfileDocument::getProfileVector).isEmpty()) { - log.warn("User profile or vector not found for userId: {}. Returning non-personalized results.", userId); - return initialResults.stream() - .map(result -> result.toBuilder().summaryVector(null).build()) - .collect(Collectors.toList()); + if (!hasProfile) { + long duration = System.currentTimeMillis() - startTime; + log.info("Personalized Search [FALLBACK]. UserID={}, Query='{}', Results={}, Time={}ms (Reason: No Profile)", + userId, query, initialResults.size(), duration); + return stripVectors(initialResults); } - log.info("User profile and vector found. Proceeding to personal reranking."); float[] userProfileVector = userProfileOpt.get().getProfileVector(); List rerankedResults = personalReranking(initialResults, userProfileVector); - log.info("Personal reranking complete. Returning {} results.", rerankedResults.size()); - return rerankedResults; + + long duration = System.currentTimeMillis() - startTime; + log.info("Personalized Search [RERANKED]. UserID={}, Query='{}', Results={}, Time={}ms", + userId, query, rerankedResults.size(), duration); + + return stripVectors(rerankedResults); } private List searchOnlyBM25(String query) { @@ -124,15 +122,27 @@ private List queryEmbedding(String query) { } private List performHybridSearch(String query, List queryVector) { - CompletableFuture>> lexicalFuture = - CompletableFuture.supplyAsync(() -> performLexicalSearch(query), searchAsyncExecutor); - CompletableFuture>> semanticFuture = - CompletableFuture.supplyAsync(() -> performSemanticSearch(queryVector), searchAsyncExecutor); + CompletableFuture>> lexicalFuture = CompletableFuture.supplyAsync(() -> { + long t = System.currentTimeMillis(); + var result = performLexicalSearch(query); + log.debug("Lexical search done. Hits={}, Time={}ms", result.size(), System.currentTimeMillis() - t); + return result; + }, searchAsyncExecutor); + + CompletableFuture>> semanticFuture = CompletableFuture.supplyAsync(() -> { + long t = System.currentTimeMillis(); + var result = performSemanticSearch(queryVector); + log.debug("Semantic search done. Hits={}, Time={}ms", result.size(), System.currentTimeMillis() - t); + return result; + }, searchAsyncExecutor); CompletableFuture> hybridResultFuture = lexicalFuture - .thenCombine(semanticFuture, this::calculateRRF) + .thenCombine(semanticFuture, (lexicalHits, semanticHits) -> { + log.debug("Merging results: Lexical Hits={}, Semantic Hits={}", lexicalHits.size(), semanticHits.size()); + return calculateRRF(lexicalHits, semanticHits); + }) .exceptionally(ex -> { - log.error("Hybrid search failed", ex); + log.error("Hybrid search failed for query: '{}'", query, ex); throw new RuntimeException("통합 검색 중 오류 발생", ex); }); @@ -197,30 +207,9 @@ private List> performSemanticSearch(List queryVector) { int numCandidates = generalSearchProperties.getKnnNumCandidates(); List knnSearches = new ArrayList<>(); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("titleEmbedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorTitleBoost()) - )); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("summaryEmbedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorSummaryBoost()) - )); - - knnSearches.add(KnnSearch.of(ks -> ks - .field("contentChunks.embedding") - .queryVector(queryVector) - .k(k) - .numCandidates(numCandidates) - .boost(generalSearchProperties.getVectorContentChunkBoost()) - )); + knnSearches.add(createKnnSearch("titleEmbedding", queryVector, k, numCandidates, generalSearchProperties.getVectorTitleBoost())); + knnSearches.add(createKnnSearch("summaryEmbedding", queryVector, k, numCandidates, generalSearchProperties.getVectorSummaryBoost())); + knnSearches.add(createKnnSearch("contentChunks.embedding", queryVector, k, numCandidates, generalSearchProperties.getVectorContentChunkBoost())); try { SearchResponse response = elasticsearchClient.search(s -> s @@ -236,6 +225,16 @@ private List> performSemanticSearch(List queryVector) { } } + private KnnSearch createKnnSearch(String field, List vector, int k, int numCandidates, float boost) { + return KnnSearch.of(ks -> ks + .field(field) + .queryVector(vector) + .k(k) + .numCandidates(numCandidates) + .boost(boost) + ); + } + private List calculateRRF(List> lexicalHits, List> semanticHits) { Map lexicalRankMap = new HashMap<>(); AtomicInteger rank = new AtomicInteger(1); @@ -347,4 +346,13 @@ private List personalReranking(List initialResults, .sorted(Comparator.comparing(SearchResult::getFinalScore).reversed()) .collect(Collectors.toList()); } + + private List stripVectors(List results) { + return results.stream() + .map(result -> result.toBuilder() + .titleVector(null) + .summaryVector(null) + .build()) + .collect(Collectors.toList()); + } } \ No newline at end of file diff --git a/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java b/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java index cfcc2ad..b9ddcc8 100644 --- a/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java +++ b/src/test/java/com/techfork/domain/search/GroundTruthGeneratorTest.java @@ -34,9 +34,6 @@ class GroundTruthGeneratorTest { @Autowired private UserProfileDocumentRepository userProfileDocumentRepository; - @Autowired - private UserRepository userRepository; - @Autowired private Executor searchAsyncExecutor; @@ -46,7 +43,7 @@ void generateGroundTruthTemplate() { List keywords = List.of("스프링", "Java", "Spring Boot", "JPA", "Docker", "MSA", "배치", "클라우드"); SearchServiceImpl searchService = new SearchServiceImpl( - elasticsearchClient, embeddingClient, generalSearchProperties, userProfileDocumentRepository, userRepository, searchAsyncExecutor); + elasticsearchClient, embeddingClient, generalSearchProperties, userProfileDocumentRepository, searchAsyncExecutor); System.out.println("========== [Copy Below JSON] =========="); System.out.println("["); diff --git a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java index a3c0db4..4daf91c 100644 --- a/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java +++ b/src/test/java/com/techfork/domain/search/SearchQualityEvaluationTest.java @@ -40,9 +40,6 @@ class SearchQualityEvaluationTest { @Autowired private UserProfileDocumentRepository userProfileDocumentRepository; - @Autowired - private UserRepository userRepository; - @Autowired @Qualifier("searchAsyncExecutor") private Executor searchAsyncExecutor; @@ -77,7 +74,6 @@ void evaluateSearchQualityAcrossScenarios() throws IOException { embeddingClient, props, userProfileDocumentRepository, - userRepository, searchAsyncExecutor );