diff --git a/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs b/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs index 02de238379..cc60aa6a3c 100644 --- a/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs +++ b/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs @@ -3663,37 +3663,6 @@ public void TestLiftImportChangingAffixToStem() Assert.That(entry.AlternateFormsOS.First().LiftResidue, Does.Contain("look for this")); } - private static readonly string[] s_LiftPronunciations = { - "", - "", - "
", - "", - "", - "
", - "", - "", - "
test
", - "
", - "", - "", - "
pronunciation
", - "
", - "", - "
pronunciation
", - "
", - "
", - "", - "", - "
testb
", - "
", - "", - "", - "
pronunciation
", - "
", - "
", - "
" - }; - private string[] _minimalLiftData = { "", "", @@ -3711,41 +3680,712 @@ public void TestLiftImportChangingAffixToStem() "" }; - ///-------------------------------------------------------------------------------------- /// - /// Test LIFT merger for problems merging pronunciations. - /// To produce the problem that led to this test, an entry with one or formless pronunciation - /// gets merged with a LIFT file that has the same entry with other pronunciations. (LT-14725) + /// Test merging pronunciations when entry has formless pronunciation and LIFT has pronunciations + /// with forms. + /// Verifies that formless pronunciations don't interfere with form-based matching. (LT-14725) + /// + [Test] + public void MergePronunciations_EntryHasFormlessPronunciation_MergesCorrectly() + { + SetWritingSystems("fr es"); + + var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es"); + + // Setup: Create entry with pronunciations including a formless one + var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test"); + AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); // 'fr' pronunciation + AddPronunciation(entry, "", -1); // blank pronunciation, no form + + var liftXml = @" + +
+ + +
+ + +
test
+
+ + +
pronunciation
+
+ +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should have 3 pronunciations total + // - 'pronunciation' in 'fr' (merged) + // - 'pronunciation' in 'es' (added) + // - blank pronunciation with no form (unchanged) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(3), + "Should have merged 'fr', added 'es', and kept formless pronunciation"); + var frPronuns = entry.PronunciationsOS.Count(p => + p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation"); + Assert.That(frPronuns, Is.EqualTo(1), "Should have one 'fr' pronunciation"); + var esPronuns = entry.PronunciationsOS.Count(p => + p.Form.get_String(wsEs).Text == "pronunciation"); + Assert.That(esPronuns, Is.EqualTo(1), "Should have one 'es' pronunciation"); + var formlessPronuns = entry.PronunciationsOS.Count(p => + p.Form.StringCount == 0); + Assert.That(formlessPronuns, Is.EqualTo(1), "Should have one formless pronunciation"); + } + + /// + /// Test merging pronunciations with media files when entry already has matching media. + /// Verifies media files are merged correctly when pronunciation forms match. (LT-14725) /// - ///-------------------------------------------------------------------------------------- [Test] - public void TestLiftMergeOfPronunciations() + public void MergePronunciations_MatchingMediaFiles_MergesIntoExisting() { SetWritingSystems("fr es"); + var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es"); + + // Setup: Create entry with pronunciation that has one media file + var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test"); + var pronunciation = AddPronunciation(entry, "pronunciation", wsEs); + + // Add existing media file + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "test_audio3.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronunciation.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + var liftXml = @" + +
+ + +
+ + +
test
+
+ + +
pronunciation
+ + +
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should merge into existing pronunciation due to matching form and media + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1), + "Should merge into existing pronunciation with matching form and media"); + Assert.That(pronunciation.MediaFilesOS, Has.Count.EqualTo(2), + "Should have 2 media files after merge (original + new)"); + } + + /// + /// Test merging multiple duplicate pronunciations with same form but different media. + /// Verifies that duplicates in the entry don't interfere with proper matching. (LT-14725) + /// + [Test] + public void MergePronunciations_DuplicateFormsWithDifferentMedia_MergesBestMatch() + { + SetWritingSystems("fr es"); + + var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es"); + var repoEntry = Cache.ServiceLocator.GetInstance(); + + // Setup: Create entry with duplicate pronunciations, one with media, one without + var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test"); + var pronunWithMedia = AddPronunciation(entry, "pronunciation", wsEs); + var pronunWithoutMedia = AddPronunciation(entry, "pronunciation", wsEs); + + // Add media file to first pronunciation + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "test_audio3.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronunWithMedia.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + var liftXml = @" + +
+ + +
+ + +
test
+
+ + +
pronunciation
+ + +
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should still have 2 pronunciations (merged into best match) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), + "Should keep both duplicate pronunciations"); + // The pronunciation with matching media should get the merge + Assert.That(pronunWithMedia.MediaFilesOS, Has.Count.EqualTo(2), + "Pronunciation with matching media should receive merged media files"); + Assert.That(pronunWithoutMedia.MediaFilesOS, Has.Count.EqualTo(0), + "Pronunciation without media should remain unchanged"); + } + + /// + /// Test comprehensive merge scenario with multiple pronunciations in different languages. + /// Verifies the complete merging logic with forms in French and Spanish. (LT-14725) + /// + [Test] + public void MergePronunciations_MultipleLanguagesAndForms_MergesAllCorrectly() + { + SetWritingSystems("fr es"); + + var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es"); var repoEntry = Cache.ServiceLocator.GetInstance(); var repoSense = Cache.ServiceLocator.GetInstance(); Assert.AreEqual(0, repoEntry.Count); Assert.AreEqual(0, repoSense.Count); - // The entries should already be present. + // Setup: Create first entry with multiple pronunciations var entry1 = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test"); - AddPronunciation(entry1, "pronunciation", Cache.DefaultVernWs); // add 'fr' pronunciation - AddPronunciation(entry1, "", -1); // add blank pronunciation, no form + AddPronunciation(entry1, "pronunciation", Cache.DefaultVernWs); // 'fr' + AddPronunciation(entry1, "", -1); // blank pronunciation + AddPronunciation(entry1, "mispronunciation", wsEs); + var misPronun = AddPronunciation(entry1, "mispronunciation", wsEs); + + // Add media file to one mispronunciation + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "test_audio3.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + misPronun.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + // Setup: Create second entry with single pronunciation var entry2 = CreateSimpleStemEntry("8d735e34-c555-4390-a0af-21a12e1dd6ff", "testb"); - AddPronunciation(entry2, "pronunciation", Cache.DefaultVernWs); // add 'fr' pronunciation + AddPronunciation(entry2, "pronunciation", Cache.DefaultVernWs); // 'fr' + + var liftXml = @" + +
+ + +
+ + +
test
+
+ + +
pronunciation
+
+ +
pronunciation
+
+ +
mispronunciation
+ + +
+
+ + +
testb
+
+ + +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 2); + File.Delete(sOrigFile); - var sOrigFile = CreateInputFile(s_LiftPronunciations); + // Verify overall counts + Assert.AreEqual(2, repoEntry.Count, "Should have exactly 2 entries"); + Assert.AreEqual(0, repoSense.Count, "Should not create any senses"); + + var repoPronunciation = + Cache.ServiceLocator.GetInstance(); + Assert.AreEqual(7, repoPronunciation.Count, "Should have 7 total pronunciations"); + + // Verify entry1: Should have 5 pronunciations after merge + // - 'pronunciation' in 'fr' (merged) + // - 'pronunciation' in 'es' (added from LIFT) + // - 'mispronunciation' in 'es' with 2 media files (merged) + // - 'mispronunciation' in 'es' with no media files (unchanged) + // - blank pronunciation with no form (unchanged) + Assert.That(entry1.PronunciationsOS, Has.Count.EqualTo(5), + "Entry 'test' should have 5 pronunciations"); + Assert.That(misPronun.MediaFilesOS, Has.Count.EqualTo(2), + "Mispronunciation should have 2 media files after merge"); + + // Verify entry2: Should have 2 pronunciations after merge + // - 'pronunciation' in 'fr' (original) + // - 'pronunciation' in 'es' (added from LIFT) + Assert.That(entry2.PronunciationsOS, Has.Count.EqualTo(2), + "Entry 'testb' should have 2 pronunciations"); + } - // Try to merge in two LIFT file entries that match our two existing entries - TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 2); + /// + /// Test merging when entry has pronunciation with media but LIFT has same form without media. + /// Verifies that form-only matches work correctly. (LT-14725) + /// + [Test] + public void MergePronunciations_EntryHasMediaLiftDoesNot_MergesOnForm() + { + SetWritingSystems("fr"); + + // Setup: Create entry with pronunciation that has media + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // Add media file to entry pronunciation + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "existing_audio.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronun.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + var liftXml = @" + +
+ +
test
+ + +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); File.Delete(sOrigFile); - // Verification - Assert.AreEqual(2, repoEntry.Count, "Created some unnecessary entries."); - Assert.AreEqual(0, repoSense.Count, "Created some unnecessary senses."); - var repoPronunciation = Cache.ServiceLocator.GetInstance(); - Assert.AreEqual(5, repoPronunciation.Count, "Wrong number of remaining LexPronunciation objects"); + // Verify: Should merge into existing pronunciation based on form match + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1), + "Should merge based on form match even though LIFT has no media"); + Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(1), + "Original media file should be preserved"); + } + + /// + /// Test media file matching is case-insensitive + /// + [Test] + public void MergePronunciations_MediaFileMatching_CaseInsensitive() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // Add media file with uppercase extension + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "Test_Audio.MP3"; // uppercase + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronun.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + var liftData = new[] + { + "", + "", + "
", + "", + "
test
", + "", + "", + "
pronunciation
", + "", // lowercase + "
", + "
", + "
" + }; + + var sOrigFile = CreateInputFile(liftData); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Should merge into existing pronunciation (not create new one) despite case difference + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1), + "Media file matching should be case-insensitive"); + } + + /// + /// Test that blank pronunciations in both LIFT and entry match and merge + /// + [Test] + public void MergePronunciations_BothBlankForms_ShouldMatch() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + AddPronunciation(entry, "", -1); // blank pronunciation + + var liftData = new[] + { + "", + "", + "
", + "", + "
test
", + "", + "", // blank pronunciation in LIFT + "
", + "
" + }; + + var sOrigFile = CreateInputFile(liftData); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Should still have only 1 pronunciation (merged blank with blank) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1), + "Blank pronunciation should merge with blank pronunciation"); + } + + /// + /// Test pronunciation with media but no matching form creates new entry + /// + [Test] + public void MergePronunciations_MediaWithoutFormMatch_CreatesNew() + { + SetWritingSystems("fr es"); + + var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es"); + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); // fr only + + var liftData = new[] + { + "", + "", + "
", + "", + "
test
", + "", + "", + "
differentform
", // different form + "", + "
", + "
", + "
" + }; + + var sOrigFile = CreateInputFile(liftData); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Should create new pronunciation since form doesn't match (score = 0) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), + "Non-matching form with media should create new pronunciation"); + Assert.That(entry.PronunciationsOS.Any(p => + p.Form.get_String(wsEs).Text == "differentform"), + "Spanish pronunciation should be added"); + } + + /// + /// Test best match selection when multiple entry pronunciations match + /// + [Test] + public void MergePronunciations_MultipleMatches_SelectsBestScore() + { + SetWritingSystems("fr es"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + + // Create entry with two matching pronunciations, one with media + var pronun1 = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + var pronun2 = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // Add media to pronun2 + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "matching_audio.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronun2.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + var liftData = new[] + { + "", + "", + "
", + "", + "
test
", + "", + "", + "
pronunciation
", + "", + "
", + "
", + "
" + }; + + var sOrigFile = CreateInputFile(liftData); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Should still have 2 pronunciations (merged into best match) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), + "Should merge into best matching pronunciation"); + // pronun2 should have been selected due to higher score (form + media match) + Assert.That(pronun2.MediaFilesOS, Has.Count.EqualTo(1), + "Pronunciation with media should be selected as best match"); + Assert.That(pronun1.MediaFilesOS, Has.Count.EqualTo(0), + "Pronunciation without media should not be selected"); + } + + /// + /// Test partial media file matches contribute to score + /// + [Test] + public void MergePronunciations_PartialMediaMatch_CorrectScore() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // Add two media files, only one will match + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + + var mediaFile1 = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile1); + mediaFile1.InternalPath = "audio1.mp3"; + var audioLink1 = Cache.ServiceLocator.GetInstance().Create(); + pronun.MediaFilesOS.Add(audioLink1); + audioLink1.MediaFileRA = mediaFile1; + + var mediaFile2 = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile2); + mediaFile2.InternalPath = "audio2.mp3"; + var audioLink2 = Cache.ServiceLocator.GetInstance().Create(); + pronun.MediaFilesOS.Add(audioLink2); + audioLink2.MediaFileRA = mediaFile2; + + var liftData = new[] + { + "", + "", + "
", + "", + "
test
", + "", + "", + "
pronunciation
", + "", // only this one matches + "", // this doesn't exist in entry + "
", + "
", + "
" + }; + + var sOrigFile = CreateInputFile(liftData); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Should merge since there's a partial match (form + at least one media file) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1), + "Should merge when at least one media file matches"); + // After merge, should have 3 media files (original 2 + 1 new from LIFT) + Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(3), + "Should retain existing media files after merge"); + } + + /// + /// Test when two LIFT pronunciations compete for the same best match. + /// The LIFT pronunciation with higher score should win the merge, the other should create new. + /// + [Test] + public void MergePronunciations_TwoLiftPronunciationsShareBestMatch_HigherScoreWinsMerge() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // Setup: Add media file to entry pronunciation + var mediaFolder = Cache.ServiceLocator.GetInstance().Create(); + Cache.LangProject.MediaOC.Add(mediaFolder); + var mediaFile = Cache.ServiceLocator.GetInstance().Create(); + mediaFolder.FilesOC.Add(mediaFile); + mediaFile.InternalPath = "matching_audio.mp3"; + var audioLink = Cache.ServiceLocator.GetInstance().Create(); + pronun.MediaFilesOS.Add(audioLink); + audioLink.MediaFileRA = mediaFile; + + // LIFT has 2 pronunciations with same form: + // 1. First has matching media (higher score: form + media match) + // 2. Second has no media (lower score: form match only) + // Both would select the same entry pronunciation as best match, + // but only the first should merge, second should create new + var liftXml = @" + +
+ +
test
+ + +
pronunciation
+ +
+ +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should have 2 pronunciations total + // - Original pronunciation merged with first LIFT pronunciation (has matching media) + // - New pronunciation created for second LIFT pronunciation (no media match) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), + "Second LIFT pronunciation should create new entry since best match was already claimed"); + + // The original pronunciation should have the matching media + Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(1), + "Original pronunciation should retain/merge with matching media"); + Assert.That(pronun.MediaFilesOS[0].MediaFileRA.InternalPath, Is.EqualTo("matching_audio.mp3"), + "Original pronunciation should have the matching media file"); + + // Verify both pronunciations have the same form + var frPronuns = entry.PronunciationsOS.Where(p => + p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList(); + Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form"); + } + + /// + /// Test when there are two identical pronunciations in the cache and two identical pronunciations in LIFT. + /// + [Test] + public void MergePronunciations_DuplicateLiftForms_AssignToDistinctCachePronunciations() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); + + // LIFT has 2 pronunciations with same form and no media: + // Both would select the first entry pronunciation as best match + // The first lift entry should merge with the first entry in the cache + // and the second entry should merge with the second entry in the cache (since the first is already claimed) + var liftXml = @" + +
+ +
test
+ + +
pronunciation
+
+ +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should have 2 pronunciations total + // - First LIFT pronunciation merged with first LIFT pronunciation (even though both match exactly) + // - Second pronunciation merged with second LIFT pronunciation (because first match was already claimed) + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), + "Second LIFT pronunciation should create new entry since best match was already claimed"); + + // Verify both pronunciations have the same form + var frPronuns = entry.PronunciationsOS.Where(p => + p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList(); + Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form"); + } + + [Test] + public void MergePronunciations_EntryMatchedWithNoPronun_PronunsAdded() + { + SetWritingSystems("fr"); + + var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test"); + + // LIFT has 2 pronunciations with same form: + // 1. One with a media file + // 2. Second with no media file + var liftXml = @" + +
+ +
test
+ + +
pronunciation
+ +
+ +
pronunciation
+
+
+
"; + + var sOrigFile = CreateInputFile(liftXml.Split('\n')); + TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1); + File.Delete(sOrigFile); + + // Verify: Should have 2 pronunciations total, both from the LIFT file + Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), "Both pronunciations should be added to the matched entry."); + + // The first pronunciation should have the matching media + Assert.That(entry.PronunciationsOS[0].MediaFilesOS, Has.Count.EqualTo(1), + "First pronunciation should have the matching media"); + Assert.That(entry.PronunciationsOS[0].MediaFilesOS[0].MediaFileRA.InternalPath.EndsWith("matching_audio.mp3"), + Is.True, "First pronunciation should have the matching media file"); + Assert.That(entry.PronunciationsOS[1].MediaFilesOS, Has.Count.EqualTo(0)); + + // Verify both pronunciations have the same form + var frPronuns = entry.PronunciationsOS.Where(p => + p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList(); + Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form"); } [Test] @@ -3966,12 +4606,13 @@ private ILexEntry CreateSimpleStemEntry(string entryGuid, string form) return entry; } - private void AddPronunciation(ILexEntry entry, string pronunciation, int ws) + private ILexPronunciation AddPronunciation(ILexEntry entry, string pronunciation, int ws) { var lexPronunciation = Cache.ServiceLocator.GetInstance().Create(); entry.PronunciationsOS.Add(lexPronunciation); if (ws > 0) lexPronunciation.Form.set_String(ws, TsStringUtils.MakeString(pronunciation, ws)); + return lexPronunciation; } [Test] diff --git a/Src/LexText/LexTextControls/LiftMerger.cs b/Src/LexText/LexTextControls/LiftMerger.cs index 2ea98c8896..982abae06f 100644 --- a/Src/LexText/LexTextControls/LiftMerger.cs +++ b/Src/LexText/LexTextControls/LiftMerger.cs @@ -10,17 +10,17 @@ using System.Linq; using System.Text.RegularExpressions; using System.Xml; -using SIL.Lift; -using SIL.Lift.Parsing; -using SIL.LCModel.Core.Cellar; -using SIL.LCModel.Core.Text; -using SIL.LCModel.Core.WritingSystems; -using SIL.LCModel.Core.KernelInterfaces; using SIL.FieldWorks.Common.FwUtils; using SIL.LCModel; using SIL.LCModel.Application; +using SIL.LCModel.Core.Cellar; +using SIL.LCModel.Core.KernelInterfaces; +using SIL.LCModel.Core.Text; +using SIL.LCModel.Core.WritingSystems; using SIL.LCModel.DomainServices; using SIL.LCModel.Utils; +using SIL.Lift; +using SIL.Lift.Parsing; using SIL.Utils; namespace SIL.FieldWorks.LexText.Controls @@ -3325,22 +3325,23 @@ private void CreateEntryPronunciations(ILexEntry le, CmLiftEntry entry) private void MergeEntryPronunciations(ILexEntry le, CmLiftEntry entry) { - Dictionary dictHvoPhon = new Dictionary(); - foreach (CmLiftPhonetic phon in entry.Pronunciations) + var matchedEntries = FindBestPronunciationMatches(entry.Pronunciations, le.PronunciationsOS); + foreach (var matchedEntry in matchedEntries) { + var entryPronunciation = matchedEntry.Value; + var liftPronunciation = matchedEntry.Key; IgnoreNewWs(); - ILexPronunciation pron = FindMatchingPronunciation(le, dictHvoPhon, phon); - if (pron == null) + if (entryPronunciation == null) { - pron = CreateNewLexPronunciation(); - le.PronunciationsOS.Add(pron); - dictHvoPhon.Add(pron.Hvo, phon); + entryPronunciation = CreateNewLexPronunciation(); + le.PronunciationsOS.Add(entryPronunciation); } - MergeInMultiUnicode(pron.Form, LexPronunciationTags.kflidForm, phon.Form, pron.Guid); - MergePronunciationMedia(pron, phon); - ProcessPronunciationFieldsAndTraits(pron, phon); - StoreAnnotationsAndDatesInResidue(pron, phon); - SavePronunciationWss(phon.Form.Keys); + MergeInMultiUnicode(entryPronunciation.Form, LexPronunciationTags.kflidForm, + liftPronunciation.Form, entryPronunciation.Guid); + MergePronunciationMedia(entryPronunciation, liftPronunciation); + ProcessPronunciationFieldsAndTraits(entryPronunciation, liftPronunciation); + StoreAnnotationsAndDatesInResidue(entryPronunciation, liftPronunciation); + SavePronunciationWss(liftPronunciation.Form.Keys); } } @@ -3522,87 +3523,113 @@ private ICmMedia FindMatchingMedia(ILcmOwningSequence rgmedia, string } /// - /// Find the best matching pronunciation in the lex entry (if one exists) for the imported LiftPhonetic phon. + /// Find the best matching pronunciations in the lex entry (if one exists) for the imported LiftPhonetic data. /// If neither has any form, then only the media filenames are compared. If both have forms, then both forms - /// and media filenames are compared. At least one form must match if any forms exist on either side. - /// If either has a media file, both must have the same number of media files, and at least one filename - /// must match. - /// As a side-effect, dictHvoPhon has the matching hvo keyed to the imported data (if one exists). + /// and media filenames are compared. The first form that has a matching media file will be selected. If the imported form + /// has no media files the first matching form will be selected. If there are multiple imported forms that match the same + /// entry form, the one with the highest score will be selected. /// - /// best match, or null - private ILexPronunciation FindMatchingPronunciation(ILexEntry le, Dictionary dictHvoPhon, - CmLiftPhonetic phon) + /// Dictionary with the best matches for each lift pronunciation. Best match can be null. + private Dictionary FindBestPronunciationMatches( + List liftPronunciations, IList entryPronunciations) { - ILexPronunciation lexpron = null; - ILexPronunciation lexpronNoMedia = null; - int cMatches = 0; - foreach (ILexPronunciation pron in le.PronunciationsOS) + // Gather the match score for every combination of lift and entry pronunciations + var matchScores = new List>(); + foreach (var liftPron in liftPronunciations) + { + foreach (var entryPron in entryPronunciations) + { + var score = GetPronunciationMatchScore(liftPron, entryPron); + if(score > 0) + matchScores.Add(Tuple.Create(liftPron, entryPron, score)); + } + } + // sort by best score descending + var sortedMatches = matchScores.OrderByDescending(t => t.Item3).ToList(); + // Each entry pronunciation can only be used once, so store the used pronunciations. + var usedEntryPronunciations = new HashSet(); + // The result will be the best match for each lift pronunciation, or null if no matches are good enough + var results = new Dictionary(); + foreach (var match in sortedMatches) { - if (dictHvoPhon.ContainsKey(pron.Hvo)) + var liftPron = match.Item1; + var entryPron = match.Item2; + + // Skip if this liftPron already has a result + if (results.ContainsKey(liftPron)) + { continue; - bool fFormMatches = false; - int cCurrent = 0; - IgnoreNewWs(); - if (phon.Form.Count == 0) + } + // If this entryPron is already used, keep looking for next match + if (entryPron != null && usedEntryPronunciations.Contains(entryPron)) { - Dictionary forms = GetAllUnicodeAlternatives(pron.Form); - fFormMatches = (forms.Count == 0); + continue; } - else + // Found a valid match + if (entryPron != null) { - cCurrent = MultiUnicodeStringMatches(pron.Form, phon.Form, false, Guid.Empty, 0); - fFormMatches = (cCurrent > cMatches); + usedEntryPronunciations.Add(entryPron); } - if (fFormMatches) + results.Add(liftPron, entryPron); + } + + // Any liftPron we saw but didn't match gets null + foreach (var liftPron in liftPronunciations) + { + if (!results.ContainsKey(liftPron)) { - cMatches = cCurrent; - if (phon.Media.Count == pron.MediaFilesOS.Count) + results.Add(liftPron, null); + } + } + + return results; + } + + private int GetPronunciationMatchScore(CmLiftPhonetic liftPronunciation, ILexPronunciation entryPronunciation) + { + var formMatches = 0; + if (liftPronunciation.Form.Count == 0) + { + Dictionary forms = GetAllUnicodeAlternatives(entryPronunciation.Form); + formMatches = forms.Count == 0 ? 1 : 0; + } + else + { + formMatches = MultiUnicodeStringMatches(entryPronunciation.Form, liftPronunciation.Form, false, Guid.Empty, 0); + } + if (formMatches > 0) + { + int mediaMatches = 0; + if (liftPronunciation.Media.Count == 0) + { + // If the imported form has no media files set the score based on if the entry form has media files. + if (entryPronunciation.MediaFilesOS.Count == 0) + mediaMatches = 1; // both have no media files + } + else if (entryPronunciation.MediaFilesOS.Count > 0) + { + // Check if at least one media file matches + foreach (var file in entryPronunciation.MediaFilesOS) { - int cFilesMatch = 0; - for (int i = 0; i < phon.Media.Count; ++i) + var cf = file.MediaFileRA; + if (cf != null) { - string sURL = phon.Media[i].Url; - if (sURL == null) + var path = cf.InternalPath; + if (path == null) continue; - string sFile = Path.GetFileName(sURL); - for (int j = 0; j < pron.MediaFilesOS.Count; ++j) + path = Path.GetFileName(path).ToLowerInvariant(); + if (liftPronunciation.Media.Any(m => m.Url != null + && Path.GetFileName(m.Url).ToLowerInvariant() == path)) { - ICmFile cf = pron.MediaFilesOS[i].MediaFileRA; - if (cf != null) - { - string sPath = cf.InternalPath; - if (sPath == null) - continue; - if (sFile.ToLowerInvariant() == Path.GetFileName(sPath).ToLowerInvariant()) - ++cFilesMatch; - } + mediaMatches++; } } - if (phon.Media.Count == 0 || cFilesMatch > 0) - lexpron = pron; - else - lexpronNoMedia = pron; - } - else - { - lexpronNoMedia = pron; } } + // score will be the combined matches for forms and media files + return mediaMatches + formMatches; } - if (lexpron != null) - { - dictHvoPhon.Add(lexpron.Hvo, phon); - return lexpron; - } - else if (lexpronNoMedia != null) - { - dictHvoPhon.Add(lexpronNoMedia.Hvo, phon); - return lexpronNoMedia; - } - else - { - return null; - } + return 0; // no form match, no score } private Dictionary GetAllUnicodeAlternatives(ITsMultiString tsm)