diff --git a/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs b/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs
index 02de238379..cc60aa6a3c 100644
--- a/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs
+++ b/Src/LexText/LexTextControls/LexTextControlsTests/LiftMergerTests.cs
@@ -3663,37 +3663,6 @@ public void TestLiftImportChangingAffixToStem()
Assert.That(entry.AlternateFormsOS.First().LiftResidue, Does.Contain("look for this"));
}
- private static readonly string[] s_LiftPronunciations = {
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- ""
- };
-
private string[] _minimalLiftData = {
"",
"",
@@ -3711,41 +3680,712 @@ public void TestLiftImportChangingAffixToStem()
""
};
- ///--------------------------------------------------------------------------------------
///
- /// Test LIFT merger for problems merging pronunciations.
- /// To produce the problem that led to this test, an entry with one or formless pronunciation
- /// gets merged with a LIFT file that has the same entry with other pronunciations. (LT-14725)
+ /// Test merging pronunciations when entry has formless pronunciation and LIFT has pronunciations
+ /// with forms.
+ /// Verifies that formless pronunciations don't interfere with form-based matching. (LT-14725)
+ ///
+ [Test]
+ public void MergePronunciations_EntryHasFormlessPronunciation_MergesCorrectly()
+ {
+ SetWritingSystems("fr es");
+
+ var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es");
+
+ // Setup: Create entry with pronunciations including a formless one
+ var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test");
+ AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); // 'fr' pronunciation
+ AddPronunciation(entry, "", -1); // blank pronunciation, no form
+
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should have 3 pronunciations total
+ // - 'pronunciation' in 'fr' (merged)
+ // - 'pronunciation' in 'es' (added)
+ // - blank pronunciation with no form (unchanged)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(3),
+ "Should have merged 'fr', added 'es', and kept formless pronunciation");
+ var frPronuns = entry.PronunciationsOS.Count(p =>
+ p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation");
+ Assert.That(frPronuns, Is.EqualTo(1), "Should have one 'fr' pronunciation");
+ var esPronuns = entry.PronunciationsOS.Count(p =>
+ p.Form.get_String(wsEs).Text == "pronunciation");
+ Assert.That(esPronuns, Is.EqualTo(1), "Should have one 'es' pronunciation");
+ var formlessPronuns = entry.PronunciationsOS.Count(p =>
+ p.Form.StringCount == 0);
+ Assert.That(formlessPronuns, Is.EqualTo(1), "Should have one formless pronunciation");
+ }
+
+ ///
+ /// Test merging pronunciations with media files when entry already has matching media.
+ /// Verifies media files are merged correctly when pronunciation forms match. (LT-14725)
///
- ///--------------------------------------------------------------------------------------
[Test]
- public void TestLiftMergeOfPronunciations()
+ public void MergePronunciations_MatchingMediaFiles_MergesIntoExisting()
{
SetWritingSystems("fr es");
+ var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es");
+
+ // Setup: Create entry with pronunciation that has one media file
+ var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test");
+ var pronunciation = AddPronunciation(entry, "pronunciation", wsEs);
+
+ // Add existing media file
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "test_audio3.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronunciation.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should merge into existing pronunciation due to matching form and media
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1),
+ "Should merge into existing pronunciation with matching form and media");
+ Assert.That(pronunciation.MediaFilesOS, Has.Count.EqualTo(2),
+ "Should have 2 media files after merge (original + new)");
+ }
+
+ ///
+ /// Test merging multiple duplicate pronunciations with same form but different media.
+ /// Verifies that duplicates in the entry don't interfere with proper matching. (LT-14725)
+ ///
+ [Test]
+ public void MergePronunciations_DuplicateFormsWithDifferentMedia_MergesBestMatch()
+ {
+ SetWritingSystems("fr es");
+
+ var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es");
+ var repoEntry = Cache.ServiceLocator.GetInstance();
+
+ // Setup: Create entry with duplicate pronunciations, one with media, one without
+ var entry = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test");
+ var pronunWithMedia = AddPronunciation(entry, "pronunciation", wsEs);
+ var pronunWithoutMedia = AddPronunciation(entry, "pronunciation", wsEs);
+
+ // Add media file to first pronunciation
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "test_audio3.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronunWithMedia.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should still have 2 pronunciations (merged into best match)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2),
+ "Should keep both duplicate pronunciations");
+ // The pronunciation with matching media should get the merge
+ Assert.That(pronunWithMedia.MediaFilesOS, Has.Count.EqualTo(2),
+ "Pronunciation with matching media should receive merged media files");
+ Assert.That(pronunWithoutMedia.MediaFilesOS, Has.Count.EqualTo(0),
+ "Pronunciation without media should remain unchanged");
+ }
+
+ ///
+ /// Test comprehensive merge scenario with multiple pronunciations in different languages.
+ /// Verifies the complete merging logic with forms in French and Spanish. (LT-14725)
+ ///
+ [Test]
+ public void MergePronunciations_MultipleLanguagesAndForms_MergesAllCorrectly()
+ {
+ SetWritingSystems("fr es");
+
+ var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es");
var repoEntry = Cache.ServiceLocator.GetInstance();
var repoSense = Cache.ServiceLocator.GetInstance();
Assert.AreEqual(0, repoEntry.Count);
Assert.AreEqual(0, repoSense.Count);
- // The entries should already be present.
+ // Setup: Create first entry with multiple pronunciations
var entry1 = CreateSimpleStemEntry("503d3478-3545-4213-9f6b-1f087464e140", "test");
- AddPronunciation(entry1, "pronunciation", Cache.DefaultVernWs); // add 'fr' pronunciation
- AddPronunciation(entry1, "", -1); // add blank pronunciation, no form
+ AddPronunciation(entry1, "pronunciation", Cache.DefaultVernWs); // 'fr'
+ AddPronunciation(entry1, "", -1); // blank pronunciation
+ AddPronunciation(entry1, "mispronunciation", wsEs);
+ var misPronun = AddPronunciation(entry1, "mispronunciation", wsEs);
+
+ // Add media file to one mispronunciation
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "test_audio3.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ misPronun.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ // Setup: Create second entry with single pronunciation
var entry2 = CreateSimpleStemEntry("8d735e34-c555-4390-a0af-21a12e1dd6ff", "testb");
- AddPronunciation(entry2, "pronunciation", Cache.DefaultVernWs); // add 'fr' pronunciation
+ AddPronunciation(entry2, "pronunciation", Cache.DefaultVernWs); // 'fr'
+
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 2);
+ File.Delete(sOrigFile);
- var sOrigFile = CreateInputFile(s_LiftPronunciations);
+ // Verify overall counts
+ Assert.AreEqual(2, repoEntry.Count, "Should have exactly 2 entries");
+ Assert.AreEqual(0, repoSense.Count, "Should not create any senses");
+
+ var repoPronunciation =
+ Cache.ServiceLocator.GetInstance();
+ Assert.AreEqual(7, repoPronunciation.Count, "Should have 7 total pronunciations");
+
+ // Verify entry1: Should have 5 pronunciations after merge
+ // - 'pronunciation' in 'fr' (merged)
+ // - 'pronunciation' in 'es' (added from LIFT)
+ // - 'mispronunciation' in 'es' with 2 media files (merged)
+ // - 'mispronunciation' in 'es' with no media files (unchanged)
+ // - blank pronunciation with no form (unchanged)
+ Assert.That(entry1.PronunciationsOS, Has.Count.EqualTo(5),
+ "Entry 'test' should have 5 pronunciations");
+ Assert.That(misPronun.MediaFilesOS, Has.Count.EqualTo(2),
+ "Mispronunciation should have 2 media files after merge");
+
+ // Verify entry2: Should have 2 pronunciations after merge
+ // - 'pronunciation' in 'fr' (original)
+ // - 'pronunciation' in 'es' (added from LIFT)
+ Assert.That(entry2.PronunciationsOS, Has.Count.EqualTo(2),
+ "Entry 'testb' should have 2 pronunciations");
+ }
- // Try to merge in two LIFT file entries that match our two existing entries
- TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 2);
+ ///
+ /// Test merging when entry has pronunciation with media but LIFT has same form without media.
+ /// Verifies that form-only matches work correctly. (LT-14725)
+ ///
+ [Test]
+ public void MergePronunciations_EntryHasMediaLiftDoesNot_MergesOnForm()
+ {
+ SetWritingSystems("fr");
+
+ // Setup: Create entry with pronunciation that has media
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // Add media file to entry pronunciation
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "existing_audio.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronun.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
File.Delete(sOrigFile);
- // Verification
- Assert.AreEqual(2, repoEntry.Count, "Created some unnecessary entries.");
- Assert.AreEqual(0, repoSense.Count, "Created some unnecessary senses.");
- var repoPronunciation = Cache.ServiceLocator.GetInstance();
- Assert.AreEqual(5, repoPronunciation.Count, "Wrong number of remaining LexPronunciation objects");
+ // Verify: Should merge into existing pronunciation based on form match
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1),
+ "Should merge based on form match even though LIFT has no media");
+ Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(1),
+ "Original media file should be preserved");
+ }
+
+ ///
+ /// Test media file matching is case-insensitive
+ ///
+ [Test]
+ public void MergePronunciations_MediaFileMatching_CaseInsensitive()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // Add media file with uppercase extension
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "Test_Audio.MP3"; // uppercase
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronun.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ var liftData = new[]
+ {
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "", // lowercase
+ "",
+ "",
+ ""
+ };
+
+ var sOrigFile = CreateInputFile(liftData);
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Should merge into existing pronunciation (not create new one) despite case difference
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1),
+ "Media file matching should be case-insensitive");
+ }
+
+ ///
+ /// Test that blank pronunciations in both LIFT and entry match and merge
+ ///
+ [Test]
+ public void MergePronunciations_BothBlankForms_ShouldMatch()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ AddPronunciation(entry, "", -1); // blank pronunciation
+
+ var liftData = new[]
+ {
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "", // blank pronunciation in LIFT
+ "",
+ ""
+ };
+
+ var sOrigFile = CreateInputFile(liftData);
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Should still have only 1 pronunciation (merged blank with blank)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1),
+ "Blank pronunciation should merge with blank pronunciation");
+ }
+
+ ///
+ /// Test pronunciation with media but no matching form creates new entry
+ ///
+ [Test]
+ public void MergePronunciations_MediaWithoutFormMatch_CreatesNew()
+ {
+ SetWritingSystems("fr es");
+
+ var wsEs = Cache.WritingSystemFactory.GetWsFromStr("es");
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs); // fr only
+
+ var liftData = new[]
+ {
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "", // different form
+ "",
+ "",
+ "",
+ ""
+ };
+
+ var sOrigFile = CreateInputFile(liftData);
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Should create new pronunciation since form doesn't match (score = 0)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2),
+ "Non-matching form with media should create new pronunciation");
+ Assert.That(entry.PronunciationsOS.Any(p =>
+ p.Form.get_String(wsEs).Text == "differentform"),
+ "Spanish pronunciation should be added");
+ }
+
+ ///
+ /// Test best match selection when multiple entry pronunciations match
+ ///
+ [Test]
+ public void MergePronunciations_MultipleMatches_SelectsBestScore()
+ {
+ SetWritingSystems("fr es");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+
+ // Create entry with two matching pronunciations, one with media
+ var pronun1 = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+ var pronun2 = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // Add media to pronun2
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "matching_audio.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronun2.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ var liftData = new[]
+ {
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ""
+ };
+
+ var sOrigFile = CreateInputFile(liftData);
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Should still have 2 pronunciations (merged into best match)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2),
+ "Should merge into best matching pronunciation");
+ // pronun2 should have been selected due to higher score (form + media match)
+ Assert.That(pronun2.MediaFilesOS, Has.Count.EqualTo(1),
+ "Pronunciation with media should be selected as best match");
+ Assert.That(pronun1.MediaFilesOS, Has.Count.EqualTo(0),
+ "Pronunciation without media should not be selected");
+ }
+
+ ///
+ /// Test partial media file matches contribute to score
+ ///
+ [Test]
+ public void MergePronunciations_PartialMediaMatch_CorrectScore()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // Add two media files, only one will match
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+
+ var mediaFile1 = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile1);
+ mediaFile1.InternalPath = "audio1.mp3";
+ var audioLink1 = Cache.ServiceLocator.GetInstance().Create();
+ pronun.MediaFilesOS.Add(audioLink1);
+ audioLink1.MediaFileRA = mediaFile1;
+
+ var mediaFile2 = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile2);
+ mediaFile2.InternalPath = "audio2.mp3";
+ var audioLink2 = Cache.ServiceLocator.GetInstance().Create();
+ pronun.MediaFilesOS.Add(audioLink2);
+ audioLink2.MediaFileRA = mediaFile2;
+
+ var liftData = new[]
+ {
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "", // only this one matches
+ "", // this doesn't exist in entry
+ "",
+ "",
+ ""
+ };
+
+ var sOrigFile = CreateInputFile(liftData);
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Should merge since there's a partial match (form + at least one media file)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(1),
+ "Should merge when at least one media file matches");
+ // After merge, should have 3 media files (original 2 + 1 new from LIFT)
+ Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(3),
+ "Should retain existing media files after merge");
+ }
+
+ ///
+ /// Test when two LIFT pronunciations compete for the same best match.
+ /// The LIFT pronunciation with higher score should win the merge, the other should create new.
+ ///
+ [Test]
+ public void MergePronunciations_TwoLiftPronunciationsShareBestMatch_HigherScoreWinsMerge()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ var pronun = AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // Setup: Add media file to entry pronunciation
+ var mediaFolder = Cache.ServiceLocator.GetInstance().Create();
+ Cache.LangProject.MediaOC.Add(mediaFolder);
+ var mediaFile = Cache.ServiceLocator.GetInstance().Create();
+ mediaFolder.FilesOC.Add(mediaFile);
+ mediaFile.InternalPath = "matching_audio.mp3";
+ var audioLink = Cache.ServiceLocator.GetInstance().Create();
+ pronun.MediaFilesOS.Add(audioLink);
+ audioLink.MediaFileRA = mediaFile;
+
+ // LIFT has 2 pronunciations with same form:
+ // 1. First has matching media (higher score: form + media match)
+ // 2. Second has no media (lower score: form match only)
+ // Both would select the same entry pronunciation as best match,
+ // but only the first should merge, second should create new
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should have 2 pronunciations total
+ // - Original pronunciation merged with first LIFT pronunciation (has matching media)
+ // - New pronunciation created for second LIFT pronunciation (no media match)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2),
+ "Second LIFT pronunciation should create new entry since best match was already claimed");
+
+ // The original pronunciation should have the matching media
+ Assert.That(pronun.MediaFilesOS, Has.Count.EqualTo(1),
+ "Original pronunciation should retain/merge with matching media");
+ Assert.That(pronun.MediaFilesOS[0].MediaFileRA.InternalPath, Is.EqualTo("matching_audio.mp3"),
+ "Original pronunciation should have the matching media file");
+
+ // Verify both pronunciations have the same form
+ var frPronuns = entry.PronunciationsOS.Where(p =>
+ p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList();
+ Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form");
+ }
+
+ ///
+ /// Test when there are two identical pronunciations in the cache and two identical pronunciations in LIFT.
+ ///
+ [Test]
+ public void MergePronunciations_DuplicateLiftForms_AssignToDistinctCachePronunciations()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+ AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+ AddPronunciation(entry, "pronunciation", Cache.DefaultVernWs);
+
+ // LIFT has 2 pronunciations with same form and no media:
+ // Both would select the first entry pronunciation as best match
+ // The first lift entry should merge with the first entry in the cache
+ // and the second entry should merge with the second entry in the cache (since the first is already claimed)
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should have 2 pronunciations total
+ // - First LIFT pronunciation merged with first LIFT pronunciation (even though both match exactly)
+ // - Second pronunciation merged with second LIFT pronunciation (because first match was already claimed)
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2),
+ "Second LIFT pronunciation should create new entry since best match was already claimed");
+
+ // Verify both pronunciations have the same form
+ var frPronuns = entry.PronunciationsOS.Where(p =>
+ p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList();
+ Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form");
+ }
+
+ [Test]
+ public void MergePronunciations_EntryMatchedWithNoPronun_PronunsAdded()
+ {
+ SetWritingSystems("fr");
+
+ var entry = CreateSimpleStemEntry("66EE6430-D40E-4BBF-8E17-0793E1176CF0", "test");
+
+ // LIFT has 2 pronunciations with same form:
+ // 1. One with a media file
+ // 2. Second with no media file
+ var liftXml = @"
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ";
+
+ var sOrigFile = CreateInputFile(liftXml.Split('\n'));
+ TryImport(sOrigFile, null, FlexLiftMerger.MergeStyle.MsKeepBoth, 1);
+ File.Delete(sOrigFile);
+
+ // Verify: Should have 2 pronunciations total, both from the LIFT file
+ Assert.That(entry.PronunciationsOS, Has.Count.EqualTo(2), "Both pronunciations should be added to the matched entry.");
+
+ // The first pronunciation should have the matching media
+ Assert.That(entry.PronunciationsOS[0].MediaFilesOS, Has.Count.EqualTo(1),
+ "First pronunciation should have the matching media");
+ Assert.That(entry.PronunciationsOS[0].MediaFilesOS[0].MediaFileRA.InternalPath.EndsWith("matching_audio.mp3"),
+ Is.True, "First pronunciation should have the matching media file");
+ Assert.That(entry.PronunciationsOS[1].MediaFilesOS, Has.Count.EqualTo(0));
+
+ // Verify both pronunciations have the same form
+ var frPronuns = entry.PronunciationsOS.Where(p =>
+ p.Form.get_String(Cache.DefaultVernWs).Text == "pronunciation").ToList();
+ Assert.That(frPronuns, Has.Count.EqualTo(2), "Should have two pronunciations with the same form");
}
[Test]
@@ -3966,12 +4606,13 @@ private ILexEntry CreateSimpleStemEntry(string entryGuid, string form)
return entry;
}
- private void AddPronunciation(ILexEntry entry, string pronunciation, int ws)
+ private ILexPronunciation AddPronunciation(ILexEntry entry, string pronunciation, int ws)
{
var lexPronunciation = Cache.ServiceLocator.GetInstance().Create();
entry.PronunciationsOS.Add(lexPronunciation);
if (ws > 0)
lexPronunciation.Form.set_String(ws, TsStringUtils.MakeString(pronunciation, ws));
+ return lexPronunciation;
}
[Test]
diff --git a/Src/LexText/LexTextControls/LiftMerger.cs b/Src/LexText/LexTextControls/LiftMerger.cs
index 2ea98c8896..982abae06f 100644
--- a/Src/LexText/LexTextControls/LiftMerger.cs
+++ b/Src/LexText/LexTextControls/LiftMerger.cs
@@ -10,17 +10,17 @@
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;
-using SIL.Lift;
-using SIL.Lift.Parsing;
-using SIL.LCModel.Core.Cellar;
-using SIL.LCModel.Core.Text;
-using SIL.LCModel.Core.WritingSystems;
-using SIL.LCModel.Core.KernelInterfaces;
using SIL.FieldWorks.Common.FwUtils;
using SIL.LCModel;
using SIL.LCModel.Application;
+using SIL.LCModel.Core.Cellar;
+using SIL.LCModel.Core.KernelInterfaces;
+using SIL.LCModel.Core.Text;
+using SIL.LCModel.Core.WritingSystems;
using SIL.LCModel.DomainServices;
using SIL.LCModel.Utils;
+using SIL.Lift;
+using SIL.Lift.Parsing;
using SIL.Utils;
namespace SIL.FieldWorks.LexText.Controls
@@ -3325,22 +3325,23 @@ private void CreateEntryPronunciations(ILexEntry le, CmLiftEntry entry)
private void MergeEntryPronunciations(ILexEntry le, CmLiftEntry entry)
{
- Dictionary dictHvoPhon = new Dictionary();
- foreach (CmLiftPhonetic phon in entry.Pronunciations)
+ var matchedEntries = FindBestPronunciationMatches(entry.Pronunciations, le.PronunciationsOS);
+ foreach (var matchedEntry in matchedEntries)
{
+ var entryPronunciation = matchedEntry.Value;
+ var liftPronunciation = matchedEntry.Key;
IgnoreNewWs();
- ILexPronunciation pron = FindMatchingPronunciation(le, dictHvoPhon, phon);
- if (pron == null)
+ if (entryPronunciation == null)
{
- pron = CreateNewLexPronunciation();
- le.PronunciationsOS.Add(pron);
- dictHvoPhon.Add(pron.Hvo, phon);
+ entryPronunciation = CreateNewLexPronunciation();
+ le.PronunciationsOS.Add(entryPronunciation);
}
- MergeInMultiUnicode(pron.Form, LexPronunciationTags.kflidForm, phon.Form, pron.Guid);
- MergePronunciationMedia(pron, phon);
- ProcessPronunciationFieldsAndTraits(pron, phon);
- StoreAnnotationsAndDatesInResidue(pron, phon);
- SavePronunciationWss(phon.Form.Keys);
+ MergeInMultiUnicode(entryPronunciation.Form, LexPronunciationTags.kflidForm,
+ liftPronunciation.Form, entryPronunciation.Guid);
+ MergePronunciationMedia(entryPronunciation, liftPronunciation);
+ ProcessPronunciationFieldsAndTraits(entryPronunciation, liftPronunciation);
+ StoreAnnotationsAndDatesInResidue(entryPronunciation, liftPronunciation);
+ SavePronunciationWss(liftPronunciation.Form.Keys);
}
}
@@ -3522,87 +3523,113 @@ private ICmMedia FindMatchingMedia(ILcmOwningSequence rgmedia, string
}
///
- /// Find the best matching pronunciation in the lex entry (if one exists) for the imported LiftPhonetic phon.
+ /// Find the best matching pronunciations in the lex entry (if one exists) for the imported LiftPhonetic data.
/// If neither has any form, then only the media filenames are compared. If both have forms, then both forms
- /// and media filenames are compared. At least one form must match if any forms exist on either side.
- /// If either has a media file, both must have the same number of media files, and at least one filename
- /// must match.
- /// As a side-effect, dictHvoPhon has the matching hvo keyed to the imported data (if one exists).
+ /// and media filenames are compared. The first form that has a matching media file will be selected. If the imported form
+ /// has no media files the first matching form will be selected. If there are multiple imported forms that match the same
+ /// entry form, the one with the highest score will be selected.
///
- /// best match, or null
- private ILexPronunciation FindMatchingPronunciation(ILexEntry le, Dictionary dictHvoPhon,
- CmLiftPhonetic phon)
+ /// Dictionary with the best matches for each lift pronunciation. Best match can be null.
+ private Dictionary FindBestPronunciationMatches(
+ List liftPronunciations, IList entryPronunciations)
{
- ILexPronunciation lexpron = null;
- ILexPronunciation lexpronNoMedia = null;
- int cMatches = 0;
- foreach (ILexPronunciation pron in le.PronunciationsOS)
+ // Gather the match score for every combination of lift and entry pronunciations
+ var matchScores = new List>();
+ foreach (var liftPron in liftPronunciations)
+ {
+ foreach (var entryPron in entryPronunciations)
+ {
+ var score = GetPronunciationMatchScore(liftPron, entryPron);
+ if(score > 0)
+ matchScores.Add(Tuple.Create(liftPron, entryPron, score));
+ }
+ }
+ // sort by best score descending
+ var sortedMatches = matchScores.OrderByDescending(t => t.Item3).ToList();
+ // Each entry pronunciation can only be used once, so store the used pronunciations.
+ var usedEntryPronunciations = new HashSet();
+ // The result will be the best match for each lift pronunciation, or null if no matches are good enough
+ var results = new Dictionary();
+ foreach (var match in sortedMatches)
{
- if (dictHvoPhon.ContainsKey(pron.Hvo))
+ var liftPron = match.Item1;
+ var entryPron = match.Item2;
+
+ // Skip if this liftPron already has a result
+ if (results.ContainsKey(liftPron))
+ {
continue;
- bool fFormMatches = false;
- int cCurrent = 0;
- IgnoreNewWs();
- if (phon.Form.Count == 0)
+ }
+ // If this entryPron is already used, keep looking for next match
+ if (entryPron != null && usedEntryPronunciations.Contains(entryPron))
{
- Dictionary forms = GetAllUnicodeAlternatives(pron.Form);
- fFormMatches = (forms.Count == 0);
+ continue;
}
- else
+ // Found a valid match
+ if (entryPron != null)
{
- cCurrent = MultiUnicodeStringMatches(pron.Form, phon.Form, false, Guid.Empty, 0);
- fFormMatches = (cCurrent > cMatches);
+ usedEntryPronunciations.Add(entryPron);
}
- if (fFormMatches)
+ results.Add(liftPron, entryPron);
+ }
+
+ // Any liftPron we saw but didn't match gets null
+ foreach (var liftPron in liftPronunciations)
+ {
+ if (!results.ContainsKey(liftPron))
{
- cMatches = cCurrent;
- if (phon.Media.Count == pron.MediaFilesOS.Count)
+ results.Add(liftPron, null);
+ }
+ }
+
+ return results;
+ }
+
+ private int GetPronunciationMatchScore(CmLiftPhonetic liftPronunciation, ILexPronunciation entryPronunciation)
+ {
+ var formMatches = 0;
+ if (liftPronunciation.Form.Count == 0)
+ {
+ Dictionary forms = GetAllUnicodeAlternatives(entryPronunciation.Form);
+ formMatches = forms.Count == 0 ? 1 : 0;
+ }
+ else
+ {
+ formMatches = MultiUnicodeStringMatches(entryPronunciation.Form, liftPronunciation.Form, false, Guid.Empty, 0);
+ }
+ if (formMatches > 0)
+ {
+ int mediaMatches = 0;
+ if (liftPronunciation.Media.Count == 0)
+ {
+ // If the imported form has no media files set the score based on if the entry form has media files.
+ if (entryPronunciation.MediaFilesOS.Count == 0)
+ mediaMatches = 1; // both have no media files
+ }
+ else if (entryPronunciation.MediaFilesOS.Count > 0)
+ {
+ // Check if at least one media file matches
+ foreach (var file in entryPronunciation.MediaFilesOS)
{
- int cFilesMatch = 0;
- for (int i = 0; i < phon.Media.Count; ++i)
+ var cf = file.MediaFileRA;
+ if (cf != null)
{
- string sURL = phon.Media[i].Url;
- if (sURL == null)
+ var path = cf.InternalPath;
+ if (path == null)
continue;
- string sFile = Path.GetFileName(sURL);
- for (int j = 0; j < pron.MediaFilesOS.Count; ++j)
+ path = Path.GetFileName(path).ToLowerInvariant();
+ if (liftPronunciation.Media.Any(m => m.Url != null
+ && Path.GetFileName(m.Url).ToLowerInvariant() == path))
{
- ICmFile cf = pron.MediaFilesOS[i].MediaFileRA;
- if (cf != null)
- {
- string sPath = cf.InternalPath;
- if (sPath == null)
- continue;
- if (sFile.ToLowerInvariant() == Path.GetFileName(sPath).ToLowerInvariant())
- ++cFilesMatch;
- }
+ mediaMatches++;
}
}
- if (phon.Media.Count == 0 || cFilesMatch > 0)
- lexpron = pron;
- else
- lexpronNoMedia = pron;
- }
- else
- {
- lexpronNoMedia = pron;
}
}
+ // score will be the combined matches for forms and media files
+ return mediaMatches + formMatches;
}
- if (lexpron != null)
- {
- dictHvoPhon.Add(lexpron.Hvo, phon);
- return lexpron;
- }
- else if (lexpronNoMedia != null)
- {
- dictHvoPhon.Add(lexpronNoMedia.Hvo, phon);
- return lexpronNoMedia;
- }
- else
- {
- return null;
- }
+ return 0; // no form match, no score
}
private Dictionary GetAllUnicodeAlternatives(ITsMultiString tsm)