@@ -769,48 +769,164 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word
769769
770770 if ( itemDict . ContainsKey ( "cf" ) ) // Lex. Entries
771771 {
772+ // NB: "cf" records the lexeme, not the headword/citation form (in spite of the name).
772773 int ws_cf = GetWsEngine ( wsFact , itemDict [ "cf" ] . Item1 ) . Handle ;
773774 ILexEntry entry = null ;
774775 var entries = lex_entry_repo . AllInstances ( ) . Where (
775- m => StringServices . CitationFormWithAffixTypeStaticForWs ( m , ws_cf , string . Empty ) == itemDict [ "cf" ] . Item2 ) ;
776- if ( entries . Count ( ) == 1 )
776+ m => DecorateFormWithAffixMarkers ( m . LexemeFormOA ? . MorphTypeRA , m . LexemeFormOA ? . Form ? . get_String ( ws_cf ) ? . Text ) == itemDict [ "cf" ] . Item2 ) ;
777+
778+ // Filter entries by homograph number.
779+ // If the lexeme and the headword are different,
780+ // then there may be more than one entry with the given homograph number.
781+ // This is because homograph numbers distinguish headwords rather than lexemes.
782+ // If there is no "hn" entry, then the hn is 0.
783+ string hn = "0" ;
784+ if ( itemDict . ContainsKey ( "hn" ) ) // Homograph Number
777785 {
778- entry = entries . First ( ) ;
786+ hn = itemDict [ "hn" ] . Item2 ;
779787 }
780- else if ( itemDict . ContainsKey ( "hn" ) ) // Homograph Number
788+ var hnEntries = entries . Where ( m => m . HomographNumber . ToString ( ) == hn ) ;
789+ if ( hnEntries . Count ( ) > 0 )
781790 {
782- entry = entries . FirstOrDefault ( m => m . HomographNumber . ToString ( ) == itemDict [ "hn" ] . Item2 ) ;
791+ entries = hnEntries ;
783792 }
784- if ( entry != null )
785- {
786- bundle . MorphRA = entry . LexemeFormOA ;
787793
788- if ( itemDict . ContainsKey ( "gls" ) ) // Lex. Gloss
794+ if ( itemDict . ContainsKey ( "gls" ) ) // Lex. Gloss
795+ {
796+ // Filter senses by gloss.
797+ int ws_gls = GetWsEngine ( wsFact , itemDict [ "gls" ] . Item1 ) . Handle ;
798+ IList < ILexSense > senses = new List < ILexSense > ( ) ;
799+ foreach ( var e in entries )
800+ {
801+ senses . AddRange ( e . SensesOS . Where ( s => s . Gloss . get_String ( ws_gls ) . Text == itemDict [ "gls" ] . Item2 ) ) ;
802+ }
803+ if ( senses . Count ( ) > 1 && itemDict . ContainsKey ( "msa" ) )
789804 {
790- int ws_gls = GetWsEngine ( wsFact , itemDict [ "gls" ] . Item1 ) . Handle ;
791- ILexSense sense = entry . SensesOS . FirstOrDefault ( s => s . Gloss . get_String ( ws_gls ) . Text == itemDict [ "gls " ] . Item2 ) ;
792- if ( sense != null )
805+ // Filter senses by MSA.
806+ IList < ILexSense > msaSenses = senses . Where ( s => s . MorphoSyntaxAnalysisRA ? . InterlinearAbbr == itemDict [ "msa " ] . Item2 ) . ToList ( ) ;
807+ if ( msaSenses . Count ( ) > 0 )
793808 {
794- bundle . SenseRA = sense ;
809+ senses = msaSenses ;
795810 }
796811 }
812+ // Record sense.
813+ if ( senses . Count ( ) > 0 )
814+ {
815+ bundle . SenseRA = senses . FirstOrDefault ( ) ;
816+ entry = bundle . SenseRA . Entry ;
817+ }
818+ }
819+
820+ if ( entry == null && entries . Count ( ) > 0 )
821+ {
822+ entry = entries . First ( ) ;
823+ }
824+
825+ // Record morpheme.
826+ if ( entry != null )
827+ {
828+ if ( itemDict . ContainsKey ( "txt" ) )
829+ {
830+ // Try allomorph first.
831+ var ws_txt = GetWsEngine ( wsFact , itemDict [ "txt" ] . Item1 ) . Handle ;
832+ bundle . MorphRA = entry . AllAllomorphs . Where (
833+ m => DecorateFormWithAffixMarkers ( m . MorphTypeRA , m . Form . get_String ( ws_txt ) . Text ) == itemDict [ "txt" ] . Item2 ) . FirstOrDefault ( ) ;
834+ }
835+ if ( bundle . MorphRA == null )
836+ {
837+ bundle . MorphRA = entry . LexemeFormOA ;
838+ }
797839 }
798840 }
799841
800842 if ( itemDict . ContainsKey ( "msa" ) ) // Lex. Gram. Info
801843 {
802- IMoMorphSynAnalysis match = msa_repo . AllInstances ( ) . FirstOrDefault ( m => m . InterlinearAbbr == itemDict [ "msa" ] . Item2 ) ;
803- if ( match != null )
844+ if ( bundle . SenseRA != null && bundle . SenseRA . MorphoSyntaxAnalysisRA ? . InterlinearAbbr == itemDict [ "msa" ] . Item2 )
845+ {
846+ bundle . MsaRA = bundle . SenseRA . MorphoSyntaxAnalysisRA ;
847+ }
848+ else
849+ {
850+ IMoMorphSynAnalysis match = msa_repo . AllInstances ( ) . FirstOrDefault ( m => m . InterlinearAbbr == itemDict [ "msa" ] . Item2 ) ;
851+ if ( match != null )
852+ {
853+ bundle . MsaRA = match ;
854+ }
855+ }
856+ }
857+ }
858+ }
859+
860+ // Try to fill in category.
861+ if ( word . Items != null && wordForm . Analysis != null )
862+ {
863+ // Look for an existing category that matches a "pos".
864+ bool hasPOS = false ;
865+ foreach ( var item in word . Items )
866+ {
867+ if ( wordForm . Analysis . CategoryRA != null )
868+ {
869+ // Category filled in.
870+ break ;
871+ }
872+ if ( item . type == "pos" )
873+ {
874+ hasPOS = true ;
875+ ILgWritingSystem writingSystem = GetWsEngine ( cache . WritingSystemFactory , item . lang ) ;
876+ if ( writingSystem != null )
877+ {
878+ foreach ( var cat in cache . LanguageProject . AllPartsOfSpeech )
879+ {
880+ if ( MatchesCatNameOrAbbreviation ( writingSystem . Handle , item . Value , cat ) )
881+ {
882+ wordForm . Analysis . CategoryRA = cat ;
883+ break ;
884+ }
885+ }
886+ }
887+ }
888+ }
889+ if ( hasPOS && wordForm . Analysis . CategoryRA == null )
890+ {
891+ // Create a new category.
892+ IPartOfSpeech cat = cache . ServiceLocator . GetInstance < IPartOfSpeechFactory > ( ) . Create ( ) ;
893+ cache . LanguageProject . PartsOfSpeechOA . PossibilitiesOS . Add ( cat ) ;
894+ foreach ( var item in word . Items )
895+ {
896+ if ( item . type == "pos" )
804897 {
805- bundle . MsaRA = match ;
898+ ILgWritingSystem writingSystem = GetWsEngine ( cache . WritingSystemFactory , item . lang ) ;
899+ if ( writingSystem != null )
900+ {
901+ cat . Name . set_String ( writingSystem . Handle , item . Value ) ;
902+ cat . Abbreviation . set_String ( writingSystem . Handle , item . Value ) ;
903+ }
806904 }
807905 }
906+ wordForm . Analysis . CategoryRA = cat ;
808907 }
809908 }
810909
811910 return wordForm ;
812911 }
813912
913+ // Based on StringServices.DecorateFormWithAffixMarkers.
914+ private static string DecorateFormWithAffixMarkers ( IMoMorphType mmt , string form )
915+ {
916+ if ( mmt == null || form == null )
917+ return form ;
918+ // Add pre- post markers, if any.
919+ if ( ! String . IsNullOrEmpty ( mmt . Prefix ) )
920+ {
921+ form = mmt . Prefix + form ;
922+ }
923+ if ( ! String . IsNullOrEmpty ( mmt . Postfix ) )
924+ {
925+ form = form + mmt . Postfix ;
926+ }
927+ return form ;
928+ }
929+
814930 private static bool FindOrCreateWfiAnalysis ( LcmCache cache , Word word ,
815931 int mainWritingSystem ,
816932 out IAnalysis analysis )
@@ -820,6 +936,7 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
820936 // First, collect all expected forms and glosses from the Word
821937 var expectedForms = new Dictionary < int , string > ( ) ; // wsHandle -> expected value
822938 var expectedGlosses = new Dictionary < int , string > ( ) ; // wsHandle -> expected gloss
939+ var expectedCats = new Dictionary < int , string > ( ) ; // wsHandle -> expected cat
823940 IAnalysis candidateForm = null ;
824941 ITsString wordForm = null ;
825942 ITsString punctForm = null ;
@@ -871,6 +988,10 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
871988
872989 expectedGlosses [ ws . Handle ] = wordItem . Value ;
873990 break ;
991+
992+ case "pos" :
993+ expectedCats [ ws . Handle ] = wordItem . Value ;
994+ break ;
874995 }
875996 }
876997
@@ -896,23 +1017,57 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
8961017 return true ;
8971018 }
8981019
1020+ analysis = FindMatchingAnalysis ( cache , candidateWordform , word , expectedGlosses , expectedCats ) ;
1021+ if ( analysis != null )
1022+ {
1023+ return true ;
1024+ }
1025+
1026+ if ( wordForm . Text . ToLower ( ) != wordForm . Text )
1027+ {
1028+ // Try lowercase.
1029+ var lcCandidateForm = cache . ServiceLocator
1030+ . GetInstance < IWfiWordformRepository > ( )
1031+ . GetMatchingWordform ( wordForm . get_WritingSystemAt ( 0 ) , wordForm . Text . ToLower ( ) ) ;
1032+ if ( lcCandidateForm is IWfiWordform lcCandidateWordform )
1033+ {
1034+ analysis = FindMatchingAnalysis ( cache , lcCandidateWordform , word , expectedGlosses , expectedCats ) ;
1035+ if ( analysis != null )
1036+ {
1037+ return true ;
1038+ }
1039+ }
1040+ }
1041+
1042+ // No matching analysis found with all expected gloss and morpheme data
1043+ analysis = AddEmptyAnalysisToWordform ( cache , candidateWordform ) ;
1044+ return false ;
1045+ }
1046+
1047+ private static IAnalysis FindMatchingAnalysis ( LcmCache cache , IWfiWordform candidateWordform , Word word ,
1048+ Dictionary < int , string > expectedGlosses , Dictionary < int , string > expectedCats )
1049+ {
1050+ IAnalysis analysis = null ;
1051+ var wsFact = cache . WritingSystemFactory ;
8991052 // Look for an analysis that has the correct morphemes and a matching gloss
9001053 foreach ( var wfiAnalysis in candidateWordform . AnalysesOC )
9011054 {
9021055 var morphemeMatch = true ;
9031056 // verify that the analysis has a Morph Bundle with the expected morphemes from the import
904- if ( word . morphemes != null && wfiAnalysis . MorphBundlesOS . Count == word . morphemes ? . morphs . Length )
1057+ if ( word . morphemes != null && wfiAnalysis . MorphBundlesOS . Count == word . morphemes ? . morphs . Length &&
1058+ word . morphemes . analysisStatus == analysisStatusTypes . humanApproved )
9051059 {
9061060 analysis = GetMostSpecificAnalysisForWordForm ( wfiAnalysis ) ;
907- for ( var i = 0 ; i < wfiAnalysis . MorphBundlesOS . Count ; ++ i )
1061+ for ( var i = 0 ; i < wfiAnalysis . MorphBundlesOS . Count ; ++ i )
9081062 {
909- var extantMorphForm = wfiAnalysis . MorphBundlesOS [ i ] . Form ;
1063+ var morphBundle = wfiAnalysis . MorphBundlesOS [ i ] ;
1064+ var extantMorphForm = morphBundle . Form ;
9101065 var importMorphForm = word . morphemes . morphs [ i ] . items . FirstOrDefault ( item => item . type == "txt" ) ;
9111066 var importFormWs = GetWsEngine ( wsFact , importMorphForm ? . lang ) ;
9121067 // compare the import item to the extant morph form
9131068 if ( importMorphForm == null || extantMorphForm == null ||
9141069 TsStringUtils . IsNullOrEmpty ( extantMorphForm . get_String ( importFormWs . Handle ) ) ||
915- ! extantMorphForm . get_String ( importFormWs . Handle ) . Text . Normalize ( )
1070+ ! DecorateFormWithAffixMarkers ( morphBundle . MorphRA ? . MorphTypeRA , extantMorphForm . get_String ( importFormWs . Handle ) . Text ) . Normalize ( )
9161071 . Equals ( importMorphForm . Value ? . Normalize ( ) ) )
9171072 {
9181073 morphemeMatch = false ;
@@ -923,18 +1078,14 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
9231078
9241079 if ( morphemeMatch )
9251080 {
926- var matchingGloss = wfiAnalysis . MeaningsOC . FirstOrDefault ( g => VerifyGlossesMatch ( g , expectedGlosses ) ) ;
1081+ var matchingGloss = wfiAnalysis . MeaningsOC . FirstOrDefault ( g => VerifyGlossesMatch ( g , expectedGlosses , expectedCats ) ) ;
9271082 if ( matchingGloss != null )
9281083 {
929- analysis = matchingGloss ;
930- return true ;
1084+ return matchingGloss ;
9311085 }
9321086 }
9331087 }
934-
935- // No matching analysis found with all expected gloss and morpheme data
936- analysis = AddEmptyAnalysisToWordform ( cache , candidateWordform ) ;
937- return false ;
1088+ return null ;
9381089 }
9391090
9401091 private static IAnalysis GetMostSpecificAnalysisForWordForm ( IAnalysis candidateWordform )
@@ -1031,7 +1182,8 @@ private static bool MatchPrimaryFormAndAddMissingAlternatives(IAnalysis wordForm
10311182
10321183 // Helper method to verify that all expected glosses match the stored glosses
10331184 private static bool VerifyGlossesMatch ( IWfiGloss wfiGloss ,
1034- Dictionary < int , string > expectedGlosses )
1185+ Dictionary < int , string > expectedGlosses ,
1186+ Dictionary < int , string > expectedCats )
10351187 {
10361188 foreach ( var expectedGloss in expectedGlosses )
10371189 {
@@ -1042,10 +1194,28 @@ private static bool VerifyGlossesMatch(IWfiGloss wfiGloss,
10421194 if ( storedGloss == null || storedGloss . Text != expectedValue )
10431195 return false ; // Mismatch found
10441196 }
1197+ foreach ( var expectedCat in expectedCats )
1198+ {
1199+ if ( ! MatchesCatNameOrAbbreviation ( expectedCat . Key , expectedCat . Value , wfiGloss . Analysis ? . CategoryRA ) )
1200+ return false ;
1201+ }
10451202
10461203 return true ;
10471204 }
10481205
1206+ private static bool MatchesCatNameOrAbbreviation ( int ws , string text , IPartOfSpeech cat )
1207+ {
1208+ if ( cat == null )
1209+ return false ;
1210+ ITsString name = cat . Name . get_String ( ws ) ;
1211+ if ( name != null && name . Text == text )
1212+ return true ;
1213+ ITsString abbr = cat . Abbreviation . get_String ( ws ) ;
1214+ if ( abbr != null && abbr . Text == text )
1215+ return true ;
1216+ return false ;
1217+ }
1218+
10491219 /// <summary>
10501220 /// </summary>
10511221 /// <param name="wordForm">The word Gloss. If multiple glosses, returns the last one created.</param>
0 commit comments