[ibus-typing-booster] multilingual: WIP: multilingual, better approach (59a60ef)

31 Mar 2016

Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch  : multilingual
...

commit 59a60ef9d86d1cde324d69b070614e1665fd63b7
Author: Mike FABIAN mfabian@redhat.com
Date:   Fri Mar 25 13:49:10 2016 +0100
WIP: multilingual, better approach
...

ibus-typing-booster/engine/hunspell_suggest.py |    3 +
 ibus-typing-booster/engine/hunspell_table.py   |  292 ++++++++++++------------
 ibus-typing-booster/engine/tabsqlitedb.py      |    7 +-
 ibus-typing-booster/hunspell-tables/mr_IN.conf |    2 +-
 4 files changed, 157 insertions(+), 147 deletions(-)

diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py
index 8e3a85c..89bdf23 100755
--- a/ibus-typing-booster/engine/hunspell_suggest.py
+++ b/ibus-typing-booster/engine/hunspell_suggest.py
@@ -109,6 +109,9 @@ class Dictionary:
                 traceback.print_exc()
                 return
         if self.buffer:
+            print("load_dictionary(): "
+                  + "Successfully loaded %(dic)s using %(enc)s encoding."
+                  %{'dic': dic_path, 'enc': self.encoding})
             self.buffer = unicodedata.normalize(
                 normalization_form_internal, self.buffer)
             if import_enchant_successful:
diff --git a/ibus-typing-booster/engine/hunspell_table.py b/ibus-typing-booster/engine/hunspell_table.py
index 483684e..f923d10 100644
--- a/ibus-typing-booster/engine/hunspell_table.py
+++ b/ibus-typing-booster/engine/hunspell_table.py
@@ -125,7 +125,6 @@ class editor(object):
         self._typed_string = []
         self._typed_string_cursor = 0
         self._typed_string_when_update_candidates_was_last_called = []
-        self._transliterated_string = u''
         self._p_phrase = u''
         self._pp_phrase = u''
         # self._candidates: hold candidates selected from database and hunspell
@@ -138,41 +137,57 @@ class editor(object):
         self._lookup_table.clear()
         self._lookup_table.set_cursor_visible(False)
-        self.trans = None
-
         self._supported_imes = []
         imes = self.db.ime_properties.get('imes').split(',')
         for item in imes:
             mim_name = item.split(':')[1]
             if not mim_name in self._supported_imes:
                 self._supported_imes.append(mim_name)
-        if not self._supported_imes:
+        if self._supported_imes == []:
             self._supported_imes = ['NoIme']
-        # Try to get the selected input method from dconf:
-        self._current_ime = variant_to_value(self._config.get_value(
+        self._current_imes = []
+        # Try to get the selected input methods from dconf:
+        inputmethod = variant_to_value(self._config.get_value(
                 self._config_section,
                 'inputmethod'))
-        if (self._current_ime == None
-            or not self._current_ime in self._supported_imes):
-            # There is no ime set in dconf or an unsupported ime, fall
-            # back to the first of the supported imes:
-            self._current_ime = self._supported_imes[0]
-        if self._current_ime == None or self._current_ime == 'NoIme':
-            # Not using m17n transliteration:
-            self.trans_m17n_mode = False
-        else:
-            # using m17n transliteration
-            self.trans_m17n_mode = True
-            try:
+        if inputmethod:
+            inputmethods = [x.strip() for x in inputmethod.split(',')]
+            for ime in inputmethods:
+                if ime in self._supported_imes or ime == 'NoIme':
+                    self._current_imes.append(ime)
+        if self._current_imes  == []:
+            # There is no ime set in dconf, fall
+            # back to the first of the supported imes
+            # and add direct input by default:
+            self._current_imes = self._supported_imes[0]
+            if 'NoIme' not in self._current_imes:
+                self._current_imes.append('NoIme')
+        if 'NoIme' not in self._current_imes:
+            self._current_imes.append('NoIme')
+        self.init_transliterators()
+
+    def init_transliterators(self):
+        self._transliterated_strings = {}
+        self.trans = {}
+        for ime in self._current_imes:
+            self._transliterated_strings[ime] = u''
+            if ime == 'NoIme':
+                # Not using m17n transliteration:
                 if debug_level > 1:
-                    sys.stderr.write(
-                        "instantiating Transliterator(%(cur)s)\n"
-                        %{'cur': self._current_ime})
-                self.trans = Transliterator(self._current_ime)
-            except ValueError as e:
-                sys.stderr.write('Error initializing Transliterator: %s' %e)
-                import traceback
-                traceback.print_exc()
+                    sys.stderr.write("Adding dummy Transliterator 'None'\n")
+                self.trans['NoIme'] = None
+            else:
+                # using m17n transliteration
+                try:
+                    if debug_level > 1:
+                        sys.stderr.write(
+                            "instantiating Transliterator(%(ime)s)\n"
+                            %{'ime': ime})
+                    self.trans[ime] = Transliterator(ime)
+                except ValueError as e:
+                    sys.stderr.write('Error initializing Transliterator: %s' %e)
+                    import traceback
+                    traceback.print_exc()
def is_empty(self):
         return len(self._typed_string) == 0
@@ -185,28 +200,32 @@ class editor(object):
         self._typed_string = []
         self._typed_string_cursor = 0
         self._typed_string_when_update_candidates_was_last_called = []
-        self._transliterated_string = u''
-
-    def update_transliterated_string(self):
-        if self.trans_m17n_mode:
-            self._transliterated_string = self.trans.transliterate(
-                self._typed_string)
-            if self._current_ime in ['ko-romaja', 'ko-han2']:
-                self._transliterated_string = unicodedata.normalize(
-                    'NFKD', self._transliterated_string)
-        else:
-            self._transliterated_string = u''.join(self._typed_string)
+        for ime in self._current_imes:
+            self._transliterated_strings[ime] = u''
+
+    def update_transliterated_strings(self):
+        self._transliterated_strings = {}
+        for ime in self._current_imes:
+            if ime == 'NoIme':
+                self._transliterated_strings['NoIme'] = u''.join(
+                    self._typed_string)
+            else:
+                self._transliterated_strings[ime] = (
+                    self.trans[ime].transliterate(self._typed_string))
+                if ime in ['ko-romaja', 'ko-han2']:
+                    self._transliterated_strings[ime] = unicodedata.normalize(
+                        'NFKD', self._transliterated_strings[ime])
         if debug_level > 1:
             sys.stderr.write(
-                "update_transliterated_string() self._typed_string=%s\n"
+                "update_transliterated_strings() self._typed_string=%s\n"
                 %self._typed_string)
             sys.stderr.write(
-                "update_transliterated_string() "
-                + "self._transliterated_string=%s\n"
-                %self._transliterated_string)
+                "update_transliterated_strings() "
+                + "self._transliterated_strings=%s\n"
+                %self._transliterated_strings)
-    def get_transliterated_string(self):
-        return self._transliterated_string
+    def get_transliterated_strings(self):
+        return self._transliterated_strings
def insert_string_at_cursor(self, string_to_insert):
         '''Insert typed string at cursor position'''
@@ -223,22 +242,22 @@ class editor(object):
                              +string_to_insert \
                              +self._typed_string[self._typed_string_cursor:]
         self._typed_string_cursor += len(string_to_insert)
-        self.update_transliterated_string()
-        self.update_candidates ()
+        self.update_transliterated_strings()
+        self.update_candidates()
def remove_string_before_cursor(self):
         '''Remove typed string before cursor'''
         if self._typed_string_cursor > 0:
             self._typed_string = self._typed_string[self._typed_string_cursor:]
             self._typed_string_cursor = 0
-            self.update_transliterated_string()
+            self.update_transliterated_strings()
             self.update_candidates()
def remove_string_after_cursor(self):
         '''Remove typed string after cursor'''
         if self._typed_string_cursor < len(self._typed_string):
             self._typed_string = self._typed_string[:self._typed_string_cursor]
-            self.update_transliterated_string()
+            self.update_transliterated_strings()
             self.update_candidates()
def remove_character_before_cursor(self):
@@ -248,7 +267,7 @@ class editor(object):
                 self._typed_string[:self._typed_string_cursor-1]
                 +self._typed_string[self._typed_string_cursor:])
             self._typed_string_cursor -= 1
-            self.update_transliterated_string()
+            self.update_transliterated_strings()
             self.update_candidates()
def remove_character_after_cursor(self):
@@ -257,7 +276,7 @@ class editor(object):
             self._typed_string = (
                 self._typed_string[:self._typed_string_cursor]
                 +self._typed_string[self._typed_string_cursor+1:])
-            self.update_transliterated_string()
+            self.update_transliterated_strings()
             self.update_candidates()
def get_caret (self):
@@ -301,13 +320,15 @@ class editor(object):
         no transliteration is used and works better than nothing
         even if transliteration is used.
         '''
-        if self.trans_m17n_mode:
-            transliterated_string_up_to_cursor = self.trans.transliterate(
-                self._typed_string[:self._typed_string_cursor])
-        else:
+        preedit_ime = self._current_imes[0]
+        if preedit_ime == 'NoIme':
             transliterated_string_up_to_cursor = (
                 u''.join(self._typed_string[:self._typed_string_cursor]))
-        if self._current_ime in ['ko-romaja', 'ko-han2']:
+        else:
+            transliterated_string_up_to_cursor = (
+                self.trans[preedit_ime].transliterate(
+                    self._typed_string[:self._typed_string_cursor]))
+        if preedit_ime in ['ko-romaja', 'ko-han2']:
             transliterated_string_up_to_cursor = unicodedata.normalize(
                 'NFKD', transliterated_string_up_to_cursor)
         transliterated_string_up_to_cursor = unicodedata.normalize(
@@ -319,14 +340,17 @@ class editor(object):
         if not phrase:
             return
         phrase = unicodedata.normalize('NFC', phrase)
-        transliterated_string = unicodedata.normalize(
-            'NFC', self._transliterated_string)
         attrs = IBus.AttrList ()
-        if not (phrase.startswith(transliterated_string)
-                or phrase.startswith(u''.join(self._typed_string))):
-            # this is a candidate which does not start exactly
-            # as the transliterated user input, i.e. it is a suggestion
-            # for a spelling correction:
+        is_spelling_correction = True
+        for ime in self._current_imes:
+            if phrase.startswith(
+                    unicodedata.normalize(
+                        'NFC', self._transliterated_strings[ime])):
+                is_spelling_correction = False
+        if is_spelling_correction:
+            # this is a candidate which does not start exactly as any
+            # of the transliterations of the user input, i.e. it must
+            # be a spelling correction suggestion:
             if debug_level > 0:
                 phrase = phrase + u' ���'
             attrs.append(IBus.attr_foreground_new(
@@ -372,65 +396,41 @@ class editor(object):
             self._typed_string[:])
         self._lookup_table.clear()
         self._lookup_table.set_cursor_visible(False)
-        self._candidates = []
-        transliterated_candidates = []
-        if self._transliterated_string:
-            prefix_length = 0
-            prefix = u''
-            stripped_transliterated_string = (
-                itb_util.lstrip_token(self._transliterated_string))
-            if len(stripped_transliterated_string) >= self._min_char_complete:
-                prefix_length = (
-                    len(self._transliterated_string)
-                    - len(stripped_transliterated_string))
-                if prefix_length:
-                    prefix = self._transliterated_string[0:prefix_length]
-                try:
-                    transliterated_candidates = self.db.select_words(
-                        stripped_transliterated_string,
-                        p_phrase=self._p_phrase,
-                        pp_phrase=self._pp_phrase)
-                except:
-                    import traceback
-                    traceback.print_exc()
-            if transliterated_candidates and prefix:
-                transliterated_candidates = (
-                    [(prefix+x[0], x[1]) for x in transliterated_candidates])
-        untransliterated_candidates = []
-        if True and self._typed_string:
-            prefix_length = 0
-            prefix = u''
-            untransliterated_string = u''.join(self._typed_string)
-            stripped_untransliterated_string = (
-                itb_util.lstrip_token(untransliterated_string))
-            if len(stripped_untransliterated_string) >= self._min_char_complete:
-                prefix_length = (
-                    len(untransliterated_string)
-                    - len(stripped_untransliterated_string))
-                if prefix_length:
-                    prefix = untransliterated_string[0:prefix_length]
-                try:
-                    untransliterated_candidates = self.db.select_words(
-                        stripped_untransliterated_string,
-                        p_phrase=self._p_phrase,
-                        pp_phrase=self._pp_phrase)
-                except:
-                    import traceback
-                    traceback.print_exc()
-            if untransliterated_candidates and prefix:
-                untransliterated_candidates = (
-                    [(prefix+x[0], x[1]) for x in untransliterated_candidates])
-        self._candidates = sorted(
-            transliterated_candidates + untransliterated_candidates,
-            key = lambda x: (
-                -1*x[1],   # user_freq descending
-                len(x[0]), # len(phrase) ascending
-                x[0]       # phrase alphabetical
-            ))[:20]
-        if self._candidates:
-            for x in self._candidates:
-                self.append_candidate_to_lookup_table(
-                    phrase=x[0], user_freq=x[1])
+        phrase_frequencies = {}
+        for ime in self._current_imes:
+            if self._transliterated_strings[ime]:
+                candidates = []
+                prefix_length = 0
+                prefix = u''
+                stripped_transliterated_string = (
+                    itb_util.lstrip_token(self._transliterated_strings[ime]))
+                if (len(stripped_transliterated_string)
+                    >= self._min_char_complete):
+                    prefix_length = (
+                        len(self._transliterated_strings[ime])
+                        - len(stripped_transliterated_string))
+                    if prefix_length:
+                        prefix = self._transliterated_string[0:prefix_length]
+                    try:
+                        candidates = self.db.select_words(
+                            stripped_transliterated_string,
+                            p_phrase=self._p_phrase,
+                            pp_phrase=self._pp_phrase)
+                    except:
+                        import traceback
+                        traceback.print_exc()
+                if candidates and prefix:
+                    candidates = [(prefix+x[0], x[1]) for x in candidates]
+                for x in candidates:
+                    if x[0] in phrase_frequencies:
+                        phrase_frequencies[x[0]] = max(
+                            phrase_frequencies[x[0]], x[1])
+                    else:
+                        phrase_frequencies[x[0]] = x[1]
+        self._candidates = self.db.best_candidates(phrase_frequencies)
+        for x in self._candidates:
+            self.append_candidate_to_lookup_table(
+                phrase=x[0], user_freq=x[1])
         return True
def arrow_down(self):
@@ -576,13 +576,13 @@ class editor(object):
         '''Get list of supported input methods'''
         return self._supported_imes
-    def get_current_ime(self):
-        '''Get current imput method'''
-        return self._current_ime
+    def get_current_imes(self):
+        '''Get current list of input methods'''
+        return self._current_imes
-    def set_current_ime(self, ime):
-        '''Get current imput method'''
-        self._current_ime = ime
+    def set_current_imes(self, imes):
+        '''Set current list of input methods'''
+        self._current_imes = imes
def push_context(self, phrase):
         self._pp_phrase = self._p_phrase
@@ -723,7 +723,8 @@ class tabengine (IBus.Engine):
         '''Update Preedit String in UI'''
         # editor.get_caret() should also use NFC!
         _str = unicodedata.normalize(
-            'NFC', self._editor.get_transliterated_string())
+            'NFC', self._editor.get_transliterated_strings()[
+                self._editor.get_current_imes()[0]])
         if _str == u'':
             super(tabengine, self).update_preedit_text(
                 IBus.Text.new_from_string(u''), 0, False)
@@ -815,7 +816,8 @@ class tabengine (IBus.Engine):
def commit_string (self, commit_phrase, input_phrase=u''):
         if not input_phrase:
-            input_phrase = self._editor.get_transliterated_string()
+            input_phrase = self._editor.get_transliterated_strings()[
+                self._editor.get_current_imes()[0]]
         # commit always in NFC:
         commit_phrase = unicodedata.normalize('NFC', commit_phrase)
         super(tabengine, self).commit_text(
@@ -981,7 +983,7 @@ class tabengine (IBus.Engine):
                 if (len(key.msymbol) == 1
                     and unicodedata.category(key.msymbol)
                     in itb_util.categories_to_trigger_immediate_commit):
-                    if not self._editor.trans_m17n_mode:
+                    if self._editor.get_current_imes()[0] == 'NoIme':
                         # Do not just pass the character through,
                         # commit it properly.  For example if it is a
                         # ���.��� we might want to remove whitespace
@@ -1010,7 +1012,7 @@ class tabengine (IBus.Engine):
                     # type digits here where the pre��dit is still empty.
                     # If digits are not used to select candidates, they
                     # can be treated just like any other input keys.
-                    if not self._editor.trans_m17n_mode:
+                    if self._editor.get_current_imes()[0] == 'NoIme':
                         # If a digit has been typed and no transliteration
                         # is used, we can pass it through
                         return False
@@ -1019,8 +1021,9 @@ class tabengine (IBus.Engine):
                     # native digits. For example, with mr-inscript we
                     # want ���3��� to be converted to ���������. So we try
                     # to transliterate and commit the result:
-                    transliterated_digit = self._editor.trans.transliterate(
-                        [key.msymbol])
+                    transliterated_digit = self._editor.trans[
+                        self._editor.get_current_imes()[0]
+                    ].transliterate([key.msymbol])
                     self.commit_string(
                         transliterated_digit, input_phrase=transliterated_digit)
                     return True
@@ -1140,7 +1143,9 @@ class tabengine (IBus.Engine):
                         self.commit_string(phrase + u' ')
                     return True
                 else:
-                    input_phrase = self._editor.get_transliterated_string()
+                    input_phrase = (
+                        self._editor.get_transliterated_strings()[
+                            self._editor.get_current_imes()[0]])
                     if input_phrase:
                         self.commit_string(
                             input_phrase + u' ', input_phrase = input_phrase)
@@ -1173,7 +1178,9 @@ class tabengine (IBus.Engine):
                     self._editor._typed_string_cursor -= 1
                 self._update_ui()
                 return True
-            input_phrase = self._editor.get_transliterated_string()
+            input_phrase = (
+                self._editor.get_transliterated_strings()[
+                    self._editor.get_current_imes()[0]])
             if not input_phrase:
                 return False
             if not self._editor.get_candidates():
@@ -1230,10 +1237,12 @@ class tabengine (IBus.Engine):
             if (len(key.msymbol) == 1
                 and unicodedata.category(key.msymbol)
                 in itb_util.categories_to_trigger_immediate_commit):
-                input_phrase = self._editor.get_transliterated_string()
+                input_phrase = (
+                    self._editor.get_transliterated_strings()[
+                        self._editor.get_current_imes()[0]])
                 if (input_phrase
                     and input_phrase[-1] == key.msymbol
-                    and not self._editor.trans_m17n_mode):
+                    and self._editor.get_current_imes()[0] == 'NoIme'):
                     self.commit_string(
                         input_phrase + u' ', input_phrase = input_phrase)
             self._update_ui()
@@ -1350,14 +1359,11 @@ class tabengine (IBus.Engine):
             return
         if name == "inputmethod":
             if value in self._editor.get_supported_imes():
-                self._editor.set_current_ime(value)
-                if value != 'NoIme':
-                    print("Switching to transliteration using  ime=%s" %value)
-                    self._editor.trans_m17n_mode = True
-                    self._editor.trans = Transliterator(value)
+                if value == 'NoIme':
+                    self._editor.set_current_imes(['NoIme'])
                 else:
-                    print("Switching off transliteration.")
-                    self._editor.trans_m17n_mode = False
+                    self._editor.set_current_imes([value, 'NoIme'])
+                self._editor.init_transliterators()
             else:
                 print("error: trying to set unsupported ime: %s" %value)
             self.reset()
diff --git a/ibus-typing-booster/engine/tabsqlitedb.py b/ibus-typing-booster/engine/tabsqlitedb.py
index 5adae22..caadd5f 100755
--- a/ibus-typing-booster/engine/tabsqlitedb.py
+++ b/ibus-typing-booster/engine/tabsqlitedb.py
@@ -109,9 +109,10 @@ class tabsqlitedb:
         self._language = self.ime_properties.get('language')
         self._normalization_form_internal = 'NFD'
-        self.hunspell_obj = hunspell_suggest.Hunspell(
-            dictionary_names=self.ime_properties.get(
-                "hunspell_dict").replace('.dic', '').split(', '))
+        dictionary_names = [
+            x.replace('.dic', '').strip()
+            for x in self.ime_properties.get("hunspell_dict").split(',')]
+        self.hunspell_obj = hunspell_suggest.Hunspell(dictionary_names)
user_db = 'user.db'
         # user database:
diff --git a/ibus-typing-booster/hunspell-tables/mr_IN.conf b/ibus-typing-booster/hunspell-tables/mr_IN.conf
index ef009cd..ae16568 100644
--- a/ibus-typing-booster/hunspell-tables/mr_IN.conf
+++ b/ibus-typing-booster/hunspell-tables/mr_IN.conf
@@ -7,7 +7,7 @@ language = mr
 author = Anish Patil apatil@redhat.com 
 status_prompt = mr
 layout = default
-hunspell_dict = mr_IN.dic, en_GB.dic
+hunspell_dict = mr_IN.dic,en_GB.dic
 hunspell_dict_package = hunspell-mr
 ime_name = Marathi - IN (Hunspell)
 symbol = mr-IN

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

[ibus-typing-booster] multilingual: WIP: multilingual, better approach (59a60ef)