Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : multilingual
commit 59a60ef9d86d1cde324d69b070614e1665fd63b7 Author: Mike FABIAN mfabian@redhat.com Date: Fri Mar 25 13:49:10 2016 +0100
WIP: multilingual, better approach
ibus-typing-booster/engine/hunspell_suggest.py | 3 + ibus-typing-booster/engine/hunspell_table.py | 292 ++++++++++++------------ ibus-typing-booster/engine/tabsqlitedb.py | 7 +- ibus-typing-booster/hunspell-tables/mr_IN.conf | 2 +- 4 files changed, 157 insertions(+), 147 deletions(-)
diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py index 8e3a85c..89bdf23 100755 --- a/ibus-typing-booster/engine/hunspell_suggest.py +++ b/ibus-typing-booster/engine/hunspell_suggest.py @@ -109,6 +109,9 @@ class Dictionary: traceback.print_exc() return if self.buffer: + print("load_dictionary(): " + + "Successfully loaded %(dic)s using %(enc)s encoding." + %{'dic': dic_path, 'enc': self.encoding}) self.buffer = unicodedata.normalize( normalization_form_internal, self.buffer) if import_enchant_successful: diff --git a/ibus-typing-booster/engine/hunspell_table.py b/ibus-typing-booster/engine/hunspell_table.py index 483684e..f923d10 100644 --- a/ibus-typing-booster/engine/hunspell_table.py +++ b/ibus-typing-booster/engine/hunspell_table.py @@ -125,7 +125,6 @@ class editor(object): self._typed_string = [] self._typed_string_cursor = 0 self._typed_string_when_update_candidates_was_last_called = [] - self._transliterated_string = u'' self._p_phrase = u'' self._pp_phrase = u'' # self._candidates: hold candidates selected from database and hunspell @@ -138,41 +137,57 @@ class editor(object): self._lookup_table.clear() self._lookup_table.set_cursor_visible(False)
- self.trans = None - self._supported_imes = [] imes = self.db.ime_properties.get('imes').split(',') for item in imes: mim_name = item.split(':')[1] if not mim_name in self._supported_imes: self._supported_imes.append(mim_name) - if not self._supported_imes: + if self._supported_imes == []: self._supported_imes = ['NoIme'] - # Try to get the selected input method from dconf: - self._current_ime = variant_to_value(self._config.get_value( + self._current_imes = [] + # Try to get the selected input methods from dconf: + inputmethod = variant_to_value(self._config.get_value( self._config_section, 'inputmethod')) - if (self._current_ime == None - or not self._current_ime in self._supported_imes): - # There is no ime set in dconf or an unsupported ime, fall - # back to the first of the supported imes: - self._current_ime = self._supported_imes[0] - if self._current_ime == None or self._current_ime == 'NoIme': - # Not using m17n transliteration: - self.trans_m17n_mode = False - else: - # using m17n transliteration - self.trans_m17n_mode = True - try: + if inputmethod: + inputmethods = [x.strip() for x in inputmethod.split(',')] + for ime in inputmethods: + if ime in self._supported_imes or ime == 'NoIme': + self._current_imes.append(ime) + if self._current_imes == []: + # There is no ime set in dconf, fall + # back to the first of the supported imes + # and add direct input by default: + self._current_imes = self._supported_imes[0] + if 'NoIme' not in self._current_imes: + self._current_imes.append('NoIme') + if 'NoIme' not in self._current_imes: + self._current_imes.append('NoIme') + self.init_transliterators() + + def init_transliterators(self): + self._transliterated_strings = {} + self.trans = {} + for ime in self._current_imes: + self._transliterated_strings[ime] = u'' + if ime == 'NoIme': + # Not using m17n transliteration: if debug_level > 1: - sys.stderr.write( - "instantiating Transliterator(%(cur)s)\n" - %{'cur': self._current_ime}) - self.trans = Transliterator(self._current_ime) - except ValueError as e: - sys.stderr.write('Error initializing Transliterator: %s' %e) - import traceback - traceback.print_exc() + sys.stderr.write("Adding dummy Transliterator 'None'\n") + self.trans['NoIme'] = None + else: + # using m17n transliteration + try: + if debug_level > 1: + sys.stderr.write( + "instantiating Transliterator(%(ime)s)\n" + %{'ime': ime}) + self.trans[ime] = Transliterator(ime) + except ValueError as e: + sys.stderr.write('Error initializing Transliterator: %s' %e) + import traceback + traceback.print_exc()
def is_empty(self): return len(self._typed_string) == 0 @@ -185,28 +200,32 @@ class editor(object): self._typed_string = [] self._typed_string_cursor = 0 self._typed_string_when_update_candidates_was_last_called = [] - self._transliterated_string = u'' - - def update_transliterated_string(self): - if self.trans_m17n_mode: - self._transliterated_string = self.trans.transliterate( - self._typed_string) - if self._current_ime in ['ko-romaja', 'ko-han2']: - self._transliterated_string = unicodedata.normalize( - 'NFKD', self._transliterated_string) - else: - self._transliterated_string = u''.join(self._typed_string) + for ime in self._current_imes: + self._transliterated_strings[ime] = u'' + + def update_transliterated_strings(self): + self._transliterated_strings = {} + for ime in self._current_imes: + if ime == 'NoIme': + self._transliterated_strings['NoIme'] = u''.join( + self._typed_string) + else: + self._transliterated_strings[ime] = ( + self.trans[ime].transliterate(self._typed_string)) + if ime in ['ko-romaja', 'ko-han2']: + self._transliterated_strings[ime] = unicodedata.normalize( + 'NFKD', self._transliterated_strings[ime]) if debug_level > 1: sys.stderr.write( - "update_transliterated_string() self._typed_string=%s\n" + "update_transliterated_strings() self._typed_string=%s\n" %self._typed_string) sys.stderr.write( - "update_transliterated_string() " - + "self._transliterated_string=%s\n" - %self._transliterated_string) + "update_transliterated_strings() " + + "self._transliterated_strings=%s\n" + %self._transliterated_strings)
- def get_transliterated_string(self): - return self._transliterated_string + def get_transliterated_strings(self): + return self._transliterated_strings
def insert_string_at_cursor(self, string_to_insert): '''Insert typed string at cursor position''' @@ -223,22 +242,22 @@ class editor(object): +string_to_insert \ +self._typed_string[self._typed_string_cursor:] self._typed_string_cursor += len(string_to_insert) - self.update_transliterated_string() - self.update_candidates () + self.update_transliterated_strings() + self.update_candidates()
def remove_string_before_cursor(self): '''Remove typed string before cursor''' if self._typed_string_cursor > 0: self._typed_string = self._typed_string[self._typed_string_cursor:] self._typed_string_cursor = 0 - self.update_transliterated_string() + self.update_transliterated_strings() self.update_candidates()
def remove_string_after_cursor(self): '''Remove typed string after cursor''' if self._typed_string_cursor < len(self._typed_string): self._typed_string = self._typed_string[:self._typed_string_cursor] - self.update_transliterated_string() + self.update_transliterated_strings() self.update_candidates()
def remove_character_before_cursor(self): @@ -248,7 +267,7 @@ class editor(object): self._typed_string[:self._typed_string_cursor-1] +self._typed_string[self._typed_string_cursor:]) self._typed_string_cursor -= 1 - self.update_transliterated_string() + self.update_transliterated_strings() self.update_candidates()
def remove_character_after_cursor(self): @@ -257,7 +276,7 @@ class editor(object): self._typed_string = ( self._typed_string[:self._typed_string_cursor] +self._typed_string[self._typed_string_cursor+1:]) - self.update_transliterated_string() + self.update_transliterated_strings() self.update_candidates()
def get_caret (self): @@ -301,13 +320,15 @@ class editor(object): no transliteration is used and works better than nothing even if transliteration is used. ''' - if self.trans_m17n_mode: - transliterated_string_up_to_cursor = self.trans.transliterate( - self._typed_string[:self._typed_string_cursor]) - else: + preedit_ime = self._current_imes[0] + if preedit_ime == 'NoIme': transliterated_string_up_to_cursor = ( u''.join(self._typed_string[:self._typed_string_cursor])) - if self._current_ime in ['ko-romaja', 'ko-han2']: + else: + transliterated_string_up_to_cursor = ( + self.trans[preedit_ime].transliterate( + self._typed_string[:self._typed_string_cursor])) + if preedit_ime in ['ko-romaja', 'ko-han2']: transliterated_string_up_to_cursor = unicodedata.normalize( 'NFKD', transliterated_string_up_to_cursor) transliterated_string_up_to_cursor = unicodedata.normalize( @@ -319,14 +340,17 @@ class editor(object): if not phrase: return phrase = unicodedata.normalize('NFC', phrase) - transliterated_string = unicodedata.normalize( - 'NFC', self._transliterated_string) attrs = IBus.AttrList () - if not (phrase.startswith(transliterated_string) - or phrase.startswith(u''.join(self._typed_string))): - # this is a candidate which does not start exactly - # as the transliterated user input, i.e. it is a suggestion - # for a spelling correction: + is_spelling_correction = True + for ime in self._current_imes: + if phrase.startswith( + unicodedata.normalize( + 'NFC', self._transliterated_strings[ime])): + is_spelling_correction = False + if is_spelling_correction: + # this is a candidate which does not start exactly as any + # of the transliterations of the user input, i.e. it must + # be a spelling correction suggestion: if debug_level > 0: phrase = phrase + u' ���' attrs.append(IBus.attr_foreground_new( @@ -372,65 +396,41 @@ class editor(object): self._typed_string[:]) self._lookup_table.clear() self._lookup_table.set_cursor_visible(False) - self._candidates = [] - transliterated_candidates = [] - if self._transliterated_string: - prefix_length = 0 - prefix = u'' - stripped_transliterated_string = ( - itb_util.lstrip_token(self._transliterated_string)) - if len(stripped_transliterated_string) >= self._min_char_complete: - prefix_length = ( - len(self._transliterated_string) - - len(stripped_transliterated_string)) - if prefix_length: - prefix = self._transliterated_string[0:prefix_length] - try: - transliterated_candidates = self.db.select_words( - stripped_transliterated_string, - p_phrase=self._p_phrase, - pp_phrase=self._pp_phrase) - except: - import traceback - traceback.print_exc() - if transliterated_candidates and prefix: - transliterated_candidates = ( - [(prefix+x[0], x[1]) for x in transliterated_candidates]) - untransliterated_candidates = [] - if True and self._typed_string: - prefix_length = 0 - prefix = u'' - untransliterated_string = u''.join(self._typed_string) - stripped_untransliterated_string = ( - itb_util.lstrip_token(untransliterated_string)) - if len(stripped_untransliterated_string) >= self._min_char_complete: - prefix_length = ( - len(untransliterated_string) - - len(stripped_untransliterated_string)) - if prefix_length: - prefix = untransliterated_string[0:prefix_length] - try: - untransliterated_candidates = self.db.select_words( - stripped_untransliterated_string, - p_phrase=self._p_phrase, - pp_phrase=self._pp_phrase) - except: - import traceback - traceback.print_exc() - if untransliterated_candidates and prefix: - untransliterated_candidates = ( - [(prefix+x[0], x[1]) for x in untransliterated_candidates]) - self._candidates = sorted( - transliterated_candidates + untransliterated_candidates, - key = lambda x: ( - -1*x[1], # user_freq descending - len(x[0]), # len(phrase) ascending - x[0] # phrase alphabetical - ))[:20] - if self._candidates: - for x in self._candidates: - self.append_candidate_to_lookup_table( - phrase=x[0], user_freq=x[1]) + phrase_frequencies = {} + for ime in self._current_imes: + if self._transliterated_strings[ime]: + candidates = [] + prefix_length = 0 + prefix = u'' + stripped_transliterated_string = ( + itb_util.lstrip_token(self._transliterated_strings[ime])) + if (len(stripped_transliterated_string) + >= self._min_char_complete): + prefix_length = ( + len(self._transliterated_strings[ime]) + - len(stripped_transliterated_string)) + if prefix_length: + prefix = self._transliterated_string[0:prefix_length] + try: + candidates = self.db.select_words( + stripped_transliterated_string, + p_phrase=self._p_phrase, + pp_phrase=self._pp_phrase) + except: + import traceback + traceback.print_exc() + if candidates and prefix: + candidates = [(prefix+x[0], x[1]) for x in candidates] + for x in candidates: + if x[0] in phrase_frequencies: + phrase_frequencies[x[0]] = max( + phrase_frequencies[x[0]], x[1]) + else: + phrase_frequencies[x[0]] = x[1] + self._candidates = self.db.best_candidates(phrase_frequencies) + for x in self._candidates: + self.append_candidate_to_lookup_table( + phrase=x[0], user_freq=x[1]) return True
def arrow_down(self): @@ -576,13 +576,13 @@ class editor(object): '''Get list of supported input methods''' return self._supported_imes
- def get_current_ime(self): - '''Get current imput method''' - return self._current_ime + def get_current_imes(self): + '''Get current list of input methods''' + return self._current_imes
- def set_current_ime(self, ime): - '''Get current imput method''' - self._current_ime = ime + def set_current_imes(self, imes): + '''Set current list of input methods''' + self._current_imes = imes
def push_context(self, phrase): self._pp_phrase = self._p_phrase @@ -723,7 +723,8 @@ class tabengine (IBus.Engine): '''Update Preedit String in UI''' # editor.get_caret() should also use NFC! _str = unicodedata.normalize( - 'NFC', self._editor.get_transliterated_string()) + 'NFC', self._editor.get_transliterated_strings()[ + self._editor.get_current_imes()[0]]) if _str == u'': super(tabengine, self).update_preedit_text( IBus.Text.new_from_string(u''), 0, False) @@ -815,7 +816,8 @@ class tabengine (IBus.Engine):
def commit_string (self, commit_phrase, input_phrase=u''): if not input_phrase: - input_phrase = self._editor.get_transliterated_string() + input_phrase = self._editor.get_transliterated_strings()[ + self._editor.get_current_imes()[0]] # commit always in NFC: commit_phrase = unicodedata.normalize('NFC', commit_phrase) super(tabengine, self).commit_text( @@ -981,7 +983,7 @@ class tabengine (IBus.Engine): if (len(key.msymbol) == 1 and unicodedata.category(key.msymbol) in itb_util.categories_to_trigger_immediate_commit): - if not self._editor.trans_m17n_mode: + if self._editor.get_current_imes()[0] == 'NoIme': # Do not just pass the character through, # commit it properly. For example if it is a # ���.��� we might want to remove whitespace @@ -1010,7 +1012,7 @@ class tabengine (IBus.Engine): # type digits here where the pre��dit is still empty. # If digits are not used to select candidates, they # can be treated just like any other input keys. - if not self._editor.trans_m17n_mode: + if self._editor.get_current_imes()[0] == 'NoIme': # If a digit has been typed and no transliteration # is used, we can pass it through return False @@ -1019,8 +1021,9 @@ class tabengine (IBus.Engine): # native digits. For example, with mr-inscript we # want ���3��� to be converted to ���������. So we try # to transliterate and commit the result: - transliterated_digit = self._editor.trans.transliterate( - [key.msymbol]) + transliterated_digit = self._editor.trans[ + self._editor.get_current_imes()[0] + ].transliterate([key.msymbol]) self.commit_string( transliterated_digit, input_phrase=transliterated_digit) return True @@ -1140,7 +1143,9 @@ class tabengine (IBus.Engine): self.commit_string(phrase + u' ') return True else: - input_phrase = self._editor.get_transliterated_string() + input_phrase = ( + self._editor.get_transliterated_strings()[ + self._editor.get_current_imes()[0]]) if input_phrase: self.commit_string( input_phrase + u' ', input_phrase = input_phrase) @@ -1173,7 +1178,9 @@ class tabengine (IBus.Engine): self._editor._typed_string_cursor -= 1 self._update_ui() return True - input_phrase = self._editor.get_transliterated_string() + input_phrase = ( + self._editor.get_transliterated_strings()[ + self._editor.get_current_imes()[0]]) if not input_phrase: return False if not self._editor.get_candidates(): @@ -1230,10 +1237,12 @@ class tabengine (IBus.Engine): if (len(key.msymbol) == 1 and unicodedata.category(key.msymbol) in itb_util.categories_to_trigger_immediate_commit): - input_phrase = self._editor.get_transliterated_string() + input_phrase = ( + self._editor.get_transliterated_strings()[ + self._editor.get_current_imes()[0]]) if (input_phrase and input_phrase[-1] == key.msymbol - and not self._editor.trans_m17n_mode): + and self._editor.get_current_imes()[0] == 'NoIme'): self.commit_string( input_phrase + u' ', input_phrase = input_phrase) self._update_ui() @@ -1350,14 +1359,11 @@ class tabengine (IBus.Engine): return if name == "inputmethod": if value in self._editor.get_supported_imes(): - self._editor.set_current_ime(value) - if value != 'NoIme': - print("Switching to transliteration using ime=%s" %value) - self._editor.trans_m17n_mode = True - self._editor.trans = Transliterator(value) + if value == 'NoIme': + self._editor.set_current_imes(['NoIme']) else: - print("Switching off transliteration.") - self._editor.trans_m17n_mode = False + self._editor.set_current_imes([value, 'NoIme']) + self._editor.init_transliterators() else: print("error: trying to set unsupported ime: %s" %value) self.reset() diff --git a/ibus-typing-booster/engine/tabsqlitedb.py b/ibus-typing-booster/engine/tabsqlitedb.py index 5adae22..caadd5f 100755 --- a/ibus-typing-booster/engine/tabsqlitedb.py +++ b/ibus-typing-booster/engine/tabsqlitedb.py @@ -109,9 +109,10 @@ class tabsqlitedb: self._language = self.ime_properties.get('language') self._normalization_form_internal = 'NFD'
- self.hunspell_obj = hunspell_suggest.Hunspell( - dictionary_names=self.ime_properties.get( - "hunspell_dict").replace('.dic', '').split(', ')) + dictionary_names = [ + x.replace('.dic', '').strip() + for x in self.ime_properties.get("hunspell_dict").split(',')] + self.hunspell_obj = hunspell_suggest.Hunspell(dictionary_names)
user_db = 'user.db' # user database: diff --git a/ibus-typing-booster/hunspell-tables/mr_IN.conf b/ibus-typing-booster/hunspell-tables/mr_IN.conf index ef009cd..ae16568 100644 --- a/ibus-typing-booster/hunspell-tables/mr_IN.conf +++ b/ibus-typing-booster/hunspell-tables/mr_IN.conf @@ -7,7 +7,7 @@ language = mr author = Anish Patil apatil@redhat.com status_prompt = mr layout = default -hunspell_dict = mr_IN.dic, en_GB.dic +hunspell_dict = mr_IN.dic,en_GB.dic hunspell_dict_package = hunspell-mr ime_name = Marathi - IN (Hunspell) symbol = mr-IN