[ibus-typing-booster] miketmp-debug: MOD5 experiment (8899d0a)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : miketmp-debug
>---------------------------------------------------------------
commit 8899d0a3bbffee03567a6eb6ae3ff679b6c1a332
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Thu Feb 20 08:17:03 2014 +0100
MOD5 experiment
>---------------------------------------------------------------
ibus-typing-booster/engine/hunspell_table.py | 3 ++-
ibus-typing-booster/hunspell-tables/mr_IN.conf | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/ibus-typing-booster/engine/hunspell_table.py b/ibus-typing-booster/engine/hunspell_table.py
index d0cf723..3ef28ac 100644
--- a/ibus-typing-booster/engine/hunspell_table.py
+++ b/ibus-typing-booster/engine/hunspell_table.py
@@ -818,7 +818,7 @@ class tabengine (IBus.Engine):
self._editor.insert_string_at_cursor(token)
self._update_ui()
return True
- if key.code >= 32 and (not (key.mask & (IBus.ModifierType.MOD1_MASK | IBus.ModifierType.CONTROL_MASK))):
+ if key.code >= 32 and (not (key.mask & (IBus.ModifierType.MOD1_MASK | IBus.ModifierType.CONTROL_MASK | IBus.ModifierType.MOD5_MASK))):
typed_character = IBus.keyval_to_unicode(key.code)
if type(typed_character) != type(u''):
typed_character = typed_character.decode('UTF-8')
@@ -1026,6 +1026,7 @@ class tabengine (IBus.Engine):
typed_character = IBus.keyval_to_unicode(key.code)
if type(typed_character) != type(u''):
typed_character = typed_character.decode('UTF-8')
+ sys.stderr.write("mike: MOD5_MASK=%(bit)s\n" %{'bit': key.mask & IBus.ModifierType.MOD5_MASK})
self._editor.insert_string_at_cursor(typed_character)
if typed_character and unicodedata.category(typed_character) in itb_util.categories_to_trigger_immediate_commit:
input_phrase = self._editor.get_transliterated_string()
diff --git a/ibus-typing-booster/hunspell-tables/mr_IN.conf b/ibus-typing-booster/hunspell-tables/mr_IN.conf
index 9f644dd..3c2b675 100644
--- a/ibus-typing-booster/hunspell-tables/mr_IN.conf
+++ b/ibus-typing-booster/hunspell-tables/mr_IN.conf
@@ -6,7 +6,7 @@ description = This is a Marathi typing booster engine table for Marathi language
language = mr
author = Anish Patil <apatil(a)redhat.com>
status_prompt = mr
-layout = default
+layout = default[lv3:ralt_switch]
hunspell_dict = mr_IN.dic
hunspell_dict_package = hunspell-mr
ime_name = Marathi - IN (Hunspell)
10 years, 4 months
[ibus-typing-booster] miketmp-debug: WIP: usage experiment: de_DE & en_GB dictionary (750bf4c)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : miketmp-debug
>---------------------------------------------------------------
commit 750bf4cb673efc3653ad33e9fad8069460f1941a
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Fri Jan 17 14:52:04 2014 +0100
WIP: usage experiment: de_DE & en_GB dictionary
>---------------------------------------------------------------
ibus-typing-booster/hunspell-tables/de_DE.conf | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/ibus-typing-booster/hunspell-tables/de_DE.conf b/ibus-typing-booster/hunspell-tables/de_DE.conf
index 0f8251d..b7adbb2 100644
--- a/ibus-typing-booster/hunspell-tables/de_DE.conf
+++ b/ibus-typing-booster/hunspell-tables/de_DE.conf
@@ -7,7 +7,7 @@ language = de
author = Mike FABIAN <mfabian(a)redhat.com>
status_prompt = de
layout = default
-hunspell_dict = de_DE.dic
+hunspell_dict = de_DE.dic, en_GB.dic
hunspell_dict_package = hunspell-de
ime_name = German - DE (Hunspell)
symbol = de-DE
10 years, 4 months
[ibus-typing-booster] miketmp-debug: add sys.stderr.write("mike ..." debug messages) (5c59e8a)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : miketmp-debug
>---------------------------------------------------------------
commit 5c59e8a68ff5a043d441c1319a1877112a075129
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Wed Jun 12 15:55:07 2013 +0200
add sys.stderr.write("mike ..." debug messages)
>---------------------------------------------------------------
ibus-typing-booster/engine/factory.py | 1 +
ibus-typing-booster/engine/hunspell_suggest.py | 2 ++
ibus-typing-booster/engine/hunspell_table.py | 18 ++++++++++++++++++
ibus-typing-booster/engine/tabsqlitedb.py | 19 +++++++++++++++++++
4 files changed, 40 insertions(+), 0 deletions(-)
diff --git a/ibus-typing-booster/engine/factory.py b/ibus-typing-booster/engine/factory.py
index 84e64d6..3b61ebf 100755
--- a/ibus-typing-booster/engine/factory.py
+++ b/ibus-typing-booster/engine/factory.py
@@ -23,6 +23,7 @@ import hunspell_table
import tabsqlitedb
import os
import re
+import sys
from gettext import dgettext
_ = lambda a : dgettext ("ibus-typing-booster", a)
diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py
index d04895d..9ddf9b0 100755
--- a/ibus-typing-booster/engine/hunspell_suggest.py
+++ b/ibus-typing-booster/engine/hunspell_suggest.py
@@ -110,6 +110,7 @@ class Hunspell:
def load_dictionaries(self):
self.dictionaries = []
+ print("mike dictionary_names=%s\n" %self.dictionary_names)
for dictionary_name in self.dictionary_names:
self.dictionaries.append(Dictionary(name=dictionary_name))
@@ -125,6 +126,7 @@ class Hunspell:
return []
if type(input_phrase) != type(u''):
input_phrase = input_phrase.decode('utf8')
+ sys.stderr.write("mike in suggest input_phrase=%(ip)s\n" %{'ip': input_phrase.encode('UTF-8')})
# http://pwet.fr/man/linux/fichiers_speciaux/hunspell says:
#
# > A dictionary file (*.dic) contains a list of words, one per
diff --git a/ibus-typing-booster/engine/hunspell_table.py b/ibus-typing-booster/engine/hunspell_table.py
index d09ea34..d0cf723 100644
--- a/ibus-typing-booster/engine/hunspell_table.py
+++ b/ibus-typing-booster/engine/hunspell_table.py
@@ -84,6 +84,7 @@ class editor(object):
'''Hold user inputs chars and preedit string'''
def __init__ (self, config, database):
+ sys.stderr.write("mike editor __init__\n")
self.db = database
self._config = config
self._name = self.db.ime_properties.get('name')
@@ -140,6 +141,7 @@ class editor(object):
self.trans_m17n_mode = True
try:
#self.trans = Translit.Transliterator.get(self._m17ndb, self._current_ime)
+ sys.stderr.write("mike calling Transliterator.get(%(m17n)s, %(cur)s)\n" %{'m17n': self._m17ndb, 'cur': self._current_ime})
self.trans = Transliterator.get(self._m17ndb, self._current_ime)
except:
import traceback
@@ -169,12 +171,19 @@ class editor(object):
'NFKD', self._transliterated_string)
else:
self._transliterated_string = self._typed_string
+ sys.stderr.write("mike in update_transliterated_string() self._typed_string=%s\n" %self._typed_string.encode('UTF-8'))
+ sys.stderr.write("mike in update_transliterated_string() repr(self._typed_string)=%s\n" %repr(self._typed_string))
+ sys.stderr.write("mike in update_transliterated_string() self._transliterated_string=%s\n" %self._transliterated_string.encode('UTF-8'))
+ sys.stderr.write("mike in update_transliterated_string() repr(self._transliterated_string)=%s\n" %repr(self._transliterated_string))
def get_transliterated_string(self):
return self._transliterated_string
def insert_string_at_cursor(self, string_to_insert):
'''Insert typed string at cursor position'''
+ sys.stderr.write("mike in insert_string_at_cursor() string_to_insert=%s\n" %string_to_insert.encode('UTF-8'))
+ sys.stderr.write("mike in insert_string_at_cursor() self._typed_string=%s\n" %self._typed_string.encode('UTF-8'))
+ sys.stderr.write("mike in insert_string_at_cursor() self._typed_string_cursor=%s\n" %self._typed_string_cursor)
self._typed_string = self._typed_string[:self._typed_string_cursor] \
+string_to_insert \
+self._typed_string[self._typed_string_cursor:]
@@ -333,6 +342,7 @@ class editor(object):
def update_candidates (self):
'''Update lookuptable'''
+ sys.stderr.write("mike in update_candidates() self._typed_string=%s\n" %self._typed_string.encode('UTF-8'))
if self._typed_string == self._typed_string_when_update_candidates_was_last_called:
# The input did not change since we came here last, do nothing and leave
# candidates and lookup table unchanged:
@@ -501,6 +511,7 @@ class tabengine (IBus.Engine):
'''The IM Engine for Tables'''
def __init__ (self, bus, obj_path, db ):
+ sys.stderr.write("mike in tabengine __init__() obj_path=%s\n" %obj_path)
super(tabengine,self).__init__ (connection=bus.get_connection(),object_path=obj_path)
global debug_level
try:
@@ -564,6 +575,7 @@ class tabengine (IBus.Engine):
def _change_mode (self):
'''Shift input mode, TAB -> EN -> TAB
'''
+ sys.stderr.write("mike in hunspell_table.py _change_mode()\n")
self.reset ()
self._update_ui ()
@@ -731,7 +743,10 @@ class tabengine (IBus.Engine):
if self._has_input_purpose and self._input_purpose in [IBus.InputPurpose.PASSWORD, IBus.InputPurpose.PIN]:
return False
+ sys.stderr.write("mike in process_key_event keyval=%(kv)s keycode=%(kc)s state=%(st)s\n" %{'kv': keyval, 'kc': keycode, 'st': state})
key = KeyEvent(keyval, state)
+ sys.stderr.write("mike after KeyEvent() in process_key_event key.code=%(kc)s\n" %{'kc': key.code})
+ sys.stderr.write("mike after KeyEvent() in process_key_event key.code=%(kc)s IBus.keyval_to_unicode(key.code)=%(uc)s\n" %{'kc': key.code, 'uc': IBus.keyval_to_unicode(key.code)})
# ignore NumLock mask
key.mask &= ~IBus.ModifierType.MOD2_MASK
@@ -752,6 +767,9 @@ class tabengine (IBus.Engine):
return True
if self._editor.is_empty ():
+ sys.stderr.write("mike self._editor.is_empty ():\n")
+ sys.stderr.write("mike key.code=%(key.code)s IBus.keyval_to_unicode(key.code)=%(keychar)s\n" %{'key.code': key.code, 'keychar': IBus.keyval_to_unicode(key.code)})
+ sys.stderr.write("mike IBus.keyval_name(key.code)=%s\n" %IBus.keyval_name(key.code))
# This is the first character typed since the last commit
# there is nothing in the preëdit yet.
if key.code < 32:
diff --git a/ibus-typing-booster/engine/tabsqlitedb.py b/ibus-typing-booster/engine/tabsqlitedb.py
index 78a4b26..2815503 100755
--- a/ibus-typing-booster/engine/tabsqlitedb.py
+++ b/ibus-typing-booster/engine/tabsqlitedb.py
@@ -241,6 +241,8 @@ class tabsqlitedb:
'input_phrase': input_phrase,
'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase,
'timestamp': time.time()}
+ sys.stderr.write("mike update_phrase() sqlstr=%s\n" %sqlstr)
+ sys.stderr.write("mike update_phrase() sqlargs=%s\n" %sqlargs)
try:
self.db.execute(sqlstr, sqlargs)
if commit:
@@ -253,8 +255,10 @@ class tabsqlitedb:
'''
Trigger a checkpoint operation.
'''
+ sys.stderr.write("mike sync_userdb() commit and execute checkpoint ...\n")
self.db.commit()
self.db.execute('PRAGMA wal_checkpoint;')
+ sys.stderr.write("mike sync_userdb() commit and execute checkpoint done.\n")
def create_tables (self, database):
'''Create table for the phrases.'''
@@ -269,6 +273,7 @@ class tabsqlitedb:
'''
Add phrase to database
'''
+ sys.stderr.write("mike in add_phrase() input_phrase=%(ip)s phrase=%(p)s user_freq=%(uf)s database=%(db)s\n" %{'ip': input_phrase.encode('UTF-8'), 'p': phrase.encode('UTF-8'), 'uf': user_freq, 'db': database})
if not input_phrase or not phrase:
return
input_phrase = unicodedata.normalize(
@@ -299,6 +304,8 @@ class tabsqlitedb:
insert_sqlargs = {'input_phrase': input_phrase,
'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase,
'user_freq': user_freq, 'timestamp': time.time()}
+ sys.stderr.write("mike add_phrase() insert_sqlstr=%s\n" %insert_sqlstr)
+ sys.stderr.write("mike add_phrase() insert_sqlargs=%s\n" %insert_sqlargs)
try:
self.db.execute (insert_sqlstr, insert_sqlargs)
if commit:
@@ -368,9 +375,11 @@ class tabsqlitedb:
self._normalization_form_internal, p_phrase)
pp_phrase = unicodedata.normalize(
self._normalization_form_internal, pp_phrase)
+ sys.stderr.write("mike in select_words() input_phrase=%(ip)s p_phrase=%(p)s pp_phrase=%(pp)s\n" %{'ip': input_phrase.encode('UTF-8'), 'p': p_phrase.encode('UTF-8'), 'pp': pp_phrase.encode('UTF-8')})
phrase_frequencies = {}
for x in self.hunspell_obj.suggest(input_phrase):
phrase_frequencies.update([(x, 0)])
+ sys.stderr.write("mike in select_words() hunspell: best_candidates=%s\n" %self.best_candidates(phrase_frequencies))
# Now phrase_frequencies might contain something like this:
#
# {u'code': 0, u'communicability': 0, u'cold': 0, u'colour': 0}
@@ -437,6 +446,7 @@ class tabsqlitedb:
# {u'conspiracy': 6/11, u'code': 0, u'communicability': 0, u'cold': 1/11, u'colour': 4/11}
for x in results_uni:
phrase_frequencies.update([(x[0], x[1]/float(count))])
+ sys.stderr.write("mike in select_words() Unigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies))
if not p_phrase:
# If no context for bigram matching is available, return what we have so far:
return self.best_candidates(phrase_frequencies)
@@ -461,6 +471,7 @@ class tabsqlitedb:
# both the weight of 0.5:
for x in results_bi:
phrase_frequencies.update([(x[0], 0.5*x[1]/float(count_p_phrase)+0.5*phrase_frequencies[x[0]])])
+ sys.stderr.write("mike in select_words() Bigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies))
if not pp_phrase:
# If no context for trigram matching is available, return what we have so far:
return self.best_candidates(phrase_frequencies)
@@ -487,6 +498,7 @@ class tabsqlitedb:
# get higher weight):
for x in results_tri:
phrase_frequencies.update([(x[0], 0.5*x[1]/float(count_pp_phrase_p_phrase)+0.5*phrase_frequencies[x[0]])])
+ sys.stderr.write("mike in select_words() Trigram best_candidates=%s\n" %self.best_candidates(phrase_frequencies))
return self.best_candidates(phrase_frequencies)
def generate_userdb_desc (self):
@@ -592,6 +604,8 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_
input_phrase = unicodedata.normalize(
self._normalization_form_internal, input_phrase)
+ sys.stderr.write("mike check_phrase() phrase=%(p)s, input_phrase=%(t)s, database=%(d)s\n" %{'p': phrase.encode('UTF-8'), 't': input_phrase.encode('UTF-8'), 'd': database})
+
# There should never be more than 1 database row for the same
# input_phrase *and* phrase. So the following query on
# the database should match at most one database
@@ -608,7 +622,10 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_
;'''
sqlargs = {'input_phrase': input_phrase,
'phrase': phrase, 'p_phrase': p_phrase, 'pp_phrase': pp_phrase}
+ sys.stderr.write("mike sqlstr=%s\n" %sqlstr)
+ sys.stderr.write("mike sqlargs=%s\n" %sqlargs)
result = self.db.execute(sqlstr, sqlargs).fetchall()
+ sys.stderr.write("mike result=%s\n" %result)
if len(result) > 0:
# A match was found in user_db, increase user frequency by 1
self.update_phrase(input_phrase = input_phrase,
@@ -630,6 +647,8 @@ CREATE TABLE phrases (id INTEGER PRIMARY KEY, input_phrase TEXT, phrase TEXT, p_
Or, if “input_phrase” is “None”, remove all rows matching “phrase”
no matter for what input phrase from the database.
'''
+ sys.stderr.write("mike remove_phrase() phrase=%(p)s\n" %{'p': phrase.encode('UTF-8')})
+ sys.stderr.write("mike remove_phrase() database=%s\n" %database)
if not phrase:
return
phrase = unicodedata.normalize(
10 years, 4 months
[ibus-typing-booster] master: Release 1.2.10 (f581795)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : master
>---------------------------------------------------------------
commit f581795a4dfa6df61d86e9d97f0c404538894978
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Thu Feb 27 15:26:06 2014 +0100
Release 1.2.10
>---------------------------------------------------------------
ibus-typing-booster/ChangeLog | 9 +++++++++
ibus-typing-booster/configure.ac | 2 +-
ibus-typing-booster/ibus-typing-booster.pc.in | 2 +-
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/ibus-typing-booster/ChangeLog b/ibus-typing-booster/ChangeLog
index 5274b44..da68d3e 100644
--- a/ibus-typing-booster/ChangeLog
+++ b/ibus-typing-booster/ChangeLog
@@ -1,4 +1,13 @@
* Author: Mike FABIAN <mfabian(a)redhat.com>
+* 27 Feb 2014 Released 1.2.10 version
+* make profiling work again and make it easier to use
+* tiny performance improvement
+* some code simplification
+* port from Python2 to Python3
+* minor improvements in user_transliteration.py
+* add python-enchant support
+
+* Author: Mike FABIAN <mfabian(a)redhat.com>
* 17 Jan 2014 Released 1.2.9 version
* Fix behaviour of arrow right keys in preëdit (Resolves: rhbz#1049324)
* Add timestamps to entries in the user database
diff --git a/ibus-typing-booster/configure.ac b/ibus-typing-booster/configure.ac
index b8c6c3c..b28eb7a 100755
--- a/ibus-typing-booster/configure.ac
+++ b/ibus-typing-booster/configure.ac
@@ -24,7 +24,7 @@ m4_define([package_name], [ibus-typing-booster])
m4_define([ibus_released], [1])
m4_define([ibus_major_version], [1])
m4_define([ibus_minor_version], [2])
-m4_define([ibus_micro_version], [9])
+m4_define([ibus_micro_version], [10])
m4_define(ibus_maybe_datestamp,
m4_esyscmd([if test x]ibus_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi]))
diff --git a/ibus-typing-booster/ibus-typing-booster.pc.in b/ibus-typing-booster/ibus-typing-booster.pc.in
index 015a88b..9a89c5d 100755
--- a/ibus-typing-booster/ibus-typing-booster.pc.in
+++ b/ibus-typing-booster/ibus-typing-booster.pc.in
@@ -11,7 +11,7 @@ enginedir=/usr/share/ibus/engine
Name: IBus-Typing-Booster
Description: Table Based Input Method Framework for Intelligent Input Bus for Linux / Unix OS
-Version: 1.2.9
+Version: 1.2.10
Requires:
Libs:
Cflags:
\ No newline at end of file
10 years, 4 months
[ibus-typing-booster] master: Print usage message if user_transliteration.py is called without options (c75d248)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : master
>---------------------------------------------------------------
commit c75d248481ab12218570942054ef62890b478999
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Thu Feb 27 10:50:02 2014 +0100
Print usage message if user_transliteration.py is called without options
>---------------------------------------------------------------
ibus-typing-booster/setup/user_transliteration.py | 34 ++++++++++----------
1 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/ibus-typing-booster/setup/user_transliteration.py b/ibus-typing-booster/setup/user_transliteration.py
index 2e991fd..687266d 100755
--- a/ibus-typing-booster/setup/user_transliteration.py
+++ b/ibus-typing-booster/setup/user_transliteration.py
@@ -180,25 +180,25 @@ class LatinConvert:
import traceback
traceback.print_exc()
-def parse_args():
- import argparse
- parser = argparse.ArgumentParser(
- description='translit')
- parser.add_argument('-u', '--userdictionary',
- nargs='?',
- type=str,
- default='',
- help='user dictionary')
- parser.add_argument('-d', '--hunspelldict',
- nargs='?',
- type=str,
- default='',
- help='hunspell file path')
- return parser.parse_args()
-
+import argparse
+parser = argparse.ArgumentParser(
+ description='translit')
+parser.add_argument('-u', '--userdictionary',
+ nargs='?',
+ type=str,
+ default='',
+ help='user dictionary')
+parser.add_argument('-d', '--hunspelldict',
+ nargs='?',
+ type=str,
+ default='',
+ help='hunspell file path')
+args = parser.parse_args()
def main():
- args = parse_args()
+ if not args.userdictionary or not args.hunspelldict:
+ parser.print_help()
+ sys.exit(1)
user_dict = args.userdictionary
hunspell_dict = args.hunspelldict
dict_name = args.hunspelldict
10 years, 4 months
[ibus-typing-booster] master: Add python-enchant support (a777d34)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : master
>---------------------------------------------------------------
commit a777d34304808156897ff781d74995c547899f82
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Fri Feb 28 09:15:27 2014 +0100
Add python-enchant support
pyhunspell is not available for Python3. python-enchant is available
both for Python2 and for Python3.
So we need python-enchant because of the Python3 port anyway.
And python-enchant actually also seems to work slightly better then
pyhunspell, it takes Unicode as input and returns Unicode whereas
pyhunspell needs its input in the encoding of the hunspell dictionary.
So in case of pyhunspell, ibus-typing-booster has to do some extra
encoding conversion.
>---------------------------------------------------------------
ibus-typing-booster/engine/hunspell_suggest.py | 45 +++++++++++++++++++-----
1 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/ibus-typing-booster/engine/hunspell_suggest.py b/ibus-typing-booster/engine/hunspell_suggest.py
index 7a9f2e2..d04895d 100755
--- a/ibus-typing-booster/engine/hunspell_suggest.py
+++ b/ibus-typing-booster/engine/hunspell_suggest.py
@@ -22,11 +22,18 @@ import sys
import unicodedata
import re
import codecs
+
+import_enchant_successful = False
+import_hunspell_successful = False
try:
- import hunspell
- import_hunspell_successful = True
+ import enchant
+ import_enchant_successful = True
except:
- import_hunspell_successful = False
+ try:
+ import hunspell # only available for Python2
+ import_hunspell_successful = True
+ except:
+ pass
# Maximum words that should be returned.
# This should a rather big number in order not
@@ -45,6 +52,7 @@ class Dictionary:
self.name = name
self.encoding = 'UTF-8'
self.buffer = None
+ self.enchant_dict = None
self.pyhunspell_object = None
self.load_dictionary()
@@ -90,11 +98,10 @@ class Dictionary:
if self.buffer:
self.buffer = unicodedata.normalize(
normalization_form_internal, self.buffer)
- if import_hunspell_successful:
- self.pyhunspell_object = hunspell.HunSpell(
- dic_path, aff_path)
- else:
- self.pyhunspell_object = None
+ if import_enchant_successful:
+ self.enchant_dict = enchant.Dict(self.name)
+ elif import_hunspell_successful:
+ self.pyhunspell_object = hunspell.HunSpell(dic_path, aff_path)
class Hunspell:
def __init__(self, dictionary_names=['en_US']):
@@ -144,7 +151,27 @@ class Hunspell:
for dictionary in self.dictionaries:
if dictionary.buffer:
suggested_words += patt_start.findall(dictionary.buffer)
- if dictionary.pyhunspell_object:
+ if dictionary.enchant_dict:
+ if len(input_phrase) >= 4:
+ # Always pass NFC to enchant and convert the
+ # result back to the internal normalization form (NFD)
+ # (enchant does the right thing for Korean if the input is NFC).
+ # enchant takes unicode strings and returns unicode strings,
+ # no encoding and decoding to and from the hunspell dictionary
+ # encoding is necessary (neither for Python2 nor Python3).
+ # (pyhunspell (which works only for Python2) needs to get
+ # its input passed in dictionary encoding and also returns it in
+ # dictionary encoding).
+ input_phrase = unicodedata.normalize('NFC', input_phrase)
+ extra_suggestions = [
+ unicodedata.normalize(normalization_form_internal, x)
+ for x in
+ dictionary.enchant_dict.suggest(input_phrase)
+ ]
+ for suggestion in extra_suggestions:
+ if suggestion not in suggested_words:
+ suggested_words.append(suggestion)
+ elif dictionary.pyhunspell_object:
if len(input_phrase) >= 4:
# Always pass NFC to pyhunspell and convert the
# result back to the internal normalization form (NFD)
10 years, 4 months
[ibus-typing-booster] master: Make it possible to use full path names or only the basenames as arguments for user_transliteration.py (2022f74)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : master
>---------------------------------------------------------------
commit 2022f74dc579a204574f0f7e4ff9234c756e4183
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Thu Feb 27 11:47:26 2014 +0100
Make it possible to use full path names or only the basenames as arguments for user_transliteration.py
>---------------------------------------------------------------
ibus-typing-booster/setup/user_transliteration.py | 26 +++++++++++++--------
1 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/ibus-typing-booster/setup/user_transliteration.py b/ibus-typing-booster/setup/user_transliteration.py
index 687266d..137e617 100755
--- a/ibus-typing-booster/setup/user_transliteration.py
+++ b/ibus-typing-booster/setup/user_transliteration.py
@@ -3,7 +3,7 @@
#
# ibus-typing-booster - The Tables engine for IBus
#
-# Copyright (c) 2012-2013 Anish Patil <apatil(a)redhat.com>
+# Copyright (c) 2012-2014 Anish Patil <apatil(a)redhat.com>
# Copyright (c) 2014 Mike FABIAN <mfabian(a)redhat.com>
#
# This program is free software: you can redistribute it and/or modify
@@ -182,17 +182,17 @@ class LatinConvert:
import argparse
parser = argparse.ArgumentParser(
- description='translit')
+ description='Transliterate a hunspell dictionary to Latin script and insert it into the user database. Currently works only for mr_IN.dic.')
parser.add_argument('-u', '--userdictionary',
nargs='?',
type=str,
default='',
- help='user dictionary')
+ help='user dictionary. For example ~/.local/share/ibus-typing-booster/user.db. A full path can be given or only the basename. When only the basename is given, ~/.local/share/ibus-typing-booster/ is prepended automatically.')
parser.add_argument('-d', '--hunspelldict',
nargs='?',
type=str,
default='',
- help='hunspell file path')
+ help='hunspell file path. For example /usr/share/myspell/mr_IN.dic. A full path can be given or only the basename. When only the basename is given, /usr/share/myspell/ is prepended automatically.')
args = parser.parse_args()
def main():
@@ -201,12 +201,15 @@ def main():
sys.exit(1)
user_dict = args.userdictionary
hunspell_dict = args.hunspelldict
- dict_name = args.hunspelldict
if user_dict:
#check whether user dict exists in the path
home_path = os.getenv ("HOME")
tables_path = path.join (home_path, ".local/share/ibus-typing-booster")
- user_dict = path.join (tables_path, user_dict)
+ if '/' not in user_dict:
+ # if user_dict already contains a '/' full path was given
+ # on the command line. If there is no '/', it is only the file
+ # name, add the default path:
+ user_dict = path.join (tables_path, user_dict)
if not path.exists(user_dict):
sys.stderr.write(
"The user database %(udb)s does not exist .\n" %{'udb': user_dict})
@@ -214,16 +217,19 @@ def main():
if hunspell_dict:
# Not sure how to get hunspell dict path from env
hunspell_path = "/usr/share/myspell/"
- hunspell_dict = path.join(hunspell_path,hunspell_dict)
+ if '/' not in hunspell_dict:
+ # if hunspell_dict already contains a '/' full path was given
+ # on the command line. If there is no '/', it is only the file
+ # name, add the default path:
+ hunspell_dict = path.join(hunspell_path,hunspell_dict)
if not path.exists(hunspell_dict):
sys.stderr.write(
"The hunspell dictionary %(hud)s does not exists .\n" %{'hud': hunspell_dict})
sys.exit(1)
- aff_name = hunspell_dict.replace('.dic','.aff')
lt = LatinConvert(user_dict,
hunspell_dict,
- aff_name,
- dict_name)
+ hunspell_dict.replace('.dic', '.aff'),
+ os.path.basename(hunspell_dict))
lt.insert_into_db()
if __name__ == '__main__':
10 years, 4 months
[ibus-typing-booster] master: Port user_transliteration.py to Python3 (40d2db0)
by mfabian@fedoraproject.org
Repository : http://git.fedorahosted.org/git/?p=ibus-typing-booster.git
On branch : master
>---------------------------------------------------------------
commit 40d2db0d322c533efd92388a16527ea9f11a596a
Author: Mike FABIAN <mfabian(a)redhat.com>
Date: Thu Feb 27 10:27:33 2014 +0100
Port user_transliteration.py to Python3
Should work with both Python2 and Python2 at the moment
>---------------------------------------------------------------
ibus-typing-booster/setup/user_transliteration.py | 41 ++++++++++++---------
1 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/ibus-typing-booster/setup/user_transliteration.py b/ibus-typing-booster/setup/user_transliteration.py
old mode 100644
new mode 100755
index 03eef03..2e991fd
--- a/ibus-typing-booster/setup/user_transliteration.py
+++ b/ibus-typing-booster/setup/user_transliteration.py
@@ -1,9 +1,10 @@
-# -*- coding: utf-8 -*-
+#!/usr/bin/python3
# vim:et sts=4 sw=4
#
# ibus-typing-booster - The Tables engine for IBus
#
# Copyright (c) 2012-2013 Anish Patil <apatil(a)redhat.com>
+# Copyright (c) 2014 Mike FABIAN <mfabian(a)redhat.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -92,8 +93,8 @@ class LatinConvert:
encoding = None
dict_buffer = None
try:
- aff_buffer = open(
- self.aff_file).read().replace('\r\n', '\n')
+ aff_buffer = codecs.open(
+ self.aff_file, mode='r', encoding='ISO-8859-1').read().replace('\r\n', '\n')
except:
import traceback
traceback.print_exc()
@@ -104,21 +105,21 @@ class LatinConvert:
match = encoding_pattern.search(aff_buffer)
if match:
encoding = match.group('encoding')
- print "load_dictionary(): encoding=%(enc)s found in %(aff)s" %{
- 'enc': encoding, 'aff': self.aff_file}
+ print("load_dictionary(): encoding=%(enc)s found in %(aff)s" %{
+ 'enc': encoding, 'aff': self.aff_file})
try:
dict_buffer = codecs.open(
self.hunspell_dict).read().decode(encoding).replace('\r\n', '\n')
except:
- print "load_dictionary(): loading %(dic)s as %(enc)s encoding failed, fall back to ISO-8859-1." %{
- 'dic': self.hunspell_dict, 'enc': encoding}
+ print("load_dictionary(): loading %(dic)s as %(enc)s encoding failed, fall back to ISO-8859-1." %{
+ 'dic': self.hunspell_dict, 'enc': encoding})
encoding = 'ISO-8859-1'
try:
dict_buffer = codecs.open(
self.hunspell_dict).read().decode(encoding).replace('\r\n', '\n')
except:
- print "load_dictionary(): loading %(dic)s as %(enc)s encoding failed, giving up." %{
- 'dic': self.hunspell_dict, 'enc': encoding}
+ print("load_dictionary(): loading %(dic)s as %(enc)s encoding failed, giving up." %{
+ 'dic': self.hunspell_dict, 'enc': encoding})
if dict_buffer[0] == u'\ufeff':
dict_buffer = dict_buffer[1:]
return dict_buffer
@@ -135,14 +136,15 @@ class LatinConvert:
try:
return self.trans.transliterate(word)[0]
except:
- print "Error while transliteration"
+ print("Error while transliteration")
def remove_accent(self,word):
- word = word.decode('utf-8')
+ if type(word) != type(u''):
+ word = word.decode('utf-8')
new_word = []
# To- Do use list compression
for char in word:
- if self.lang_table.has_key(char):
+ if char in self.lang_table:
new_word.append(self.lang_table[char])
elif char in[ u'\u0325', u'\u0310',u'\u0304', u'\u0315',u'\u0314']:
pass
@@ -152,23 +154,26 @@ class LatinConvert:
def get_converted_words(self):
words = self.get_words()
- icu_words = map(self.trans_word,words)
- ascii_words = map(self.remove_accent,icu_words)
+ icu_words = list(map(self.trans_word,words))
+ ascii_words = list(map(self.remove_accent,icu_words))
return ascii_words
def insert_into_db(self):
words = self.get_converted_words()
+ for w in words:
+ if type(w) != type(u''):
+ w = w.decode('UTF-8')
sql_table_name = "phrases"
try:
conn = sqlite3.connect(self.user_db)
sql = "INSERT INTO %s (input_phrase, phrase, user_freq, timestamp) values(:input_phrase, :phrase, :user_freq, :timestamp);" % (sql_table_name)
sqlargs = []
- map(lambda x: sqlargs.append(
- {'input_phrase': x.decode('utf-8'),
- 'phrase': x.decode('utf-8'),
+ list(map(lambda x: sqlargs.append(
+ {'input_phrase': x,
+ 'phrase': x,
'user_freq': 0,
'timestamp': time.time()}),
- words)
+ words))
conn.executemany(sql,sqlargs)
conn.commit()
except:
10 years, 4 months