Bundle multiple tesseract versions

Load them via C api and allow user to select which one to use.
This commit is contained in:
Gres 2020-07-18 12:26:59 +03:00
parent bd99d04416
commit 0920ed1f40
16 changed files with 279 additions and 138 deletions

View File

@ -33,18 +33,13 @@ jobs:
runs-on: ${{ matrix.config.os }} runs-on: ${{ matrix.config.os }}
env: env:
OS: ${{ matrix.config.name }} OS: ${{ matrix.config.name }}
MARCH: ${{ matrix.config.march }}
TAG: ${{ matrix.config.tag }}
MSVC_VERSION: 2019/Enterprise MSVC_VERSION: 2019/Enterprise
strategy: strategy:
matrix: matrix:
config: config:
- { name: "win64", os: windows-latest, tag: "", march: "sandy-bridge" } - { name: "win64", os: windows-latest }
- { name: "win32", os: windows-latest, tag: "", march: "sandy-bridge" } - { name: "win32", os: windows-latest }
- { name: "linux", os: ubuntu-16.04, tag: "", march: "sandy-bridge" } - { name: "linux", os: ubuntu-16.04 }
- { name: "win64", os: windows-latest, tag: "-compatible", march: "nehalem" }
- { name: "win32", os: windows-latest, tag: "-compatible", march: "nehalem" }
- { name: "linux", os: ubuntu-16.04, tag: "-compatible", march: "nehalem" }
# - { name: "macos", os: macos-latest } # - { name: "macos", os: macos-latest }
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@ -66,7 +61,7 @@ jobs:
uses: actions/cache@v1 uses: actions/cache@v1
with: with:
path: deps path: deps
key: ${{ env.OS }}-${{ env.TAG }}-deps key: ${{ env.OS }}-deps
- name: Get Qt - name: Get Qt
run: python ./share/ci/get_qt.py run: python ./share/ci/get_qt.py
@ -77,7 +72,16 @@ jobs:
- name: Get leptonica - name: Get leptonica
run: python ./share/ci/get_leptonica.py run: python ./share/ci/get_leptonica.py
- name: Get tesseract - name: Get tesseract optimized
env:
MARCH: sandy-bridge
TAG: optimized
run: python ./share/ci/get_tesseract.py
- name: Get tesseract compatible
env:
MARCH: nehalem
TAG: compatible
run: python ./share/ci/get_tesseract.py run: python ./share/ci/get_tesseract.py
- name: Get hunspell - name: Get hunspell

View File

@ -8,7 +8,7 @@ DEPS_DIR=$$(ST_DEPS_DIR)
isEmpty(DEPS_DIR):DEPS_DIR=$$PWD/../deps isEmpty(DEPS_DIR):DEPS_DIR=$$PWD/../deps
INCLUDEPATH += $$DEPS_DIR/include INCLUDEPATH += $$DEPS_DIR/include
LIBS += -L$$DEPS_DIR/lib LIBS += -L$$DEPS_DIR/lib
LIBS += -ltesseract -lleptonica -lhunspell LIBS += -lhunspell -lleptonica
win32{ win32{
LIBS += -lUser32 LIBS += -lUser32

View File

@ -47,7 +47,8 @@ os.environ['VERSION'] = app_version
flags = '' if os.getenv("DEBUG") is None else '-unsupported-allow-new-glibc' flags = '' if os.getenv("DEBUG") is None else '-unsupported-allow-new-glibc'
additional_files = glob(ssl_dir + '/lib/lib*.so.*') + \ additional_files = glob(ssl_dir + '/lib/lib*.so.*') + \
glob('/usr/lib/x86_64-linux-gnu/nss/*') glob('/usr/lib/x86_64-linux-gnu/nss/*') + \
glob(dependencies_dir + '/lib/libtesseract-*.so')
out_lib_dir = install_dir + '/usr/lib' out_lib_dir = install_dir + '/usr/lib'
os.makedirs(out_lib_dir, exist_ok=True) os.makedirs(out_lib_dir, exist_ok=True)
for f in additional_files: for f in additional_files:

View File

@ -33,39 +33,33 @@ if os.environ.get('NO_OPT', '0') == '1':
if len(os.environ.get('MARCH', '')) > 0: if len(os.environ.get('MARCH', '')) > 0:
compat_flags += ' -D TARGET_ARCHITECTURE={} '.format(os.environ['MARCH']) compat_flags += ' -D TARGET_ARCHITECTURE={} '.format(os.environ['MARCH'])
cache_file = install_dir + '/tesseract.cache' lib_suffix = os.environ.get('TAG', '')
cache_file_data = required_version + build_type_flag + compat_flags if len(lib_suffix) > 0:
lib_suffix = '-' + lib_suffix
def check_existing(): def check_existing():
if not os.path.exists(cache_file):
return False
with open(cache_file, 'r') as f:
cached = f.read()
if cached != cache_file_data:
return False
if platform.system() == "Windows":
dll = install_dir + '/bin/tesseract41.dll'
lib = install_dir + '/lib/tesseract41.lib'
if not os.path.exists(dll) or not os.path.exists(lib):
return False
c.symlink(dll, install_dir + '/bin/tesseract.dll')
c.symlink(lib, install_dir + '/lib/tesseract.lib')
elif platform.system() == "Darwin":
lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
if not os.path.exists(lib):
return False
c.symlink(lib, install_dir + '/lib/libtesseract.dylib')
else:
if not os.path.exists(install_dir + '/lib/libtesseract.so'):
return False
includes_path = install_dir + '/include/tesseract' includes_path = install_dir + '/include/tesseract'
if len(c.get_folder_files(includes_path)) == 0: if len(c.get_folder_files(includes_path)) == 0:
return False return False
return True if platform.system() == "Windows":
lib = install_dir + '/bin/tesseract{}.dll'.format(lib_suffix)
orig_lib = install_dir + '/bin/tesseract41.dll'
elif platform.system() == "Darwin":
lib = install_dir + '/lib/libtesseract{}.dylib'.format(lib_suffix)
orig_lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
else:
lib = install_dir + '/lib/libtesseract{}.so'.format(lib_suffix)
orig_lib = install_dir + '/lib/libtesseract.so.4.1.1'
if os.path.exists(lib):
return True
if os.path.exists(orig_lib):
os.rename(orig_lib, lib)
return True
return False
if check_existing() and not 'FORCE' in os.environ: if check_existing() and not 'FORCE' in os.environ:
@ -102,9 +96,6 @@ if len(compat_flags) > 0:
c.run('cmake --build . --config {}'.format(build_type_flag)) c.run('cmake --build . --config {}'.format(build_type_flag))
c.run('cmake --build . --target install --config {}'.format(build_type_flag)) c.run('cmake --build . --target install --config {}'.format(build_type_flag))
with open(cache_file, 'w') as f: if not check_existing(): # add suffix
f.write(cache_file_data)
if not check_existing(): # create links
c.print('>> Build failed') c.print('>> Build failed')
exit(1) exit(1)

View File

@ -34,7 +34,9 @@ for file in os.scandir(libs_dir):
c.print('>> Copying {} to {}'.format(full_name, install_dir)) c.print('>> Copying {} to {}'.format(full_name, install_dir))
shutil.copy(full_name, install_dir) shutil.copy(full_name, install_dir)
for f in glob(ssl_dir + '/bin/*.dll'): additional_libs = glob(ssl_dir + '/bin/*.dll') + \
glob(dependencies_dir + '/bin/tesseract-*.dll')
for f in additional_libs:
c.print('>> Copying {} to {}'.format(f, install_dir)) c.print('>> Copying {} to {}'.format(f, install_dir))
shutil.copy(f, install_dir) shutil.copy(f, install_dir)

View File

@ -66,7 +66,7 @@ Ctrl - продолжить выделять</translation>
<context> <context>
<name>QObject</name> <name>QObject</name>
<message> <message>
<location filename="../../src/main.cpp" line="28"/> <location filename="../../src/main.cpp" line="30"/>
<source>OCR and translation tool</source> <source>OCR and translation tool</source>
<translation>Инструмент распознавания и перевода</translation> <translation>Инструмент распознавания и перевода</translation>
</message> </message>
@ -668,7 +668,7 @@ Check for updates to silence this warning</source>
<translation>Начата запись в лог-файл: %1</translation> <translation>Начата запись в лог-файл: %1</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.cpp" line="104"/> <location filename="../../src/settingseditor.cpp" line="112"/>
<source>&lt;p&gt;Optical character recognition (OCR) and translation tool&lt;/p&gt; <source>&lt;p&gt;Optical character recognition (OCR) and translation tool&lt;/p&gt;
&lt;p&gt;Version: %1&lt;/p&gt; &lt;p&gt;Version: %1&lt;/p&gt;
&lt;p&gt;Author: Gres (&lt;a href=&quot;mailto:%2&quot;&gt;%2&lt;/a&gt;)&lt;/p&gt; &lt;p&gt;Author: Gres (&lt;a href=&quot;mailto:%2&quot;&gt;%2&lt;/a&gt;)&lt;/p&gt;
@ -684,12 +684,12 @@ Check for updates to silence this warning</source>
<translation>неизвестные языки для перевода: %1 или %2</translation> <translation>неизвестные языки для перевода: %1 или %2</translation>
</message> </message>
<message> <message>
<location filename="../../src/ocr/tesseract.cpp" line="153"/> <location filename="../../src/ocr/tesseract.cpp" line="238"/>
<source>init failed</source> <source>init failed</source>
<translation>ошибка инициалиизации</translation> <translation>ошибка инициалиизации</translation>
</message> </message>
<message> <message>
<location filename="../../src/ocr/tesseract.cpp" line="211"/> <location filename="../../src/ocr/tesseract.cpp" line="289"/>
<source>Failed to recognize text or no text selected</source> <source>Failed to recognize text or no text selected</source>
<translation>Ошибка распознавания текста или нет текста в выделенной зоне</translation> <translation>Ошибка распознавания текста или нет текста в выделенной зоне</translation>
</message> </message>
@ -734,7 +734,7 @@ in %1</source>
<context> <context>
<name>Recognizer</name> <name>Recognizer</name>
<message> <message>
<location filename="../../src/ocr/recognizer.cpp" line="36"/> <location filename="../../src/ocr/recognizer.cpp" line="37"/>
<source>No source language set. Check settings</source> <source>No source language set. Check settings</source>
<translation>Не задан исходный язык. Проверьте настройки</translation> <translation>Не задан исходный язык. Проверьте настройки</translation>
</message> </message>
@ -870,37 +870,42 @@ in %1</source>
<translation>сохранять пароль (небезопасно)</translation> <translation>сохранять пароль (небезопасно)</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="295"/> <location filename="../../src/settingseditor.ui" line="291"/>
<source>Library version</source>
<translation>Версия</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="305"/>
<source>User substitutions</source> <source>User substitutions</source>
<translation>Пользовательская коррекция</translation> <translation>Пользовательская коррекция</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="318"/> <location filename="../../src/settingseditor.ui" line="328"/>
<source>Use auto corrections (hunspell)</source> <source>Use auto corrections (hunspell)</source>
<translation>Использовать автокоррекцию (hunspell)</translation> <translation>Использовать автокоррекцию (hunspell)</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="325"/> <location filename="../../src/settingseditor.ui" line="335"/>
<source>Use user substitutions</source> <source>Use user substitutions</source>
<translation>Использовать пользовательскую коррекцию</translation> <translation>Использовать пользовательскую коррекцию</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="332"/> <location filename="../../src/settingseditor.ui" line="342"/>
<source>Hunspell dictionaries path:</source> <source>Hunspell dictionaries path:</source>
<translation>Путь к словарям Hunspell:</translation> <translation>Путь к словарям Hunspell:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="394"/> <location filename="../../src/settingseditor.ui" line="404"/>
<source>Language:</source> <source>Language:</source>
<translation>Язык:</translation> <translation>Язык:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="430"/> <location filename="../../src/settingseditor.ui" line="440"/>
<source> secs</source> <source> secs</source>
<translation> сек</translation> <translation> сек</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="370"/> <location filename="../../src/settingseditor.ui" line="380"/>
<source>Ignore SSL errors</source> <source>Ignore SSL errors</source>
<translation>Игнорировать ошибки SSL</translation> <translation>Игнорировать ошибки SSL</translation>
</message> </message>
@ -930,107 +935,107 @@ in %1</source>
<translation>Писать логи в файл (отладка)</translation> <translation>Писать логи в файл (отладка)</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="239"/> <location filename="../../src/settingseditor.ui" line="278"/>
<source>Default language:</source> <source>Default language:</source>
<translation>Язык по умолчанию:</translation> <translation>Язык по умолчанию:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="255"/> <location filename="../../src/settingseditor.ui" line="265"/>
<source>Tessdata path:</source> <source>Tessdata path:</source>
<translation>Путь к языкам (tessdata):</translation> <translation>Путь к языкам (tessdata):</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="305"/> <location filename="../../src/settingseditor.ui" line="315"/>
<source>\\ for \ symbol, \n for newline</source> <source>\\ for \ symbol, \n for newline</source>
<translation>\\ для символа \ , \n для символа новой строки</translation> <translation>\\ для символа \ , \n для символа новой строки</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="353"/> <location filename="../../src/settingseditor.ui" line="363"/>
<source>Translators path:</source> <source>Translators path:</source>
<translation>Путь к переводчикам:</translation> <translation>Путь к переводчикам:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="360"/> <location filename="../../src/settingseditor.ui" line="370"/>
<source>Translators</source> <source>Translators</source>
<translation>Переводчики</translation> <translation>Переводчики</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="495"/> <location filename="../../src/settingseditor.ui" line="505"/>
<source>Result window</source> <source>Result window</source>
<translation>Окно результата</translation> <translation>Окно результата</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="501"/> <location filename="../../src/settingseditor.ui" line="511"/>
<source>Font:</source> <source>Font:</source>
<translation>Шрифт:</translation> <translation>Шрифт:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="511"/> <location filename="../../src/settingseditor.ui" line="521"/>
<source>Font size:</source> <source>Font size:</source>
<translation>Размер шрифта:</translation> <translation>Размер шрифта:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="528"/> <location filename="../../src/settingseditor.ui" line="538"/>
<source>Font color:</source> <source>Font color:</source>
<translation>Цвет шрифта:</translation> <translation>Цвет шрифта:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="545"/> <location filename="../../src/settingseditor.ui" line="555"/>
<source>Background:</source> <source>Background:</source>
<translation>Фон:</translation> <translation>Фон:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="562"/> <location filename="../../src/settingseditor.ui" line="572"/>
<source>Show image</source> <source>Show image</source>
<translation>Показывать изображение</translation> <translation>Показывать изображение</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="569"/> <location filename="../../src/settingseditor.ui" line="579"/>
<source>Show recognized</source> <source>Show recognized</source>
<translation>Показывать распознанное</translation> <translation>Показывать распознанное</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="638"/> <location filename="../../src/settingseditor.ui" line="648"/>
<source>Update check interval (days):</source> <source>Update check interval (days):</source>
<translation>Интервал проверки обновления (дней):</translation> <translation>Интервал проверки обновления (дней):</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="645"/> <location filename="../../src/settingseditor.ui" line="655"/>
<source>0 - disabled</source> <source>0 - disabled</source>
<translation>- отключено</translation> <translation>- отключено</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="678"/> <location filename="../../src/settingseditor.ui" line="688"/>
<source>Apply updates</source> <source>Apply updates</source>
<translation>Применить изменения</translation> <translation>Применить изменения</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="380"/> <location filename="../../src/settingseditor.ui" line="390"/>
<source>Translate text</source> <source>Translate text</source>
<translation>Переводить текст</translation> <translation>Переводить текст</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="387"/> <location filename="../../src/settingseditor.ui" line="397"/>
<source>Single translator timeout:</source> <source>Single translator timeout:</source>
<translation>Переходить к следующему переводчику после:</translation> <translation>Переходить к следующему переводчику после:</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="457"/> <location filename="../../src/settingseditor.ui" line="467"/>
<source>Result type</source> <source>Result type</source>
<translation>Тип результата</translation> <translation>Тип результата</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="469"/> <location filename="../../src/settingseditor.ui" line="479"/>
<source>Tray</source> <source>Tray</source>
<translation>Трей</translation> <translation>Трей</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="482"/> <location filename="../../src/settingseditor.ui" line="492"/>
<source>Window</source> <source>Window</source>
<translation>Окно</translation> <translation>Окно</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.ui" line="661"/> <location filename="../../src/settingseditor.ui" line="671"/>
<source>Check now</source> <source>Check now</source>
<translation>Проверить сейчас</translation> <translation>Проверить сейчас</translation>
</message> </message>
@ -1090,17 +1095,32 @@ in %1</source>
<translation>HTTP</translation> <translation>HTTP</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.cpp" line="66"/> <location filename="../../src/settingseditor.cpp" line="57"/>
<source>Optimized</source>
<translation>Оптимизированная</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="58"/>
<source>Compatible</source>
<translation>Совместимая</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="62"/>
<source>Use compatible version if you are experiencing crashes during recognition</source>
<translation>Используйте совместимую версию если программа неожиданно завершается во время распознавания</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="74"/>
<source>&lt;b&gt;NOTE! Some translators might require the translation window to be visible. You can make it using the &quot;Show translator&quot; entry in the tray icon&apos;s context menu&lt;/b&gt;</source> <source>&lt;b&gt;NOTE! Some translators might require the translation window to be visible. You can make it using the &quot;Show translator&quot; entry in the tray icon&apos;s context menu&lt;/b&gt;</source>
<translation>&lt;b&gt;ПРИМЕЧАНИЕ! Для работы некоторых переводчиков может потребоваться активное окно перевода. Его можно отобразить при помощи пункта &quot;Показать окно перевода&quot; контекстного меню иконки в трее&lt;/b&gt;</translation> <translation>&lt;b&gt;ПРИМЕЧАНИЕ! Для работы некоторых переводчиков может потребоваться активное окно перевода. Его можно отобразить при помощи пункта &quot;Показать окно перевода&quot; контекстного меню иконки в трее&lt;/b&gt;</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.cpp" line="74"/> <location filename="../../src/settingseditor.cpp" line="82"/>
<source>Sample text</source> <source>Sample text</source>
<translation>Текст для проверки</translation> <translation>Текст для проверки</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.cpp" line="115"/> <location filename="../../src/settingseditor.cpp" line="123"/>
<source>The program workflow consists of the following steps: <source>The program workflow consists of the following steps:
1. Selection on the screen area 1. Selection on the screen area
2. Recognition of the selected area 2. Recognition of the selected area
@ -1123,7 +1143,7 @@ Then set default recognition and translation languages, enable some (or all) tra
Далее установите языки распознавания и перевода по умолчанию, активируйте некоторые (или все) переводчики и настройку &quot;переводить текст&quot;, если нужно.</translation> Далее установите языки распознавания и перевода по умолчанию, активируйте некоторые (или все) переводчики и настройку &quot;переводить текст&quot;, если нужно.</translation>
</message> </message>
<message> <message>
<location filename="../../src/settingseditor.cpp" line="341"/> <location filename="../../src/settingseditor.cpp" line="352"/>
<source>Portable changed. Apply settings first</source> <source>Portable changed. Apply settings first</source>
<translation>Portable режиме изменени. Сначала применить настройки</translation> <translation>Portable режиме изменени. Сначала применить настройки</translation>
</message> </message>

View File

@ -79,5 +79,9 @@ void Recognizer::updateSettings()
SOFT_ASSERT(!settings_.tessdataPath.isEmpty(), return ); SOFT_ASSERT(!settings_.tessdataPath.isEmpty(), return );
queue_.clear(); queue_.clear();
emit reset(settings_.tessdataPath); const auto libName =
(settings_.tesseractVersion == TesseractVersion::Optimized
? "tesseract-optimized"
: "tesseract-compatible");
emit reset(settings_.tessdataPath, libName);
} }

View File

@ -18,7 +18,7 @@ public:
signals: signals:
void recognizeImpl(const TaskPtr &task); void recognizeImpl(const TaskPtr &task);
void reset(const QString &tessdataPath); void reset(const QString &tessdataPath, const QString &tesseractLibrary);
private: private:
void recognized(const TaskPtr &task); void recognized(const TaskPtr &task);

View File

@ -17,8 +17,8 @@ void RecognizeWorker::handle(const TaskPtr &task)
if (!engines_.count(task->sourceLanguage)) { if (!engines_.count(task->sourceLanguage)) {
LTRACE() << "Create OCR engine" << task->sourceLanguage; LTRACE() << "Create OCR engine" << task->sourceLanguage;
auto engine = auto engine = std::make_unique<Tesseract>(task->sourceLanguage,
std::make_unique<Tesseract>(task->sourceLanguage, tessdataPath_); tessdataPath_, tesseractLibrary_);
if (!engine->isValid()) { if (!engine->isValid()) {
result->error = tr("Failed to init OCR engine: %1").arg(engine->error()); result->error = tr("Failed to init OCR engine: %1").arg(engine->error());
@ -43,12 +43,14 @@ void RecognizeWorker::handle(const TaskPtr &task)
emit finished(result); emit finished(result);
} }
void RecognizeWorker::reset(const QString &tessdataPath) void RecognizeWorker::reset(const QString &tessdataPath,
const QString &tesseractLibrary)
{ {
if (tessdataPath_ == tessdataPath) if (tessdataPath_ == tessdataPath && tesseractLibrary_ == tesseractLibrary)
return; return;
tessdataPath_ = tessdataPath; tessdataPath_ = tessdataPath;
tesseractLibrary_ = tesseractLibrary;
engines_.clear(); engines_.clear();
LTRACE() << "Cleared OCR engines"; LTRACE() << "Cleared OCR engines";
} }

View File

@ -13,7 +13,7 @@ public:
~RecognizeWorker(); ~RecognizeWorker();
void handle(const TaskPtr &task); void handle(const TaskPtr &task);
void reset(const QString &tessdataPath); void reset(const QString &tessdataPath, const QString &tesseractLibrary);
signals: signals:
void finished(const TaskPtr &task); void finished(const TaskPtr &task);
@ -24,4 +24,5 @@ private:
std::map<QString, std::unique_ptr<Tesseract>> engines_; std::map<QString, std::unique_ptr<Tesseract>> engines_;
std::map<QString, Generation> lastGenerations_; std::map<QString, Generation> lastGenerations_;
QString tessdataPath_; QString tessdataPath_;
QString tesseractLibrary_;
}; };

View File

@ -4,10 +4,10 @@
#include "task.h" #include "task.h"
#include <leptonica/allheaders.h> #include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <QBuffer> #include <QBuffer>
#include <QDir> #include <QDir>
#include <QLibrary>
#if defined(Q_OS_LINUX) #if defined(Q_OS_LINUX)
#include <fstream> #include <fstream>
@ -125,7 +125,103 @@ static void cleanupImage(Pix **image)
pixDestroy(image); pixDestroy(image);
} }
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath) // do not include capi.h from tesseract because it defined BOOL that breaks msvc
struct TessBaseAPI;
class Tesseract::Wrapper
{
using CreateApi = TessBaseAPI *(*)();
using DeleteApi = void (*)(TessBaseAPI *);
using InitApi = int (*)(TessBaseAPI *, const char *, const char *, int);
using SetImage = void (*)(TessBaseAPI *, struct Pix *);
using GetUtf8 = char *(*)(TessBaseAPI *);
using ClearApi = void (*)(TessBaseAPI *);
using DeleteUtf8 = void (*)(const char *);
public:
explicit Wrapper(const QString &libraryName)
: lib(libraryName)
{
if (!lib.load()) {
LERROR() << "Failed to load tesseract library" << libraryName;
return;
}
LTRACE() << "Loaded tesseract library" << lib.fileName();
auto ok = true;
ok &= bool(createApi_ = (CreateApi)lib.resolve("TessBaseAPICreate"));
ok &= bool(deleteApi_ = (DeleteApi)lib.resolve("TessBaseAPIDelete"));
ok &= bool(initApi_ = (InitApi)lib.resolve("TessBaseAPIInit2"));
ok &= bool(setImage_ = (SetImage)lib.resolve("TessBaseAPISetImage2"));
ok &= bool(getUtf8_ = (GetUtf8)lib.resolve("TessBaseAPIGetUTF8Text"));
ok &= bool(clearApi_ = (ClearApi)lib.resolve("TessBaseAPIClear"));
ok &= bool(deleteUtf8_ = (DeleteUtf8)lib.resolve("TessDeleteText"));
if (!ok) {
LERROR() << "Failed to resolve tesseract functions from" << libraryName;
return;
}
handle_ = createApi_();
}
~Wrapper()
{
if (handle_ && deleteApi_) {
deleteApi_(handle_);
}
lib.unload();
}
int Init(const char *datapath, const char *language)
{
SOFT_ASSERT(handle_, return -1);
SOFT_ASSERT(initApi_, return -1);
const auto mode = 3; // TessOcrEngineMode::OEM_DEFAULT
return initApi_(handle_, datapath, language, mode);
}
QString GetText(Pix *pix)
{
SOFT_ASSERT(handle_, return {});
SOFT_ASSERT(setImage_, return {});
setImage_(handle_, pix);
LTRACE() << "Set Pix to engine";
char *outText = nullptr;
SOFT_ASSERT(getUtf8_, return {});
outText = getUtf8_(handle_);
LTRACE() << "Received recognized text";
SOFT_ASSERT(clearApi_, return {});
clearApi_(handle_);
LTRACE() << "Cleared engine";
const auto result = QString(outText).trimmed();
SOFT_ASSERT(deleteUtf8_, return {});
deleteUtf8_(outText);
LTRACE() << "Cleared recognized text buffer";
return result;
}
private:
QLibrary lib;
CreateApi createApi_{nullptr};
DeleteApi deleteApi_{nullptr};
InitApi initApi_{nullptr};
SetImage setImage_{nullptr};
GetUtf8 getUtf8_{nullptr};
ClearApi clearApi_{nullptr};
DeleteUtf8 deleteUtf8_{nullptr};
TessBaseAPI *handle_{nullptr};
};
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath,
const QString &tesseractLibrary)
: tesseractLibrary_(tesseractLibrary)
{ {
SOFT_ASSERT(!tessdataPath.isEmpty(), return ); SOFT_ASSERT(!tessdataPath.isEmpty(), return );
SOFT_ASSERT(!language.isEmpty(), return ); SOFT_ASSERT(!language.isEmpty(), return );
@ -139,13 +235,12 @@ void Tesseract::init(const LanguageId &language, const QString &tessdataPath)
{ {
SOFT_ASSERT(!engine_, return ); SOFT_ASSERT(!engine_, return );
engine_ = std::make_unique<tesseract::TessBaseAPI>(); engine_ = std::make_unique<Wrapper>(tesseractLibrary_);
LTRACE() << "Created Tesseract api" << engine_.get(); LTRACE() << "Created Tesseract api" << engine_.get();
const auto tesseractName = LanguageCodes::tesseract(language); const auto tesseractName = LanguageCodes::tesseract(language);
auto result = auto result =
engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName), engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName));
tesseract::OEM_DEFAULT);
LTRACE() << "Inited Tesseract api" << result; LTRACE() << "Inited Tesseract api" << result;
if (result == 0) if (result == 0)
return; return;
@ -194,19 +289,12 @@ QString Tesseract::recognize(const QPixmap &source)
Pix *image = prepareImage(source.toImage()); Pix *image = prepareImage(source.toImage());
SOFT_ASSERT(image, return {}); SOFT_ASSERT(image, return {});
LTRACE() << "Preprocessed Pix for OCR" << image; LTRACE() << "Preprocessed Pix for OCR" << image;
engine_->SetImage(image);
LTRACE() << "Set Pix to engine"; auto result = engine_->GetText(image);
char *outText = engine_->GetUTF8Text();
LTRACE() << "Received recognized text";
engine_->Clear();
LTRACE() << "Cleared engine";
cleanupImage(&image); cleanupImage(&image);
LTRACE() << "Cleared preprocessed Pix"; LTRACE() << "Cleared preprocessed Pix";
QString result = QString(outText).trimmed();
delete[] outText;
LTRACE() << "Cleared recognized text buffer";
if (result.isEmpty()) if (result.isEmpty())
error_ = QObject::tr("Failed to recognize text or no text selected"); error_ = QObject::tr("Failed to recognize text or no text selected");
return result; return result;

View File

@ -7,16 +7,13 @@
#include <memory> #include <memory>
class QPixmap; class QPixmap;
namespace tesseract
{
class TessBaseAPI;
}
class Task; class Task;
class Tesseract class Tesseract
{ {
public: public:
Tesseract(const LanguageId& language, const QString& tessdataPath); Tesseract(const LanguageId& language, const QString& tessdataPath,
const QString& tesseractLibrary);
~Tesseract(); ~Tesseract();
QString recognize(const QPixmap& source); QString recognize(const QPixmap& source);
@ -26,8 +23,10 @@ public:
static QStringList availableLanguageNames(const QString& path); static QStringList availableLanguageNames(const QString& path);
private: private:
class Wrapper;
void init(const LanguageId& language, const QString& tessdataPath); void init(const LanguageId& language, const QString& tessdataPath);
std::unique_ptr<tesseract::TessBaseAPI> engine_; const QString tesseractLibrary_;
std::unique_ptr<Wrapper> engine_;
QString error_; QString error_;
}; };

View File

@ -30,6 +30,7 @@ const QString qs_showMessageOnStart = "showMessageOnStart";
const QString qs_recogntionGroup = "Recognition"; const QString qs_recogntionGroup = "Recognition";
const QString qs_ocrLanguage = "language"; const QString qs_ocrLanguage = "language";
const QString qs_tesseractVersion = "tesseractVersion";
const QString qs_correctionGroup = "Correction"; const QString qs_correctionGroup = "Correction";
const QString qs_userSubstitutions = "userSubstitutions"; const QString qs_userSubstitutions = "userSubstitutions";
@ -171,6 +172,7 @@ void Settings::save() const
settings.beginGroup(qs_recogntionGroup); settings.beginGroup(qs_recogntionGroup);
settings.setValue(qs_ocrLanguage, sourceLanguage); settings.setValue(qs_ocrLanguage, sourceLanguage);
settings.setValue(qs_tesseractVersion, int(tesseractVersion));
settings.endGroup(); settings.endGroup();
settings.beginGroup(qs_correctionGroup); settings.beginGroup(qs_correctionGroup);
@ -257,6 +259,9 @@ void Settings::load()
settings.beginGroup(qs_recogntionGroup); settings.beginGroup(qs_recogntionGroup);
sourceLanguage = settings.value(qs_ocrLanguage, sourceLanguage).toString(); sourceLanguage = settings.value(qs_ocrLanguage, sourceLanguage).toString();
tesseractVersion = TesseractVersion(std::clamp(
settings.value(qs_tesseractVersion, int(tesseractVersion)).toInt(),
int(TesseractVersion::Optimized), int(TesseractVersion::Compatible)));
settings.endGroup(); settings.endGroup();
settings.beginGroup(qs_correctionGroup); settings.beginGroup(qs_correctionGroup);

View File

@ -18,6 +18,8 @@ using Substitutions = std::unordered_multimap<LanguageId, Substitution>;
enum class ProxyType { Disabled, System, Socks5, Http }; enum class ProxyType { Disabled, System, Socks5, Http };
enum class TesseractVersion { Optimized, Compatible };
class Settings class Settings
{ {
public: public:
@ -57,6 +59,7 @@ public:
QString tessdataPath; QString tessdataPath;
QString sourceLanguage{"eng"}; QString sourceLanguage{"eng"};
TesseractVersion tesseractVersion{TesseractVersion::Optimized};
bool doTranslation{true}; bool doTranslation{true};
bool ignoreSslErrors{false}; bool ignoreSslErrors{false};

View File

@ -51,8 +51,16 @@ SettingsEditor::SettingsEditor(Manager &manager, update::Loader &updater)
ui->proxyPassEdit->setEchoMode(QLineEdit::PasswordEchoOnEdit); ui->proxyPassEdit->setEchoMode(QLineEdit::PasswordEchoOnEdit);
} }
// translation // recognition
ui->tesseractLangCombo->setModel(models_.sourceLanguageModel()); ui->tesseractLangCombo->setModel(models_.sourceLanguageModel());
const QMap<TesseractVersion, QString> tesseractVersions{
{TesseractVersion::Optimized, tr("Optimized")},
{TesseractVersion::Compatible, tr("Compatible")},
};
ui->tesseractVersion->addItems(tesseractVersions.values());
ui->tesseractVersion->setToolTip(
tr("Use compatible version if you are experiencing crashes during "
"recognition"));
// correction // correction
ui->userSubstitutionsTable->setEnabled(ui->useUserSubstitutions->isChecked()); ui->userSubstitutionsTable->setEnabled(ui->useUserSubstitutions->isChecked());
@ -164,6 +172,8 @@ Settings SettingsEditor::settings() const
settings.sourceLanguage = settings.sourceLanguage =
LanguageCodes::idForName(ui->tesseractLangCombo->currentText()); LanguageCodes::idForName(ui->tesseractLangCombo->currentText());
settings.tesseractVersion =
TesseractVersion(ui->tesseractVersion->currentIndex());
settings.useHunspell = ui->useHunspell->isChecked(); settings.useHunspell = ui->useHunspell->isChecked();
settings.useUserSubstitutions = ui->useUserSubstitutions->isChecked(); settings.useUserSubstitutions = ui->useUserSubstitutions->isChecked();
@ -227,6 +237,7 @@ void SettingsEditor::setSettings(const Settings &settings)
ui->tessdataPath->setText(settings.tessdataPath); ui->tessdataPath->setText(settings.tessdataPath);
ui->tesseractLangCombo->setCurrentText( ui->tesseractLangCombo->setCurrentText(
LanguageCodes::name(settings.sourceLanguage)); LanguageCodes::name(settings.sourceLanguage));
ui->tesseractVersion->setCurrentIndex(int(settings.tesseractVersion));
ui->useHunspell->setChecked(settings.useHunspell); ui->useHunspell->setChecked(settings.useHunspell);
ui->hunspellDir->setText(settings.hunspellDir); ui->hunspellDir->setText(settings.hunspellDir);

View File

@ -227,19 +227,29 @@
</widget> </widget>
<widget class="QWidget" name="pageRecognize"> <widget class="QWidget" name="pageRecognize">
<layout class="QGridLayout" name="gridLayout_2"> <layout class="QGridLayout" name="gridLayout_2">
<item row="1" column="0"> <item row="3" column="2">
<widget class="QLabel" name="label_4"> <spacer name="verticalSpacer_2">
<property name="sizePolicy"> <property name="orientation">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred"> <enum>Qt::Vertical</enum>
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property> </property>
<property name="sizeHint" stdset="0">
<size>
<width>17</width>
<height>410</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="2">
<widget class="QLabel" name="tessdataPath">
<property name="text"> <property name="text">
<string>Default language:</string> <string/>
</property> </property>
<property name="buddy"> <property name="wordWrap">
<cstring>tesseractLangCombo</cstring> <bool>true</bool>
</property>
<property name="textInteractionFlags">
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
</property> </property>
</widget> </widget>
</item> </item>
@ -256,35 +266,35 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="2"> <item row="1" column="0">
<spacer name="verticalSpacer_2"> <widget class="QLabel" name="label_4">
<property name="orientation"> <property name="sizePolicy">
<enum>Qt::Vertical</enum> <sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property> </property>
<property name="sizeHint" stdset="0"> <property name="text">
<size> <string>Default language:</string>
<width>17</width>
<height>410</height>
</size>
</property> </property>
</spacer> <property name="buddy">
<cstring>tesseractLangCombo</cstring>
</property>
</widget>
</item> </item>
<item row="1" column="2"> <item row="1" column="2">
<widget class="QComboBox" name="tesseractLangCombo"/> <widget class="QComboBox" name="tesseractLangCombo"/>
</item> </item>
<item row="0" column="2"> <item row="2" column="0">
<widget class="QLabel" name="tessdataPath"> <widget class="QLabel" name="label_24">
<property name="text"> <property name="text">
<string/> <string>Library version</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="textInteractionFlags">
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="2">
<widget class="QComboBox" name="tesseractVersion"/>
</item>
</layout> </layout>
</widget> </widget>
<widget class="QWidget" name="pageCorrect"> <widget class="QWidget" name="pageCorrect">