Bundle multiple tesseract versions
Load them via C api and allow user to select which one to use.
This commit is contained in:
parent
bd99d04416
commit
0920ed1f40
24
.github/workflows/build.yml
vendored
24
.github/workflows/build.yml
vendored
@ -33,18 +33,13 @@ jobs:
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
env:
|
||||
OS: ${{ matrix.config.name }}
|
||||
MARCH: ${{ matrix.config.march }}
|
||||
TAG: ${{ matrix.config.tag }}
|
||||
MSVC_VERSION: 2019/Enterprise
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- { name: "win64", os: windows-latest, tag: "", march: "sandy-bridge" }
|
||||
- { name: "win32", os: windows-latest, tag: "", march: "sandy-bridge" }
|
||||
- { name: "linux", os: ubuntu-16.04, tag: "", march: "sandy-bridge" }
|
||||
- { name: "win64", os: windows-latest, tag: "-compatible", march: "nehalem" }
|
||||
- { name: "win32", os: windows-latest, tag: "-compatible", march: "nehalem" }
|
||||
- { name: "linux", os: ubuntu-16.04, tag: "-compatible", march: "nehalem" }
|
||||
- { name: "win64", os: windows-latest }
|
||||
- { name: "win32", os: windows-latest }
|
||||
- { name: "linux", os: ubuntu-16.04 }
|
||||
# - { name: "macos", os: macos-latest }
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@ -66,7 +61,7 @@ jobs:
|
||||
uses: actions/cache@v1
|
||||
with:
|
||||
path: deps
|
||||
key: ${{ env.OS }}-${{ env.TAG }}-deps
|
||||
key: ${{ env.OS }}-deps
|
||||
|
||||
- name: Get Qt
|
||||
run: python ./share/ci/get_qt.py
|
||||
@ -77,7 +72,16 @@ jobs:
|
||||
- name: Get leptonica
|
||||
run: python ./share/ci/get_leptonica.py
|
||||
|
||||
- name: Get tesseract
|
||||
- name: Get tesseract optimized
|
||||
env:
|
||||
MARCH: sandy-bridge
|
||||
TAG: optimized
|
||||
run: python ./share/ci/get_tesseract.py
|
||||
|
||||
- name: Get tesseract compatible
|
||||
env:
|
||||
MARCH: nehalem
|
||||
TAG: compatible
|
||||
run: python ./share/ci/get_tesseract.py
|
||||
|
||||
- name: Get hunspell
|
||||
|
@ -8,7 +8,7 @@ DEPS_DIR=$$(ST_DEPS_DIR)
|
||||
isEmpty(DEPS_DIR):DEPS_DIR=$$PWD/../deps
|
||||
INCLUDEPATH += $$DEPS_DIR/include
|
||||
LIBS += -L$$DEPS_DIR/lib
|
||||
LIBS += -ltesseract -lleptonica -lhunspell
|
||||
LIBS += -lhunspell -lleptonica
|
||||
|
||||
win32{
|
||||
LIBS += -lUser32
|
||||
|
@ -47,7 +47,8 @@ os.environ['VERSION'] = app_version
|
||||
flags = '' if os.getenv("DEBUG") is None else '-unsupported-allow-new-glibc'
|
||||
|
||||
additional_files = glob(ssl_dir + '/lib/lib*.so.*') + \
|
||||
glob('/usr/lib/x86_64-linux-gnu/nss/*')
|
||||
glob('/usr/lib/x86_64-linux-gnu/nss/*') + \
|
||||
glob(dependencies_dir + '/lib/libtesseract-*.so')
|
||||
out_lib_dir = install_dir + '/usr/lib'
|
||||
os.makedirs(out_lib_dir, exist_ok=True)
|
||||
for f in additional_files:
|
||||
|
@ -33,39 +33,33 @@ if os.environ.get('NO_OPT', '0') == '1':
|
||||
if len(os.environ.get('MARCH', '')) > 0:
|
||||
compat_flags += ' -D TARGET_ARCHITECTURE={} '.format(os.environ['MARCH'])
|
||||
|
||||
cache_file = install_dir + '/tesseract.cache'
|
||||
cache_file_data = required_version + build_type_flag + compat_flags
|
||||
lib_suffix = os.environ.get('TAG', '')
|
||||
if len(lib_suffix) > 0:
|
||||
lib_suffix = '-' + lib_suffix
|
||||
|
||||
|
||||
def check_existing():
|
||||
if not os.path.exists(cache_file):
|
||||
return False
|
||||
with open(cache_file, 'r') as f:
|
||||
cached = f.read()
|
||||
if cached != cache_file_data:
|
||||
return False
|
||||
|
||||
if platform.system() == "Windows":
|
||||
dll = install_dir + '/bin/tesseract41.dll'
|
||||
lib = install_dir + '/lib/tesseract41.lib'
|
||||
if not os.path.exists(dll) or not os.path.exists(lib):
|
||||
return False
|
||||
c.symlink(dll, install_dir + '/bin/tesseract.dll')
|
||||
c.symlink(lib, install_dir + '/lib/tesseract.lib')
|
||||
elif platform.system() == "Darwin":
|
||||
lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
|
||||
if not os.path.exists(lib):
|
||||
return False
|
||||
c.symlink(lib, install_dir + '/lib/libtesseract.dylib')
|
||||
else:
|
||||
if not os.path.exists(install_dir + '/lib/libtesseract.so'):
|
||||
return False
|
||||
|
||||
includes_path = install_dir + '/include/tesseract'
|
||||
if len(c.get_folder_files(includes_path)) == 0:
|
||||
return False
|
||||
|
||||
return True
|
||||
if platform.system() == "Windows":
|
||||
lib = install_dir + '/bin/tesseract{}.dll'.format(lib_suffix)
|
||||
orig_lib = install_dir + '/bin/tesseract41.dll'
|
||||
elif platform.system() == "Darwin":
|
||||
lib = install_dir + '/lib/libtesseract{}.dylib'.format(lib_suffix)
|
||||
orig_lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
|
||||
else:
|
||||
lib = install_dir + '/lib/libtesseract{}.so'.format(lib_suffix)
|
||||
orig_lib = install_dir + '/lib/libtesseract.so.4.1.1'
|
||||
|
||||
if os.path.exists(lib):
|
||||
return True
|
||||
if os.path.exists(orig_lib):
|
||||
os.rename(orig_lib, lib)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
if check_existing() and not 'FORCE' in os.environ:
|
||||
@ -102,9 +96,6 @@ if len(compat_flags) > 0:
|
||||
c.run('cmake --build . --config {}'.format(build_type_flag))
|
||||
c.run('cmake --build . --target install --config {}'.format(build_type_flag))
|
||||
|
||||
with open(cache_file, 'w') as f:
|
||||
f.write(cache_file_data)
|
||||
|
||||
if not check_existing(): # create links
|
||||
if not check_existing(): # add suffix
|
||||
c.print('>> Build failed')
|
||||
exit(1)
|
||||
|
@ -34,7 +34,9 @@ for file in os.scandir(libs_dir):
|
||||
c.print('>> Copying {} to {}'.format(full_name, install_dir))
|
||||
shutil.copy(full_name, install_dir)
|
||||
|
||||
for f in glob(ssl_dir + '/bin/*.dll'):
|
||||
additional_libs = glob(ssl_dir + '/bin/*.dll') + \
|
||||
glob(dependencies_dir + '/bin/tesseract-*.dll')
|
||||
for f in additional_libs:
|
||||
c.print('>> Copying {} to {}'.format(f, install_dir))
|
||||
shutil.copy(f, install_dir)
|
||||
|
||||
|
@ -66,7 +66,7 @@ Ctrl - продолжить выделять</translation>
|
||||
<context>
|
||||
<name>QObject</name>
|
||||
<message>
|
||||
<location filename="../../src/main.cpp" line="28"/>
|
||||
<location filename="../../src/main.cpp" line="30"/>
|
||||
<source>OCR and translation tool</source>
|
||||
<translation>Инструмент распознавания и перевода</translation>
|
||||
</message>
|
||||
@ -668,7 +668,7 @@ Check for updates to silence this warning</source>
|
||||
<translation>Начата запись в лог-файл: %1</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="104"/>
|
||||
<location filename="../../src/settingseditor.cpp" line="112"/>
|
||||
<source><p>Optical character recognition (OCR) and translation tool</p>
|
||||
<p>Version: %1</p>
|
||||
<p>Author: Gres (<a href="mailto:%2">%2</a>)</p>
|
||||
@ -684,12 +684,12 @@ Check for updates to silence this warning</source>
|
||||
<translation>неизвестные языки для перевода: %1 или %2</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/ocr/tesseract.cpp" line="153"/>
|
||||
<location filename="../../src/ocr/tesseract.cpp" line="238"/>
|
||||
<source>init failed</source>
|
||||
<translation>ошибка инициалиизации</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/ocr/tesseract.cpp" line="211"/>
|
||||
<location filename="../../src/ocr/tesseract.cpp" line="289"/>
|
||||
<source>Failed to recognize text or no text selected</source>
|
||||
<translation>Ошибка распознавания текста или нет текста в выделенной зоне</translation>
|
||||
</message>
|
||||
@ -734,7 +734,7 @@ in %1</source>
|
||||
<context>
|
||||
<name>Recognizer</name>
|
||||
<message>
|
||||
<location filename="../../src/ocr/recognizer.cpp" line="36"/>
|
||||
<location filename="../../src/ocr/recognizer.cpp" line="37"/>
|
||||
<source>No source language set. Check settings</source>
|
||||
<translation>Не задан исходный язык. Проверьте настройки</translation>
|
||||
</message>
|
||||
@ -870,37 +870,42 @@ in %1</source>
|
||||
<translation>сохранять пароль (небезопасно)</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="295"/>
|
||||
<location filename="../../src/settingseditor.ui" line="291"/>
|
||||
<source>Library version</source>
|
||||
<translation>Версия</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="305"/>
|
||||
<source>User substitutions</source>
|
||||
<translation>Пользовательская коррекция</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="318"/>
|
||||
<location filename="../../src/settingseditor.ui" line="328"/>
|
||||
<source>Use auto corrections (hunspell)</source>
|
||||
<translation>Использовать автокоррекцию (hunspell)</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="325"/>
|
||||
<location filename="../../src/settingseditor.ui" line="335"/>
|
||||
<source>Use user substitutions</source>
|
||||
<translation>Использовать пользовательскую коррекцию</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="332"/>
|
||||
<location filename="../../src/settingseditor.ui" line="342"/>
|
||||
<source>Hunspell dictionaries path:</source>
|
||||
<translation>Путь к словарям Hunspell:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="394"/>
|
||||
<location filename="../../src/settingseditor.ui" line="404"/>
|
||||
<source>Language:</source>
|
||||
<translation>Язык:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="430"/>
|
||||
<location filename="../../src/settingseditor.ui" line="440"/>
|
||||
<source> secs</source>
|
||||
<translation> сек</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="370"/>
|
||||
<location filename="../../src/settingseditor.ui" line="380"/>
|
||||
<source>Ignore SSL errors</source>
|
||||
<translation>Игнорировать ошибки SSL</translation>
|
||||
</message>
|
||||
@ -930,107 +935,107 @@ in %1</source>
|
||||
<translation>Писать логи в файл (отладка)</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="239"/>
|
||||
<location filename="../../src/settingseditor.ui" line="278"/>
|
||||
<source>Default language:</source>
|
||||
<translation>Язык по умолчанию:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="255"/>
|
||||
<location filename="../../src/settingseditor.ui" line="265"/>
|
||||
<source>Tessdata path:</source>
|
||||
<translation>Путь к языкам (tessdata):</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="305"/>
|
||||
<location filename="../../src/settingseditor.ui" line="315"/>
|
||||
<source>\\ for \ symbol, \n for newline</source>
|
||||
<translation>\\ для символа \ , \n для символа новой строки</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="353"/>
|
||||
<location filename="../../src/settingseditor.ui" line="363"/>
|
||||
<source>Translators path:</source>
|
||||
<translation>Путь к переводчикам:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="360"/>
|
||||
<location filename="../../src/settingseditor.ui" line="370"/>
|
||||
<source>Translators</source>
|
||||
<translation>Переводчики</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="495"/>
|
||||
<location filename="../../src/settingseditor.ui" line="505"/>
|
||||
<source>Result window</source>
|
||||
<translation>Окно результата</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="501"/>
|
||||
<location filename="../../src/settingseditor.ui" line="511"/>
|
||||
<source>Font:</source>
|
||||
<translation>Шрифт:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="511"/>
|
||||
<location filename="../../src/settingseditor.ui" line="521"/>
|
||||
<source>Font size:</source>
|
||||
<translation>Размер шрифта:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="528"/>
|
||||
<location filename="../../src/settingseditor.ui" line="538"/>
|
||||
<source>Font color:</source>
|
||||
<translation>Цвет шрифта:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="545"/>
|
||||
<location filename="../../src/settingseditor.ui" line="555"/>
|
||||
<source>Background:</source>
|
||||
<translation>Фон:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="562"/>
|
||||
<location filename="../../src/settingseditor.ui" line="572"/>
|
||||
<source>Show image</source>
|
||||
<translation>Показывать изображение</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="569"/>
|
||||
<location filename="../../src/settingseditor.ui" line="579"/>
|
||||
<source>Show recognized</source>
|
||||
<translation>Показывать распознанное</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="638"/>
|
||||
<location filename="../../src/settingseditor.ui" line="648"/>
|
||||
<source>Update check interval (days):</source>
|
||||
<translation>Интервал проверки обновления (дней):</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="645"/>
|
||||
<location filename="../../src/settingseditor.ui" line="655"/>
|
||||
<source>0 - disabled</source>
|
||||
<translation>- отключено</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="678"/>
|
||||
<location filename="../../src/settingseditor.ui" line="688"/>
|
||||
<source>Apply updates</source>
|
||||
<translation>Применить изменения</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="380"/>
|
||||
<location filename="../../src/settingseditor.ui" line="390"/>
|
||||
<source>Translate text</source>
|
||||
<translation>Переводить текст</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="387"/>
|
||||
<location filename="../../src/settingseditor.ui" line="397"/>
|
||||
<source>Single translator timeout:</source>
|
||||
<translation>Переходить к следующему переводчику после:</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="457"/>
|
||||
<location filename="../../src/settingseditor.ui" line="467"/>
|
||||
<source>Result type</source>
|
||||
<translation>Тип результата</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="469"/>
|
||||
<location filename="../../src/settingseditor.ui" line="479"/>
|
||||
<source>Tray</source>
|
||||
<translation>Трей</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="482"/>
|
||||
<location filename="../../src/settingseditor.ui" line="492"/>
|
||||
<source>Window</source>
|
||||
<translation>Окно</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.ui" line="661"/>
|
||||
<location filename="../../src/settingseditor.ui" line="671"/>
|
||||
<source>Check now</source>
|
||||
<translation>Проверить сейчас</translation>
|
||||
</message>
|
||||
@ -1090,17 +1095,32 @@ in %1</source>
|
||||
<translation>HTTP</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="66"/>
|
||||
<location filename="../../src/settingseditor.cpp" line="57"/>
|
||||
<source>Optimized</source>
|
||||
<translation>Оптимизированная</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="58"/>
|
||||
<source>Compatible</source>
|
||||
<translation>Совместимая</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="62"/>
|
||||
<source>Use compatible version if you are experiencing crashes during recognition</source>
|
||||
<translation>Используйте совместимую версию если программа неожиданно завершается во время распознавания</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="74"/>
|
||||
<source><b>NOTE! Some translators might require the translation window to be visible. You can make it using the "Show translator" entry in the tray icon's context menu</b></source>
|
||||
<translation><b>ПРИМЕЧАНИЕ! Для работы некоторых переводчиков может потребоваться активное окно перевода. Его можно отобразить при помощи пункта "Показать окно перевода" контекстного меню иконки в трее</b></translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="74"/>
|
||||
<location filename="../../src/settingseditor.cpp" line="82"/>
|
||||
<source>Sample text</source>
|
||||
<translation>Текст для проверки</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="115"/>
|
||||
<location filename="../../src/settingseditor.cpp" line="123"/>
|
||||
<source>The program workflow consists of the following steps:
|
||||
1. Selection on the screen area
|
||||
2. Recognition of the selected area
|
||||
@ -1123,7 +1143,7 @@ Then set default recognition and translation languages, enable some (or all) tra
|
||||
Далее установите языки распознавания и перевода по умолчанию, активируйте некоторые (или все) переводчики и настройку "переводить текст", если нужно.</translation>
|
||||
</message>
|
||||
<message>
|
||||
<location filename="../../src/settingseditor.cpp" line="341"/>
|
||||
<location filename="../../src/settingseditor.cpp" line="352"/>
|
||||
<source>Portable changed. Apply settings first</source>
|
||||
<translation>Portable режиме изменени. Сначала применить настройки</translation>
|
||||
</message>
|
||||
|
@ -79,5 +79,9 @@ void Recognizer::updateSettings()
|
||||
SOFT_ASSERT(!settings_.tessdataPath.isEmpty(), return );
|
||||
|
||||
queue_.clear();
|
||||
emit reset(settings_.tessdataPath);
|
||||
const auto libName =
|
||||
(settings_.tesseractVersion == TesseractVersion::Optimized
|
||||
? "tesseract-optimized"
|
||||
: "tesseract-compatible");
|
||||
emit reset(settings_.tessdataPath, libName);
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ public:
|
||||
|
||||
signals:
|
||||
void recognizeImpl(const TaskPtr &task);
|
||||
void reset(const QString &tessdataPath);
|
||||
void reset(const QString &tessdataPath, const QString &tesseractLibrary);
|
||||
|
||||
private:
|
||||
void recognized(const TaskPtr &task);
|
||||
|
@ -17,8 +17,8 @@ void RecognizeWorker::handle(const TaskPtr &task)
|
||||
if (!engines_.count(task->sourceLanguage)) {
|
||||
LTRACE() << "Create OCR engine" << task->sourceLanguage;
|
||||
|
||||
auto engine =
|
||||
std::make_unique<Tesseract>(task->sourceLanguage, tessdataPath_);
|
||||
auto engine = std::make_unique<Tesseract>(task->sourceLanguage,
|
||||
tessdataPath_, tesseractLibrary_);
|
||||
|
||||
if (!engine->isValid()) {
|
||||
result->error = tr("Failed to init OCR engine: %1").arg(engine->error());
|
||||
@ -43,12 +43,14 @@ void RecognizeWorker::handle(const TaskPtr &task)
|
||||
emit finished(result);
|
||||
}
|
||||
|
||||
void RecognizeWorker::reset(const QString &tessdataPath)
|
||||
void RecognizeWorker::reset(const QString &tessdataPath,
|
||||
const QString &tesseractLibrary)
|
||||
{
|
||||
if (tessdataPath_ == tessdataPath)
|
||||
if (tessdataPath_ == tessdataPath && tesseractLibrary_ == tesseractLibrary)
|
||||
return;
|
||||
|
||||
tessdataPath_ = tessdataPath;
|
||||
tesseractLibrary_ = tesseractLibrary;
|
||||
engines_.clear();
|
||||
LTRACE() << "Cleared OCR engines";
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ public:
|
||||
~RecognizeWorker();
|
||||
|
||||
void handle(const TaskPtr &task);
|
||||
void reset(const QString &tessdataPath);
|
||||
void reset(const QString &tessdataPath, const QString &tesseractLibrary);
|
||||
|
||||
signals:
|
||||
void finished(const TaskPtr &task);
|
||||
@ -24,4 +24,5 @@ private:
|
||||
std::map<QString, std::unique_ptr<Tesseract>> engines_;
|
||||
std::map<QString, Generation> lastGenerations_;
|
||||
QString tessdataPath_;
|
||||
QString tesseractLibrary_;
|
||||
};
|
||||
|
@ -4,10 +4,10 @@
|
||||
#include "task.h"
|
||||
|
||||
#include <leptonica/allheaders.h>
|
||||
#include <tesseract/baseapi.h>
|
||||
|
||||
#include <QBuffer>
|
||||
#include <QDir>
|
||||
#include <QLibrary>
|
||||
|
||||
#if defined(Q_OS_LINUX)
|
||||
#include <fstream>
|
||||
@ -125,7 +125,103 @@ static void cleanupImage(Pix **image)
|
||||
pixDestroy(image);
|
||||
}
|
||||
|
||||
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath)
|
||||
// do not include capi.h from tesseract because it defined BOOL that breaks msvc
|
||||
struct TessBaseAPI;
|
||||
|
||||
class Tesseract::Wrapper
|
||||
{
|
||||
using CreateApi = TessBaseAPI *(*)();
|
||||
using DeleteApi = void (*)(TessBaseAPI *);
|
||||
using InitApi = int (*)(TessBaseAPI *, const char *, const char *, int);
|
||||
using SetImage = void (*)(TessBaseAPI *, struct Pix *);
|
||||
using GetUtf8 = char *(*)(TessBaseAPI *);
|
||||
using ClearApi = void (*)(TessBaseAPI *);
|
||||
using DeleteUtf8 = void (*)(const char *);
|
||||
|
||||
public:
|
||||
explicit Wrapper(const QString &libraryName)
|
||||
: lib(libraryName)
|
||||
{
|
||||
if (!lib.load()) {
|
||||
LERROR() << "Failed to load tesseract library" << libraryName;
|
||||
return;
|
||||
}
|
||||
|
||||
LTRACE() << "Loaded tesseract library" << lib.fileName();
|
||||
auto ok = true;
|
||||
ok &= bool(createApi_ = (CreateApi)lib.resolve("TessBaseAPICreate"));
|
||||
ok &= bool(deleteApi_ = (DeleteApi)lib.resolve("TessBaseAPIDelete"));
|
||||
ok &= bool(initApi_ = (InitApi)lib.resolve("TessBaseAPIInit2"));
|
||||
ok &= bool(setImage_ = (SetImage)lib.resolve("TessBaseAPISetImage2"));
|
||||
ok &= bool(getUtf8_ = (GetUtf8)lib.resolve("TessBaseAPIGetUTF8Text"));
|
||||
ok &= bool(clearApi_ = (ClearApi)lib.resolve("TessBaseAPIClear"));
|
||||
ok &= bool(deleteUtf8_ = (DeleteUtf8)lib.resolve("TessDeleteText"));
|
||||
if (!ok) {
|
||||
LERROR() << "Failed to resolve tesseract functions from" << libraryName;
|
||||
return;
|
||||
}
|
||||
handle_ = createApi_();
|
||||
}
|
||||
|
||||
~Wrapper()
|
||||
{
|
||||
if (handle_ && deleteApi_) {
|
||||
deleteApi_(handle_);
|
||||
}
|
||||
lib.unload();
|
||||
}
|
||||
|
||||
int Init(const char *datapath, const char *language)
|
||||
{
|
||||
SOFT_ASSERT(handle_, return -1);
|
||||
SOFT_ASSERT(initApi_, return -1);
|
||||
|
||||
const auto mode = 3; // TessOcrEngineMode::OEM_DEFAULT
|
||||
return initApi_(handle_, datapath, language, mode);
|
||||
}
|
||||
|
||||
QString GetText(Pix *pix)
|
||||
{
|
||||
SOFT_ASSERT(handle_, return {});
|
||||
|
||||
SOFT_ASSERT(setImage_, return {});
|
||||
setImage_(handle_, pix);
|
||||
LTRACE() << "Set Pix to engine";
|
||||
|
||||
char *outText = nullptr;
|
||||
|
||||
SOFT_ASSERT(getUtf8_, return {});
|
||||
outText = getUtf8_(handle_);
|
||||
LTRACE() << "Received recognized text";
|
||||
|
||||
SOFT_ASSERT(clearApi_, return {});
|
||||
clearApi_(handle_);
|
||||
LTRACE() << "Cleared engine";
|
||||
|
||||
const auto result = QString(outText).trimmed();
|
||||
|
||||
SOFT_ASSERT(deleteUtf8_, return {});
|
||||
deleteUtf8_(outText);
|
||||
LTRACE() << "Cleared recognized text buffer";
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
QLibrary lib;
|
||||
CreateApi createApi_{nullptr};
|
||||
DeleteApi deleteApi_{nullptr};
|
||||
InitApi initApi_{nullptr};
|
||||
SetImage setImage_{nullptr};
|
||||
GetUtf8 getUtf8_{nullptr};
|
||||
ClearApi clearApi_{nullptr};
|
||||
DeleteUtf8 deleteUtf8_{nullptr};
|
||||
TessBaseAPI *handle_{nullptr};
|
||||
};
|
||||
|
||||
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath,
|
||||
const QString &tesseractLibrary)
|
||||
: tesseractLibrary_(tesseractLibrary)
|
||||
{
|
||||
SOFT_ASSERT(!tessdataPath.isEmpty(), return );
|
||||
SOFT_ASSERT(!language.isEmpty(), return );
|
||||
@ -139,13 +235,12 @@ void Tesseract::init(const LanguageId &language, const QString &tessdataPath)
|
||||
{
|
||||
SOFT_ASSERT(!engine_, return );
|
||||
|
||||
engine_ = std::make_unique<tesseract::TessBaseAPI>();
|
||||
engine_ = std::make_unique<Wrapper>(tesseractLibrary_);
|
||||
LTRACE() << "Created Tesseract api" << engine_.get();
|
||||
|
||||
const auto tesseractName = LanguageCodes::tesseract(language);
|
||||
auto result =
|
||||
engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName),
|
||||
tesseract::OEM_DEFAULT);
|
||||
engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName));
|
||||
LTRACE() << "Inited Tesseract api" << result;
|
||||
if (result == 0)
|
||||
return;
|
||||
@ -194,19 +289,12 @@ QString Tesseract::recognize(const QPixmap &source)
|
||||
Pix *image = prepareImage(source.toImage());
|
||||
SOFT_ASSERT(image, return {});
|
||||
LTRACE() << "Preprocessed Pix for OCR" << image;
|
||||
engine_->SetImage(image);
|
||||
LTRACE() << "Set Pix to engine";
|
||||
char *outText = engine_->GetUTF8Text();
|
||||
LTRACE() << "Received recognized text";
|
||||
engine_->Clear();
|
||||
LTRACE() << "Cleared engine";
|
||||
|
||||
auto result = engine_->GetText(image);
|
||||
|
||||
cleanupImage(&image);
|
||||
LTRACE() << "Cleared preprocessed Pix";
|
||||
|
||||
QString result = QString(outText).trimmed();
|
||||
delete[] outText;
|
||||
LTRACE() << "Cleared recognized text buffer";
|
||||
|
||||
if (result.isEmpty())
|
||||
error_ = QObject::tr("Failed to recognize text or no text selected");
|
||||
return result;
|
||||
|
@ -7,16 +7,13 @@
|
||||
#include <memory>
|
||||
|
||||
class QPixmap;
|
||||
namespace tesseract
|
||||
{
|
||||
class TessBaseAPI;
|
||||
}
|
||||
class Task;
|
||||
|
||||
class Tesseract
|
||||
{
|
||||
public:
|
||||
Tesseract(const LanguageId& language, const QString& tessdataPath);
|
||||
Tesseract(const LanguageId& language, const QString& tessdataPath,
|
||||
const QString& tesseractLibrary);
|
||||
~Tesseract();
|
||||
|
||||
QString recognize(const QPixmap& source);
|
||||
@ -26,8 +23,10 @@ public:
|
||||
static QStringList availableLanguageNames(const QString& path);
|
||||
|
||||
private:
|
||||
class Wrapper;
|
||||
void init(const LanguageId& language, const QString& tessdataPath);
|
||||
|
||||
std::unique_ptr<tesseract::TessBaseAPI> engine_;
|
||||
const QString tesseractLibrary_;
|
||||
std::unique_ptr<Wrapper> engine_;
|
||||
QString error_;
|
||||
};
|
||||
|
@ -30,6 +30,7 @@ const QString qs_showMessageOnStart = "showMessageOnStart";
|
||||
|
||||
const QString qs_recogntionGroup = "Recognition";
|
||||
const QString qs_ocrLanguage = "language";
|
||||
const QString qs_tesseractVersion = "tesseractVersion";
|
||||
|
||||
const QString qs_correctionGroup = "Correction";
|
||||
const QString qs_userSubstitutions = "userSubstitutions";
|
||||
@ -171,6 +172,7 @@ void Settings::save() const
|
||||
|
||||
settings.beginGroup(qs_recogntionGroup);
|
||||
settings.setValue(qs_ocrLanguage, sourceLanguage);
|
||||
settings.setValue(qs_tesseractVersion, int(tesseractVersion));
|
||||
settings.endGroup();
|
||||
|
||||
settings.beginGroup(qs_correctionGroup);
|
||||
@ -257,6 +259,9 @@ void Settings::load()
|
||||
|
||||
settings.beginGroup(qs_recogntionGroup);
|
||||
sourceLanguage = settings.value(qs_ocrLanguage, sourceLanguage).toString();
|
||||
tesseractVersion = TesseractVersion(std::clamp(
|
||||
settings.value(qs_tesseractVersion, int(tesseractVersion)).toInt(),
|
||||
int(TesseractVersion::Optimized), int(TesseractVersion::Compatible)));
|
||||
settings.endGroup();
|
||||
|
||||
settings.beginGroup(qs_correctionGroup);
|
||||
|
@ -18,6 +18,8 @@ using Substitutions = std::unordered_multimap<LanguageId, Substitution>;
|
||||
|
||||
enum class ProxyType { Disabled, System, Socks5, Http };
|
||||
|
||||
enum class TesseractVersion { Optimized, Compatible };
|
||||
|
||||
class Settings
|
||||
{
|
||||
public:
|
||||
@ -57,6 +59,7 @@ public:
|
||||
|
||||
QString tessdataPath;
|
||||
QString sourceLanguage{"eng"};
|
||||
TesseractVersion tesseractVersion{TesseractVersion::Optimized};
|
||||
|
||||
bool doTranslation{true};
|
||||
bool ignoreSslErrors{false};
|
||||
|
@ -51,8 +51,16 @@ SettingsEditor::SettingsEditor(Manager &manager, update::Loader &updater)
|
||||
ui->proxyPassEdit->setEchoMode(QLineEdit::PasswordEchoOnEdit);
|
||||
}
|
||||
|
||||
// translation
|
||||
// recognition
|
||||
ui->tesseractLangCombo->setModel(models_.sourceLanguageModel());
|
||||
const QMap<TesseractVersion, QString> tesseractVersions{
|
||||
{TesseractVersion::Optimized, tr("Optimized")},
|
||||
{TesseractVersion::Compatible, tr("Compatible")},
|
||||
};
|
||||
ui->tesseractVersion->addItems(tesseractVersions.values());
|
||||
ui->tesseractVersion->setToolTip(
|
||||
tr("Use compatible version if you are experiencing crashes during "
|
||||
"recognition"));
|
||||
|
||||
// correction
|
||||
ui->userSubstitutionsTable->setEnabled(ui->useUserSubstitutions->isChecked());
|
||||
@ -164,6 +172,8 @@ Settings SettingsEditor::settings() const
|
||||
|
||||
settings.sourceLanguage =
|
||||
LanguageCodes::idForName(ui->tesseractLangCombo->currentText());
|
||||
settings.tesseractVersion =
|
||||
TesseractVersion(ui->tesseractVersion->currentIndex());
|
||||
|
||||
settings.useHunspell = ui->useHunspell->isChecked();
|
||||
settings.useUserSubstitutions = ui->useUserSubstitutions->isChecked();
|
||||
@ -227,6 +237,7 @@ void SettingsEditor::setSettings(const Settings &settings)
|
||||
ui->tessdataPath->setText(settings.tessdataPath);
|
||||
ui->tesseractLangCombo->setCurrentText(
|
||||
LanguageCodes::name(settings.sourceLanguage));
|
||||
ui->tesseractVersion->setCurrentIndex(int(settings.tesseractVersion));
|
||||
|
||||
ui->useHunspell->setChecked(settings.useHunspell);
|
||||
ui->hunspellDir->setText(settings.hunspellDir);
|
||||
|
@ -227,19 +227,29 @@
|
||||
</widget>
|
||||
<widget class="QWidget" name="pageRecognize">
|
||||
<layout class="QGridLayout" name="gridLayout_2">
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<item row="3" column="2">
|
||||
<spacer name="verticalSpacer_2">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>17</width>
|
||||
<height>410</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
<item row="0" column="2">
|
||||
<widget class="QLabel" name="tessdataPath">
|
||||
<property name="text">
|
||||
<string>Default language:</string>
|
||||
<string/>
|
||||
</property>
|
||||
<property name="buddy">
|
||||
<cstring>tesseractLangCombo</cstring>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="textInteractionFlags">
|
||||
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -256,35 +266,35 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="2">
|
||||
<spacer name="verticalSpacer_2">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>17</width>
|
||||
<height>410</height>
|
||||
</size>
|
||||
<property name="text">
|
||||
<string>Default language:</string>
|
||||
</property>
|
||||
</spacer>
|
||||
<property name="buddy">
|
||||
<cstring>tesseractLangCombo</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="2">
|
||||
<widget class="QComboBox" name="tesseractLangCombo"/>
|
||||
</item>
|
||||
<item row="0" column="2">
|
||||
<widget class="QLabel" name="tessdataPath">
|
||||
<item row="2" column="0">
|
||||
<widget class="QLabel" name="label_24">
|
||||
<property name="text">
|
||||
<string/>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="textInteractionFlags">
|
||||
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
|
||||
<string>Library version</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="2">
|
||||
<widget class="QComboBox" name="tesseractVersion"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="pageCorrect">
|
||||
|
Loading…
Reference in New Issue
Block a user