Bundle multiple tesseract versions

Load them via C api and allow user to select which one to use.
This commit is contained in:
Gres 2020-07-18 12:26:59 +03:00
parent bd99d04416
commit 0920ed1f40
16 changed files with 279 additions and 138 deletions

View File

@ -33,18 +33,13 @@ jobs:
runs-on: ${{ matrix.config.os }}
env:
OS: ${{ matrix.config.name }}
MARCH: ${{ matrix.config.march }}
TAG: ${{ matrix.config.tag }}
MSVC_VERSION: 2019/Enterprise
strategy:
matrix:
config:
- { name: "win64", os: windows-latest, tag: "", march: "sandy-bridge" }
- { name: "win32", os: windows-latest, tag: "", march: "sandy-bridge" }
- { name: "linux", os: ubuntu-16.04, tag: "", march: "sandy-bridge" }
- { name: "win64", os: windows-latest, tag: "-compatible", march: "nehalem" }
- { name: "win32", os: windows-latest, tag: "-compatible", march: "nehalem" }
- { name: "linux", os: ubuntu-16.04, tag: "-compatible", march: "nehalem" }
- { name: "win64", os: windows-latest }
- { name: "win32", os: windows-latest }
- { name: "linux", os: ubuntu-16.04 }
# - { name: "macos", os: macos-latest }
steps:
- uses: actions/checkout@v2
@ -66,7 +61,7 @@ jobs:
uses: actions/cache@v1
with:
path: deps
key: ${{ env.OS }}-${{ env.TAG }}-deps
key: ${{ env.OS }}-deps
- name: Get Qt
run: python ./share/ci/get_qt.py
@ -77,7 +72,16 @@ jobs:
- name: Get leptonica
run: python ./share/ci/get_leptonica.py
- name: Get tesseract
- name: Get tesseract optimized
env:
MARCH: sandy-bridge
TAG: optimized
run: python ./share/ci/get_tesseract.py
- name: Get tesseract compatible
env:
MARCH: nehalem
TAG: compatible
run: python ./share/ci/get_tesseract.py
- name: Get hunspell

View File

@ -8,7 +8,7 @@ DEPS_DIR=$$(ST_DEPS_DIR)
isEmpty(DEPS_DIR):DEPS_DIR=$$PWD/../deps
INCLUDEPATH += $$DEPS_DIR/include
LIBS += -L$$DEPS_DIR/lib
LIBS += -ltesseract -lleptonica -lhunspell
LIBS += -lhunspell -lleptonica
win32{
LIBS += -lUser32

View File

@ -47,7 +47,8 @@ os.environ['VERSION'] = app_version
flags = '' if os.getenv("DEBUG") is None else '-unsupported-allow-new-glibc'
additional_files = glob(ssl_dir + '/lib/lib*.so.*') + \
glob('/usr/lib/x86_64-linux-gnu/nss/*')
glob('/usr/lib/x86_64-linux-gnu/nss/*') + \
glob(dependencies_dir + '/lib/libtesseract-*.so')
out_lib_dir = install_dir + '/usr/lib'
os.makedirs(out_lib_dir, exist_ok=True)
for f in additional_files:

View File

@ -33,39 +33,33 @@ if os.environ.get('NO_OPT', '0') == '1':
if len(os.environ.get('MARCH', '')) > 0:
compat_flags += ' -D TARGET_ARCHITECTURE={} '.format(os.environ['MARCH'])
cache_file = install_dir + '/tesseract.cache'
cache_file_data = required_version + build_type_flag + compat_flags
lib_suffix = os.environ.get('TAG', '')
if len(lib_suffix) > 0:
lib_suffix = '-' + lib_suffix
def check_existing():
if not os.path.exists(cache_file):
return False
with open(cache_file, 'r') as f:
cached = f.read()
if cached != cache_file_data:
return False
if platform.system() == "Windows":
dll = install_dir + '/bin/tesseract41.dll'
lib = install_dir + '/lib/tesseract41.lib'
if not os.path.exists(dll) or not os.path.exists(lib):
return False
c.symlink(dll, install_dir + '/bin/tesseract.dll')
c.symlink(lib, install_dir + '/lib/tesseract.lib')
elif platform.system() == "Darwin":
lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
if not os.path.exists(lib):
return False
c.symlink(lib, install_dir + '/lib/libtesseract.dylib')
else:
if not os.path.exists(install_dir + '/lib/libtesseract.so'):
return False
includes_path = install_dir + '/include/tesseract'
if len(c.get_folder_files(includes_path)) == 0:
return False
return True
if platform.system() == "Windows":
lib = install_dir + '/bin/tesseract{}.dll'.format(lib_suffix)
orig_lib = install_dir + '/bin/tesseract41.dll'
elif platform.system() == "Darwin":
lib = install_dir + '/lib/libtesseract{}.dylib'.format(lib_suffix)
orig_lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
else:
lib = install_dir + '/lib/libtesseract{}.so'.format(lib_suffix)
orig_lib = install_dir + '/lib/libtesseract.so.4.1.1'
if os.path.exists(lib):
return True
if os.path.exists(orig_lib):
os.rename(orig_lib, lib)
return True
return False
if check_existing() and not 'FORCE' in os.environ:
@ -102,9 +96,6 @@ if len(compat_flags) > 0:
c.run('cmake --build . --config {}'.format(build_type_flag))
c.run('cmake --build . --target install --config {}'.format(build_type_flag))
with open(cache_file, 'w') as f:
f.write(cache_file_data)
if not check_existing(): # create links
if not check_existing(): # add suffix
c.print('>> Build failed')
exit(1)

View File

@ -34,7 +34,9 @@ for file in os.scandir(libs_dir):
c.print('>> Copying {} to {}'.format(full_name, install_dir))
shutil.copy(full_name, install_dir)
for f in glob(ssl_dir + '/bin/*.dll'):
additional_libs = glob(ssl_dir + '/bin/*.dll') + \
glob(dependencies_dir + '/bin/tesseract-*.dll')
for f in additional_libs:
c.print('>> Copying {} to {}'.format(f, install_dir))
shutil.copy(f, install_dir)

View File

@ -66,7 +66,7 @@ Ctrl - продолжить выделять</translation>
<context>
<name>QObject</name>
<message>
<location filename="../../src/main.cpp" line="28"/>
<location filename="../../src/main.cpp" line="30"/>
<source>OCR and translation tool</source>
<translation>Инструмент распознавания и перевода</translation>
</message>
@ -668,7 +668,7 @@ Check for updates to silence this warning</source>
<translation>Начата запись в лог-файл: %1</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="104"/>
<location filename="../../src/settingseditor.cpp" line="112"/>
<source>&lt;p&gt;Optical character recognition (OCR) and translation tool&lt;/p&gt;
&lt;p&gt;Version: %1&lt;/p&gt;
&lt;p&gt;Author: Gres (&lt;a href=&quot;mailto:%2&quot;&gt;%2&lt;/a&gt;)&lt;/p&gt;
@ -684,12 +684,12 @@ Check for updates to silence this warning</source>
<translation>неизвестные языки для перевода: %1 или %2</translation>
</message>
<message>
<location filename="../../src/ocr/tesseract.cpp" line="153"/>
<location filename="../../src/ocr/tesseract.cpp" line="238"/>
<source>init failed</source>
<translation>ошибка инициалиизации</translation>
</message>
<message>
<location filename="../../src/ocr/tesseract.cpp" line="211"/>
<location filename="../../src/ocr/tesseract.cpp" line="289"/>
<source>Failed to recognize text or no text selected</source>
<translation>Ошибка распознавания текста или нет текста в выделенной зоне</translation>
</message>
@ -734,7 +734,7 @@ in %1</source>
<context>
<name>Recognizer</name>
<message>
<location filename="../../src/ocr/recognizer.cpp" line="36"/>
<location filename="../../src/ocr/recognizer.cpp" line="37"/>
<source>No source language set. Check settings</source>
<translation>Не задан исходный язык. Проверьте настройки</translation>
</message>
@ -870,37 +870,42 @@ in %1</source>
<translation>сохранять пароль (небезопасно)</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="295"/>
<location filename="../../src/settingseditor.ui" line="291"/>
<source>Library version</source>
<translation>Версия</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="305"/>
<source>User substitutions</source>
<translation>Пользовательская коррекция</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="318"/>
<location filename="../../src/settingseditor.ui" line="328"/>
<source>Use auto corrections (hunspell)</source>
<translation>Использовать автокоррекцию (hunspell)</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="325"/>
<location filename="../../src/settingseditor.ui" line="335"/>
<source>Use user substitutions</source>
<translation>Использовать пользовательскую коррекцию</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="332"/>
<location filename="../../src/settingseditor.ui" line="342"/>
<source>Hunspell dictionaries path:</source>
<translation>Путь к словарям Hunspell:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="394"/>
<location filename="../../src/settingseditor.ui" line="404"/>
<source>Language:</source>
<translation>Язык:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="430"/>
<location filename="../../src/settingseditor.ui" line="440"/>
<source> secs</source>
<translation> сек</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="370"/>
<location filename="../../src/settingseditor.ui" line="380"/>
<source>Ignore SSL errors</source>
<translation>Игнорировать ошибки SSL</translation>
</message>
@ -930,107 +935,107 @@ in %1</source>
<translation>Писать логи в файл (отладка)</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="239"/>
<location filename="../../src/settingseditor.ui" line="278"/>
<source>Default language:</source>
<translation>Язык по умолчанию:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="255"/>
<location filename="../../src/settingseditor.ui" line="265"/>
<source>Tessdata path:</source>
<translation>Путь к языкам (tessdata):</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="305"/>
<location filename="../../src/settingseditor.ui" line="315"/>
<source>\\ for \ symbol, \n for newline</source>
<translation>\\ для символа \ , \n для символа новой строки</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="353"/>
<location filename="../../src/settingseditor.ui" line="363"/>
<source>Translators path:</source>
<translation>Путь к переводчикам:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="360"/>
<location filename="../../src/settingseditor.ui" line="370"/>
<source>Translators</source>
<translation>Переводчики</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="495"/>
<location filename="../../src/settingseditor.ui" line="505"/>
<source>Result window</source>
<translation>Окно результата</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="501"/>
<location filename="../../src/settingseditor.ui" line="511"/>
<source>Font:</source>
<translation>Шрифт:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="511"/>
<location filename="../../src/settingseditor.ui" line="521"/>
<source>Font size:</source>
<translation>Размер шрифта:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="528"/>
<location filename="../../src/settingseditor.ui" line="538"/>
<source>Font color:</source>
<translation>Цвет шрифта:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="545"/>
<location filename="../../src/settingseditor.ui" line="555"/>
<source>Background:</source>
<translation>Фон:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="562"/>
<location filename="../../src/settingseditor.ui" line="572"/>
<source>Show image</source>
<translation>Показывать изображение</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="569"/>
<location filename="../../src/settingseditor.ui" line="579"/>
<source>Show recognized</source>
<translation>Показывать распознанное</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="638"/>
<location filename="../../src/settingseditor.ui" line="648"/>
<source>Update check interval (days):</source>
<translation>Интервал проверки обновления (дней):</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="645"/>
<location filename="../../src/settingseditor.ui" line="655"/>
<source>0 - disabled</source>
<translation>- отключено</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="678"/>
<location filename="../../src/settingseditor.ui" line="688"/>
<source>Apply updates</source>
<translation>Применить изменения</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="380"/>
<location filename="../../src/settingseditor.ui" line="390"/>
<source>Translate text</source>
<translation>Переводить текст</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="387"/>
<location filename="../../src/settingseditor.ui" line="397"/>
<source>Single translator timeout:</source>
<translation>Переходить к следующему переводчику после:</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="457"/>
<location filename="../../src/settingseditor.ui" line="467"/>
<source>Result type</source>
<translation>Тип результата</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="469"/>
<location filename="../../src/settingseditor.ui" line="479"/>
<source>Tray</source>
<translation>Трей</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="482"/>
<location filename="../../src/settingseditor.ui" line="492"/>
<source>Window</source>
<translation>Окно</translation>
</message>
<message>
<location filename="../../src/settingseditor.ui" line="661"/>
<location filename="../../src/settingseditor.ui" line="671"/>
<source>Check now</source>
<translation>Проверить сейчас</translation>
</message>
@ -1090,17 +1095,32 @@ in %1</source>
<translation>HTTP</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="66"/>
<location filename="../../src/settingseditor.cpp" line="57"/>
<source>Optimized</source>
<translation>Оптимизированная</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="58"/>
<source>Compatible</source>
<translation>Совместимая</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="62"/>
<source>Use compatible version if you are experiencing crashes during recognition</source>
<translation>Используйте совместимую версию если программа неожиданно завершается во время распознавания</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="74"/>
<source>&lt;b&gt;NOTE! Some translators might require the translation window to be visible. You can make it using the &quot;Show translator&quot; entry in the tray icon&apos;s context menu&lt;/b&gt;</source>
<translation>&lt;b&gt;ПРИМЕЧАНИЕ! Для работы некоторых переводчиков может потребоваться активное окно перевода. Его можно отобразить при помощи пункта &quot;Показать окно перевода&quot; контекстного меню иконки в трее&lt;/b&gt;</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="74"/>
<location filename="../../src/settingseditor.cpp" line="82"/>
<source>Sample text</source>
<translation>Текст для проверки</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="115"/>
<location filename="../../src/settingseditor.cpp" line="123"/>
<source>The program workflow consists of the following steps:
1. Selection on the screen area
2. Recognition of the selected area
@ -1123,7 +1143,7 @@ Then set default recognition and translation languages, enable some (or all) tra
Далее установите языки распознавания и перевода по умолчанию, активируйте некоторые (или все) переводчики и настройку &quot;переводить текст&quot;, если нужно.</translation>
</message>
<message>
<location filename="../../src/settingseditor.cpp" line="341"/>
<location filename="../../src/settingseditor.cpp" line="352"/>
<source>Portable changed. Apply settings first</source>
<translation>Portable режиме изменени. Сначала применить настройки</translation>
</message>

View File

@ -79,5 +79,9 @@ void Recognizer::updateSettings()
SOFT_ASSERT(!settings_.tessdataPath.isEmpty(), return );
queue_.clear();
emit reset(settings_.tessdataPath);
const auto libName =
(settings_.tesseractVersion == TesseractVersion::Optimized
? "tesseract-optimized"
: "tesseract-compatible");
emit reset(settings_.tessdataPath, libName);
}

View File

@ -18,7 +18,7 @@ public:
signals:
void recognizeImpl(const TaskPtr &task);
void reset(const QString &tessdataPath);
void reset(const QString &tessdataPath, const QString &tesseractLibrary);
private:
void recognized(const TaskPtr &task);

View File

@ -17,8 +17,8 @@ void RecognizeWorker::handle(const TaskPtr &task)
if (!engines_.count(task->sourceLanguage)) {
LTRACE() << "Create OCR engine" << task->sourceLanguage;
auto engine =
std::make_unique<Tesseract>(task->sourceLanguage, tessdataPath_);
auto engine = std::make_unique<Tesseract>(task->sourceLanguage,
tessdataPath_, tesseractLibrary_);
if (!engine->isValid()) {
result->error = tr("Failed to init OCR engine: %1").arg(engine->error());
@ -43,12 +43,14 @@ void RecognizeWorker::handle(const TaskPtr &task)
emit finished(result);
}
void RecognizeWorker::reset(const QString &tessdataPath)
void RecognizeWorker::reset(const QString &tessdataPath,
const QString &tesseractLibrary)
{
if (tessdataPath_ == tessdataPath)
if (tessdataPath_ == tessdataPath && tesseractLibrary_ == tesseractLibrary)
return;
tessdataPath_ = tessdataPath;
tesseractLibrary_ = tesseractLibrary;
engines_.clear();
LTRACE() << "Cleared OCR engines";
}

View File

@ -13,7 +13,7 @@ public:
~RecognizeWorker();
void handle(const TaskPtr &task);
void reset(const QString &tessdataPath);
void reset(const QString &tessdataPath, const QString &tesseractLibrary);
signals:
void finished(const TaskPtr &task);
@ -24,4 +24,5 @@ private:
std::map<QString, std::unique_ptr<Tesseract>> engines_;
std::map<QString, Generation> lastGenerations_;
QString tessdataPath_;
QString tesseractLibrary_;
};

View File

@ -4,10 +4,10 @@
#include "task.h"
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <QBuffer>
#include <QDir>
#include <QLibrary>
#if defined(Q_OS_LINUX)
#include <fstream>
@ -125,7 +125,103 @@ static void cleanupImage(Pix **image)
pixDestroy(image);
}
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath)
// do not include capi.h from tesseract because it defined BOOL that breaks msvc
struct TessBaseAPI;
class Tesseract::Wrapper
{
using CreateApi = TessBaseAPI *(*)();
using DeleteApi = void (*)(TessBaseAPI *);
using InitApi = int (*)(TessBaseAPI *, const char *, const char *, int);
using SetImage = void (*)(TessBaseAPI *, struct Pix *);
using GetUtf8 = char *(*)(TessBaseAPI *);
using ClearApi = void (*)(TessBaseAPI *);
using DeleteUtf8 = void (*)(const char *);
public:
explicit Wrapper(const QString &libraryName)
: lib(libraryName)
{
if (!lib.load()) {
LERROR() << "Failed to load tesseract library" << libraryName;
return;
}
LTRACE() << "Loaded tesseract library" << lib.fileName();
auto ok = true;
ok &= bool(createApi_ = (CreateApi)lib.resolve("TessBaseAPICreate"));
ok &= bool(deleteApi_ = (DeleteApi)lib.resolve("TessBaseAPIDelete"));
ok &= bool(initApi_ = (InitApi)lib.resolve("TessBaseAPIInit2"));
ok &= bool(setImage_ = (SetImage)lib.resolve("TessBaseAPISetImage2"));
ok &= bool(getUtf8_ = (GetUtf8)lib.resolve("TessBaseAPIGetUTF8Text"));
ok &= bool(clearApi_ = (ClearApi)lib.resolve("TessBaseAPIClear"));
ok &= bool(deleteUtf8_ = (DeleteUtf8)lib.resolve("TessDeleteText"));
if (!ok) {
LERROR() << "Failed to resolve tesseract functions from" << libraryName;
return;
}
handle_ = createApi_();
}
~Wrapper()
{
if (handle_ && deleteApi_) {
deleteApi_(handle_);
}
lib.unload();
}
int Init(const char *datapath, const char *language)
{
SOFT_ASSERT(handle_, return -1);
SOFT_ASSERT(initApi_, return -1);
const auto mode = 3; // TessOcrEngineMode::OEM_DEFAULT
return initApi_(handle_, datapath, language, mode);
}
QString GetText(Pix *pix)
{
SOFT_ASSERT(handle_, return {});
SOFT_ASSERT(setImage_, return {});
setImage_(handle_, pix);
LTRACE() << "Set Pix to engine";
char *outText = nullptr;
SOFT_ASSERT(getUtf8_, return {});
outText = getUtf8_(handle_);
LTRACE() << "Received recognized text";
SOFT_ASSERT(clearApi_, return {});
clearApi_(handle_);
LTRACE() << "Cleared engine";
const auto result = QString(outText).trimmed();
SOFT_ASSERT(deleteUtf8_, return {});
deleteUtf8_(outText);
LTRACE() << "Cleared recognized text buffer";
return result;
}
private:
QLibrary lib;
CreateApi createApi_{nullptr};
DeleteApi deleteApi_{nullptr};
InitApi initApi_{nullptr};
SetImage setImage_{nullptr};
GetUtf8 getUtf8_{nullptr};
ClearApi clearApi_{nullptr};
DeleteUtf8 deleteUtf8_{nullptr};
TessBaseAPI *handle_{nullptr};
};
Tesseract::Tesseract(const LanguageId &language, const QString &tessdataPath,
const QString &tesseractLibrary)
: tesseractLibrary_(tesseractLibrary)
{
SOFT_ASSERT(!tessdataPath.isEmpty(), return );
SOFT_ASSERT(!language.isEmpty(), return );
@ -139,13 +235,12 @@ void Tesseract::init(const LanguageId &language, const QString &tessdataPath)
{
SOFT_ASSERT(!engine_, return );
engine_ = std::make_unique<tesseract::TessBaseAPI>();
engine_ = std::make_unique<Wrapper>(tesseractLibrary_);
LTRACE() << "Created Tesseract api" << engine_.get();
const auto tesseractName = LanguageCodes::tesseract(language);
auto result =
engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName),
tesseract::OEM_DEFAULT);
engine_->Init(qPrintable(tessdataPath), qPrintable(tesseractName));
LTRACE() << "Inited Tesseract api" << result;
if (result == 0)
return;
@ -194,19 +289,12 @@ QString Tesseract::recognize(const QPixmap &source)
Pix *image = prepareImage(source.toImage());
SOFT_ASSERT(image, return {});
LTRACE() << "Preprocessed Pix for OCR" << image;
engine_->SetImage(image);
LTRACE() << "Set Pix to engine";
char *outText = engine_->GetUTF8Text();
LTRACE() << "Received recognized text";
engine_->Clear();
LTRACE() << "Cleared engine";
auto result = engine_->GetText(image);
cleanupImage(&image);
LTRACE() << "Cleared preprocessed Pix";
QString result = QString(outText).trimmed();
delete[] outText;
LTRACE() << "Cleared recognized text buffer";
if (result.isEmpty())
error_ = QObject::tr("Failed to recognize text or no text selected");
return result;

View File

@ -7,16 +7,13 @@
#include <memory>
class QPixmap;
namespace tesseract
{
class TessBaseAPI;
}
class Task;
class Tesseract
{
public:
Tesseract(const LanguageId& language, const QString& tessdataPath);
Tesseract(const LanguageId& language, const QString& tessdataPath,
const QString& tesseractLibrary);
~Tesseract();
QString recognize(const QPixmap& source);
@ -26,8 +23,10 @@ public:
static QStringList availableLanguageNames(const QString& path);
private:
class Wrapper;
void init(const LanguageId& language, const QString& tessdataPath);
std::unique_ptr<tesseract::TessBaseAPI> engine_;
const QString tesseractLibrary_;
std::unique_ptr<Wrapper> engine_;
QString error_;
};

View File

@ -30,6 +30,7 @@ const QString qs_showMessageOnStart = "showMessageOnStart";
const QString qs_recogntionGroup = "Recognition";
const QString qs_ocrLanguage = "language";
const QString qs_tesseractVersion = "tesseractVersion";
const QString qs_correctionGroup = "Correction";
const QString qs_userSubstitutions = "userSubstitutions";
@ -171,6 +172,7 @@ void Settings::save() const
settings.beginGroup(qs_recogntionGroup);
settings.setValue(qs_ocrLanguage, sourceLanguage);
settings.setValue(qs_tesseractVersion, int(tesseractVersion));
settings.endGroup();
settings.beginGroup(qs_correctionGroup);
@ -257,6 +259,9 @@ void Settings::load()
settings.beginGroup(qs_recogntionGroup);
sourceLanguage = settings.value(qs_ocrLanguage, sourceLanguage).toString();
tesseractVersion = TesseractVersion(std::clamp(
settings.value(qs_tesseractVersion, int(tesseractVersion)).toInt(),
int(TesseractVersion::Optimized), int(TesseractVersion::Compatible)));
settings.endGroup();
settings.beginGroup(qs_correctionGroup);

View File

@ -18,6 +18,8 @@ using Substitutions = std::unordered_multimap<LanguageId, Substitution>;
enum class ProxyType { Disabled, System, Socks5, Http };
enum class TesseractVersion { Optimized, Compatible };
class Settings
{
public:
@ -57,6 +59,7 @@ public:
QString tessdataPath;
QString sourceLanguage{"eng"};
TesseractVersion tesseractVersion{TesseractVersion::Optimized};
bool doTranslation{true};
bool ignoreSslErrors{false};

View File

@ -51,8 +51,16 @@ SettingsEditor::SettingsEditor(Manager &manager, update::Loader &updater)
ui->proxyPassEdit->setEchoMode(QLineEdit::PasswordEchoOnEdit);
}
// translation
// recognition
ui->tesseractLangCombo->setModel(models_.sourceLanguageModel());
const QMap<TesseractVersion, QString> tesseractVersions{
{TesseractVersion::Optimized, tr("Optimized")},
{TesseractVersion::Compatible, tr("Compatible")},
};
ui->tesseractVersion->addItems(tesseractVersions.values());
ui->tesseractVersion->setToolTip(
tr("Use compatible version if you are experiencing crashes during "
"recognition"));
// correction
ui->userSubstitutionsTable->setEnabled(ui->useUserSubstitutions->isChecked());
@ -164,6 +172,8 @@ Settings SettingsEditor::settings() const
settings.sourceLanguage =
LanguageCodes::idForName(ui->tesseractLangCombo->currentText());
settings.tesseractVersion =
TesseractVersion(ui->tesseractVersion->currentIndex());
settings.useHunspell = ui->useHunspell->isChecked();
settings.useUserSubstitutions = ui->useUserSubstitutions->isChecked();
@ -227,6 +237,7 @@ void SettingsEditor::setSettings(const Settings &settings)
ui->tessdataPath->setText(settings.tessdataPath);
ui->tesseractLangCombo->setCurrentText(
LanguageCodes::name(settings.sourceLanguage));
ui->tesseractVersion->setCurrentIndex(int(settings.tesseractVersion));
ui->useHunspell->setChecked(settings.useHunspell);
ui->hunspellDir->setText(settings.hunspellDir);

View File

@ -227,19 +227,29 @@
</widget>
<widget class="QWidget" name="pageRecognize">
<layout class="QGridLayout" name="gridLayout_2">
<item row="1" column="0">
<widget class="QLabel" name="label_4">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
<item row="3" column="2">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>17</width>
<height>410</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="2">
<widget class="QLabel" name="tessdataPath">
<property name="text">
<string>Default language:</string>
<string/>
</property>
<property name="buddy">
<cstring>tesseractLangCombo</cstring>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="textInteractionFlags">
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
</property>
</widget>
</item>
@ -256,35 +266,35 @@
</property>
</widget>
</item>
<item row="2" column="2">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
<item row="1" column="0">
<widget class="QLabel" name="label_4">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>17</width>
<height>410</height>
</size>
<property name="text">
<string>Default language:</string>
</property>
</spacer>
<property name="buddy">
<cstring>tesseractLangCombo</cstring>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QComboBox" name="tesseractLangCombo"/>
</item>
<item row="0" column="2">
<widget class="QLabel" name="tessdataPath">
<item row="2" column="0">
<widget class="QLabel" name="label_24">
<property name="text">
<string/>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="textInteractionFlags">
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByMouse</set>
<string>Library version</string>
</property>
</widget>
</item>
<item row="2" column="2">
<widget class="QComboBox" name="tesseractVersion"/>
</item>
</layout>
</widget>
<widget class="QWidget" name="pageCorrect">