ScreenTranslator/Recognizer.cpp

92 lines
2.6 KiB
C++
Raw Normal View History

#include "Recognizer.h"
#include <tesseract/baseapi.h>
2013-11-24 19:43:37 +07:00
#include <QDebug>
#include <QSettings>
#include "Settings.h"
#include "ImageProcessing.h"
2015-06-30 00:26:33 +07:00
#include "StAssert.h"
#include "RecognizerHelper.h"
2013-11-24 19:43:37 +07:00
2015-09-23 01:41:08 +07:00
Recognizer::Recognizer (QObject *parent) :
QObject (parent),
engine_ (NULL), recognizerHelper_ (new RecognizerHelper), imageScale_ (0) {
2013-11-24 19:43:37 +07:00
applySettings ();
}
2015-09-23 01:41:08 +07:00
void Recognizer::applySettings () {
2013-11-24 19:43:37 +07:00
QSettings settings;
settings.beginGroup (settings_names::recogntionGroup);
recognizerHelper_->load ();
2013-11-24 19:43:37 +07:00
tessDataDir_ = settings.value (settings_names::tessDataPlace,
settings_values::tessDataPlace).toString ();
2015-09-23 01:41:08 +07:00
if (tessDataDir_.right (1) != "/") {
2013-11-24 20:06:19 +07:00
tessDataDir_ += "/";
}
2013-11-24 19:43:37 +07:00
ocrLanguage_ = settings.value (settings_names::ocrLanguage,
settings_values::ocrLanguage).toString ();
imageScale_ = settings.value (settings_names::imageScale,
settings_values::imageScale).toInt ();
initEngine (engine_, ocrLanguage_);
2013-11-24 19:43:37 +07:00
}
2015-09-23 01:41:08 +07:00
bool Recognizer::initEngine (tesseract::TessBaseAPI * &engine, const QString &language) {
if (tessDataDir_.isEmpty () || language.isEmpty ()) {
emit error (tr ("Неверные параметры для OCR"));
return false;
}
2015-09-23 01:41:08 +07:00
if (engine != NULL) {
delete engine;
}
2015-09-23 01:41:08 +07:00
engine = new tesseract::TessBaseAPI ();
int result = engine->Init (qPrintable (tessDataDir_), qPrintable (language),
tesseract::OEM_DEFAULT);
2015-09-23 01:41:08 +07:00
if (result != 0) {
emit error (tr ("Ошибка инициализации OCR: %1").arg (result));
delete engine;
engine = NULL;
return false;
}
return true;
}
2015-09-23 01:41:08 +07:00
void Recognizer::recognize (ProcessingItem item) {
2015-06-30 00:26:33 +07:00
ST_ASSERT (!item.source.isNull ());
bool isCustomLanguage = (!item.ocrLanguage.isEmpty () &&
item.ocrLanguage != ocrLanguage_);
2015-09-23 01:41:08 +07:00
tesseract::TessBaseAPI *engine = (isCustomLanguage) ? NULL : engine_;
QString language = (isCustomLanguage) ? item.ocrLanguage : ocrLanguage_;
2015-09-23 01:41:08 +07:00
if (engine == NULL) {
if (!initEngine (engine, language)) {
emit recognized (item);
return;
}
}
2013-11-24 19:43:37 +07:00
2015-09-23 01:41:08 +07:00
Pix *image = prepareImage (item.source.toImage (), imageScale_);
2015-06-30 00:26:33 +07:00
ST_ASSERT (image != NULL);
engine->SetImage (image);
2015-09-23 01:41:08 +07:00
char *outText = engine->GetUTF8Text ();
engine->Clear ();
cleanupImage (&image);
QString result = QString (outText).trimmed ();
delete [] outText;
2015-09-23 01:41:08 +07:00
if (isCustomLanguage) {
delete engine;
}
2015-09-23 01:41:08 +07:00
if (!result.isEmpty ()) {
item.recognized = recognizerHelper_->substitute (result, language);
}
2015-09-23 01:41:08 +07:00
else {
emit error (tr ("Текст не распознан."));
}
emit recognized (item);
}