2013-11-23 14:00:22 +07:00
|
|
|
#include "Recognizer.h"
|
|
|
|
|
2013-11-26 23:59:47 +07:00
|
|
|
#include <tesseract/baseapi.h>
|
2013-11-24 19:43:37 +07:00
|
|
|
|
|
|
|
#include <QDebug>
|
|
|
|
#include <QSettings>
|
|
|
|
|
|
|
|
#include "Settings.h"
|
2014-03-30 21:36:32 +07:00
|
|
|
#include "ImageProcessing.h"
|
2015-06-30 00:26:33 +07:00
|
|
|
#include "StAssert.h"
|
2013-11-24 19:43:37 +07:00
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
Recognizer::Recognizer (QObject *parent) :
|
|
|
|
QObject (parent),
|
|
|
|
engine_ (NULL), imageScale_ (0) {
|
2013-11-24 19:43:37 +07:00
|
|
|
applySettings ();
|
|
|
|
}
|
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
void Recognizer::applySettings () {
|
2013-11-24 19:43:37 +07:00
|
|
|
QSettings settings;
|
|
|
|
settings.beginGroup (settings_names::recogntionGroup);
|
|
|
|
|
|
|
|
tessDataDir_ = settings.value (settings_names::tessDataPlace,
|
|
|
|
settings_values::tessDataPlace).toString ();
|
2015-09-23 01:41:08 +07:00
|
|
|
if (tessDataDir_.right (1) != "/") {
|
2013-11-24 20:06:19 +07:00
|
|
|
tessDataDir_ += "/";
|
|
|
|
}
|
2013-11-24 19:43:37 +07:00
|
|
|
ocrLanguage_ = settings.value (settings_names::ocrLanguage,
|
|
|
|
settings_values::ocrLanguage).toString ();
|
|
|
|
imageScale_ = settings.value (settings_names::imageScale,
|
|
|
|
settings_values::imageScale).toInt ();
|
|
|
|
|
2014-04-12 01:51:31 +07:00
|
|
|
initEngine (engine_, ocrLanguage_);
|
2013-11-24 19:43:37 +07:00
|
|
|
}
|
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
bool Recognizer::initEngine (tesseract::TessBaseAPI * &engine, const QString &language) {
|
|
|
|
if (tessDataDir_.isEmpty () || language.isEmpty ()) {
|
2013-11-26 23:59:47 +07:00
|
|
|
emit error (tr ("Неверные параметры для OCR"));
|
|
|
|
return false;
|
|
|
|
}
|
2015-09-23 01:41:08 +07:00
|
|
|
if (engine != NULL) {
|
2014-04-04 21:39:49 +07:00
|
|
|
delete engine;
|
2013-11-26 23:59:47 +07:00
|
|
|
}
|
2015-09-23 01:41:08 +07:00
|
|
|
engine = new tesseract::TessBaseAPI ();
|
|
|
|
int result = engine->Init (qPrintable (tessDataDir_), qPrintable (language),
|
2013-11-26 23:59:47 +07:00
|
|
|
tesseract::OEM_DEFAULT);
|
2015-09-23 01:41:08 +07:00
|
|
|
if (result != 0) {
|
2013-11-26 23:59:47 +07:00
|
|
|
emit error (tr ("Ошибка инициализации OCR: %1").arg (result));
|
2014-04-04 21:39:49 +07:00
|
|
|
delete engine;
|
|
|
|
engine = NULL;
|
2013-11-26 23:59:47 +07:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2013-11-23 14:00:22 +07:00
|
|
|
}
|
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
void Recognizer::recognize (ProcessingItem item) {
|
2015-06-30 00:26:33 +07:00
|
|
|
ST_ASSERT (!item.source.isNull ());
|
2014-04-04 21:39:49 +07:00
|
|
|
bool isCustomLanguage = (!item.ocrLanguage.isEmpty () &&
|
|
|
|
item.ocrLanguage != ocrLanguage_);
|
2015-09-23 01:41:08 +07:00
|
|
|
tesseract::TessBaseAPI *engine = (isCustomLanguage) ? NULL : engine_;
|
|
|
|
if (engine == NULL) {
|
2014-04-12 01:51:31 +07:00
|
|
|
QString language = (isCustomLanguage) ? item.ocrLanguage : ocrLanguage_;
|
2015-09-23 01:41:08 +07:00
|
|
|
if (!initEngine (engine, language)) {
|
2013-11-26 23:59:47 +07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2013-11-24 19:43:37 +07:00
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
Pix *image = prepareImage (item.source.toImage (), imageScale_);
|
2015-06-30 00:26:33 +07:00
|
|
|
ST_ASSERT (image != NULL);
|
2014-04-04 21:39:49 +07:00
|
|
|
engine->SetImage (image);
|
2015-09-23 01:41:08 +07:00
|
|
|
char *outText = engine->GetUTF8Text ();
|
|
|
|
engine->Clear ();
|
2014-03-30 21:36:32 +07:00
|
|
|
cleanupImage (&image);
|
2013-11-23 14:00:22 +07:00
|
|
|
|
2014-04-04 21:39:49 +07:00
|
|
|
QString result = QString (outText).trimmed ();
|
|
|
|
delete [] outText;
|
2015-09-23 01:41:08 +07:00
|
|
|
if (isCustomLanguage) {
|
2014-04-04 21:39:49 +07:00
|
|
|
delete engine;
|
|
|
|
}
|
|
|
|
|
2015-09-23 01:41:08 +07:00
|
|
|
if (!result.isEmpty ()) {
|
2013-11-26 23:59:47 +07:00
|
|
|
item.recognized = result;
|
|
|
|
emit recognized (item);
|
|
|
|
}
|
2015-09-23 01:41:08 +07:00
|
|
|
else {
|
2013-11-26 23:59:47 +07:00
|
|
|
emit error (tr ("Текст не распознан."));
|
|
|
|
}
|
2013-11-23 14:00:22 +07:00
|
|
|
}
|