ScreenTranslator/share/ci/get_tesseract.py

111 lines
3.7 KiB
Python
Raw Normal View History

2020-03-01 16:30:43 +07:00
import common as c
2020-04-03 22:05:33 +07:00
from config import bitness, msvc_version, build_dir, dependencies_dir, build_type
2020-03-01 16:30:43 +07:00
import os
import platform
c.print('>> Installing tesseract')
install_dir = dependencies_dir
url = 'https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz'
required_version = '4.1.1'
2020-05-01 16:51:11 +07:00
build_type_flag = 'Debug' if build_type == 'debug' else 'Release'
# compatibility flags
compat_flags = ''
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_AVX2', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D USE_AVX2=OFF '
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_AVX512', '0') == '1':
compat_flags += ' -D USE_AVX512BW=OFF -D USE_AVX512CD=OFF \
-D USE_AVX512DQ=OFF -D USE_AVX512ER=OFF -D USE_AVX512F=OFF -D USE_AVX512IFMA=OFF \
-D USE_AVX512PF=OFF -D USE_AVX512VBMI=OFF -D USE_AVX512VL=OFF '
if os.environ.get('NO_AVX', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D USE_AVX=OFF '
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_FMA', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D USE_FMA=OFF '
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_BMI2', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D USE_BMI2=OFF '
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_SSE4', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D USE_SSE4_1=OFF -D USE_SSE4_2=OFF '
2020-05-02 15:44:11 +07:00
if os.environ.get('NO_OPT', '0') == '1':
2020-05-01 16:51:11 +07:00
compat_flags += ' -D CMAKE_CXX_FLAGS_RELEASE="/MD /Od /Od0 /DNDEBUG" '
compat_flags += ' -D CMAKE_C_FLAGS_RELEASE="/MD /Od /Od0 /DNDEBUG" '
2020-05-02 15:44:11 +07:00
if len(os.environ.get('MARCH', '')) > 0:
compat_flags += ' -D TARGET_ARCHITECTURE={} '.format(os.environ['MARCH'])
2020-05-01 16:51:11 +07:00
cache_file = install_dir + '/tesseract.cache'
cache_file_data = required_version + build_type_flag + compat_flags
2020-03-01 16:30:43 +07:00
def check_existing():
2020-05-01 16:51:11 +07:00
if not os.path.exists(cache_file):
return False
with open(cache_file, 'r') as f:
cached = f.read()
if cached != cache_file_data:
return False
2020-03-01 16:30:43 +07:00
if platform.system() == "Windows":
dll = install_dir + '/bin/tesseract41.dll'
lib = install_dir + '/lib/tesseract41.lib'
if not os.path.exists(dll) or not os.path.exists(lib):
return False
c.symlink(dll, install_dir + '/bin/tesseract.dll')
c.symlink(lib, install_dir + '/lib/tesseract.lib')
elif platform.system() == "Darwin":
lib = install_dir + '/lib/libtesseract.4.1.1.dylib'
if not os.path.exists(lib):
return False
c.symlink(lib, install_dir + '/lib/libtesseract.dylib')
else:
if not os.path.exists(install_dir + '/lib/libtesseract.so'):
return False
includes_path = install_dir + '/include/tesseract'
if len(c.get_folder_files(includes_path)) == 0:
return False
return True
if check_existing() and not 'FORCE' in os.environ:
2020-03-01 16:30:43 +07:00
c.print('>> Using cached')
exit(0)
archive = 'tesseract-' + os.path.basename(url)
c.download(url, archive)
src_dir = os.path.abspath('tesseract_src')
c.extract(archive, '.')
c.symlink(c.get_archive_top_dir(archive), src_dir)
c.ensure_got_path(install_dir)
c.recreate_dir(build_dir)
os.chdir(build_dir)
cmake_args = '"{0}" -DCMAKE_INSTALL_PREFIX="{1}" -DLeptonica_DIR="{1}/cmake" \
-DBUILD_TRAINING_TOOLS=OFF -DBUILD_TESTS=OFF'.format(src_dir, install_dir)
2020-03-01 16:30:43 +07:00
if platform.system() == "Windows":
env_cmd = c.get_msvc_env_cmd(bitness=bitness, msvc_version=msvc_version)
c.apply_cmd_env(env_cmd)
cmake_args += ' ' + c.get_cmake_arch_args(bitness=bitness)
c.set_make_threaded()
c.run('cmake {}'.format(cmake_args))
if len(compat_flags) > 0:
c.run('cmake {} .'.format(compat_flags))
2020-05-01 16:51:11 +07:00
c.run('cmake {} .'.format(compat_flags)) # for sure :)
2020-04-03 22:05:33 +07:00
c.run('cmake --build . --config {}'.format(build_type_flag))
c.run('cmake --build . --target install --config {}'.format(build_type_flag))
2020-03-02 01:44:20 +07:00
2020-05-01 16:51:11 +07:00
with open(cache_file, 'w') as f:
f.write(cache_file_data)
2020-03-03 01:27:51 +07:00
if not check_existing(): # create links
2020-03-02 01:44:20 +07:00
c.print('>> Build failed')
exit(1)