ScreenTranslator/share/updates/tessdata.py
2020-04-20 20:45:26 +03:00

37 lines
1.1 KiB
Python

import sys
import os
import subprocess
if len(sys.argv) < 2:
print("Usage:", sys.argv[0], "<tessdata_dir> [<download_url>]")
exit(1)
tessdata_dir = sys.argv[1]
download_url = "https://github.com/tesseract-ocr/tessdata_best/raw/master"
if len(sys.argv) > 2:
download_url = sys.argv[2]
files = {}
with os.scandir(tessdata_dir) as it:
for f in it:
if not f.is_file() or f.name in ["LICENSE", "README.md"]:
continue
name = f.name[:f.name.index('.')]
files.setdefault(name, []).append(f.name)
print(',"recognizers": {')
comma = ''
for name, file_names in files.items():
print(' {}"{}":{{"files":['.format(comma, name))
comma = ', '
for file_name in file_names:
git_cmd = ['git', 'log', '-1', '--pretty=format:%cI', file_name]
date = subprocess.run(git_cmd, cwd=tessdata_dir, universal_newlines=True,
stdout=subprocess.PIPE, check=True).stdout
print(' {{"url":"{}/{}", "path":"$tessdata$/{}", "date":"{}"}}'.format(
download_url, file_name, file_name, date))
print(' ]}')
print('}')