tesseract-box/install.sh
Nikita Orlov a34b9ffbab установка и сборка под vagrant
фикс копирования шрифтов
установка переменных среды
пример для начала обучения
2020-02-13 16:32:12 +03:00

60 lines
2.5 KiB
Bash
Executable File

cd /tesseract-4.1.1 || exit 1
VLOC=$HOME/local
VBIN=$VLOC/bin
VINC=$VLOC/include
VLIB=$VLOC/lib
mkdir -p $VINC $VLIB $VBIN/pkgconfig
export PKG_CONFIG_PATH=$VLIB/pkgconfig
export LD_LIBRARY_PATH=$VLIB
./autogen.sh
LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB
make
make install
make training
make training-install
export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
cd $TESSDATA_PREFIX || exit 1
wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata
# download langdata
cd /tesseract-4.1.1
mkdir langdata
cd langdata
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default
mkdir rus
cd rus
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset
{
echo "export PATH=$PATH:$VBIN"; \
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \
echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \
} >> "$HOME"/.bashrc