60 lines
2.5 KiB
Bash
Executable File
60 lines
2.5 KiB
Bash
Executable File
cd /tesseract-4.1.1 || exit 1
|
|
|
|
VLOC=$HOME/local
|
|
VBIN=$VLOC/bin
|
|
VINC=$VLOC/include
|
|
VLIB=$VLOC/lib
|
|
|
|
mkdir -p $VINC $VLIB $VBIN/pkgconfig
|
|
|
|
export PKG_CONFIG_PATH=$VLIB/pkgconfig
|
|
export LD_LIBRARY_PATH=$VLIB
|
|
|
|
./autogen.sh
|
|
LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB
|
|
|
|
make
|
|
make install
|
|
make training
|
|
make training-install
|
|
|
|
export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
|
|
cd $TESSDATA_PREFIX || exit 1
|
|
wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata
|
|
|
|
# download langdata
|
|
cd /tesseract-4.1.1
|
|
mkdir langdata
|
|
cd langdata
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights
|
|
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default
|
|
|
|
mkdir rus
|
|
cd rus
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist
|
|
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs
|
|
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset
|
|
|
|
{
|
|
echo "export PATH=$PATH:$VBIN"; \
|
|
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \
|
|
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \
|
|
echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \
|
|
} >> "$HOME"/.bashrc
|