установка и сборка под vagrant

фикс копирования шрифтов
установка переменных среды
пример для начала обучения
This commit is contained in:
Nikita Orlov 2020-02-13 16:32:12 +03:00
parent 986621e566
commit a34b9ffbab
7 changed files with 89 additions and 37 deletions

13
Vagrantfile vendored
View File

@ -28,9 +28,16 @@ Vagrant.configure("2") do |config|
# Error: # Error:
# [default] GuestAdditions seems to be installed (5.1.38) correctly, but not running. # [default] GuestAdditions seems to be installed (5.1.38) correctly, but not running.
# @see solution from: https://github.com/dotless-de/vagrant-vbguest/issues/333#issuecomment-487105544 # @see solution from: https://github.com/dotless-de/vagrant-vbguest/issues/333#issuecomment-487105544
if Vagrant.has_plugin?("vagrant-vbguest") # if Vagrant.has_plugin?("vagrant-vbguest")
config.vbguest.auto_update = false # config.vbguest.auto_update = false
end # end
config.trigger.after :up do |trigger|
trigger.info = "Trigger: install dependencies on every up"
trigger.run_remote = {inline: <<-SHELL
SHELL
} end
# Enable X11 forwarding for graphical apps. # Enable X11 forwarding for graphical apps.
# Make sure you have xquartz installed if using OSX host! # Make sure you have xquartz installed if using OSX host!

59
install.sh Executable file
View File

@ -0,0 +1,59 @@
cd /tesseract-4.1.1 || exit 1
VLOC=$HOME/local
VBIN=$VLOC/bin
VINC=$VLOC/include
VLIB=$VLOC/lib
mkdir -p $VINC $VLIB $VBIN/pkgconfig
export PKG_CONFIG_PATH=$VLIB/pkgconfig
export LD_LIBRARY_PATH=$VLIB
./autogen.sh
LIBLEPT_HEADERSDIR=$VINC ./configure --prefix=$VLOC --with-extra-libraries=$VLIB
make
make install
make training
make training-install
export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
cd $TESSDATA_PREFIX || exit 1
wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata
# download langdata
cd /tesseract-4.1.1
mkdir langdata
cd langdata
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/radical-stroke.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.punc
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/font_properties
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.unicharset
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Latin.xheights
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.unicharset
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/Cyrillic.xheights
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/desired_bigrams.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/common.unicharambigs
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/forbidden_characters_default
mkdir rus
cd rus
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.training_text
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.punc
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.numbers
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.wordlist
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/desired_characters
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/okfonts.txt
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.singles_text
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharambigs
wget https://raw.githubusercontent.com/tesseract-ocr/langdata_lstm/master/rus/rus.unicharset
{
echo "export PATH=$PATH:$VBIN"; \
echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH"; \
echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; \
echo "export TESSDATA_PREFIX=$TESSDATA_PREFIX"; \
} >> "$HOME"/.bashrc

View File

@ -6,52 +6,33 @@ env|grep DISPLAY
# #
echo "%vagrant ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/vagrant echo "%vagrant ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/vagrant
export PATH=$PATH:$HOME/local/bin
echo "PATH=$PATH:$HOME/local/bin" >> $HOME/.bashrc
echo "PATH=$PATH:$HOME/local/bin" >> /etc/profile
# зависимости # зависимости
export DEBIAN_FRONTEND=noninteractive export DEBIAN_FRONTEND=noninteractive
locale-gen en_US.UTF-8
apt-get update apt-get update
apt-get install -y htop nano libsm6 libxext6 libxrender-dev \ apt-get install -y htop nano libsm6 libxext6 libxrender-dev \
libicu-dev libpango1.0-dev libcairo2-dev g++ autoconf automake \ libicu-dev libpango1.0-dev libcairo2-dev g++ autoconf automake \
libtool pkg-config libpng-dev libjpeg62-turbo-dev libtiff5-dev \ libtool pkg-config libpng-dev libjpeg62-turbo-dev libtiff5-dev \
zlib1g-dev libicu-dev libleptonica-dev zlib1g-dev libicu-dev libleptonica-dev
# Устанавливаем чертежные шрифты в систему
export FONTS_DIR=/usr/local/share/fonts
echo "export FONTS_DIR=$FONTS_DIR" >> /etc/profile
cp /vagrant/fonts/* $FONTS_DIR
chmod -R 644 $FONTS_DIR
fc-cache -v
# скачиваем и собираем tesseract 4.1.1 # скачиваем и собираем tesseract 4.1.1
cd / cd /
wget https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz wget https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz
tar -zxvf 4.1.1.tar.gz tar -zxvf 4.1.1.tar.gz
cd /tesseract-4.1.1 || exit 1 chown -R vagrant:vagrant /tesseract-4.1.1
mkdir -p $HOME/local/include $HOME/local/lib/pkgconfig
export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
echo "export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig" >> /etc/profile
./autogen.sh
LIBLEPT_HEADERSDIR=$HOME/local/include ./configure \
--prefix=$HOME/local/ --with-extra-libraries=$HOME/local/lib
make
make install
export LD_LIBRARY_PATH=$HOME/local/lib
echo "LD_LIBRARY_PATH=$HOME/local/lib" >> /etc/profile
make training
make training-install
export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata
acho "export TESSDATA_PREFIX=/tesseract-4.1.1/tessdata" >> /etc/profile
cd /tessdata
wget https://github.com/tesseract-ocr/tessdata_best/raw/master/rus.traineddata
# установка, сборка и сохранение переменных среды
cd /vagrant || exit 1
chmod +x ./install.sh
runuser -l vagrant -c '/vagrant/install.sh'
# Устанавливаем чертежные шрифты в систему
export FONTS_DIR=/usr/local/share/fonts
cp /vagrant/fonts/* $FONTS_DIR
chown -R vagrant:vagrant $FONTS_DIR
chmod -R 775 $FONTS_DIR
chmod 644 $FONTS_DIR/*
fc-cache
fc-list | grep 'GOST'
echo "export FONTS_DIR=$FONTS_DIR" >> /home/vagrant/.bashrc

5
train.sh Normal file
View File

@ -0,0 +1,5 @@
tesstrain.sh --fonts_dir $FONTS_DIR --fontlist "GOST Type A" --lang rus \
--linedata_only --noextract_font_properties --langdata_dir /tesseract-4.1.1/langdata \
--tessdata_dir $TESSDATA_PREFIX --output_dir ./