2013年12月15日 星期日

Tesseract on ubuntu

refer to:
https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html

preinstall:
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ xenial main'
sudo apt update
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DE19EB17684BA42D
apt install cmake cmake-qt-gui cmake-curses-gui
apt install automake autoconf libtool pkg-config libcurl4-gnutls-dev libjpeg8-dev libpng12-dev libtiff5-dev

1. font database
git clone --recursive https://github.com/tesseract-ocr/tessdata_best

2.
CPU_NUM=$(grep processor /proc/cpuinfo | awk '{field=$NF};END{print field+1}')
echo "CPU number = "$CPU_NUM
wget http://www.leptonica.org/source/leptonica-1.82.0.tar.gz
tar -xvf leptonica-1.82.0.tar.gz
cd leptonica-1.82.0
./configure
make -j$CPU_NUM V=1
make install

3. release version and there is no avx in cpu, if you have avx then skip USE_AVX*=OFF
CPU_NUM=$(grep processor /proc/cpuinfo | awk '{field=$NF};END{print field+1}')
echo "CPU number = "$CPU_NUM
wget https://github.com/tesseract-ocr/tesseract/archive/refs/tags/4.1.3.tar.gz

tar -xvf 4.1.3.tar.gz
cd tesseract-4.1.3
mkdir -p build
cd build

cmake .. -G"Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/opt/tesseract -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_FLAGS_RELEASE="-O3 -g" -DCMAKE_CXX_FLAGS_RELEASE="-O3 -g" -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_AVX512F=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

4. debug version and there is no avx in cpu,if you have avx then skip USE_AVX*=OFF
tar -xvf 4.1.3.tar.gz
cd tesseract-4.1.3
mkdir -p _debug
cd _debug

cmake .. -G"Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/opt/tesseract/debug -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS_DEBUG="-O0 -g" -DCMAKE_CXX_FLAGS_DEBUG="-O0 -g" -DUSE_AVX=OFF -DUSE_AVX2=OFF -DUSE_AVX512F=OFF

cmake --build . --config Debug --target install -- -j$CPU_NUM VERBOSE=1

5.
cd /opt/tesseract/bin
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/opt/tesseract/lib
export TESSDATA_PREFIX=/mnt/upload/git_source/tessdata_best
./tesseract test.png - -l eng
./tesseract test.png - -l eng+deu
./tesseract test.png stdout -c tessedit_char_whitelist=0123456789 --psm 6

~~if not export TESSDATA_PREFIX~~
./tesseract --tessdata-dir /mnt/upload/git_source/tessdata_best test.png - -l eng

~~help param
./tesseract --help-oem
./tesseract --help-psm

沒有留言:

張貼留言