2013年12月16日 星期一

libtorch cross compile on aarch64-linux-gnu-gcc include torchvision

Refer to:
https://github.com/pytorch/pytorch#get-the-pytorch-source
https://github.com/pytorch/pytorch/blob/master/docs/libtorch.rst
https://discuss.pytorch.org/t/compile-libtorch-c-api-from-source/81624/2
https://qiita.com/syoyo/items/360bcf15b9ee88eeeacd
https://github.com/ljk53/pytorch-android-cpp-demo
https://github.com/dpilger26/NumCpp/tree/master
https://github.com/t-kuha/mpsoc-library/wiki/How-to-build-libtorch-&-torchvision          
https://github.com/t-kuha/mpsoc-library/wiki/How-to-build-Caffe-%26-libtorch-dependencies 
https://fatalfeel.blogspot.com/2013/12/build-nrf9160dk-projects-with-zephyr-os.html      
https://fatalfeel.blogspot.com/2019/12/ai-with-cuda-install-step-and-yolov3-in.html

#install gcc compiler
sudo apt install build-essential

#install python3 version >= 3.7 need here are a lot of works.
https://fatalfeel.blogspot.com/2019/12/ai-with-cuda-install-step-and-yolov3-in.html

#install cmake 3.20.5
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ xenial main'
sudo apt update
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DE19EB17684BA42D
sudo apt install cmake cmake-qt-gui cmake-curses-gui
sudo apt install libjpeg8-dev libpng12-dev libgtk2.0-dev libxext-dev libopenblas-dev liblapack-dev
sudo apt install libavcodec-dev libavutil-dev libavformat-dev libswscale-dev libavdevice-dev
sudo apt install libv4l-dev zlib1g-dev
sudo apt install ninja-build

===================================================
==================Ubuntu16.04 x86_64=================
===================================================
///////////////////////opencv
CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/usr/local/cuda/bin

1.
https://github.com/opencv/opencv_contrib/tags
dwonload
opencv_contrib-4.6.0.zip
unzip ./opencv_contrib-4.6.0.zip

https://opencv.org/releases
download opencv-4.6.0.zip
unzip ./opencv-4.6.0.zip

~~if no avx instruction cpu
-DCPU_DISPATCH="SSE4_1;SSE4_2;FP16" \
~~if have avx2 instruction cpu
-DCPU_DISPATCH="SSE4_1;SSE4_2;AVX;FP16;AVX2" \
~~if have avx-512 instruction cpu
-DCPU_DISPATCH="SSE4_1;SSE4_2;AVX;FP16;AVX2;AVX512_SKX" \

~~~~~~~~opencv debug
cd opencv-4.6.0
mkdir build_debug
cd build_debug

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/opencv/debug \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_C_FLAGS_DEBUG="-O0 -g" \
-DCMAKE_CXX_FLAGS_DEBUG="-O0 -g" \
-DCUDA_NVCC_FLAGS="-O0 -Wno-deprecated-gpu-targets" \
-DBUILD_PERF_TESTS=OFF \
-DBUILD_TESTS=OFF \
-DBUILD_JPEG=ON \
-DBUILD_PNG=ON \
-DBUILD_ZLIB=ON \
-DWITH_LIBV4L=ON \
-DBUILD_opencv_python3=OFF \
-DBUILD_opencv_python_tests=OFF \
-DBUILD_EXAMPLES=ON \
-DWITH_GSTREAMER=OFF \
-DWITH_TESSERACT=OFF \
-DOPENCV_EXTRA_MODULES_PATH=../../opencv_contrib-4.6.0/modules \
-DCPU_DISPATCH="SSE4_1;SSE4_2;FP16" \
-DWITH_CUDA=OFF

cmake --build . --config Debug --target install -- -j$CPU_NUM VERBOSE=1

~~~~~~~~opencv release
cd opencv-4.6.0
mkdir build_release
cd build_release

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/opencv \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-O3 -g" \
-DCMAKE_CXX_FLAGS_RELEASE="-O3 -g" \
-DCUDA_NVCC_FLAGS="-O3 -Wno-deprecated-gpu-targets" \
-DBUILD_PERF_TESTS=OFF \
-DBUILD_TESTS=OFF \
-DBUILD_JPEG=ON \
-DBUILD_PNG=ON \
-DBUILD_ZLIB=ON \
-DWITH_LIBV4L=ON \
-DBUILD_opencv_python3=OFF \
-DBUILD_opencv_python_tests=OFF \
-DBUILD_EXAMPLES=ON \
-DWITH_GSTREAMER=OFF \
-DWITH_TESSERACT=OFF \
-DOPENCV_EXTRA_MODULES_PATH=../../opencv_contrib-4.6.0/modules \
-DCPU_DISPATCH="SSE4_1;SSE4_2;FP16" \
-DWITH_CUDA=ON

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

///////////////////////libtorch
CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/usr/local/cuda/bin

1.
git clone --recursive https://github.com/pytorch/pytorch
cd pytorch
git checkout v1.7.1
git submodule sync
git submodule update --init --recursive

export LIBTORCH_ROOT=$(pwd)

2. 
gedit ./CMakeLists.txt
~change
###string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
~to
string(APPEND CMAKE_CXX_FLAGS " -fPIC")

3.
gedit ./cmake/TorchConfig.cmake.in
~comment
###if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
###  set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@")
###endif()

4.
gedit ./caffe2/utils/math_gpu.cu
~add header host_vector.h
#include <thrust/host_vector.h> //---> add here
#include <thrust/device_vector.h>
#include <thrust/functional.h>

5.
~~if no avx instruction cpu
-DC_AVX_FOUND=OFF \
-DC_AVX2_FOUND=OFF \
-DCXX_AVX_FOUND=OFF \
-DCXX_AVX2_FOUND=OFF \
-DDISABLE_AVX2=ON \
-DDISABLE_AVX512F=ON \
-DC_AVX_FLAGS="" \
-DC_AVX2_FLAGS="" \
-DCXX_AVX_FLAGS="" \
-DCXX_AVX2_FLAGS="" \

~~if have avx-512 instruction cpu and gcc version >= 5.0
~debug version
-DCMAKE_C_FLAGS_DEBUG="-O0 -g -mavx512f" \
-DCMAKE_CXX_FLAGS_DEBUG="-O0 -g -mavx512f" \
~release version
-DCMAKE_C_FLAGS_RELEASE="-O3 -g -mavx512f" \
-DCMAKE_CXX_FLAGS_RELEASE="-O3 -g -mavx512f" \

~~if set -DUSE_FBGEMM=ON
FBGEMM offers optimized on-CPU performance for reduced precision calculations used to accelerate deep learning models. It’s delivered greater than 2x performance gains.

~~~~~~~~libtorch debug
mkdir build_debug
cd build_debug

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/libtorch/x64/debug \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_C_FLAGS_DEBUG="-O0 -g" \
-DCMAKE_CXX_FLAGS_DEBUG="-O0 -g" \
-DPYTHON_EXECUTABLE=$(which python3) \
-DBUILDING_WITH_TORCH_LIBS=ON \
-DUSE_OPENCV=ON \
-DOpenCV_DIR=/opt/opencv/debug/lib/cmake/opencv4 \
-DBUILD_EXAMPLES=ON \
-DC_AVX_FOUND=OFF \
-DC_AVX2_FOUND=OFF \
-DCXX_AVX_FOUND=OFF \
-DCXX_AVX2_FOUND=OFF \
-DDISABLE_AVX2=ON \
-DDISABLE_AVX512F=ON \
-DC_AVX_FLAGS="" \
-DC_AVX2_FLAGS="" \
-DCXX_AVX_FLAGS="" \
-DCXX_AVX2_FLAGS="" \
-DUSE_FBGEMM=OFF \
-DBUILD_PYTHON=OFF \
-DBUILD_ONNX_PYTHON=OFF \
-DUSE_CUDA=OFF

cmake --build . --config Debug --target install -- -j$CPU_NUM VERBOSE=1

~~~~~~~~libtorch release
mkdir build_release
cd build_release

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/libtorch/x64 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-O3 -g" \
-DCMAKE_CXX_FLAGS_RELEASE="-O3 -g" \
-DPYTHON_EXECUTABLE=$(which python3) \
-DBUILDING_WITH_TORCH_LIBS=ON \
-DUSE_OPENCV=ON \
-DOpenCV_DIR=/opt/opencv/lib/cmake/opencv4 \
-DBUILD_EXAMPLES=ON \
-DC_AVX_FOUND=OFF \
-DC_AVX2_FOUND=OFF \
-DCXX_AVX_FOUND=OFF \
-DCXX_AVX2_FOUND=OFF \
-DDISABLE_AVX2=ON \
-DDISABLE_AVX512F=ON \
-DC_AVX_FLAGS="" \
-DC_AVX2_FLAGS="" \
-DCXX_AVX_FLAGS="" \
-DCXX_AVX2_FLAGS="" \
-DUSE_FBGEMM=OFF \
-DBUILD_PYTHON=OFF \
-DBUILD_ONNX_PYTHON=OFF \
-DUSE_CUDA=ON

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

///////////////////////torchvision
~~if you have not set the environment variables yet, set them now~~
CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/usr/local/cuda/bin

1.
https://github.com/pytorch/vision/releases
download vision-0.8.2.tar.gz
tar -xvf ./vision-0.8.2.tar.gz
cd vision-0.8.2

~modify these files add definition USE_PYTHON for cmake, header, source
./CMakeLists.txt
./cmake/TorchVisionConfig.cmake.in
./torchvision/csrc/cpu/image/image.cpp
./torchvision/csrc/cpu/video/Video.h
./torchvision/csrc/cpu/video_reader/VideoReader.cpp
./torchvision/csrc/cpu/vision_cpu.h
./torchvision/csrc/cuda/vision_cuda.h
./torchvision/csrc/empty_tensor_op.h
./torchvision/csrc/vision.cpp
patch: https://www.mediafire.com/file/wcxce2v6vdzvtvw/vision-0.8.2_patch.tar.gz

2.
~~~~~~~~~~~~~~vision debug
mkdir build_debug
cd build_debug

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/torchvision/x64/debug \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_C_FLAGS_DEBUG="-O0 -g" \
-DCMAKE_CXX_FLAGS_DEBUG="-O0 -g" \
-DTorch_DIR=/opt/libtorch/x64/debug/share/cmake/Torch \
-DUSE_PYTHON=OFF \
-DWITH_CUDA=OFF

~~~TorchConfig.cmake.in modified already, so we can skip sed -i
###sed -i 's/-D_GLIBCXX_USE_CXX11_ABI=//g' ./CMakeFiles/torchvision.dir/flags.make

cmake --build . --config Debug --target install -- -j$CPU_NUM VERBOSE=1

~~~~~~~~~~~~~~vision release
mkdir build_release
cd build_release

cmake .. -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/torchvision/x64 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-O3 -g" \
-DCMAKE_CXX_FLAGS_RELEASE="-O3 -g" \
-DTorch_DIR=/opt/libtorch/x64/share/cmake/Torch \
-DUSE_PYTHON=OFF \
-DWITH_CUDA=ON

~~~TorchConfig.cmake.in modified already, so we can skip sed -i
###sed -i 's/-D_GLIBCXX_USE_CXX11_ABI=//g' ./CMakeFiles/torchvision.dir/flags.make

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

==========================================
==================Android==================
==========================================
https://developer.android.com/ndk/downloads/older_releases
download and extract Android NDK 19c to /opt

#python link to python3
ln -sf /usr/bin/python3.7 /usr/bin/python

pip3 install pyyaml

cd pytorch
#release version
gedit ./scripts/build_android.sh
-DCMAKE_BUILD_TYPE=Release
#change to
-DCMAKE_BUILD_TYPE=RelWithDebInfo

#debug version
gedit ./scripts/build_android.sh
-DCMAKE_BUILD_TYPE=Release
#change to
-DCMAKE_BUILD_TYPE=Debug

export ANDROID_NDK=/opt/android-ndk
export ANDROID_ABI=arm64-v8a
export ANDROID_NATIVE_API_LEVEL=28
export ANDROID_DEBUG_SYMBOLS=1
export VERBOSE=1
./scripts/build_android.sh

==================================================================
=========================aarch64-linux-gnu-gcc========================
==================================================================
//////////////////Method A: download prebuilt libs
1.
CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin
export LD_LIBRARY_PATH=/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/lib

2.
gedit ./toolchain.make
set( CMAKE_SYSTEM_NAME Linux )
set( CMAKE_SYSTEM_PROCESSOR aarch64 )
set( CMAKE_C_COMPILER /opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin/aarch64-ca53-linux-gnu-gcc )
set( CMAKE_CXX_COMPILER /opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin/aarch64-ca53-linux-gnu-g++ )

~~~copy toolchain.make to each dependency and libtorch source directories~~~

3.
git clone --recursive https://github.com/pytorch/pytorch
cd pytorch
git checkout v1.7.1
git submodule sync
git submodule update --init --recursive

export LIBTORCH_ROOT=$(pwd)

4.
gedit ./CMakeLists.txt
~change
###string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
~to
string(APPEND CMAKE_CXX_FLAGS " -fPIC")

5.
gedit ./cmake/TorchConfig.cmake.in
~comment
###if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
###  set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@")
###endif()

6.
./scripts/build_host_protoc.sh

7.
cd ./third_party/sleef
mkdir _host
cd _host

cmake .. -G"Unix Makefiles" -DCMAKE_INSTALL_PREFIX=_install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

8. (if use manual make libs, skip this)
cd ../..
git clone -b 2019.2 https://github.com/t-kuha/mpsoc-library.git
export DEPEND_DIR=$(pwd)/mpsoc-library

9. (option can skip, if cmake error)
when
CMake Error at cmake/Codegen.cmake:163 (message):

cd $LIBTORCH_ROOT
gedit ./cmake/Codegen.cmake
set(GEN_COMMAND
  "${PYTHON_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/gen.py
#to
find_package (Python COMPONENTS Interpreter)
set(GEN_COMMAND
  "${Python_EXECUTABLE}" ${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/gen.py

10.
cd $LIBTORCH_ROOT

mkdir _mpsoc
cd _mpsoc

cmake ${LIBTORCH_ROOT} -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/libtorch/arm64 \
-DCMAKE_BUILD_TYPE=Release \
-DCAFFE2_CUSTOM_PROTOC_EXECUTABLE=${LIBTORCH_ROOT}/build_host_protoc/bin/protoc \
-DCMAKE_TOOLCHAIN_FILE=toolchain.make \
-DCMAKE_PREFIX_PATH=${DEPEND_DIR}/dl-framework/caffe-dependency \
-DPYTHON_EXECUTABLE=$(which python3) \
-C${DEPEND_DIR}/dl-framework/libtorch/TryRunResults.cmake \
-DBUILDING_WITH_TORCH_LIBS=ON \
-DBUILD_BINARY=ON \
-DBUILD_CAFFE2_MOBILE=ON \
-DBUILD_CAFFE2_OPS=ON \
-DBUILD_CUSTOM_PROTOBUF=ON \
-DBUILD_DFT=OFF \
-DBUILD_DOCS=OFF \
-DBUILD_GMOCK=ON \
-DBUILD_GNUABI_LIBS=OFF \
-DBUILD_LIBM=ON \
-DBUILD_ONNX_PYTHON=OFF \
-DBUILD_PYTHON=OFF \
-DBUILD_QUAD=OFF \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_TEST=ON \
-DBUILD_TESTS=ON \
-DUSE_ASAN=OFF \
-DUSE_CUDA=OFF \
-DUSE_DISTRIBUTED=OFF \
-DUSE_FBGEMM=OFF \
-DUSE_FFMPEG=ON \
-DUSE_GFLAGS=ON \
-DUSE_GLOG=ON \
-DUSE_GLOO=OFF \
-DUSE_LEVELDB=ON \
-DUSE_LITE_PROTO=OFF \
-DUSE_LMDB=ON \
-DUSE_METAL=OFF \
-DUSE_MKLDNN=OFF \
-DUSE_MPI=OFF \
-DUSE_NATIVE_ARCH=OFF \
-DUSE_NNAPI=OFF \
-DUSE_NNPACK=ON \
-DUSE_NUMA=OFF \
-DUSE_NUMPY=ON \
-DUSE_OBSERVERS=OFF \
-DUSE_OPENCL=OFF \
-DUSE_OPENCV=ON \
-DUSE_OPENMP=OFF \
-DUSE_PROF=OFF \
-DUSE_PYTORCH_QNNPACK=OFF \
-DUSE_QNNPACK=ON \
-DUSE_REDIS=OFF \
-DUSE_ROCKSDB=ON \
-DUSE_ROCM=OFF \
-DUSE_SNPE=OFF \
-DUSE_SYSTEM_EIGEN_INSTALL=OFF \
-DUSE_TBB=OFF \
-DUSE_TENSORRT=OFF \
-DUSE_ZMQ=ON \
-DUSE_ZSTD=OFF \
-DHAVE_STD_REGEX=0 \
-DHAVE_POSIX_REGEX=0 \
-DHAVE_STEADY_CLOCK=0 \
-DATEN_THREADING=NATIVE \
-DBLAS=OpenBLAS \
-DCMAKE_CROSSCOMPILING=ON \
-DNATIVE_BUILD_DIR=${LIBTORCH_ROOT}/third_party/sleef/_host/ \
-DCMAKE_CXX_FLAGS="-L${DEPEND_DIR}/dl-framework/caffe-dependency/lib -llmdb -lleveldb -lsnappy -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_video -lopencv_videoio -lzmq -lrocksdb"

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

~~~note~~~
search python3: -DPYTHON_EXECUTABLE=$(which python3)
arm gcc have no -fopenmp: -DATEN_THREADING=NATIVE

//////////////////Method B: build manual libs
1.
CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin
export LD_LIBRARY_PATH=/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/lib

2.
gedit ./toolchain.make
set( CMAKE_SYSTEM_NAME Linux )
set( CMAKE_SYSTEM_PROCESSOR aarch64 )
set( CMAKE_C_COMPILER /opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin/aarch64-ca53-linux-gnu-gcc )
set( CMAKE_CXX_COMPILER /opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin/aarch64-ca53-linux-gnu-g++ )

~~~copy toolchain.make to each dependency and libtorch source directories~~~

3.
wget --continue https://raw.githubusercontent.com/t-kuha/mpsoc-library/2019.2/dl-framework/libtorch/TryRunResults.cmake
or
download https://www.mediafire.com/file/6ospje4cqqrswvq/TryRunResults.cmake
mkdir -p /opt/torch_deps/arm64
cp ./TryRunResults.cmake /opt/torch_deps/arm64

4.
https://www.boost.org/users/history/version_1_68_0.html
download bboost_1_68_0.tar.gz
tar -xvf ./boost_1_68_0.tar.gz
cd boost_1_68_0
./bootstrap.sh
gedit ./project-config.jam
using gcc ;
~change to
using gcc : arm64 : aarch64-ca53-linux-gnu-g++ ;

using python : 2.7 : /usr ;
~change to
using python : 3.7 : /usr ;

./bjam install toolset=gcc-arm64 variant=release link=shared threading=multi runtime-link=shared --prefix=/opt/torch_deps/arm64 --with-thread --with-system --with-filesystem -j$CPU_NUM

5.
https://github.com/gflags/gflags/releases
download gflags-2.2.2.tar.gz
tar -xvf ./gflags-2.2.2.tar.gz
cd gflags-2.2.2
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

6.
https://github.com/google/glog/releases
download glog-0.4.0.tar.gz
tar -xvf ./glog-0.4.0.tar.gz
cd glog-0.4.0
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

7.
https://github.com/google/leveldb/releases
download leveldb-1.22.tar.gz
tar -xvf ./leveldb-1.22.tar.gz
cd leveldb-1.22
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DLEVELDB_BUILD_TESTS=OFF -DLEVELDB_BUILD_BENCHMARKS=OFF -DBUILD_SHARED_LIBS=ON

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

8.
https://git.openldap.org/openldap/openldap/-/tags
download openldap-LMDB_0.9.24.tar.gz
tar -xvf ./openldap-LMDB_0.9.24.tar.gz
cd openldap-LMDB_0.9.24/libraries/liblmdb

gedit ./Makefile
CC      = gcc
AR      = ar
OPT     = -O2 -g
prefix  = /usr/local
#change to
CC      = aarch64-ca53-linux-gnu-gcc
AR      = aarch64-ca53-linux-gnu-ar
OPT     = -O3
prefix  = /opt/torch_deps/arm64
#save

make -j$CPU_NUM install

9.
https://github.com/xianyi/OpenBLAS/releases?page=2
download OpenBLAS-0.3.7.tar.gz
tar -xvf ./OpenBLAS-0.3.7.tar.gz
cd OpenBLAS-0.3.7
mkdir _mpsoc
cd _mpsoc
cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=toolchain.make \
-DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 \
-DCMAKE_BUILD_TYPE=Release \
-DNOFORTRAN=1 \
-DBUILD_SHARED_LIBS=1

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

10.
Need arm64 opencv only. To avoid FindCUDA.cmake point to x64 opencv in libtorch:
Could NOT find CUDA (missing: CUDA_TOOLKIT_ROOT_DIR CUDA_NVCC_EXECUTABLE
Please remove all the built targets of x64 opencv that in /opt or any other directory

https://opencv.org/releases/page/3/
download opencv-3.4.8.zip
unzip ./opencv-3.4.8.zip
cd opencv-3.4.8
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=toolchain.make \
-DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_PERF_TESTS=OFF \
-DBUILD_TESTS=OFF \
-DBUILD_ZLIB=ON \
-DWITH_CUDA=OFF \
-DZLIB_INCLUDE_DIR=../3rdparty/zlib

~~~~enable these modules, disable unuse BUILD_opencv_xxxx~~~
cmake-gui ./CMakeCache.txt
BUILD_opencv_core
BUILD_opencv_video
BUILD_opencv_videoio
BUILD_opencv_highgui
BUILD_opencv_imgproc
BUILD_opencv_imgcodecs
#press Configure then Generate

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

11.
https://github.com/protocolbuffers/protobuf/releases?page=6
download protobuf-3.10.1.tar.gz
tar -xvf ./protobuf-3.10.1.tar.gz

***copy toolchain.make to ./protobuf-3.10.1/cmake***

cd protobuf-3.10.1
mkdir _host
cd _host

cmake ../cmake/ -G"Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/x64/host -DCMAKE_BUILD_TYPE=Release -Dprotobuf_BUILD_TESTS=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

export PATH=${PATH}:/opt/torch_deps/x64/host/bin

cd ..
mkdir _mpsoc
cd _mpsoc

cmake ../cmake/ -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=ON

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

12.
https://github.com/google/snappy/releases
download snappy-1.1.7.tar.gz
tar -xvf ./snappy-1.1.7.tar.gz
cd snappy-1.1.7
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSNAPPY_BUILD_TESTS=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

13.
https://github.com/facebook/rocksdb/releases?page=4
download rocksdb-6.4.6.tar.gz
tar -xvf ./rocksdb-6.4.6.tar.gz
cd rocksdb-6.4.6
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Ninja" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=OFF -DWITH_TOOLS=OFF -DWITH_GFLAGS=OFF -DPORTABLE=ON

cmake --build . --config Release --target install -- -j$CPU_NUM
~~~can not use VERBOSE=1~~~

14.
https://github.com/zeromq/libzmq/releases
download libzmq-4.3.4.tar.gz
tar -xvf ./libzmq-4.3.4.tar.gz
cd libzmq-4.3.4
mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=toolchain.make -DCMAKE_INSTALL_PREFIX=/opt/torch_deps/arm64 -DCMAKE_BUILD_TYPE=Release -DBUILD_STATIC=OFF -DBUILD_TESTS=OFF -DWITH_DOCS=OFF -DWITH_PERF_TOOL=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

15.
first do step 1.2.3.4.5.6.7. of [Method A: download prebuilt libs]

CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM
export PATH=$PATH:/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/bin
export LD_LIBRARY_PATH=/opt/ivot/aarch64-ca53-linux-gnueabihf-8.4/lib

cd pytorch
export LIBTORCH_ROOT=$(pwd)

gedit ./CMakeLists.txt
~change
###string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
~to
string(APPEND CMAKE_CXX_FLAGS " -fPIC")

gedit ./cmake/TorchConfig.cmake.in
~comment
###if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
###  set(TORCH_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=@GLIBCXX_USE_CXX11_ABI@")
###endif()

./scripts/build_host_protoc.sh

cd ./third_party/sleef
mkdir _host
cd _host

cmake .. -G"Unix Makefiles" -DCMAKE_INSTALL_PREFIX=_install -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=OFF

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

cd $LIBTORCH_ROOT
mkdir _mpsoc
cd _mpsoc

cmake ${LIBTORCH_ROOT} -G"Unix Makefiles" \
-DCMAKE_INSTALL_PREFIX=/opt/libtorch/arm64 \
-DCMAKE_BUILD_TYPE=Release \
-DCAFFE2_CUSTOM_PROTOC_EXECUTABLE=${LIBTORCH_ROOT}/build_host_protoc/bin/protoc \
-DCMAKE_TOOLCHAIN_FILE=toolchain.make \
-DCMAKE_PREFIX_PATH=/opt/torch_deps/arm64 \
-DPYTHON_EXECUTABLE=$(which python3) \
-C/opt/torch_deps/arm64/TryRunResults.cmake \
-DBUILDING_WITH_TORCH_LIBS=ON \
-DBUILD_BINARY=ON \
-DBUILD_CAFFE2_MOBILE=ON \
-DBUILD_CAFFE2_OPS=ON \
-DBUILD_CUSTOM_PROTOBUF=ON \
-DBUILD_DFT=OFF \
-DBUILD_DOCS=OFF \
-DBUILD_GMOCK=ON \
-DBUILD_GNUABI_LIBS=OFF \
-DBUILD_LIBM=ON \
-DBUILD_ONNX_PYTHON=OFF \
-DBUILD_PYTHON=OFF \
-DBUILD_QUAD=OFF \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_TEST=ON \
-DBUILD_TESTS=ON \
-DUSE_ASAN=OFF \
-DUSE_CUDA=OFF \
-DUSE_DISTRIBUTED=OFF \
-DUSE_FBGEMM=OFF \
-DUSE_FFMPEG=ON \
-DUSE_GFLAGS=ON \
-DUSE_GLOG=ON \
-DUSE_GLOO=OFF \
-DUSE_LEVELDB=ON \
-DUSE_LITE_PROTO=OFF \
-DUSE_LMDB=ON \
-DUSE_METAL=OFF \
-DUSE_MKLDNN=OFF \
-DUSE_MPI=OFF \
-DUSE_NATIVE_ARCH=OFF \
-DUSE_NNAPI=OFF \
-DUSE_NNPACK=ON \
-DUSE_NUMA=OFF \
-DUSE_NUMPY=ON \
-DUSE_OBSERVERS=OFF \
-DUSE_OPENCL=OFF \
-DUSE_OPENCV=ON \
-DUSE_OPENMP=OFF \
-DUSE_PROF=OFF \
-DUSE_PYTORCH_QNNPACK=OFF \
-DUSE_QNNPACK=ON \
-DUSE_REDIS=OFF \
-DUSE_ROCKSDB=ON \
-DUSE_ROCM=OFF \
-DUSE_SNPE=OFF \
-DUSE_SYSTEM_EIGEN_INSTALL=OFF \
-DUSE_TBB=OFF \
-DUSE_TENSORRT=OFF \
-DUSE_ZMQ=ON \
-DUSE_ZSTD=OFF \
-DHAVE_STD_REGEX=0 \
-DHAVE_POSIX_REGEX=0 \
-DHAVE_STEADY_CLOCK=0 \
-DATEN_THREADING=NATIVE \
-DBLAS=OpenBLAS \
-DCMAKE_CROSSCOMPILING=ON \
-DNATIVE_BUILD_DIR=${LIBTORCH_ROOT}/third_party/sleef/_host/ \
-DCMAKE_CXX_FLAGS="-L/opt/torch_deps/arm64/lib -llmdb -lleveldb -lsnappy -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_imgproc -lopencv_video -lopencv_videoio -lzmq -lrocksdb"

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

~~~note~~~
search python3: -DPYTHON_EXECUTABLE=$(which python3)
arm gcc have no -fopenmp: -DATEN_THREADING=NATIVE

16.
https://github.com/pytorch/vision/releases
download vision-0.8.2.tar.gz
tar -xvf ./vision-0.8.2.tar.gz
cd vision-0.8.2

~modify these files add definition USE_PYTHON for cmake, header, source
./CMakeLists.txt
./cmake/TorchVisionConfig.cmake.in
./torchvision/csrc/cpu/image/image.cpp
./torchvision/csrc/cpu/video/Video.h
./torchvision/csrc/cpu/video_reader/VideoReader.cpp
./torchvision/csrc/cpu/vision_cpu.h
./torchvision/csrc/cuda/vision_cuda.h
./torchvision/csrc/empty_tensor_op.h
./torchvision/csrc/vision.cpp
patch: https://www.mediafire.com/file/wcxce2v6vdzvtvw/vision-0.8.2_patch.tar.gz

mkdir _mpsoc
cd _mpsoc

cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=toolchain.make \
-DCMAKE_INSTALL_PREFIX=/opt/torchvision/arm64 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH=/opt/torch_deps/arm64 \
-DTorch_DIR=/opt/libtorch/arm64/share/cmake/Torch \
-DUSE_PYTHON=OFF \
-DWITH_CUDA=OFF \
-DPNG_LIBRARY=/opt/torch_deps/arm64/lib/libopencv_imgcodecs.so \
-DJPEG_LIBRARY=/opt/torch_deps/arm64/lib/libopencv_imgcodecs.so

~~~TorchConfig.cmake.in modified already, so we can skip sed -i
###sed -i 's/-D_GLIBCXX_USE_CXX11_ABI=//g' ./CMakeFiles/torchvision.dir/flags.make

cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1

sample code:
https://www.mediafire.com/file/72pdqg041n2iw85/torch_test_arm.tar.gz
https://www.mediafire.com/file/18lyaq8d4qhx0ne/torch_test_x64.tar.gz

demo: (both forward & backward support for aarch64-linux-gnu-gcc)
https://www.mediafire.com/view/1zgp44v08n7tpj6/libtorch_build_ok.png
https://www.mediafire.com/view/fdwap6ldb8zl6w0/torchvision.png
https://www.mediafire.com/view/cmqha2rgexv7l3p/libtorch_nettransform.png
https://www.mediafire.com/view/gl8rlbt9e911f7p/libtorch_debug.png

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Tesla told in New York Herald: I prefer to be remembered as the inventor who succeeded in abolishing war. That will be my highest pride.
http://www.teslacollection.com/tesla_articles/1898/new_york_herald/f_l_christman/tesla_declares_he_will_abolish_war (in middle section)

Albert Einstein: The release of atom power has changed everything except our way of thinking... the solution to this problem lies in the heart of mankind. If only I had known, I should have become a watchmaker.
https://atomictrauma.wordpress.com/the-scientists/albert-einstein

AI training will produce carbon footprint
https://arxiv.org/pdf/1906.02243.pdf

8 則留言:

  1. i build pytoch using "A. prebuilt libs download method", but give me an error, error mesage as follows:
    make[2]: Leaving directory '/root/pytorch/_mpsoc'
    [ 5%] Built target libprotobuf
    make[1]: Leaving directory '/root/pytorch/_mpsoc'
    make: *** [Makefile:141: all] Error 2

    回覆刪除
    回覆
    1. use all hand made, because the prebuilt use different arm g++ from yours

      刪除
  2. someone can help me ?

    回覆刪除
  3. using c++ implement faster-rcnn yolo by ncnn
    https://github.com/Tencent/ncnn

    回覆刪除
  4. hello fatalfeel, i'd like to cross compile libtorch for jetson TX2, could you give me more information

    回覆刪除
    回覆
    1. try to use eclipse debug with gdb and gdbserver, search my blogger build eclipse debug env.

      刪除
  5. I use ubuntu 16.04, root login
    cuda_10.2.89_440.33.01_linux ,
    cuda_10.2.1_linux,
    cuda_10.2.2_linux
    libcudnn8_8.0.5.39-1+cuda10.2_amd64
    libcudnn8-dev_8.0.5.39-1+cuda10.2_amd64
    libcudnn8-samples_8.0.5.39-1+cuda10.2_amd64

    driver use data center version:
    NVIDIA-Linux-x86_64-440.118.02

    cmake 3.20.5

    回覆刪除