2013年12月3日 星期二

Install Nvidia cuda & python3.7/3.8 & Yolov3 in Pytorch

#######################################################################
#install nvidia driver + cuda 10.2 + cudnn 7.6.5
#######################################################################
1. remove old driver
sudo apt list --installed | grep cuda
sudo apt purge libcudnn7 libcudnn7-dev libcudnn7-doc
sudo rm -rf /usr/local/cuda-10.0

2. check gpu version and install driver
sudo lspci -v  #find VGA compatible controller

3. stop linux default driver if you are using
gedit /etc/modprobe.d/blacklist.conf
#necessary
blacklist nouveau
#option
blacklist lbm-nouveau
options nouveau modeset=0
alias nouveau off
alias lbm-nouveau off

#terminal check
echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf

update-initramfs -u
reboot

4.
#build tools
apt install build-essential checkinstall curl lzma openssl wget

5. download driver NVIDIA-Linux-x86_64-440.118.02.run
https://www.nvidia.com/download/find.aspx
#Product Type: Data Center
(version 440.118.02 support CUDA Toolkit 10.2)

6. download cuda 10.2
https://developer.nvidia.com/cuda-toolkit-archive
runfile:
https://developer.nvidia.com/cuda-10.2-download-archive
Installer Type: runfile (local)
wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run
patch v1:
wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/patches/1/cuda_10.2.1_linux.run
patch v2:
wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/patches/2/cuda_10.2.2_linux.run

7. Download cuDNN v8.0.5 (November 9th, 2020), for CUDA 10.2
https://developer.nvidia.com/rdp/cudnn-archive
libcudnn8_8.0.5.39-1+cuda10.2_amd64.deb
libcudnn8-dev_8.0.5.39-1+cuda10.2_amd64.deb
libcudnn8-samples_8.0.5.39-1+cuda10.2_amd64.deb

8. remove and install driver cuda libcudnn
login with ctrl + alt + f3
dpkg --remove libcudnnX-samples
dpkg --remove libcudnnX-dev
dpkg --remove libcudnnX
/usr/local/cuda-XX.X/bin/cuda-uninstaller
sudo apt purge nvidia-cuda*
sudo apt purge nvidia-*

chmod 755 *.run
sudo service lightdm stop
sudo service gdm3 stop
./NVIDIA-Linux-x86_64-440.118.02.run
./cuda_10.2.89_440.33.01_linux.run (uncheck default driver install)
./cuda_10.2.2_linux.run

(option)
when meet -> error opening terminal: linux
~check 
echo $TERM
echo $TERMINFO
~setting
apt install --reinstall ncurses-base ncurses-bin ncurses-term libncurses5 libncurses5-dev
export TERM=xterm-basic
export TERMINFO=/usr/share/terminfo

9. install cudnn
apt install gdebi
gdebi ./libcudnn8_8.0.5.39-1+cuda10.2_amd64.deb
gdebi ./libcudnn8-dev_8.0.5.39-1+cuda10.2_amd64.deb
gdebi ./libcudnn8-samples_8.0.5.39-1+cuda10.2_amd64.deb

###how to handle cudnn-local-repo-ubuntu1804-8.3.2.44_1.0-1_amd64.deb###
dpkg -x ./cudnn-local-repo-ubuntu1804-8.3.2.44_1.0-1_amd64.deb ./
cd ./var/cudnn-local-repo-ubuntu1804-8.3.2.44
gdebi libcudnn8_8.3.2.44-1+cuda11.5_amd64.deb
gdebi libcudnn8-dev_8.3.2.44-1+cuda11.5_amd64.deb
gdebi libcudnn8-samples_8.3.2.44-1+cuda11.5_amd64.deb

#######################################################################
#install python 3.7 + pip3 + labelimg + yolov3
#######################################################################
Ref.
https://www.twblogs.net/a/5d4048fcbd9eee51fbf9884d
https://blog.csdn.net/u014072827/article/details/91379267
https://www.cnblogs.com/pprp/p/10863496.html

1.
#lsb_release symbolic link for Python-3.7.16 using make install
mkdir -p /usr/lib/python3.7
ln -rsf /usr/share/pyshared/lsb_release.py /usr/lib/python3.7/lsb_release.py

2.
#ffmpeg libs
sudo apt install libavcodec-dev libavutil-dev libavformat-dev libswscale-dev libavdevice-dev

#python3 libs
sudo apt install python3-dev libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev zlib1g-dev libffi-dev libcairo2 libcairo-gobject2 libpangocairo-1.0-0 libcairo2-dev libgirepository1.0-dev liblzma-dev

3.
wget https://www.python.org/ftp/python/3.7.16/Python-3.7.16.tar.xz
tar -h -xvf ./Python-3.7.16.tar.xz
cd Python-3.7.16

CPU_NUM=$(grep -c processor /proc/cpuinfo)
echo "CPU number = "$CPU_NUM

~~~must unset env or show No module named '_posixsubprocess'
unset LD_LIBRARY_PATH
unset PYTHONHOME
unset PYTHONUSERBASE

#for Ubuntu x86_64
./configure --prefix=/usr --enable-shared --enable-ipv6 --with-system-expat --with-system-ffi --with-ensurepip=install
make -j$CPU_NUM && make install

#for Arm64 aarch64
./configure --prefix=/usr --enable-shared --enable-ipv6 --with-ensurepip=install
make -j$CPU_NUM && make install

~~~python3.7 are recommended, other versions are not stable for pytorch

4. add PYTHONUSERBASE for pip3 install path
gedit /etc/environment
PYTHONUSERBASE="/usr/lib/python3.7/site-packages"

#and then do in terminal
export PYTHONUSERBASE="/usr/lib/python3.7/site-packages"

5. fix -> async = reply_handler and error_handlererror
sudo apt install rename

gedit ./cpython37.sh
modify 's/cpython-35/cpython-37/' (ubuntu 16.04)
modify 's/cpython-36/cpython-37/' (ubuntu 18.04)
./cpython37.sh  #run

#the last works in cpython37.sh will do symbolic link
ln -rsf /usr/share/pyshared/lsb_release.py /usr/lib/python3.7/lsb_release.py
ln -rsf /usr/bin/python3.7m /usr/bin/python3m
ln -rsf /usr/bin/python3.7m-config /usr/bin/python3m-config

~~download
ubuntu 16 python3.7
https://www.mediafire.com/file/23sh6ppoc94mpkc/cpython37.sh
ubuntu 18 python3.8
https://www.mediafire.com/file/w8v0r2nbl8zfzi6/cpython38.sh

6.
Because gi, PIL, Pillow, pkg_resources, setuptools are old version.
(a) python3.7/ubuntu 16 and python3.7/ubuntu 18
remove directories gi, PIL, Pillow, pkg_resources, setuptools in /usr/lib/python3 & /usr/lib/python3.7
(b) python3.8/ubuntu 18
remove directories gi, PIL, Pillow, pkg_resources, setuptools in /usr/lib/python3 & /usr/lib/python3.8
remove files apt_inst*, apt_pkg* in /usr/lib/python3

7. install pip3
wget https://bootstrap.pypa.io/get-pip.py
python3 ./get-pip.py

#new version of setuptools will auto install in this command

8.
#prevent from gnome-terminal open fail, No module named 'gi'
rm -rf /usr/lib/python3/dist-packages/cairo
rm -rf /usr/lib/python3.7/site-packages/cairo

9. gnome-terminal dependency
remove PyGObject* in /usr/lib/python3 & /usr/lib/python3.7 then
(a)
pycairo on ubuntu <= 16.04
pip3 install --force-reinstall pycairo==1.19.1
pip3 install PyGObject==3.30.5
(b)
pycairo on ubuntu >= 18.04
pip3 install --force-reinstall pycairo==1.23.0
pip3 install PyGObject==3.42.2

10.
pip3 install --force-reinstall torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2
(do not use torch==1.8.0 will cuBLAS error)

11.
git clone https://github.com/ultralytics/yolov3
pip3 uninstall Cython numpy
pip3 install Cython numpy
pip3 install -U -r /root/projects/yolov3/requirements.txt

12. download all weights
bash weights/download_yolov3_weights.sh
https://drive.google.com/drive/folders/1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0

13.
sudo apt install python3-pyqt5
sudo apt install libxcb-xinerama0
sudo pip3 install pyqt5
sudo pip3 install opencv-python==3.4.10.35
pip3 install labelImg
run labelImg
#edit box area and save them

14.
mkdir /root/projects/yolov3/data/Annotations
mkdir /root/projects/yolov3/data/images
mkdir /root/projects/yolov3/data/ImageSets
mkdir /root/projects/yolov3/data/labels
(images and labels must same as x.replace('images', 'labels') of /root/projects/yolov3/utils/datasets.py)

source & data: http://www.mediafire.com/file/ciemxq4qm3ikuj6/yolov3_voc.tar.gz

15.
cp *.xml /root/projects/yolov3/data/Annotations
cp *.jpg /root/projects/yolov3/data/images

16.
cd /root/projects/yolov3
python3 ./make_imgtxt.py
python3 ./make_labels.py

17.
gedit /root/projects/yolov3/cfg/yolov3.cfg
#or
gedit /root/projects/yolov3/cfg/yolov3-spp.cfg

gedit /root/projects/yolov3/cfg/yolov3-1cls.cfg #one class
gedit /root/projects/yolov3/cfg/yolov3-spp-1cls.cfg #one class

assume classes = plate, 5 means tx,ty,tw,th,po
classes=1
filters=[classes + 5] * 3
filters=18

find all [yolo]
classes=80
#to
classes=1

find all [yolo] previous line of first filters
filters=255
#to
filters=18

18.
gedit /root/projects/yolov3/data/collector.data
classes=1
train=data/train.txt
valid=data/test.txt
names=data/collector.names
backup=backup/
eval=coco
#save

gedit /root/projects/yolov3/data/collector.names
#add
plate
#save

19.
python3 ./train.py --cfg cfg/yolov3.cfg --weights weights/yolov3.pt --data data/collector.data --epochs 10
#or
python3 ./train.py --cfg cfg/yolov3-spp.cfg --weights weights/yolov3-spp.pt --data data/collector.data --epochs 10

#one class
python3 ./train.py --cfg cfg/yolov3-1cls.cfg --weights weights/yolov3.pt --data data/collector.data --epochs 10
#or
python3 ./train.py --cfg cfg/yolov3-spp-1cls.cfg --weights weights/yolov3-spp.pt --data data/collector.data --epochs 10

20.
#copy detect image to data/samples
python3 ./detect.py --cfg cfg/yolov3.cfg --weights weights/last.pt --data data/collector.data --source data/samples
#or
python3 ./detect.py --cfg cfg/yolov3-spp.cfg --weights weights/last.pt --data data/collector.data --source data/samples 

#one class
python3 ./detect.py --cfg cfg/yolov3-1cls.cfg --weights weights/last.pt --data data/collector.data --source data/samples
#or
python3 ./detect.py --cfg cfg/yolov3-spp-1cls.cfg --weights weights/last.pt --data data/collector.data --source data/samples

21. plot training:
python3 -c "from utils import utils; utils.plot_results()"

download:
http://www.mediafire.com/file/dq78mz7aus5pnpw/Convert-COCO-to-PascalVOC.tar.gz/file

1 則留言:

  1. ASUS Dual GeForce RTX 3050 OC
    NVIDIA-Linux-x86_64-510.47.03.run
    cuda_11.3.1_465.19.01_linux.run
    libcudnn8_8.2.1.32-1+cuda11.3_amd64.deb
    libcudnn8-dev_8.2.1.32-1+cuda11.3_amd64.deb
    libcudnn8-samples_8.2.1.32-1+cuda11.3_amd64.deb
    pip3 install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
    install and test PPO AI in cuda OK

    回覆刪除