wget --continue https://huggingface.co/bartowski/SmolLM2-135M-Instruct-GGUF/resolve/main/SmolLM2-135M-Instruct-Q8_0.gguf
wget --continue https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin
git clone --recursive https://github.com/ggerganov/llama.cpp
git clone --recursive https://github.com/ggerganov/whisper.cpp
git clone --recursive https://github.com/skeskinen/bert.cpp
#llama
CPU_NUM=$(echo $(grep -c processor /proc/cpuinfo)/2 | bc)
echo "CPU number = "$CPU_NUM
mkdir -p build && cd build
cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=../toolchain_arm64.cmake \
-DCMAKE_INSTALL_PREFIX=/opt/ggerganov/llama/arm64/release \
-DCMAKE_INSTALL_RPATH=/volume1/ggerganov/llama/arm64/release/lib \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-mcpu=cortex-a55 -O3" \
-DCMAKE_CXX_FLAGS_RELEASE="-mcpu=cortex-a55 -O3" \
-DGGML_CPU_ARM_ARCH="armv8.2-a+dotprod+fp16" \
-DGGML_NATIVE=OFF \
-DGGML_OPENMP=OFF \
-DGGML_CPU_ALL_VARIANTS=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TESTS=OFF
# compile and install to the specified directory
cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1
#check asm instruction
/opt/toolchain/aarch64-linux-gnu/bin/aarch64-linux-gnu-objdump -d /opt/ggerganov/llama/arm64/release/lib/libggml-cpu.so.0.9.7 | grep "sdot"
~~~~~~~~~~another terminal~~~~~~~~~~
#whisper
CPU_NUM=$(echo $(grep -c processor /proc/cpuinfo)/2 | bc)
echo "CPU number = "$CPU_NUM
mkdir -p build && cd build
cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=../toolchain_arm64.cmake \
-DCMAKE_INSTALL_PREFIX=/opt/ggerganov/whisper/arm64/release \
-DCMAKE_INSTALL_RPATH=/volume1/ggerganov/whisper/arm64/release/lib \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-mcpu=cortex-a55 -O3" \
-DCMAKE_CXX_FLAGS_RELEASE="-mcpu=cortex-a55 -O3" \
-DGGML_CPU_ARM_ARCH="armv8.2-a+dotprod+fp16" \
-DGGML_NATIVE=OFF \
-DGGML_OPENMP=OFF \
-DGGML_CPU_ALL_VARIANTS=OFF \
-DWHISPER_BUILD_SERVER=ON \
-DWHISPER_BUILD_EXAMPLES=ON \
-DWHISPER_BUILD_TESTS=OFF
cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1
#check asm instruction
/opt/toolchain/aarch64-linux-gnu/bin/aarch64-linux-gnu-objdump -d /opt/ggerganov/whisper/arm64/release/lib/libggml-cpu.so.0.9.7 | grep "sdot"
~~~~~~~~~~another terminal~~~~~~~~~~
#bert model
gedit ~/bert.cpp/CMakeLists.txt
comment these lines
#if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
# add_compile_options(-mcpu=native)
#endif()
CPU_NUM=$(echo $(grep -c processor /proc/cpuinfo)/2 | bc)
echo "CPU number = "$CPU_NUM
mkdir -p build && cd build
cmake .. -G"Unix Makefiles" \
-DCMAKE_TOOLCHAIN_FILE=../toolchain_arm64.cmake \
-DCMAKE_INSTALL_PREFIX=/opt/bert/arm64/release \
-DCMAKE_INSTALL_RPATH=/volume1/bert/arm64/release/lib \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-mcpu=cortex-a55 -march=armv8.2-a+dotprod+fp16 -O3" \
-DCMAKE_CXX_FLAGS_RELEASE="-mcpu=cortex-a55 -march=armv8.2-a+dotprod+fp16 -O3" \
-DBERT_NATIVE=OFF \
-DGGML_FMA=OFF \
-DGGML_F16C=OFF \
-DGGML_BUILD_EXAMPLES=ON \
-DGGML_BUILD_TESTS=OFF
# compile and install to the specified directory
cmake --build . --config Release --target install -- -j$CPU_NUM VERBOSE=1
/opt/toolchain/aarch64-linux-gnu/bin/aarch64-linux-gnu-objdump -d /root/llm/bert.cpp/build/bin/main | grep "sdot"
~~~~~~~~~~another terminal~~~~~~~~~~
mkdir -p /volume1/ggerganov/llama/arm64/release/lib
mkdir -p /volume1/ggerganov/whisper/arm64/release/lib
sshpass -p "202302001" scp /root/llm/SmolLM2-135M-Instruct-Q8_0.gguf root@172.16.2.219:/volume1
sshpass -p "202302001" scp /root/llm/ggml-tiny.bin root@172.16.2.219:/volume1
sshpass -p "202302001" scp /opt/ggerganov/llama/arm64/release/bin/llama-cli root@172.16.2.219:/volume1
sshpass -p "202302001" scp /opt/ggerganov/llama/arm64/release/bin/llama-server root@172.16.2.219:/volume1
sshpass -p "202302001" scp /opt/ggerganov/llama/arm64/release/lib/lib* root@172.16.2.219:/volume1/ggerganov/llama/arm64/release/lib
sshpass -p "202302001" scp /opt/ggerganov/whisper/arm64/release/bin/whisper-cli root@172.16.2.219:/volume1
sshpass -p "202302001" scp /opt/ggerganov/whisper/arm64/release/bin/whisper-server root@172.16.2.219:/volume1
sshpass -p "202302001" scp /opt/ggerganov/whisper/arm64/release/lib/lib* root@172.16.2.219:/volume1/ggerganov/whisper/arm64/release/lib
sshpass -p "202302001" scp /root/llm/bert.cpp/build/bin/main root@172.16.2.219:/volume1
sshpass -p "202302001" scp /root/llm/bert.cpp/build/bin/server root@172.16.2.219:/volume1
~~~~~~~~~~target terminal~~~~~~~~~~
./llama-cli -m ./SmolLM2-135M-Instruct-Q8_0.gguf -p "You are a helpful assistant." -cnv -t 3 --color on -n 128 --repeat-penalty 1.3
./llama-server -m ./SmolLM2-135M-Instruct-Q8_0.gguf --host 0.0.0.0 --port 8080 -t 3 -n 128 --repeat-penalty 1.3
#16 kHz / 16-bit / mono PCM WAV
./whisper-cli -m ./ggml-tiny.bin -f ./speech.wav -l zh -otxt
~~~~~~~~~~llama pretrain~~~~~~~~~~
refer to: https://huggingface.co/datasets/HuggingFaceTB/smoltalk
pip3 install --break-system-packages --target=/usr/lib/python3/dist-packages huggingface_hub transformers datasets sentencepiece
#if cpu only add --extra-index-url https://download.pytorch.org/whl/cpu
pip3 install --break-system-packages --target=/usr/lib/python3/dist-packages torch torchvision torchaudio trl peft --extra-index-url https://download.pytorch.org/whl/cpu
curl -LsSf https://hf.co/cli/install.sh | bash
#reopen terminal, copy SmolLM2-135M-Instruct to llama.cpp
hf download HuggingFaceTB/SmolLM2-135M-Instruct --local-dir SmolLM2-135M-Instruct
#translate weight
python3 /root/llm/llama.cpp/convert_hf_to_gguf.py ./smollm2-finetuned --outfile ./smollm2-finetuned-Q8_0.gguf --outtype q8_0
sshpass -p "202302001" scp ./smollm2-finetuned-Q8_0.gguf root@172.16.2.219:/volume1
sshpass -p "202302001" scp ./smollm2-finetuned/chat_template.jinja root@172.16.2.219:/volume1
#run
./llama-cli -m ./SmolLM2-135M-Instruct-Q8_0.gguf -p "assistant" -cnv -t 3 --color on --ctx-size 2048 -n 128 --repeat-penalty 1.3 --chat-template-file ./chat_template.jinja
./llama-server -m ./smollm2-finetuned-Q8_0.gguf --host 0.0.0.0 --port 8080 -t 3 --ctx-size 2048 -n 128 --repeat-penalty 1.3 --chat-template-file ./chat_template.jinja
download:
https://www.mediafire.com/file/0536txutyktjpfa/smollmtrain.py/file
~~~~~~~~~~bert feature vector~~~~~~~~~~
python3 ./bert.cpp/models/download-ggml.py download all-MiniLM-L6-v2 f16
python3 ./bert.cpp/models/download-ggml.py download all-MiniLM-L6-v2 q4_0
sshpass -p "202302001" scp /root/llm/bert.cpp/models/all-MiniLM-L6-v2/ggml-model-f16.bin root@172.16.2.219:/volume1
sshpass -p "202302001" scp /root/llm/bert.cpp/models/all-MiniLM-L6-v2/ggml-model-q4_0.bin root@172.16.2.219:/volume1
./server -m ./ggml-model-f16.bin --port 8080
./server -m ./ggml-model-q4_0.bin --port 8080
download:
https://www.mediafire.com/file/dyvvw41cad81k9v/testbert.cpp/file
https://www.mediafire.com/file/gvfu3een1m7xqq3/faq.jsonl/file
沒有留言:
張貼留言