CUDA_VISIBLE_DEVICES="0" /data2/fangjiasheng/llama.cpp/build/bin/llama-server -m /data2/fangjiasheng/qwen_lora/qwen_0.8B_lora_bidding_kg/model/gguf_model/qwen-f16.gguf -c 16384 -t 4 --host 0.0.0.0 --port 8765 --temp 0 --repeat-penalty 1.1 -ngl 99