#!/bin/bash export OLLAMA_NUM_GPU=999 export no_proxy=localhost,127.0.0.1 export ZES_ENABLE_SYSMAN=1 # [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 # Use OLLAMA_HOST and OLLAMA_KEEP_ALIVE from environment (set via docker-compose), # falling back to sensible defaults if not set. export OLLAMA_HOST="${OLLAMA_HOST:-0.0.0.0:11434}" export OLLAMA_KEEP_ALIVE="${OLLAMA_KEEP_ALIVE:-24h}" # [optional] if you want to run on single GPU, use below command to limit GPU may improve performance # export ONEAPI_DEVICE_SELECTOR=level_zero:0 # If you have more than one dGPUs, according to your configuration you can use configuration like below, it will use the first and second card. # export ONEAPI_DEVICE_SELECTOR="level_zero:0;level_zero:1" ./ollama serve