Replace the IPEX-LLM portable zip (bundling a patched ollama 0.9.3 with SYCL) with the official ollama 0.15.6 release using the Vulkan backend for Intel GPU acceleration. The official ollama project does not ship a SYCL backend; Vulkan is their supported path for Intel GPUs. - Use official ollama binary with Vulkan runner (OLLAMA_VULKAN=1) - Strip CUDA/MLX runners from image to save space - Add mesa-vulkan-drivers for Intel ANV Vulkan ICD - Remove all IPEX-LLM env vars and wrapper scripts - Simplify entrypoint to /usr/bin/ollama serve directly - Clean up docker-compose.yml: remove IPEX build args and env vars Tested: Intel Arc Graphics (MTL) detected, 17/17 layers offloaded to Vulkan0 Co-authored-by: Cursor <cursoragent@cursor.com>
62 lines
1.6 KiB
YAML
62 lines
1.6 KiB
YAML
services:
|
|
ollama-intel-gpu:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
OLLAMA_VERSION: "0.15.6"
|
|
container_name: ollama-intel-gpu
|
|
restart: unless-stopped
|
|
devices:
|
|
# - /dev/dri:/dev/dri
|
|
- /dev/dri/renderD128:/dev/dri/renderD128
|
|
volumes:
|
|
- /tmp/.X11-unix:/tmp/.X11-unix
|
|
- ollama-intel-gpu:/root/.ollama
|
|
shm_size: "16G"
|
|
environment:
|
|
- DISPLAY=${DISPLAY}
|
|
- OLLAMA_HOST=0.0.0.0
|
|
- OLLAMA_VULKAN=1
|
|
- OLLAMA_DEFAULT_KEEPALIVE=6h
|
|
- OLLAMA_KEEP_ALIVE=24h
|
|
- OLLAMA_MAX_LOADED_MODELS=1
|
|
- OLLAMA_MAX_QUEUE=512
|
|
- OLLAMA_MAX_VRAM=0
|
|
- OLLAMA_NUM_PARALLEL=1
|
|
#- OLLAMA_NOHISTORY=false
|
|
#- OLLAMA_NOPRUNE=false
|
|
ports:
|
|
- 11434:11434
|
|
|
|
ollama-webui:
|
|
image: ghcr.io/open-webui/open-webui:latest
|
|
container_name: ollama-webui
|
|
volumes:
|
|
- ./webui/data:/app/backend/data
|
|
# - ollama-webui:/app/backend/data
|
|
depends_on:
|
|
- ollama-intel-gpu
|
|
ports:
|
|
- ${OLLAMA_WEBUI_PORT-3000}:8080
|
|
environment:
|
|
- OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
|
|
- OLLAMA_DEFAULT_KEEPALIVE=6h
|
|
#- OPENAI_API_BASE_URL=
|
|
#- OPENAI_API_KEY=
|
|
#
|
|
# AUTOMATIC1111_BASE_URL="http://localhost:7860"
|
|
- WEBUI_AUTH=False
|
|
- ENABLE_RAG_WEB_SEARCH=True
|
|
|
|
# DO NOT TRACK
|
|
- SCARF_NO_ANALYTICS=true
|
|
- DO_NOT_TRACK=true
|
|
- ANONYMIZED_TELEMETRY=false
|
|
extra_hosts:
|
|
- host.docker.internal:host-gateway
|
|
restart: unless-stopped
|
|
volumes:
|
|
ollama-webui: {}
|
|
ollama-intel-gpu: {}
|