Replace the IPEX-LLM portable zip (bundling a patched ollama 0.9.3 with SYCL) with the official ollama 0.15.6 release using the Vulkan backend for Intel GPU acceleration. The official ollama project does not ship a SYCL backend; Vulkan is their supported path for Intel GPUs. - Use official ollama binary with Vulkan runner (OLLAMA_VULKAN=1) - Strip CUDA/MLX runners from image to save space - Add mesa-vulkan-drivers for Intel ANV Vulkan ICD - Remove all IPEX-LLM env vars and wrapper scripts - Simplify entrypoint to /usr/bin/ollama serve directly - Clean up docker-compose.yml: remove IPEX build args and env vars Tested: Intel Arc Graphics (MTL) detected, 17/17 layers offloaded to Vulkan0 Co-authored-by: Cursor <cursoragent@cursor.com>
69 lines
2.7 KiB
Docker
69 lines
2.7 KiB
Docker
FROM ubuntu:24.04
|
|
ENV DEBIAN_FRONTEND=noninteractive \
|
|
TZ=America/Los_Angeles
|
|
|
|
# Base packages + Intel Vulkan ICD (ANV driver)
|
|
RUN apt-get update && \
|
|
apt-get install --no-install-recommends -q -y \
|
|
ca-certificates \
|
|
wget \
|
|
zstd \
|
|
mesa-vulkan-drivers \
|
|
ocl-icd-libopencl1 && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# Intel GPU runtimes (release 26.05.37020.3)
|
|
# Provides level-zero, IGC, compute-runtime for Intel GPU kernel support
|
|
RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \
|
|
wget https://github.com/oneapi-src/level-zero/releases/download/v1.28.0/level-zero_1.28.0+u24.04_amd64.deb && \
|
|
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.28.4/intel-igc-core-2_2.28.4+20760_amd64.deb && \
|
|
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.28.4/intel-igc-opencl-2_2.28.4+20760_amd64.deb && \
|
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/intel-ocloc-dbgsym_26.05.37020.3-0_amd64.ddeb && \
|
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/intel-ocloc_26.05.37020.3-0_amd64.deb && \
|
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/intel-opencl-icd_26.05.37020.3-0_amd64.deb && \
|
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/libigdgmm12_22.9.0_amd64.deb && \
|
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/libze-intel-gpu1_26.05.37020.3-0_amd64.deb && \
|
|
dpkg -i *.deb *.ddeb && rm -rf /tmp/gpu
|
|
|
|
# Install official ollama (Vulkan runner provides Intel GPU acceleration)
|
|
ARG OLLAMA_VERSION=0.15.6
|
|
RUN wget -qO- "https://github.com/ollama/ollama/releases/download/v${OLLAMA_VERSION}/ollama-linux-amd64.tar.zst" | \
|
|
zstd -d | tar -xf - -C /usr && \
|
|
# Remove CUDA and MLX runners — we only need CPU + Vulkan
|
|
rm -rf /usr/lib/ollama/cuda_* /usr/lib/ollama/mlx_*
|
|
|
|
# Clean up
|
|
RUN apt-get clean && \
|
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
|
apt-get autoremove -y --purge 2>/dev/null; \
|
|
apt-get autoclean -y 2>/dev/null; true
|
|
|
|
# Serve ollama on all interfaces
|
|
ENV OLLAMA_HOST=0.0.0.0:11434
|
|
|
|
# Keep models loaded in memory
|
|
ENV OLLAMA_KEEP_ALIVE=24h
|
|
ENV OLLAMA_DEFAULT_KEEPALIVE=6h
|
|
|
|
# Concurrency and resource limits
|
|
ENV OLLAMA_NUM_PARALLEL=1
|
|
ENV OLLAMA_MAX_LOADED_MODELS=1
|
|
ENV OLLAMA_MAX_QUEUE=512
|
|
ENV OLLAMA_MAX_VRAM=0
|
|
|
|
# Enable Vulkan backend for Intel GPU acceleration
|
|
ENV OLLAMA_VULKAN=1
|
|
|
|
# Use all GPU layers
|
|
ENV OLLAMA_NUM_GPU=999
|
|
|
|
# Intel GPU tuning
|
|
ENV ZES_ENABLE_SYSMAN=1
|
|
|
|
# For Intel Core Ultra Processors (Series 1), code name Meteor Lake
|
|
ENV IPEX_LLM_NPU_MTL=1
|
|
|
|
EXPOSE 11434
|
|
ENTRYPOINT ["/usr/bin/ollama"]
|
|
CMD ["serve"]
|