Upgrade ollama from 0.9.3 (IPEX-LLM) to 0.15.6 (official) with Vulkan Intel GPU
Replace the IPEX-LLM portable zip (bundling a patched ollama 0.9.3 with SYCL) with the official ollama 0.15.6 release using the Vulkan backend for Intel GPU acceleration. The official ollama project does not ship a SYCL backend; Vulkan is their supported path for Intel GPUs. - Use official ollama binary with Vulkan runner (OLLAMA_VULKAN=1) - Strip CUDA/MLX runners from image to save space - Add mesa-vulkan-drivers for Intel ANV Vulkan ICD - Remove all IPEX-LLM env vars and wrapper scripts - Simplify entrypoint to /usr/bin/ollama serve directly - Clean up docker-compose.yml: remove IPEX build args and env vars Tested: Intel Arc Graphics (MTL) detected, 17/17 layers offloaded to Vulkan0 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+33
-68
@@ -2,16 +2,18 @@ FROM ubuntu:24.04
|
|||||||
ENV DEBIAN_FRONTEND=noninteractive \
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
TZ=America/Los_Angeles
|
TZ=America/Los_Angeles
|
||||||
|
|
||||||
# Base packages
|
# Base packages + Intel Vulkan ICD (ANV driver)
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install --no-install-recommends -q -y \
|
apt-get install --no-install-recommends -q -y \
|
||||||
software-properties-common \
|
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
wget \
|
wget \
|
||||||
|
zstd \
|
||||||
|
mesa-vulkan-drivers \
|
||||||
ocl-icd-libopencl1 && \
|
ocl-icd-libopencl1 && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Intel GPU runtimes (release 26.05.37020.3)
|
# Intel GPU runtimes (release 26.05.37020.3)
|
||||||
|
# Provides level-zero, IGC, compute-runtime for Intel GPU kernel support
|
||||||
RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \
|
RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \
|
||||||
wget https://github.com/oneapi-src/level-zero/releases/download/v1.28.0/level-zero_1.28.0+u24.04_amd64.deb && \
|
wget https://github.com/oneapi-src/level-zero/releases/download/v1.28.0/level-zero_1.28.0+u24.04_amd64.deb && \
|
||||||
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.28.4/intel-igc-core-2_2.28.4+20760_amd64.deb && \
|
wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.28.4/intel-igc-core-2_2.28.4+20760_amd64.deb && \
|
||||||
@@ -23,81 +25,44 @@ RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \
|
|||||||
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/libze-intel-gpu1_26.05.37020.3-0_amd64.deb && \
|
wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/libze-intel-gpu1_26.05.37020.3-0_amd64.deb && \
|
||||||
dpkg -i *.deb *.ddeb && rm -rf /tmp/gpu
|
dpkg -i *.deb *.ddeb && rm -rf /tmp/gpu
|
||||||
|
|
||||||
# Install IPEX-LLM Portable Zip (ollama bundle v2.3.0-nightly)
|
# Install official ollama (Vulkan runner provides Intel GPU acceleration)
|
||||||
RUN cd / && \
|
ARG OLLAMA_VERSION=0.15.6
|
||||||
wget https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz && \
|
RUN wget -qO- "https://github.com/ollama/ollama/releases/download/v${OLLAMA_VERSION}/ollama-linux-amd64.tar.zst" | \
|
||||||
tar xvf ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz --strip-components=1 -C / && \
|
zstd -d | tar -xf - -C /usr && \
|
||||||
rm ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz
|
# Remove CUDA and MLX runners — we only need CPU + Vulkan
|
||||||
|
rm -rf /usr/lib/ollama/cuda_* /usr/lib/ollama/mlx_*
|
||||||
|
|
||||||
# Clean up any temporary files
|
# Clean up
|
||||||
RUN apt-get clean && \
|
RUN apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||||
&& find /var/log -type f -exec rm -f {} \; \
|
apt-get autoremove -y --purge 2>/dev/null; \
|
||||||
&& rm -rf /var/log/*-old \
|
apt-get autoclean -y 2>/dev/null; true
|
||||||
&& rm -rf /var/log/apt/* \
|
|
||||||
&& rm -rf /var/log/dpkg.log* \
|
|
||||||
&& rm -rf /var/log/alternatives.log \
|
|
||||||
&& rm -rf /var/log/installer/* \
|
|
||||||
&& rm -rf /var/log/unattended-upgrades/* \
|
|
||||||
&& apt autoremove -y --purge \
|
|
||||||
&& apt-get autoclean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/*
|
|
||||||
|
|
||||||
# Best practices
|
|
||||||
|
|
||||||
# Save model for faster loading
|
|
||||||
ENV OLLAMA_DEFAULT_KEEPALIVE=6h
|
|
||||||
|
|
||||||
# Keep models loaded in memory
|
|
||||||
ENV OLLAMA_KEEP_ALIVE=24h
|
|
||||||
|
|
||||||
# Load models in parallel
|
|
||||||
ENV OLLAMA_NUM_PARALLEL=1
|
|
||||||
ENV OLLAMA_MAX_LOADED_MODELS=1
|
|
||||||
|
|
||||||
# Set bigger queue and VRAM for better performance
|
|
||||||
ENV OLLAMA_MAX_QUEUE=512
|
|
||||||
ENV OLLAMA_MAX_VRAM=0
|
|
||||||
|
|
||||||
# Serve ollama on all interfaces
|
# Serve ollama on all interfaces
|
||||||
ENV OLLAMA_HOST=0.0.0.0:11434
|
ENV OLLAMA_HOST=0.0.0.0:11434
|
||||||
|
|
||||||
# Set ollama to use the Intel GPU
|
# Keep models loaded in memory
|
||||||
|
ENV OLLAMA_KEEP_ALIVE=24h
|
||||||
|
ENV OLLAMA_DEFAULT_KEEPALIVE=6h
|
||||||
|
|
||||||
|
# Concurrency and resource limits
|
||||||
|
ENV OLLAMA_NUM_PARALLEL=1
|
||||||
|
ENV OLLAMA_MAX_LOADED_MODELS=1
|
||||||
|
ENV OLLAMA_MAX_QUEUE=512
|
||||||
|
ENV OLLAMA_MAX_VRAM=0
|
||||||
|
|
||||||
|
# Enable Vulkan backend for Intel GPU acceleration
|
||||||
|
ENV OLLAMA_VULKAN=1
|
||||||
|
|
||||||
|
# Use all GPU layers
|
||||||
ENV OLLAMA_NUM_GPU=999
|
ENV OLLAMA_NUM_GPU=999
|
||||||
|
|
||||||
|
# Intel GPU tuning
|
||||||
## # Available low_bit format including sym_int4, sym_int8, fp16 etc.
|
|
||||||
ENV USE_XETLA=OFF
|
|
||||||
ENV ZES_ENABLE_SYSMAN=1
|
ENV ZES_ENABLE_SYSMAN=1
|
||||||
|
|
||||||
# Set ollama to use the Intel GPU
|
# For Intel Core Ultra Processors (Series 1), code name Meteor Lake
|
||||||
# Set ollama to use the Intel GPU with IPEX-LLM
|
|
||||||
ENV OLLAMA_USE_IPEX=1
|
|
||||||
# Set ollama to use the Intel GPU with IPEX-LLM and SYCL
|
|
||||||
ENV OLLAMA_USE_IPEX_SYCL=1
|
|
||||||
# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero
|
|
||||||
ENV OLLAMA_USE_IPEX_SYCL_ZE=1
|
|
||||||
# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero and XETLA
|
|
||||||
ENV OLLAMA_USE_IPEX_SYCL_ZE_XETLA=1
|
|
||||||
|
|
||||||
# # Available low_bit format including sym_int4, sym_int8, fp16 etc.
|
|
||||||
ENV USE_XETLA=OFF
|
|
||||||
ENV ZES_ENABLE_SYSMAN=1
|
|
||||||
|
|
||||||
# Add some intel specific adjustments
|
|
||||||
# https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/fastchat_quickstart.md
|
|
||||||
|
|
||||||
ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
||||||
ENV ENABLE_SDP_FUSION=1
|
|
||||||
|
|
||||||
# [optional] under most circumstances, the following environment variable may improve performance,
|
|
||||||
# but sometimes this may also cause performance degradation
|
|
||||||
ENV SYCL_CACHE_PERSISTENT=1
|
|
||||||
|
|
||||||
# For Intel Core™ Ultra Processors (Series 2) with processor number 2xxK or 2xxH (code name Arrow Lake):
|
|
||||||
#- IPEX_LLM_NPU_ARL=1
|
|
||||||
|
|
||||||
# For Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake):
|
|
||||||
ENV IPEX_LLM_NPU_MTL=1
|
ENV IPEX_LLM_NPU_MTL=1
|
||||||
|
|
||||||
ENTRYPOINT ["/bin/bash", "/start-ollama.sh"]
|
EXPOSE 11434
|
||||||
|
ENTRYPOINT ["/usr/bin/ollama"]
|
||||||
|
CMD ["serve"]
|
||||||
|
|||||||
+5
-15
@@ -4,9 +4,7 @@ services:
|
|||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
IPEXLLM_RELEASE_REPO: ipex-llm/ipex-llm
|
OLLAMA_VERSION: "0.15.6"
|
||||||
IPEXLLM_RELEASE_VERSON: v2.2.0
|
|
||||||
IPEXLLM_PORTABLE_ZIP_FILENAME: ollama-ipex-llm-2.2.0-ubuntu.tgz
|
|
||||||
container_name: ollama-intel-gpu
|
container_name: ollama-intel-gpu
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
devices:
|
devices:
|
||||||
@@ -15,27 +13,19 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||||
- ollama-intel-gpu:/root/.ollama
|
- ollama-intel-gpu:/root/.ollama
|
||||||
- ./start-ollama.sh:/start-ollama.sh:ro
|
|
||||||
shm_size: "16G"
|
shm_size: "16G"
|
||||||
environment:
|
environment:
|
||||||
- ONEAPI_DEVICE_SELECTOR=level_zero:0
|
|
||||||
#- SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
|
||||||
#- SYCL_CACHE_PERSISTENT=1
|
|
||||||
- IPEX_LLM_NUM_CTX=16384
|
|
||||||
- LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.2/lib
|
|
||||||
- DISPLAY=${DISPLAY}
|
- DISPLAY=${DISPLAY}
|
||||||
- OLLAMA_DEFAULT_KEEPALIVE="6h"
|
|
||||||
- OLLAMA_HOST=0.0.0.0
|
- OLLAMA_HOST=0.0.0.0
|
||||||
|
- OLLAMA_VULKAN=1
|
||||||
|
- OLLAMA_DEFAULT_KEEPALIVE=6h
|
||||||
- OLLAMA_KEEP_ALIVE=24h
|
- OLLAMA_KEEP_ALIVE=24h
|
||||||
- OLLAMA_MAX_LOADED_MODELS=1
|
- OLLAMA_MAX_LOADED_MODELS=1
|
||||||
- OLLAMA_MAX_QUEUE=512
|
- OLLAMA_MAX_QUEUE=512
|
||||||
- OLLAMA_MAX_VRAM=0
|
- OLLAMA_MAX_VRAM=0
|
||||||
|
- OLLAMA_NUM_PARALLEL=1
|
||||||
#- OLLAMA_NOHISTORY=false
|
#- OLLAMA_NOHISTORY=false
|
||||||
#- OLLAMA_NOPRUNE=false
|
#- OLLAMA_NOPRUNE=false
|
||||||
- OLLAMA_NUM_PARALLEL=1
|
|
||||||
#- IPEXLLM_RELEASE_REPO=ipex-llm/ipex-llm
|
|
||||||
#- IPEXLLM_RELEASE_VERSON=v2.2.0
|
|
||||||
#- IPEXLLM_PORTABLE_ZIP_FILENAME=ollama-ipex-llm-2.2.0-ubuntu.tgz
|
|
||||||
ports:
|
ports:
|
||||||
- 11434:11434
|
- 11434:11434
|
||||||
|
|
||||||
@@ -51,7 +41,7 @@ services:
|
|||||||
- ${OLLAMA_WEBUI_PORT-3000}:8080
|
- ${OLLAMA_WEBUI_PORT-3000}:8080
|
||||||
environment:
|
environment:
|
||||||
- OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
|
- OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
|
||||||
- OLLAMA_DEFAULT_KEEPALIVE="6h"
|
- OLLAMA_DEFAULT_KEEPALIVE=6h
|
||||||
#- OPENAI_API_BASE_URL=
|
#- OPENAI_API_BASE_URL=
|
||||||
#- OPENAI_API_KEY=
|
#- OPENAI_API_KEY=
|
||||||
#
|
#
|
||||||
|
|||||||
Reference in New Issue
Block a user