diff --git a/Dockerfile b/Dockerfile index 3ace191..7218a35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,16 +2,18 @@ FROM ubuntu:24.04 ENV DEBIAN_FRONTEND=noninteractive \ TZ=America/Los_Angeles -# Base packages +# Base packages + Intel Vulkan ICD (ANV driver) RUN apt-get update && \ apt-get install --no-install-recommends -q -y \ - software-properties-common \ ca-certificates \ wget \ + zstd \ + mesa-vulkan-drivers \ ocl-icd-libopencl1 && \ rm -rf /var/lib/apt/lists/* # Intel GPU runtimes (release 26.05.37020.3) +# Provides level-zero, IGC, compute-runtime for Intel GPU kernel support RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \ wget https://github.com/oneapi-src/level-zero/releases/download/v1.28.0/level-zero_1.28.0+u24.04_amd64.deb && \ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.28.4/intel-igc-core-2_2.28.4+20760_amd64.deb && \ @@ -23,81 +25,44 @@ RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \ wget https://github.com/intel/compute-runtime/releases/download/26.05.37020.3/libze-intel-gpu1_26.05.37020.3-0_amd64.deb && \ dpkg -i *.deb *.ddeb && rm -rf /tmp/gpu -# Install IPEX-LLM Portable Zip (ollama bundle v2.3.0-nightly) -RUN cd / && \ - wget https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz && \ - tar xvf ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz --strip-components=1 -C / && \ - rm ollama-ipex-llm-2.3.0b20250725-ubuntu.tgz +# Install official ollama (Vulkan runner provides Intel GPU acceleration) +ARG OLLAMA_VERSION=0.15.6 +RUN wget -qO- "https://github.com/ollama/ollama/releases/download/v${OLLAMA_VERSION}/ollama-linux-amd64.tar.zst" | \ + zstd -d | tar -xf - -C /usr && \ + # Remove CUDA and MLX runners — we only need CPU + Vulkan + rm -rf /usr/lib/ollama/cuda_* /usr/lib/ollama/mlx_* -# Clean up any temporary files +# Clean up RUN apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ - && find /var/log -type f -exec rm -f {} \; \ - && rm -rf /var/log/*-old \ - && rm -rf /var/log/apt/* \ - && rm -rf /var/log/dpkg.log* \ - && rm -rf /var/log/alternatives.log \ - && rm -rf /var/log/installer/* \ - && rm -rf /var/log/unattended-upgrades/* \ - && apt autoremove -y --purge \ - && apt-get autoclean -y \ - && rm -rf /tmp/* /var/tmp/* - -# Best practices - -# Save model for faster loading -ENV OLLAMA_DEFAULT_KEEPALIVE=6h - -# Keep models loaded in memory -ENV OLLAMA_KEEP_ALIVE=24h - -# Load models in parallel -ENV OLLAMA_NUM_PARALLEL=1 -ENV OLLAMA_MAX_LOADED_MODELS=1 - -# Set bigger queue and VRAM for better performance -ENV OLLAMA_MAX_QUEUE=512 -ENV OLLAMA_MAX_VRAM=0 + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ + apt-get autoremove -y --purge 2>/dev/null; \ + apt-get autoclean -y 2>/dev/null; true # Serve ollama on all interfaces ENV OLLAMA_HOST=0.0.0.0:11434 -# Set ollama to use the Intel GPU +# Keep models loaded in memory +ENV OLLAMA_KEEP_ALIVE=24h +ENV OLLAMA_DEFAULT_KEEPALIVE=6h + +# Concurrency and resource limits +ENV OLLAMA_NUM_PARALLEL=1 +ENV OLLAMA_MAX_LOADED_MODELS=1 +ENV OLLAMA_MAX_QUEUE=512 +ENV OLLAMA_MAX_VRAM=0 + +# Enable Vulkan backend for Intel GPU acceleration +ENV OLLAMA_VULKAN=1 + +# Use all GPU layers ENV OLLAMA_NUM_GPU=999 - -## # Available low_bit format including sym_int4, sym_int8, fp16 etc. -ENV USE_XETLA=OFF +# Intel GPU tuning ENV ZES_ENABLE_SYSMAN=1 -# Set ollama to use the Intel GPU -# Set ollama to use the Intel GPU with IPEX-LLM -ENV OLLAMA_USE_IPEX=1 -# Set ollama to use the Intel GPU with IPEX-LLM and SYCL -ENV OLLAMA_USE_IPEX_SYCL=1 -# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero -ENV OLLAMA_USE_IPEX_SYCL_ZE=1 -# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero and XETLA -ENV OLLAMA_USE_IPEX_SYCL_ZE_XETLA=1 - -# # Available low_bit format including sym_int4, sym_int8, fp16 etc. -ENV USE_XETLA=OFF -ENV ZES_ENABLE_SYSMAN=1 - -# Add some intel specific adjustments -# https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/fastchat_quickstart.md - -ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 -ENV ENABLE_SDP_FUSION=1 - -# [optional] under most circumstances, the following environment variable may improve performance, -# but sometimes this may also cause performance degradation -ENV SYCL_CACHE_PERSISTENT=1 - -# For Intel Core™ Ultra Processors (Series 2) with processor number 2xxK or 2xxH (code name Arrow Lake): -#- IPEX_LLM_NPU_ARL=1 - -# For Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake): +# For Intel Core Ultra Processors (Series 1), code name Meteor Lake ENV IPEX_LLM_NPU_MTL=1 -ENTRYPOINT ["/bin/bash", "/start-ollama.sh"] +EXPOSE 11434 +ENTRYPOINT ["/usr/bin/ollama"] +CMD ["serve"] diff --git a/docker-compose.yml b/docker-compose.yml index 02e1b40..f8ad473 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,9 +4,7 @@ services: context: . dockerfile: Dockerfile args: - IPEXLLM_RELEASE_REPO: ipex-llm/ipex-llm - IPEXLLM_RELEASE_VERSON: v2.2.0 - IPEXLLM_PORTABLE_ZIP_FILENAME: ollama-ipex-llm-2.2.0-ubuntu.tgz + OLLAMA_VERSION: "0.15.6" container_name: ollama-intel-gpu restart: unless-stopped devices: @@ -15,27 +13,19 @@ services: volumes: - /tmp/.X11-unix:/tmp/.X11-unix - ollama-intel-gpu:/root/.ollama - - ./start-ollama.sh:/start-ollama.sh:ro shm_size: "16G" environment: - - ONEAPI_DEVICE_SELECTOR=level_zero:0 - #- SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 - #- SYCL_CACHE_PERSISTENT=1 - - IPEX_LLM_NUM_CTX=16384 - - LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/2024.2/lib - DISPLAY=${DISPLAY} - - OLLAMA_DEFAULT_KEEPALIVE="6h" - OLLAMA_HOST=0.0.0.0 + - OLLAMA_VULKAN=1 + - OLLAMA_DEFAULT_KEEPALIVE=6h - OLLAMA_KEEP_ALIVE=24h - OLLAMA_MAX_LOADED_MODELS=1 - OLLAMA_MAX_QUEUE=512 - OLLAMA_MAX_VRAM=0 + - OLLAMA_NUM_PARALLEL=1 #- OLLAMA_NOHISTORY=false #- OLLAMA_NOPRUNE=false - - OLLAMA_NUM_PARALLEL=1 - #- IPEXLLM_RELEASE_REPO=ipex-llm/ipex-llm - #- IPEXLLM_RELEASE_VERSON=v2.2.0 - #- IPEXLLM_PORTABLE_ZIP_FILENAME=ollama-ipex-llm-2.2.0-ubuntu.tgz ports: - 11434:11434 @@ -51,7 +41,7 @@ services: - ${OLLAMA_WEBUI_PORT-3000}:8080 environment: - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434 - - OLLAMA_DEFAULT_KEEPALIVE="6h" + - OLLAMA_DEFAULT_KEEPALIVE=6h #- OPENAI_API_BASE_URL= #- OPENAI_API_KEY= #