diff --git a/Dockerfile b/Dockerfile index 0b0f2bd..90f773e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,36 +1,100 @@ FROM ubuntu:24.04 -ENV DEBIAN_FRONTEND=noninteractive -ENV TZ=america/los_angeles - +ENV DEBIAN_FRONTEND=noninteractive \ + TZ=America/Los_Angeles # Base packages -RUN apt update && \ - apt install --no-install-recommends -q -y \ - software-properties-common \ - ca-certificates \ - wget \ - ocl-icd-libopencl1 +RUN apt-get update && \ + apt-get install --no-install-recommends -q -y \ + software-properties-common \ + ca-certificates \ + wget \ + ocl-icd-libopencl1 && \ + rm -rf /var/lib/apt/lists/* -# Intel GPU compute user-space drivers -RUN mkdir -p /tmp/gpu && \ - cd /tmp/gpu && \ - wget https://github.com/oneapi-src/level-zero/releases/download/v1.21.9/level-zero_1.21.9+u24.04_amd64.deb && \ - wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-core-2_2.8.3+18762_amd64.deb && \ - wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.8.3/intel-igc-opencl-2_2.8.3+18762_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-level-zero-gpu_1.6.32961.7_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/intel-opencl-icd_25.09.32961.7_amd64.deb && \ - wget https://github.com/intel/compute-runtime/releases/download/25.09.32961.7/libigdgmm12_22.6.0_amd64.deb && \ - dpkg -i *.deb && \ - rm *.deb +# Intel GPU runtimes (release 25.18.33578.6) +RUN mkdir -p /tmp/gpu && cd /tmp/gpu && \ + wget https://github.com/oneapi-src/level-zero/releases/download/v1.22.4/level-zero_1.22.4+u24.04_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.11.7/intel-igc-core-2_2.11.7+19146_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.11.7/intel-igc-opencl-2_2.11.7+19146_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/25.18.33578.6/intel-ocloc-dbgsym_25.18.33578.6-0_amd64.ddeb && \ + wget https://github.com/intel/compute-runtime/releases/download/25.18.33578.6/intel-ocloc_25.18.33578.6-0_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/25.18.33578.6/intel-opencl-icd_25.18.33578.6-0_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/25.18.33578.6/libigdgmm12_22.7.0_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/25.18.33578.6/libze-intel-gpu1_25.18.33578.6-0_amd64.deb && \ + dpkg -i *.deb *.ddeb && rm -rf /tmp/gpu -# Install Ollama Portable Zip -ARG IPEXLLM_RELEASE_REPO=ipex-llm/ipex-llm -ARG IPEXLLM_RELEASE_VERSON=v2.2.0 -ARG IPEXLLM_PORTABLE_ZIP_FILENAME=ollama-ipex-llm-2.2.0-ubuntu.tgz +# Install IPEX-LLM Portable Zip (ollama bundle v2.3.0-nightly) RUN cd / && \ - wget https://github.com/${IPEXLLM_RELEASE_REPO}/releases/download/${IPEXLLM_RELEASE_VERSON}/${IPEXLLM_PORTABLE_ZIP_FILENAME} && \ - tar xvf ${IPEXLLM_PORTABLE_ZIP_FILENAME} --strip-components=1 -C / + wget https://github.com/ipex-llm/ipex-llm/releases/download/v2.3.0-nightly/ollama-ipex-llm-2.3.0b20250612-ubuntu.tgz && \ + tar xvf ollama-ipex-llm-2.3.0b20250612-ubuntu.tgz --strip-components=1 -C / && \ + rm ollama-ipex-llm-2.3.0b20250612-ubuntu.tgz +# Clean up any temporary files +RUN apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ + && find /var/log -type f -exec rm -f {} \; \ + && rm -rf /var/log/*-old \ + && rm -rf /var/log/apt/* \ + && rm -rf /var/log/dpkg.log* \ + && rm -rf /var/log/alternatives.log \ + && rm -rf /var/log/installer/* \ + && rm -rf /var/log/unattended-upgrades/* + +# Best practices + +# Save model for faster loading +ENV OLLAMA_DEFAULT_KEEPALIVE=6h + +# Keep models loaded in memory +ENV OLLAMA_KEEP_ALIVE=24h + +# Load models in parallel +ENV OLLAMA_NUM_PARALLEL=1 +ENV OLLAMA_MAX_LOADED_MODELS=1 + +# Set bigger queue and VRAM for better performance +ENV OLLAMA_MAX_QUEUE=512 +ENV OLLAMA_MAX_VRAM=0 + +# Serve ollama on all interfaces ENV OLLAMA_HOST=0.0.0.0:11434 +# Set ollama to use the Intel GPU +ENV OLLAMA_NUM_GPU=999 + + +## # Available low_bit format including sym_int4, sym_int8, fp16 etc. +ENV USE_XETLA=OFF +ENV ZES_ENABLE_SYSMAN=1 + +# Set ollama to use the Intel GPU +# Set ollama to use the Intel GPU with IPEX-LLM +ENV OLLAMA_USE_IPEX=1 +# Set ollama to use the Intel GPU with IPEX-LLM and SYCL +ENV OLLAMA_USE_IPEX_SYCL=1 +# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero +ENV OLLAMA_USE_IPEX_SYCL_ZE=1 +# Set ollama to use the Intel GPU with IPEX-LLM and SYCL and Level Zero and XETLA +ENV OLLAMA_USE_IPEX_SYCL_ZE_XETLA=1 + +# # Available low_bit format including sym_int4, sym_int8, fp16 etc. +ENV USE_XETLA=OFF +ENV ZES_ENABLE_SYSMAN=1 + +# Add some intel specific adjustments +# https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/fastchat_quickstart.md + +ENV SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +ENV ENABLE_SDP_FUSION=1 + +# [optional] under most circumstances, the following environment variable may improve performance, +# but sometimes this may also cause performance degradation +ENV SYCL_CACHE_PERSISTENT=1 + +# For Intel Core™ Ultra Processors (Series 2) with processor number 2xxK or 2xxH (code name Arrow Lake): +#- IPEX_LLM_NPU_ARL=1 + +# For Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake): +ENV IPEX_LLM_NPU_MTL=1 + ENTRYPOINT ["/bin/bash", "/start-ollama.sh"] diff --git a/docker-compose.yml b/docker-compose.yml index bd1e216..94a7f23 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,9 +29,11 @@ services: #- OLLAMA_NOHISTORY=false #- OLLAMA_NOPRUNE=false - OLLAMA_NUM_PARALLEL=1 - + #- IPEXLLM_RELEASE_REPO=ipex-llm/ipex-llm + #- IPEXLLM_RELEASE_VERSON=v2.2.0 + #- IPEXLLM_PORTABLE_ZIP_FILENAME=ollama-ipex-llm-2.2.0-ubuntu.tgz ports: - - 11435:11434 + - 11434:11434 ollama-webui: image: ghcr.io/open-webui/open-webui:latest