From 2fc526511f673d19a73b1a4ac5bc50814c0a3831 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Wed, 19 Feb 2025 14:56:56 -0800 Subject: [PATCH 1/4] Update to use new ipex portable .zip packages --- Dockerfile | 40 +++++++++++++++++++++++++++++++--------- README.md | 26 +++++++------------------- docker-compose-wsl2.yml | 35 ----------------------------------- docker-compose.yml | 5 +---- 4 files changed, 39 insertions(+), 67 deletions(-) delete mode 100644 docker-compose-wsl2.yml diff --git a/Dockerfile b/Dockerfile index 69a065d..f98bfa8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,34 @@ -FROM intelanalytics/ipex-llm-inference-cpp-xpu:2.2.0-SNAPSHOT +FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=america/los_angeles + +# Base packages +RUN apt update && \ + apt install --no-install-recommends -q -y \ + software-properties-common \ + ca-certificates \ + wget \ + ocl-icd-libopencl1 + +# gnupg \ + +# Intel GPU compute user-space drivers +RUN mkdir -p /tmp/gpu && \ + cd /tmp/gpu && \ + wget https://github.com/oneapi-src/level-zero/releases/download/v1.19.2/level-zero_1.19.2+u24.04_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \ + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \ + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \ + dpkg -i *.deb && \ + rm *.deb + +# Install Ollama Portable Zip +RUN cd / && \ + wget https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz && \ + tar xvf ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz --strip-components=1 -C / -ENV ZES_ENABLE_SYSMAN=1 ENV OLLAMA_HOST=0.0.0.0:11434 -RUN mkdir -p /llm/ollama; \ - cd /llm/ollama; \ - init-ollama; - -WORKDIR /llm/ollama - -ENTRYPOINT ["./ollama", "serve"] +ENTRYPOINT ["/bin/bash", "/start-ollama.sh"] diff --git a/README.md b/README.md index de0800a..ab0a1d9 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,15 @@ # ollama-intel-gpu -This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux or Windows WSL2. +This repo illlustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux ## Screenshot ![screenshot](doc/screenshot.png) # Prerequisites -* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer). -* Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows) -* Intel ARC series GPU. Tested with Intel ARC A770 16GB, Intel(R) Core(TM) Ultra 5 125H integrated GPU (Meteor Lake), and Intel(R) Core(TM) Intel Ultra 7 258V integrated GPU (Lunar Lake) - -*Note:* This branch uses the upstream ipex container published by Intel. See the alternate branch [alternate_base_image](https://github.com/mattcurf/ollama-intel-gpu/tree/alternate_base_image) for an equivalent Dockerfile which builds everything from the published packages directly. - +* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04.02 +* Installed Docker and Docker-compose tools +* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU) + # Usage The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui @@ -23,20 +21,10 @@ $ cd ollama-intel-gpu $ docker compose up ``` -Windows WSL2: -```bash -$ git clone https://github.com/mattcurf/ollama-intel-gpu -$ cd ollama-intel-gpu -$ docker-compose -f docker-compose-wsl2.yml up -``` - -*Note:* you will see the following message. This is expected and harmless, as the docker image 'ollama-intel-gpu' is built locally. -``` -ollama-intel-gpu Warning pull access denied for ollama-intel-gpu, repository does not exist or may require 'docker login': denied: requested access to the resource is denied -``` +*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the correct device to use. Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM # References * https://dgpu-docs.intel.com/driver/client/overview.html -* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html +* https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_portablze_zip_quickstart.md diff --git a/docker-compose-wsl2.yml b/docker-compose-wsl2.yml deleted file mode 100644 index 7efe6c6..0000000 --- a/docker-compose-wsl2.yml +++ /dev/null @@ -1,35 +0,0 @@ -version: "3.9" -services: - ollama-intel-gpu: - build: - context: . - dockerfile: Dockerfile - container_name: ollama-intel-gpu - image: ollama-intel-gpu:latest - restart: always - devices: - - /dev/dri:/dev/dri - - /dev/dxg:/dev/dxg - volumes: - - /usr/lib/wsl:/usr/lib/wsl - - /tmp/.X11-unix:/tmp/.X11-unix - - ollama-intel-gpu:/root/.ollama - environment: - - DISPLAY=${DISPLAY} - ollama-webui: - image: ghcr.io/open-webui/open-webui - container_name: ollama-webui - volumes: - - ollama-webui:/app/backend/data - depends_on: - - ollama-intel-gpu - ports: - - ${OLLAMA_WEBUI_PORT-3000}:8080 - environment: - - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434 - extra_hosts: - - host.docker.internal:host-gateway - restart: unless-stopped -volumes: - ollama-webui: {} - ollama-intel-gpu: {} diff --git a/docker-compose.yml b/docker-compose.yml index f06a16d..570c93a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,16 @@ -version: "3.9" services: ollama-intel-gpu: build: context: . dockerfile: Dockerfile container_name: ollama-intel-gpu - image: ollama-intel-gpu:latest restart: always devices: - /dev/dri:/dev/dri volumes: - - /tmp/.X11-unix:/tmp/.X11-unix - ollama-intel-gpu:/root/.ollama environment: - - DISPLAY=${DISPLAY} + - ONEAPI_DEVICE_SELECTOR=level_zero:0 ollama-webui: image: ghcr.io/open-webui/open-webui container_name: ollama-webui From dd84c202a7dc38d36ca2ec290b876c35c3bc77f9 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Wed, 19 Feb 2025 15:00:46 -0800 Subject: [PATCH 2/4] Minor fixes --- Dockerfile | 2 -- README.md | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index f98bfa8..cf5fa32 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,6 @@ RUN apt update && \ wget \ ocl-icd-libopencl1 -# gnupg \ - # Intel GPU compute user-space drivers RUN mkdir -p /tmp/gpu && \ cd /tmp/gpu && \ diff --git a/README.md b/README.md index ab0a1d9..b95dfee 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ollama-intel-gpu -This repo illlustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux +This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux ## Screenshot ![screenshot](doc/screenshot.png) From fed3cf9ba0e9e2608cd10584d05a6f282c13f806 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Wed, 19 Feb 2025 15:07:23 -0800 Subject: [PATCH 3/4] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b95dfee..322ee59 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support. Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux +## Important Note + +All Ollama based ipex-llm defects should be reported directly to the ipex-llm project at https://github.com/intel/ipex-llm + ## Screenshot ![screenshot](doc/screenshot.png) @@ -21,7 +25,7 @@ $ cd ollama-intel-gpu $ docker compose up ``` -*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the correct device to use. +*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the intended device to use. Then launch your web browser to http://localhost:3000 to launch the web ui. Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM From fa579db49227b28602ec993c460d5809d1f36271 Mon Sep 17 00:00:00 2001 From: Matt Curfman Date: Wed, 19 Feb 2025 15:26:01 -0800 Subject: [PATCH 4/4] Increase context window size --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 570c93a..117bac2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: - ollama-intel-gpu:/root/.ollama environment: - ONEAPI_DEVICE_SELECTOR=level_zero:0 + - IPEX_LLM_NUM_CTX=16384 ollama-webui: image: ghcr.io/open-webui/open-webui container_name: ollama-webui