From 2fc526511f673d19a73b1a4ac5bc50814c0a3831 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Wed, 19 Feb 2025 14:56:56 -0800
Subject: [PATCH 1/4] Update to use new ipex portable .zip packages

---
 Dockerfile              | 40 +++++++++++++++++++++++++++++++---------
 README.md               | 26 +++++++-------------------
 docker-compose-wsl2.yml | 35 -----------------------------------
 docker-compose.yml      |  5 +----
 4 files changed, 39 insertions(+), 67 deletions(-)
 delete mode 100644 docker-compose-wsl2.yml

diff --git a/Dockerfile b/Dockerfile
index 69a065d..f98bfa8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,12 +1,34 @@
-FROM intelanalytics/ipex-llm-inference-cpp-xpu:2.2.0-SNAPSHOT
+FROM ubuntu:24.04
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=america/los_angeles
+
+# Base packages
+RUN apt update && \
+    apt install --no-install-recommends -q -y \
+    software-properties-common \
+    ca-certificates \
+    wget \
+    ocl-icd-libopencl1
+
+#    gnupg \
+
+# Intel GPU compute user-space drivers
+RUN mkdir -p /tmp/gpu && \
+ cd /tmp/gpu && \
+ wget https://github.com/oneapi-src/level-zero/releases/download/v1.19.2/level-zero_1.19.2+u24.04_amd64.deb && \ 
+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb && \
+ wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb && \
+ wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb && \
+ dpkg -i *.deb && \
+ rm *.deb
+
+# Install Ollama Portable Zip 
+RUN cd / && \
+  wget https://github.com/intel/ipex-llm/releases/download/v2.2.0-nightly/ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz && \
+  tar xvf ollama-0.5.4-ipex-llm-2.2.0b20250218-ubuntu.tgz --strip-components=1 -C /
 
-ENV ZES_ENABLE_SYSMAN=1
 ENV OLLAMA_HOST=0.0.0.0:11434
 
-RUN mkdir -p /llm/ollama; \
-    cd /llm/ollama; \
-    init-ollama;
-
-WORKDIR /llm/ollama
-
-ENTRYPOINT ["./ollama", "serve"]
+ENTRYPOINT ["/bin/bash", "/start-ollama.sh"]
diff --git a/README.md b/README.md
index de0800a..ab0a1d9 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,15 @@
 # ollama-intel-gpu
 
-This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm.  Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux or Windows WSL2.
+This repo illlustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support.  Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux
 
 ## Screenshot
 ![screenshot](doc/screenshot.png)
 
 # Prerequisites
-* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04), or Windows 11 with WSL2 (graphics driver [101.5445](https://www.intel.com/content/www/us/en/download/785597/intel-arc-iris-xe-graphics-windows.html) or newer). 
-* Installed Docker and Docker-compose tools (for Linux) or Docker Desktop (for Windows)
-* Intel ARC series GPU. Tested with Intel ARC A770 16GB, Intel(R) Core(TM) Ultra 5 125H integrated GPU (Meteor Lake), and Intel(R) Core(TM) Intel Ultra 7 258V integrated GPU (Lunar Lake)
-
-*Note:* This branch uses the upstream ipex container published by Intel.  See the alternate branch [alternate_base_image](https://github.com/mattcurf/ollama-intel-gpu/tree/alternate_base_image) for an equivalent Dockerfile which builds everything from the published packages directly.
-
+* Ubuntu 24.04 or newer (for Intel ARC GPU kernel driver support. Tested with Ubuntu 24.04.02
+* Installed Docker and Docker-compose tools
+* Intel ARC series GPU (tested with Intel ARC A770 16GB and Intel(R) Core(TM) Ultra 5 125H integrated GPU)
+ 
 # Usage
 
 The following will build the Ollama with Intel ARC GPU support, and compose those with the public docker image based on OpenWEB UI from https://github.com/open-webui/open-webui
@@ -23,20 +21,10 @@ $ cd ollama-intel-gpu
 $ docker compose up 
 ```
 
-Windows WSL2:
-```bash
-$ git clone https://github.com/mattcurf/ollama-intel-gpu
-$ cd ollama-intel-gpu
-$ docker-compose -f docker-compose-wsl2.yml up 
-```
-
-*Note:* you will see the following message.  This is expected and harmless, as the docker image 'ollama-intel-gpu' is built locally.
-```
-ollama-intel-gpu Warning pull access denied for ollama-intel-gpu, repository does not exist or may require 'docker login': denied: requested access to the resource is denied
-```
+*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the correct device to use.
 
 Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
 
 # References
 * https://dgpu-docs.intel.com/driver/client/overview.html
-* https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/ollama_quickstart.html
+* https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_portablze_zip_quickstart.md
diff --git a/docker-compose-wsl2.yml b/docker-compose-wsl2.yml
deleted file mode 100644
index 7efe6c6..0000000
--- a/docker-compose-wsl2.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: "3.9"
-services:
-  ollama-intel-gpu:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: ollama-intel-gpu
-    image: ollama-intel-gpu:latest
-    restart: always
-    devices:
-      - /dev/dri:/dev/dri
-      - /dev/dxg:/dev/dxg
-    volumes:
-      - /usr/lib/wsl:/usr/lib/wsl
-      - /tmp/.X11-unix:/tmp/.X11-unix
-      - ollama-intel-gpu:/root/.ollama
-    environment:
-      - DISPLAY=${DISPLAY}
-  ollama-webui:
-    image: ghcr.io/open-webui/open-webui
-    container_name: ollama-webui
-    volumes:
-      - ollama-webui:/app/backend/data
-    depends_on:
-      - ollama-intel-gpu
-    ports:
-      - ${OLLAMA_WEBUI_PORT-3000}:8080
-    environment:
-      - OLLAMA_BASE_URL=http://ollama-intel-gpu:11434
-    extra_hosts:
-      - host.docker.internal:host-gateway
-    restart: unless-stopped
-volumes:
-  ollama-webui: {}
-  ollama-intel-gpu: {}
diff --git a/docker-compose.yml b/docker-compose.yml
index f06a16d..570c93a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,19 +1,16 @@
-version: "3.9"
 services:
   ollama-intel-gpu:
     build:
       context: .
       dockerfile: Dockerfile
     container_name: ollama-intel-gpu
-    image: ollama-intel-gpu:latest
     restart: always
     devices:
       - /dev/dri:/dev/dri
     volumes:
-      - /tmp/.X11-unix:/tmp/.X11-unix
       - ollama-intel-gpu:/root/.ollama
     environment:
-      - DISPLAY=${DISPLAY}
+      - ONEAPI_DEVICE_SELECTOR=level_zero:0
   ollama-webui:
     image: ghcr.io/open-webui/open-webui
     container_name: ollama-webui

From dd84c202a7dc38d36ca2ec290b876c35c3bc77f9 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Wed, 19 Feb 2025 15:00:46 -0800
Subject: [PATCH 2/4] Minor fixes

---
 Dockerfile | 2 --
 README.md  | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f98bfa8..cf5fa32 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,8 +10,6 @@ RUN apt update && \
     wget \
     ocl-icd-libopencl1
 
-#    gnupg \
-
 # Intel GPU compute user-space drivers
 RUN mkdir -p /tmp/gpu && \
  cd /tmp/gpu && \
diff --git a/README.md b/README.md
index ab0a1d9..b95dfee 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ollama-intel-gpu
 
-This repo illlustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support.  Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux
+This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support.  Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux
 
 ## Screenshot
 ![screenshot](doc/screenshot.png)

From fed3cf9ba0e9e2608cd10584d05a6f282c13f806 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Wed, 19 Feb 2025 15:07:23 -0800
Subject: [PATCH 3/4] Update README.md

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b95dfee..322ee59 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,10 @@
 
 This repo illustrates the use of Ollama with support for Intel ARC GPU based via ipex-llm and Ollama Portable ZIP support.  Run the recently released [deepseek-r1](https://github.com/deepseek-ai/DeepSeek-R1) model on your local Intel ARC GPU based PC using Linux
 
+## Important Note
+
+All Ollama based ipex-llm defects should be reported directly to the ipex-llm project at https://github.com/intel/ipex-llm
+
 ## Screenshot
 ![screenshot](doc/screenshot.png)
 
@@ -21,7 +25,7 @@ $ cd ollama-intel-gpu
 $ docker compose up 
 ```
 
-*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the correct device to use.
+*Note:* If you have multiple GPU's installed (like integrated and discrete), set the ONEAPI_DEVICE_DELECTOR environment variable in the docker compose file to select the intended device to use.
 
 Then launch your web browser to http://localhost:3000 to launch the web ui.  Create a local OpenWeb UI credential, then click the settings icon in the top right of the screen, then select 'Models', then click 'Show', then download a model like 'llama3.1:8b-instruct-q8_0' for Intel ARC A770 16GB VRAM
 

From fa579db49227b28602ec993c460d5809d1f36271 Mon Sep 17 00:00:00 2001
From: Matt Curfman <matt@tecnovelty.com>
Date: Wed, 19 Feb 2025 15:26:01 -0800
Subject: [PATCH 4/4] Increase context window size

---
 docker-compose.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 570c93a..117bac2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,7 @@ services:
       - ollama-intel-gpu:/root/.ollama
     environment:
       - ONEAPI_DEVICE_SELECTOR=level_zero:0
+      - IPEX_LLM_NUM_CTX=16384
   ollama-webui:
     image: ghcr.io/open-webui/open-webui
     container_name: ollama-webui