From 316eb23905818c84066e5b569a7e831888a3efb9 Mon Sep 17 00:00:00 2001
From: eleiton <eleiton@proton.me>
Date: Tue, 11 Mar 2025 22:41:02 +0100
Subject: [PATCH] Stop building the ollama image manually from scratch and use
 the official intel image

---
 Dockerfile       | 47 +----------------------------------------------
 README.md        | 32 +++++++++++++++-----------------
 scripts/serve.sh | 18 ++++++------------
 3 files changed, 22 insertions(+), 75 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 083b7c4..e60f193 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,53 +1,8 @@
-FROM ubuntu:24.04
+FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV PIP_BREAK_SYSTEM_PACKAGES=1
-ENV OLLAMA_NUM_GPU=999
 ENV OLLAMA_HOST=0.0.0.0:11434
 
-# Install base packages
-RUN apt update && \
-    apt install --no-install-recommends -q -y \
-    wget \
-    gnupg \
-    ca-certificates \
-    python3-pip \
-    pkg-config \
-    build-essential \
-    python3-dev \
-    cmake
-
-# Install Client GPUs
-# Reference: https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop-24-04-lts
-RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
-    gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
-    echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble unified" | \
-    tee /etc/apt/sources.list.d/intel-gpu-noble.list && \
-    apt update && \
-    apt install -y libze-intel-gpu1 libze1 intel-opencl-icd clinfo intel-gsc && \
-    apt install -y libze-dev intel-ocloc && \
-    apt install --no-install-recommends -q -y \
-    udev \
-    level-zero \
-    libigdgmm12
-
-# Install oneAPI Base Toolkit
-# Reference: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=oneapi-toolkit&oneapi-toolkit-os=linux&oneapi-lin=apt
-RUN apt update && \
-    apt install -y gpg-agent wget && \
-    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
-    gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
-    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" |  \
-    tee /etc/apt/sources.list.d/oneAPI.list && \
-    apt update && \
-    apt install -y intel-oneapi-base-toolkit
-
-# Install serve.sh script
 COPY ./scripts/serve.sh /usr/share/lib/serve.sh
 
-# Install ipex-llm[cpp] using pip
-# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md#1-install-ipex-llm-for-llamacpp
-RUN pip install --pre --upgrade ipex-llm[cpp]
-
-# Set entrypoint to run the serve.sh script
 ENTRYPOINT ["/bin/bash", "/usr/share/lib/serve.sh"]
diff --git a/README.md b/README.md
index c1abbce..4e94464 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,9 @@ This repository provides a convenient way to run Ollama as a backend and Open We
 
 ## Services
 1. Ollama  
-   * Runs llama.cpp and Ollama with IPEX-LLM on your Linux computer with Intel GPU.  
-   * Built following the guidelines from [Intel](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md).  
-   * Uses [Ubuntu 24.04 LTS](https://ubuntu.com/blog/tag/ubuntu-24-04-lts), Ubuntu's latest stable version, as a container.
+   * Runs llama.cpp and Ollama with IPEX-LLM on your Linux computer with Intel Arc GPU.  
+   * Built following the guidelines from [Intel](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/README.md).  
+   * Uses the official [Intel ipex-llm docker image](https://hub.docker.com/r/intelanalytics/ipex-llm-inference-cpp-xpu) as the base container.
    * Uses the latest versions of required packages, prioritizing cutting-edge features over stability.  
    * Exposes port `11434` for connecting other tools to your Ollama service.
 
@@ -29,7 +29,12 @@ $ podman compose up
 ```
 
 ## Validate
-You should see this partial output in your console, indicating your arc gpu was detected
+Run the following command to verify your Ollama instance is up and running
+```bash
+$ curl http://localhost:11434/
+Ollama is running
+```
+When using Open WebUI, you should see this partial output in your console, indicating your arc gpu was detected
 ```bash
 [ollama-intel-arc] | Found 1 SYCL devices:
 [ollama-intel-arc] | |  |                   |                                       |       |Max    |        |Max  |Global |                     |
@@ -38,13 +43,8 @@ You should see this partial output in your console, indicating your arc gpu was
 [ollama-intel-arc] | |--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|
 [ollama-intel-arc] | | 0| [level_zero:gpu:0]|                     Intel Arc Graphics|  12.71|    128|    1024|   32| 62400M|         1.6.32224+14|
 ```
-Run the following command to verify your Ollama instance is up and running
-```bash
-$ curl http://localhost:11434/
-Ollama is running
-```
+
 ## Usage
-* Run the services using the setup instructions above.  
 * Open your web browser to http://localhost:3000 to access the Open WebUI web page.  
 * For more information on using Open WebUI, refer to the official documentation at https://docs.openwebui.com/ .
 
@@ -55,7 +55,7 @@ $ podman compose down
 ```
 
 ### ollama-intel-arc Image
-If there are new updates in ipex-llm[cpp] or oneAPI or GPU drivers, you may want to update the Ollama image and containers, to stay updated.
+If there are new updates in the [ipex-llm docker image](https://hub.docker.com/r/intelanalytics/ipex-llm-inference-cpp-xpu), you may want to update the Ollama image and containers, to stay updated.
 
 First check any containers running the docker image, and remove them
 ```bash
@@ -89,16 +89,14 @@ You can connect directly to your Ollama container by running these commands:
 
 ```bash
 $ podman exec -it ollama-intel-arc /bin/bash
-> source /opt/intel/oneapi/setvars.sh
-> /usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama -v
+$ /llm/ollama/ollama -v
 ```
 
 ## My development environment:
 * Core Ultra 7 155H
 * Intel® Arc™ Graphics (Meteor Lake-P)
-* Fedora 40
+* Fedora 41
 
-## References
-* [Intel guidelines for installing Linux GPU support](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md)  
+## References 
 * [Open WebUI documentation](https://docs.openwebui.com/)
-* [Ollama Quickstart](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md)
+* [Intel - ipex-llm](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md)
diff --git a/scripts/serve.sh b/scripts/serve.sh
index b0970ab..23ea0be 100644
--- a/scripts/serve.sh
+++ b/scripts/serve.sh
@@ -1,15 +1,9 @@
 #!/bin/sh
 
-# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md#3-run-ollama-serve
-export OLLAMA_NUM_GPU=999
-export no_proxy=localhost,127.0.0.1
-export ZES_ENABLE_SYSMAN=1
+cd /llm/scripts/
+source ipex-llm-init --gpu --device Arc
 
-source /opt/intel/oneapi/setvars.sh
-export SYCL_CACHE_PERSISTENT=1
-# [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
-export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
-# [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
-export ONEAPI_DEVICE_SELECTOR=level_zero:0
-
-/usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama serve
+mkdir -p /llm/ollama
+cd /llm/ollama
+init-ollama
+./ollama serve