From 316eb23905818c84066e5b569a7e831888a3efb9 Mon Sep 17 00:00:00 2001 From: eleiton Date: Tue, 11 Mar 2025 22:41:02 +0100 Subject: [PATCH] Stop building the ollama image manually from scratch and use the official intel image --- Dockerfile | 47 +---------------------------------------------- README.md | 32 +++++++++++++++----------------- scripts/serve.sh | 18 ++++++------------ 3 files changed, 22 insertions(+), 75 deletions(-) diff --git a/Dockerfile b/Dockerfile index 083b7c4..e60f193 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,53 +1,8 @@ -FROM ubuntu:24.04 +FROM intelanalytics/ipex-llm-inference-cpp-xpu:latest ENV DEBIAN_FRONTEND=noninteractive -ENV PIP_BREAK_SYSTEM_PACKAGES=1 -ENV OLLAMA_NUM_GPU=999 ENV OLLAMA_HOST=0.0.0.0:11434 -# Install base packages -RUN apt update && \ - apt install --no-install-recommends -q -y \ - wget \ - gnupg \ - ca-certificates \ - python3-pip \ - pkg-config \ - build-essential \ - python3-dev \ - cmake - -# Install Client GPUs -# Reference: https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop-24-04-lts -RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ - echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble unified" | \ - tee /etc/apt/sources.list.d/intel-gpu-noble.list && \ - apt update && \ - apt install -y libze-intel-gpu1 libze1 intel-opencl-icd clinfo intel-gsc && \ - apt install -y libze-dev intel-ocloc && \ - apt install --no-install-recommends -q -y \ - udev \ - level-zero \ - libigdgmm12 - -# Install oneAPI Base Toolkit -# Reference: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=oneapi-toolkit&oneapi-toolkit-os=linux&oneapi-lin=apt -RUN apt update && \ - apt install -y gpg-agent wget && \ - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ - gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ - tee /etc/apt/sources.list.d/oneAPI.list && \ - apt update && \ - apt install -y intel-oneapi-base-toolkit - -# Install serve.sh script COPY ./scripts/serve.sh /usr/share/lib/serve.sh -# Install ipex-llm[cpp] using pip -# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md#1-install-ipex-llm-for-llamacpp -RUN pip install --pre --upgrade ipex-llm[cpp] - -# Set entrypoint to run the serve.sh script ENTRYPOINT ["/bin/bash", "/usr/share/lib/serve.sh"] diff --git a/README.md b/README.md index c1abbce..4e94464 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ This repository provides a convenient way to run Ollama as a backend and Open We ## Services 1. Ollama - * Runs llama.cpp and Ollama with IPEX-LLM on your Linux computer with Intel GPU. - * Built following the guidelines from [Intel](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md). - * Uses [Ubuntu 24.04 LTS](https://ubuntu.com/blog/tag/ubuntu-24-04-lts), Ubuntu's latest stable version, as a container. + * Runs llama.cpp and Ollama with IPEX-LLM on your Linux computer with Intel Arc GPU. + * Built following the guidelines from [Intel](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/README.md). + * Uses the official [Intel ipex-llm docker image](https://hub.docker.com/r/intelanalytics/ipex-llm-inference-cpp-xpu) as the base container. * Uses the latest versions of required packages, prioritizing cutting-edge features over stability. * Exposes port `11434` for connecting other tools to your Ollama service. @@ -29,7 +29,12 @@ $ podman compose up ``` ## Validate -You should see this partial output in your console, indicating your arc gpu was detected +Run the following command to verify your Ollama instance is up and running +```bash +$ curl http://localhost:11434/ +Ollama is running +``` +When using Open WebUI, you should see this partial output in your console, indicating your arc gpu was detected ```bash [ollama-intel-arc] | Found 1 SYCL devices: [ollama-intel-arc] | | | | | |Max | |Max |Global | | @@ -38,13 +43,8 @@ You should see this partial output in your console, indicating your arc gpu was [ollama-intel-arc] | |--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------| [ollama-intel-arc] | | 0| [level_zero:gpu:0]| Intel Arc Graphics| 12.71| 128| 1024| 32| 62400M| 1.6.32224+14| ``` -Run the following command to verify your Ollama instance is up and running -```bash -$ curl http://localhost:11434/ -Ollama is running -``` + ## Usage -* Run the services using the setup instructions above. * Open your web browser to http://localhost:3000 to access the Open WebUI web page. * For more information on using Open WebUI, refer to the official documentation at https://docs.openwebui.com/ . @@ -55,7 +55,7 @@ $ podman compose down ``` ### ollama-intel-arc Image -If there are new updates in ipex-llm[cpp] or oneAPI or GPU drivers, you may want to update the Ollama image and containers, to stay updated. +If there are new updates in the [ipex-llm docker image](https://hub.docker.com/r/intelanalytics/ipex-llm-inference-cpp-xpu), you may want to update the Ollama image and containers, to stay updated. First check any containers running the docker image, and remove them ```bash @@ -89,16 +89,14 @@ You can connect directly to your Ollama container by running these commands: ```bash $ podman exec -it ollama-intel-arc /bin/bash -> source /opt/intel/oneapi/setvars.sh -> /usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama -v +$ /llm/ollama/ollama -v ``` ## My development environment: * Core Ultra 7 155H * Intel® Arc™ Graphics (Meteor Lake-P) -* Fedora 40 +* Fedora 41 -## References -* [Intel guidelines for installing Linux GPU support](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md) +## References * [Open WebUI documentation](https://docs.openwebui.com/) -* [Ollama Quickstart](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md) +* [Intel - ipex-llm](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/DockerGuides/docker_cpp_xpu_quickstart.md) diff --git a/scripts/serve.sh b/scripts/serve.sh index b0970ab..23ea0be 100644 --- a/scripts/serve.sh +++ b/scripts/serve.sh @@ -1,15 +1,9 @@ #!/bin/sh -# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md#3-run-ollama-serve -export OLLAMA_NUM_GPU=999 -export no_proxy=localhost,127.0.0.1 -export ZES_ENABLE_SYSMAN=1 +cd /llm/scripts/ +source ipex-llm-init --gpu --device Arc -source /opt/intel/oneapi/setvars.sh -export SYCL_CACHE_PERSISTENT=1 -# [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation -export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 -# [optional] if you want to run on single GPU, use below command to limit GPU may improve performance -export ONEAPI_DEVICE_SELECTOR=level_zero:0 - -/usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama serve +mkdir -p /llm/ollama +cd /llm/ollama +init-ollama +./ollama serve