diff --git a/Dockerfile b/Dockerfile index 2d8b9f2..083b7c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,52 +17,36 @@ RUN apt update && \ python3-dev \ cmake -# Install IPEX-LLM on Linux with Intel GPU +# Install Client GPUs +# Reference: https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop-24-04-lts RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ - echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ - tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \ + gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble unified" | \ + tee /etc/apt/sources.list.d/intel-gpu-noble.list && \ apt update && \ + apt install -y libze-intel-gpu1 libze1 intel-opencl-icd clinfo intel-gsc && \ + apt install -y libze-dev intel-ocloc && \ apt install --no-install-recommends -q -y \ udev \ level-zero \ - libigdgmm12 \ - intel-level-zero-gpu \ - intel-opencl-icd + libigdgmm12 -# Install OneAPI packages -RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ - gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \ - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ +# Install oneAPI Base Toolkit +# Reference: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=oneapi-toolkit&oneapi-toolkit-os=linux&oneapi-lin=apt +RUN apt update && \ + apt install -y gpg-agent wget && \ + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \ + gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ tee /etc/apt/sources.list.d/oneAPI.list && \ apt update && \ - apt install --no-install-recommends -q -y \ - intel-oneapi-common-vars \ - intel-oneapi-common-oneapi-vars \ - intel-oneapi-diagnostics-utility \ - intel-oneapi-compiler-dpcpp-cpp \ - intel-oneapi-dpcpp-ct \ - intel-oneapi-mkl \ - intel-oneapi-mkl-devel \ - intel-oneapi-mpi \ - intel-oneapi-mpi-devel \ - intel-oneapi-dal \ - intel-oneapi-dal-devel \ - intel-oneapi-ippcp \ - intel-oneapi-ippcp-devel \ - intel-oneapi-ipp \ - intel-oneapi-ipp-devel \ - intel-oneapi-tlt \ - intel-oneapi-ccl \ - intel-oneapi-ccl-devel \ - intel-oneapi-dnnl-devel \ - intel-oneapi-dnnl \ - intel-oneapi-tcm-1.0 + apt install -y intel-oneapi-base-toolkit # Install serve.sh script COPY ./scripts/serve.sh /usr/share/lib/serve.sh # Install ipex-llm[cpp] using pip +# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md#1-install-ipex-llm-for-llamacpp RUN pip install --pre --upgrade ipex-llm[cpp] # Set entrypoint to run the serve.sh script diff --git a/README.md b/README.md index 80dfdc5..c1abbce 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ollama-intel-arc +# Run Ollama using your Intel Arc GPU A Docker-based setup for running Ollama as a backend and Open WebUI as a frontend, leveraging Intel Arc Series GPUs on Linux systems. @@ -13,34 +13,92 @@ This repository provides a convenient way to run Ollama as a backend and Open We * Built following the guidelines from [Intel](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md). * Uses [Ubuntu 24.04 LTS](https://ubuntu.com/blog/tag/ubuntu-24-04-lts), Ubuntu's latest stable version, as a container. * Uses the latest versions of required packages, prioritizing cutting-edge features over stability. - * Exposes port `11434` for connecting other tools to your Ollama service. - * To validate this setup, run: `curl http://localhost:11434/` + * Exposes port `11434` for connecting other tools to your Ollama service. 2. Open WebUI * The official distribution of Open WebUI. * `WEBUI_AUTH` is turned off for authentication-free usage. - * `ENABLE_OPENAI_API` and ENABLE_OLLAMA_API flags are set to off and on, respectively, allowing interactions via Ollama only. + * `ENABLE_OPENAI_API` and `ENABLE_OLLAMA_API` flags are set to off and on, respectively, allowing interactions via Ollama only. ## Setup - -### Fedora +Run the following commands to start your Ollama instance ```bash $ git clone https://github.com/eleiton/ollama-intel-arc.git $ cd ollama-intel-arc $ podman compose up ``` -### Others (Ubuntu 24.04 or newer) +## Validate +You should see this partial output in your console, indicating your arc gpu was detected ```bash -$ git clone https://github.com/eleiton/ollama-intel-arc.git -$ cd ollama-intel-arc -$ docker compose up +[ollama-intel-arc] | Found 1 SYCL devices: +[ollama-intel-arc] | | | | | |Max | |Max |Global | | +[ollama-intel-arc] | | | | | |compute|Max work|sub |mem | | +[ollama-intel-arc] | |ID| Device Type| Name|Version|units |group |group|size | Driver version| +[ollama-intel-arc] | |--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------| +[ollama-intel-arc] | | 0| [level_zero:gpu:0]| Intel Arc Graphics| 12.71| 128| 1024| 32| 62400M| 1.6.32224+14| +``` +Run the following command to verify your Ollama instance is up and running +```bash +$ curl http://localhost:11434/ +Ollama is running ``` ## Usage * Run the services using the setup instructions above. * Open your web browser to http://localhost:3000 to access the Open WebUI web page. * For more information on using Open WebUI, refer to the official documentation at https://docs.openwebui.com/ . +## Updating the images +Before any updates, be sure to stop your containers +```bash +$ podman compose down +``` + +### ollama-intel-arc Image +If there are new updates in ipex-llm[cpp] or oneAPI or GPU drivers, you may want to update the Ollama image and containers, to stay updated. + +First check any containers running the docker image, and remove them +```bash +$ podman ps -a +CONTAINER ID IMAGE +111479fde20f localhost/ollama-intel-arc:latest + +$ podman rm +``` + +The go ahead and remove the docker image: +```bash +$ podman image list +REPOSITORY TAG +localhost/ollama-intel-arc latest + +$ podman rmi +``` +After that, you can run compose up, to rebuild the image from scratch +```bash +$ podman compose up +``` +### open-webui Image +If there are new updates in Open WebUI, just do a pull and the new changes will be retrieved automatically. +```bash +$ podman compose pull +``` + +## Manually connecting to your Ollama container +You can connect directly to your Ollama container by running these commands: + +```bash +$ podman exec -it ollama-intel-arc /bin/bash +> source /opt/intel/oneapi/setvars.sh +> /usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama -v +``` + +## My development environment: +* Core Ultra 7 155H +* Intel® Arc™ Graphics (Meteor Lake-P) +* Fedora 40 + ## References * [Intel guidelines for installing Linux GPU support](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md) * [Open WebUI documentation](https://docs.openwebui.com/) +* [Ollama Quickstart](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md) diff --git a/docker-compose.yml b/docker-compose.yml index 692cf3b..60cf3df 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,11 +1,9 @@ -version: "1.0" +version: '3' services: - ollama-ipex-llm: - build: - context: . - dockerfile: Dockerfile - container_name: ollama-ipex-llm - image: ollama-ipex-llm:latest + ollama-intel-arc: + build: . + image: ollama-intel-arc:latest + container_name: ollama-intel-arc restart: unless-stopped devices: - /dev/dri:/dev/dri @@ -19,7 +17,7 @@ services: volumes: - open-webui-volume:/app/backend/data depends_on: - - ollama-ipex-llm + - ollama-intel-arc ports: - 3000:8080 environment: diff --git a/resources/open-webui.png b/resources/open-webui.png index 0506a08..c6687b7 100644 Binary files a/resources/open-webui.png and b/resources/open-webui.png differ diff --git a/scripts/serve.sh b/scripts/serve.sh index e666702..b0970ab 100644 --- a/scripts/serve.sh +++ b/scripts/serve.sh @@ -1,10 +1,15 @@ #!/bin/sh -source /opt/intel/oneapi/setvars.sh -export USE_XETLA=OFF +# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md#3-run-ollama-serve +export OLLAMA_NUM_GPU=999 +export no_proxy=localhost,127.0.0.1 export ZES_ENABLE_SYSMAN=1 + +source /opt/intel/oneapi/setvars.sh export SYCL_CACHE_PERSISTENT=1 +# [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +# [optional] if you want to run on single GPU, use below command to limit GPU may improve performance export ONEAPI_DEVICE_SELECTOR=level_zero:0 /usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama serve