Updates:
- Improve documentation - Clean dependencies - Use the official oneAPI Base Toolkit Breaking change: - Container name changed to ollama-intel-arc
This commit is contained in:
50
Dockerfile
50
Dockerfile
@@ -17,52 +17,36 @@ RUN apt update && \
|
|||||||
python3-dev \
|
python3-dev \
|
||||||
cmake
|
cmake
|
||||||
|
|
||||||
# Install IPEX-LLM on Linux with Intel GPU
|
# Install Client GPUs
|
||||||
|
# Reference: https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop-24-04-lts
|
||||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
||||||
gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
|
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
|
||||||
echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
|
echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble unified" | \
|
||||||
tee /etc/apt/sources.list.d/intel-gpu-jammy.list && \
|
tee /etc/apt/sources.list.d/intel-gpu-noble.list && \
|
||||||
apt update && \
|
apt update && \
|
||||||
|
apt install -y libze-intel-gpu1 libze1 intel-opencl-icd clinfo intel-gsc && \
|
||||||
|
apt install -y libze-dev intel-ocloc && \
|
||||||
apt install --no-install-recommends -q -y \
|
apt install --no-install-recommends -q -y \
|
||||||
udev \
|
udev \
|
||||||
level-zero \
|
level-zero \
|
||||||
libigdgmm12 \
|
libigdgmm12
|
||||||
intel-level-zero-gpu \
|
|
||||||
intel-opencl-icd
|
|
||||||
|
|
||||||
# Install OneAPI packages
|
# Install oneAPI Base Toolkit
|
||||||
RUN wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
|
# Reference: https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?packages=oneapi-toolkit&oneapi-toolkit-os=linux&oneapi-lin=apt
|
||||||
gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg && \
|
RUN apt update && \
|
||||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
|
apt install -y gpg-agent wget && \
|
||||||
|
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | \
|
||||||
|
gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
|
||||||
tee /etc/apt/sources.list.d/oneAPI.list && \
|
tee /etc/apt/sources.list.d/oneAPI.list && \
|
||||||
apt update && \
|
apt update && \
|
||||||
apt install --no-install-recommends -q -y \
|
apt install -y intel-oneapi-base-toolkit
|
||||||
intel-oneapi-common-vars \
|
|
||||||
intel-oneapi-common-oneapi-vars \
|
|
||||||
intel-oneapi-diagnostics-utility \
|
|
||||||
intel-oneapi-compiler-dpcpp-cpp \
|
|
||||||
intel-oneapi-dpcpp-ct \
|
|
||||||
intel-oneapi-mkl \
|
|
||||||
intel-oneapi-mkl-devel \
|
|
||||||
intel-oneapi-mpi \
|
|
||||||
intel-oneapi-mpi-devel \
|
|
||||||
intel-oneapi-dal \
|
|
||||||
intel-oneapi-dal-devel \
|
|
||||||
intel-oneapi-ippcp \
|
|
||||||
intel-oneapi-ippcp-devel \
|
|
||||||
intel-oneapi-ipp \
|
|
||||||
intel-oneapi-ipp-devel \
|
|
||||||
intel-oneapi-tlt \
|
|
||||||
intel-oneapi-ccl \
|
|
||||||
intel-oneapi-ccl-devel \
|
|
||||||
intel-oneapi-dnnl-devel \
|
|
||||||
intel-oneapi-dnnl \
|
|
||||||
intel-oneapi-tcm-1.0
|
|
||||||
|
|
||||||
# Install serve.sh script
|
# Install serve.sh script
|
||||||
COPY ./scripts/serve.sh /usr/share/lib/serve.sh
|
COPY ./scripts/serve.sh /usr/share/lib/serve.sh
|
||||||
|
|
||||||
# Install ipex-llm[cpp] using pip
|
# Install ipex-llm[cpp] using pip
|
||||||
|
# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md#1-install-ipex-llm-for-llamacpp
|
||||||
RUN pip install --pre --upgrade ipex-llm[cpp]
|
RUN pip install --pre --upgrade ipex-llm[cpp]
|
||||||
|
|
||||||
# Set entrypoint to run the serve.sh script
|
# Set entrypoint to run the serve.sh script
|
||||||
|
|||||||
78
README.md
78
README.md
@@ -1,4 +1,4 @@
|
|||||||
# ollama-intel-arc
|
# Run Ollama using your Intel Arc GPU
|
||||||
|
|
||||||
A Docker-based setup for running Ollama as a backend and Open WebUI as a frontend, leveraging Intel Arc Series GPUs on Linux systems.
|
A Docker-based setup for running Ollama as a backend and Open WebUI as a frontend, leveraging Intel Arc Series GPUs on Linux systems.
|
||||||
|
|
||||||
@@ -13,34 +13,92 @@ This repository provides a convenient way to run Ollama as a backend and Open We
|
|||||||
* Built following the guidelines from [Intel](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md).
|
* Built following the guidelines from [Intel](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/llama_cpp_quickstart.md).
|
||||||
* Uses [Ubuntu 24.04 LTS](https://ubuntu.com/blog/tag/ubuntu-24-04-lts), Ubuntu's latest stable version, as a container.
|
* Uses [Ubuntu 24.04 LTS](https://ubuntu.com/blog/tag/ubuntu-24-04-lts), Ubuntu's latest stable version, as a container.
|
||||||
* Uses the latest versions of required packages, prioritizing cutting-edge features over stability.
|
* Uses the latest versions of required packages, prioritizing cutting-edge features over stability.
|
||||||
* Exposes port `11434` for connecting other tools to your Ollama service.
|
* Exposes port `11434` for connecting other tools to your Ollama service.
|
||||||
* To validate this setup, run: `curl http://localhost:11434/`
|
|
||||||
|
|
||||||
2. Open WebUI
|
2. Open WebUI
|
||||||
* The official distribution of Open WebUI.
|
* The official distribution of Open WebUI.
|
||||||
* `WEBUI_AUTH` is turned off for authentication-free usage.
|
* `WEBUI_AUTH` is turned off for authentication-free usage.
|
||||||
* `ENABLE_OPENAI_API` and ENABLE_OLLAMA_API flags are set to off and on, respectively, allowing interactions via Ollama only.
|
* `ENABLE_OPENAI_API` and `ENABLE_OLLAMA_API` flags are set to off and on, respectively, allowing interactions via Ollama only.
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
Run the following commands to start your Ollama instance
|
||||||
### Fedora
|
|
||||||
```bash
|
```bash
|
||||||
$ git clone https://github.com/eleiton/ollama-intel-arc.git
|
$ git clone https://github.com/eleiton/ollama-intel-arc.git
|
||||||
$ cd ollama-intel-arc
|
$ cd ollama-intel-arc
|
||||||
$ podman compose up
|
$ podman compose up
|
||||||
```
|
```
|
||||||
|
|
||||||
### Others (Ubuntu 24.04 or newer)
|
## Validate
|
||||||
|
You should see this partial output in your console, indicating your arc gpu was detected
|
||||||
```bash
|
```bash
|
||||||
$ git clone https://github.com/eleiton/ollama-intel-arc.git
|
[ollama-intel-arc] | Found 1 SYCL devices:
|
||||||
$ cd ollama-intel-arc
|
[ollama-intel-arc] | | | | | |Max | |Max |Global | |
|
||||||
$ docker compose up
|
[ollama-intel-arc] | | | | | |compute|Max work|sub |mem | |
|
||||||
|
[ollama-intel-arc] | |ID| Device Type| Name|Version|units |group |group|size | Driver version|
|
||||||
|
[ollama-intel-arc] | |--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|
|
||||||
|
[ollama-intel-arc] | | 0| [level_zero:gpu:0]| Intel Arc Graphics| 12.71| 128| 1024| 32| 62400M| 1.6.32224+14|
|
||||||
|
```
|
||||||
|
Run the following command to verify your Ollama instance is up and running
|
||||||
|
```bash
|
||||||
|
$ curl http://localhost:11434/
|
||||||
|
Ollama is running
|
||||||
```
|
```
|
||||||
## Usage
|
## Usage
|
||||||
* Run the services using the setup instructions above.
|
* Run the services using the setup instructions above.
|
||||||
* Open your web browser to http://localhost:3000 to access the Open WebUI web page.
|
* Open your web browser to http://localhost:3000 to access the Open WebUI web page.
|
||||||
* For more information on using Open WebUI, refer to the official documentation at https://docs.openwebui.com/ .
|
* For more information on using Open WebUI, refer to the official documentation at https://docs.openwebui.com/ .
|
||||||
|
|
||||||
|
## Updating the images
|
||||||
|
Before any updates, be sure to stop your containers
|
||||||
|
```bash
|
||||||
|
$ podman compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
### ollama-intel-arc Image
|
||||||
|
If there are new updates in ipex-llm[cpp] or oneAPI or GPU drivers, you may want to update the Ollama image and containers, to stay updated.
|
||||||
|
|
||||||
|
First check any containers running the docker image, and remove them
|
||||||
|
```bash
|
||||||
|
$ podman ps -a
|
||||||
|
CONTAINER ID IMAGE
|
||||||
|
111479fde20f localhost/ollama-intel-arc:latest
|
||||||
|
|
||||||
|
$ podman rm <CONTAINER ID>
|
||||||
|
```
|
||||||
|
|
||||||
|
The go ahead and remove the docker image:
|
||||||
|
```bash
|
||||||
|
$ podman image list
|
||||||
|
REPOSITORY TAG
|
||||||
|
localhost/ollama-intel-arc latest
|
||||||
|
|
||||||
|
$ podman rmi <IMAGE ID>
|
||||||
|
```
|
||||||
|
After that, you can run compose up, to rebuild the image from scratch
|
||||||
|
```bash
|
||||||
|
$ podman compose up
|
||||||
|
```
|
||||||
|
### open-webui Image
|
||||||
|
If there are new updates in Open WebUI, just do a pull and the new changes will be retrieved automatically.
|
||||||
|
```bash
|
||||||
|
$ podman compose pull
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manually connecting to your Ollama container
|
||||||
|
You can connect directly to your Ollama container by running these commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ podman exec -it ollama-intel-arc /bin/bash
|
||||||
|
> source /opt/intel/oneapi/setvars.sh
|
||||||
|
> /usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama -v
|
||||||
|
```
|
||||||
|
|
||||||
|
## My development environment:
|
||||||
|
* Core Ultra 7 155H
|
||||||
|
* Intel® Arc™ Graphics (Meteor Lake-P)
|
||||||
|
* Fedora 40
|
||||||
|
|
||||||
## References
|
## References
|
||||||
* [Intel guidelines for installing Linux GPU support](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md)
|
* [Intel guidelines for installing Linux GPU support](https://github.com/intel-analytics/ipex-llm/blob/main/docs/mddocs/Quickstart/install_linux_gpu.md)
|
||||||
* [Open WebUI documentation](https://docs.openwebui.com/)
|
* [Open WebUI documentation](https://docs.openwebui.com/)
|
||||||
|
* [Ollama Quickstart](https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md)
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
version: "1.0"
|
version: '3'
|
||||||
services:
|
services:
|
||||||
ollama-ipex-llm:
|
ollama-intel-arc:
|
||||||
build:
|
build: .
|
||||||
context: .
|
image: ollama-intel-arc:latest
|
||||||
dockerfile: Dockerfile
|
container_name: ollama-intel-arc
|
||||||
container_name: ollama-ipex-llm
|
|
||||||
image: ollama-ipex-llm:latest
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
devices:
|
devices:
|
||||||
- /dev/dri:/dev/dri
|
- /dev/dri:/dev/dri
|
||||||
@@ -19,7 +17,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- open-webui-volume:/app/backend/data
|
- open-webui-volume:/app/backend/data
|
||||||
depends_on:
|
depends_on:
|
||||||
- ollama-ipex-llm
|
- ollama-intel-arc
|
||||||
ports:
|
ports:
|
||||||
- 3000:8080
|
- 3000:8080
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 45 KiB |
@@ -1,10 +1,15 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
# Reference: https://github.com/intel/ipex-llm/blob/main/docs/mddocs/Quickstart/ollama_quickstart.md#3-run-ollama-serve
|
||||||
export USE_XETLA=OFF
|
export OLLAMA_NUM_GPU=999
|
||||||
|
export no_proxy=localhost,127.0.0.1
|
||||||
export ZES_ENABLE_SYSMAN=1
|
export ZES_ENABLE_SYSMAN=1
|
||||||
|
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
export SYCL_CACHE_PERSISTENT=1
|
export SYCL_CACHE_PERSISTENT=1
|
||||||
|
# [optional] under most circumstances, the following environment variable may improve performance, but sometimes this may also cause performance degradation
|
||||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||||
|
# [optional] if you want to run on single GPU, use below command to limit GPU may improve performance
|
||||||
export ONEAPI_DEVICE_SELECTOR=level_zero:0
|
export ONEAPI_DEVICE_SELECTOR=level_zero:0
|
||||||
|
|
||||||
/usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama serve
|
/usr/local/lib/python3.12/dist-packages/bigdl/cpp/libs/ollama serve
|
||||||
|
|||||||
Reference in New Issue
Block a user