mirror of
https://github.com/Laurent2916/nio-llm.git
synced 2024-11-21 05:38:48 +00:00
♻️ big refactor, use llama server and openai python library
This commit is contained in:
parent
541007380a
commit
904dde744f
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,5 @@
|
||||||
.direnv
|
.direnv
|
||||||
|
.venv
|
||||||
result
|
result
|
||||||
|
|
||||||
# https://github.com/github/gitignore/blob/main/Python.gitignore
|
# https://github.com/github/gitignore/blob/main/Python.gitignore
|
||||||
|
|
4
.vscode/extensions.json
vendored
4
.vscode/extensions.json
vendored
|
@ -1,7 +1,9 @@
|
||||||
{
|
{
|
||||||
"recommendations": [
|
"recommendations": [
|
||||||
"editorconfig.editorconfig",
|
|
||||||
"charliermarsh.ruff",
|
"charliermarsh.ruff",
|
||||||
|
"editorconfig.editorconfig",
|
||||||
|
"ms-python.black-formatter",
|
||||||
"ms-python.python",
|
"ms-python.python",
|
||||||
|
"tamasfe.even-better-toml",
|
||||||
]
|
]
|
||||||
}
|
}
|
40
.vscode/settings.json
vendored
40
.vscode/settings.json
vendored
|
@ -1,30 +1,38 @@
|
||||||
{
|
{
|
||||||
"python.analysis.typeCheckingMode": "basic",
|
// nice editor settings
|
||||||
"python.formatting.provider": "black",
|
|
||||||
"editor.formatOnSave": true,
|
"editor.formatOnSave": true,
|
||||||
"python.linting.enabled": true,
|
"editor.formatOnPaste": true,
|
||||||
"python.linting.lintOnSave": true,
|
"editor.rulers": [
|
||||||
"python.linting.mypyEnabled": true,
|
120
|
||||||
"python.linting.banditEnabled": true,
|
],
|
||||||
"python.languageServer": "Pylance",
|
// editorconfig redundancy
|
||||||
"[python]": {
|
"files.insertFinalNewline": true,
|
||||||
"editor.codeActionsOnSave": {
|
"files.trimTrailingWhitespace": true,
|
||||||
"source.organizeImports": true
|
// hide unimportant files/folders
|
||||||
}
|
|
||||||
},
|
|
||||||
"terminal.integrated.env.linux": {
|
|
||||||
"PYTHONPATH": "${workspaceFolder}"
|
|
||||||
},
|
|
||||||
"files.exclude": {
|
"files.exclude": {
|
||||||
|
// defaults
|
||||||
"**/.git": true,
|
"**/.git": true,
|
||||||
"**/.svn": true,
|
"**/.svn": true,
|
||||||
"**/.hg": true,
|
"**/.hg": true,
|
||||||
"**/CVS": true,
|
"**/CVS": true,
|
||||||
"**/.DS_Store": true,
|
"**/.DS_Store": true,
|
||||||
"**/Thumbs.db": true,
|
"**/Thumbs.db": true,
|
||||||
|
// annoying
|
||||||
"**/__pycache__": true,
|
"**/__pycache__": true,
|
||||||
"**/.mypy_cache": true,
|
"**/.mypy_cache": true,
|
||||||
"**/.direnv": true,
|
|
||||||
"**/.ruff_cache": true,
|
"**/.ruff_cache": true,
|
||||||
|
"**/*.tmp": true,
|
||||||
|
},
|
||||||
|
// python settings
|
||||||
|
"python.analysis.typeCheckingMode": "basic", // get ready to be annoyed
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||||
|
"editor.codeActionsOnSave": {
|
||||||
|
"source.organizeImports.ruff": true,
|
||||||
|
"source.fixAll": true,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"terminal.integrated.env.linux": {
|
||||||
|
"PYTHONPATH": "${workspaceFolder}/src/",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
29
docker-compose.yml
Normal file
29
docker-compose.yml
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
version: "3.8"
|
||||||
|
services:
|
||||||
|
server:
|
||||||
|
build:
|
||||||
|
context: src/nio_llm_server/
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
ports:
|
||||||
|
- 8000:8000
|
||||||
|
volumes:
|
||||||
|
- /home/laurent/.cache/huggingface/hub/:/root/.cache/huggingface/hub/
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "nc", "-z", "-v", "localhost", "8000"]
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
client:
|
||||||
|
build:
|
||||||
|
context: src/nio_llm/
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
environment:
|
||||||
|
- NIOLLM_HOMESERVER=$NIOLLM_HOMESERVER
|
||||||
|
- NIOLLM_USERNAME=$NIOLLM_USERNAME
|
||||||
|
- NIOLLM_DEVICE_ID=$NIOLLM_DEVICE_ID
|
||||||
|
- NIOLLM_ROOM=$NIOLLM_ROOM
|
||||||
|
- NIOLLM_PASSWORD=$NIOLLM_PASSWORD
|
||||||
|
- NIOLLM_OPENAI_API_ENDPOINT=$NIOLLM_OPENAI_API_ENDPOINT
|
||||||
|
depends_on:
|
||||||
|
server:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
85
flake.lock
85
flake.lock
|
@ -1,85 +0,0 @@
|
||||||
{
|
|
||||||
"nodes": {
|
|
||||||
"flake-utils": {
|
|
||||||
"inputs": {
|
|
||||||
"systems": "systems"
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1685518550,
|
|
||||||
"narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "numtide",
|
|
||||||
"repo": "flake-utils",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nixpkgs": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1686501370,
|
|
||||||
"narHash": "sha256-G0WuM9fqTPRc2URKP9Lgi5nhZMqsfHGrdEbrLvAPJcg=",
|
|
||||||
"owner": "NixOS",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"rev": "75a5ebf473cd60148ba9aec0d219f72e5cf52519",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "NixOS",
|
|
||||||
"ref": "nixos-unstable",
|
|
||||||
"repo": "nixpkgs",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"poetry2nix": {
|
|
||||||
"inputs": {
|
|
||||||
"flake-utils": [
|
|
||||||
"flake-utils"
|
|
||||||
],
|
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1686140708,
|
|
||||||
"narHash": "sha256-CKTahDFlhx07OQb4Afj+4/cNaxIWfxb8VGUlllUgoPY=",
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "poetry2nix",
|
|
||||||
"rev": "d91e2dd14caf4d09240bedf69a778c88f356ebda",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-community",
|
|
||||||
"repo": "poetry2nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": {
|
|
||||||
"inputs": {
|
|
||||||
"flake-utils": "flake-utils",
|
|
||||||
"nixpkgs": "nixpkgs",
|
|
||||||
"poetry2nix": "poetry2nix"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"systems": {
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1681028828,
|
|
||||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "nix-systems",
|
|
||||||
"repo": "default",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"root": "root",
|
|
||||||
"version": 7
|
|
||||||
}
|
|
40
flake.nix
40
flake.nix
|
@ -1,40 +0,0 @@
|
||||||
{
|
|
||||||
description = "nio-llm";
|
|
||||||
|
|
||||||
inputs = {
|
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
|
||||||
|
|
||||||
poetry2nix = {
|
|
||||||
url = "github:nix-community/poetry2nix";
|
|
||||||
inputs = {
|
|
||||||
nixpkgs.follows = "nixpkgs";
|
|
||||||
flake-utils.follows = "flake-utils";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
outputs = { nixpkgs, flake-utils, poetry2nix, ... }:
|
|
||||||
flake-utils.lib.eachDefaultSystem (system:
|
|
||||||
let
|
|
||||||
pkgs = import nixpkgs {
|
|
||||||
inherit system;
|
|
||||||
overlays = [ poetry2nix.overlay ];
|
|
||||||
};
|
|
||||||
|
|
||||||
pythonEnv = pkgs.poetry2nix.mkPoetryEnv {
|
|
||||||
projectDir = ./.;
|
|
||||||
preferWheels = true;
|
|
||||||
python = pkgs.python311;
|
|
||||||
};
|
|
||||||
in {
|
|
||||||
packages.default = pkgs.poetry2nix.mkPoetryApplication {
|
|
||||||
projectDir = ./.;
|
|
||||||
preferWheels = true;
|
|
||||||
python = pkgs.python311;
|
|
||||||
};
|
|
||||||
|
|
||||||
devShells.default =
|
|
||||||
pkgs.mkShell { buildInputs = [ pythonEnv pkgs.poetry ]; };
|
|
||||||
});
|
|
||||||
}
|
|
1633
poetry.lock
generated
1633
poetry.lock
generated
File diff suppressed because it is too large
Load diff
3
poetry.toml
Normal file
3
poetry.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[virtualenvs]
|
||||||
|
create = true
|
||||||
|
in-project = true
|
|
@ -6,7 +6,7 @@ license = "MIT"
|
||||||
name = "nio-llm"
|
name = "nio-llm"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
repository = "https://github.com/Laurent2916/nio-llm.git"
|
repository = "https://github.com/Laurent2916/nio-llm.git"
|
||||||
version = "0.1.0"
|
version = "1.0.0"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
nio-llm = "nio_llm.main:main"
|
nio-llm = "nio_llm.main:main"
|
||||||
|
@ -29,9 +29,21 @@ mypy = "^1.3.0"
|
||||||
ruff = "^0.0.267"
|
ruff = "^0.0.267"
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
target-version = "py311"
|
||||||
line-length = 120
|
line-length = 120
|
||||||
|
ignore-init-module-imports = true
|
||||||
|
include = [
|
||||||
|
"*.py", # regular python files
|
||||||
|
"*.pyi", # python stub files
|
||||||
|
"*.ipynb", # jupyter notebooks
|
||||||
|
"**/pyproject.toml", # python config files
|
||||||
|
]
|
||||||
ignore = [
|
ignore = [
|
||||||
"G004", # Logging statement uses f-string
|
"G004", # Logging statement uses f-string
|
||||||
|
"EM102", # Exception must not use an f-string literal, assign to variable first
|
||||||
|
"D100", # Missing docstring in public module
|
||||||
|
"D104", # Missing docstring in public package
|
||||||
|
"N812", # Lowercase imported as non lowercase
|
||||||
]
|
]
|
||||||
select = [
|
select = [
|
||||||
"A", # flake8-builtins
|
"A", # flake8-builtins
|
||||||
|
@ -47,6 +59,8 @@ select = [
|
||||||
"N", # pep8-naming
|
"N", # pep8-naming
|
||||||
"PIE", # flake8-pie
|
"PIE", # flake8-pie
|
||||||
"PTH", # flake8-use-pathlib
|
"PTH", # flake8-use-pathlib
|
||||||
|
"TD", # flake8-todo
|
||||||
|
"FIX", # flake8-fixme
|
||||||
"RET", # flake8-return
|
"RET", # flake8-return
|
||||||
"RUF", # ruff
|
"RUF", # ruff
|
||||||
"S", # flake8-bandit
|
"S", # flake8-bandit
|
||||||
|
@ -59,26 +73,28 @@ select = [
|
||||||
[tool.ruff.pydocstyle]
|
[tool.ruff.pydocstyle]
|
||||||
convention = "google"
|
convention = "google"
|
||||||
|
|
||||||
|
[tool.ruff.isort]
|
||||||
|
known-first-party = ["nio_llm"]
|
||||||
|
|
||||||
|
[tool.ruff.per-file-ignores]
|
||||||
|
"__init__.py" = [
|
||||||
|
"F401", # Imported but unused
|
||||||
|
]
|
||||||
|
"src/aube/main.py" = [
|
||||||
|
"F401", # Imported but unused
|
||||||
|
"E402", # Module level import not at top of file
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.ruff.mccabe]
|
||||||
|
max-complexity = 5 # C901
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
include = '\.pyi?$'
|
|
||||||
target-version = ["py311"]
|
|
||||||
line-length = 120
|
|
||||||
exclude = '''
|
exclude = '''
|
||||||
/(
|
/(
|
||||||
\.git
|
\.git
|
||||||
\.venv
|
\.venv
|
||||||
)/
|
)/
|
||||||
'''
|
'''
|
||||||
|
include = '\.pyi?$'
|
||||||
[tool.isort]
|
line-length = 120
|
||||||
multi_line_output = 3
|
target-version = ["py311"]
|
||||||
profile = "black"
|
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
python_version = "3.11"
|
|
||||||
warn_return_any = true
|
|
||||||
warn_unused_configs = true
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["poetry-core>=1.0.0"]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
||||||
|
|
21
src/nio_llm/Dockerfile
Normal file
21
src/nio_llm/Dockerfile
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
FROM python:3.9
|
||||||
|
|
||||||
|
# Update and upgrade the existing packages
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get upgrade -y
|
||||||
|
|
||||||
|
RUN mkdir /app
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# install python dependencies
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
RUN pip install \
|
||||||
|
jsonargparse[signatures] \
|
||||||
|
git+https://github.com/abetlen/llama-cpp-python.git \
|
||||||
|
matrix-nio \
|
||||||
|
openai \
|
||||||
|
rich
|
||||||
|
|
||||||
|
COPY *.py /app/nio_llm/
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
CMD ["/usr/local/bin/python", "/app/nio_llm"]
|
|
@ -2,9 +2,7 @@
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from huggingface_hub import hf_hub_download
|
|
||||||
from jsonargparse import CLI
|
from jsonargparse import CLI
|
||||||
from rich.logging import RichHandler
|
from rich.logging import RichHandler
|
||||||
|
|
||||||
|
@ -15,53 +13,62 @@ logger = logging.getLogger("nio-llm.main")
|
||||||
|
|
||||||
def main(
|
def main(
|
||||||
room: str,
|
room: str,
|
||||||
password: str,
|
|
||||||
username: str,
|
username: str,
|
||||||
preprompt: str,
|
password: str,
|
||||||
|
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
|
||||||
device_id: str = "nio-llm",
|
device_id: str = "nio-llm",
|
||||||
homeserver: str = "https://matrix.org",
|
homeserver: str = "https://matrix.org",
|
||||||
ggml_repoid: str = "TheBloke/stable-vicuna-13B-GGML",
|
|
||||||
ggml_filename: str = "stable-vicuna-13B.ggmlv3.q5_1.bin",
|
|
||||||
sync_timeout: int = 30000,
|
sync_timeout: int = 30000,
|
||||||
|
openai_api_key: str = "osftw",
|
||||||
|
openai_api_endpoint: str = "http://localhost:8000/v1",
|
||||||
|
openai_temperature: float = 0,
|
||||||
|
openai_max_tokens: int = 256,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Download llama model from HuggingFace and start the client.
|
"""Instantiate and start the client.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
room (`str`):
|
room (`str`):
|
||||||
The room to join.
|
The room to join.
|
||||||
password (`str`):
|
|
||||||
The password to log in with.
|
|
||||||
username (`str`):
|
username (`str`):
|
||||||
The username to log in as.
|
The username to log in as.
|
||||||
device_id (`str`):
|
password (`str`):
|
||||||
The device ID to use.
|
The password to log in with.
|
||||||
preprompt (`str`):
|
preprompt (`str`):
|
||||||
The preprompt to use.
|
The preprompt to use.
|
||||||
ggml_repoid (`str`, default `"TheBloke/stable-vicuna-13B-GGML"`):
|
Defaults to `"You are a helpful assistant."`.
|
||||||
The HuggingFace Hub repo ID to download the model from.
|
device_id (`str`):
|
||||||
ggml_filename (`str`, default `"stable-vicuna-13B.ggmlv3.q5_1.bin"`):
|
The device ID to use.
|
||||||
The HuggingFace Hub filename to download the model from.
|
Defaults to `"nio-llm"`.
|
||||||
homeserver (`str`, default `"matrix.org"`):
|
homeserver (`str`):
|
||||||
The homeserver to connect to.
|
The matrix homeserver to connect to.
|
||||||
sync_timeout (`int`, default `30000`):
|
Defaults to `"https://matrix.org"`.
|
||||||
|
sync_timeout (`int`):
|
||||||
The timeout to use when syncing with the homeserver.
|
The timeout to use when syncing with the homeserver.
|
||||||
|
Defaults to `30000`.
|
||||||
|
openai_api_key (`str`):
|
||||||
|
The OpenAI API key to use.
|
||||||
|
Defaults to `"osftw"`.
|
||||||
|
openai_api_endpoint (`str`):
|
||||||
|
The OpenAI API endpoint to use.
|
||||||
|
Defaults to `"http://localhost:8000/v1"`.
|
||||||
|
openai_temperature (`float`):
|
||||||
|
The OpenAI temperature to use.
|
||||||
|
Defaults to `0`.
|
||||||
|
openai_max_tokens (`int`):
|
||||||
|
The OpenAI max tokens to use.
|
||||||
|
Defaults to `256`.
|
||||||
"""
|
"""
|
||||||
# download the model
|
|
||||||
ggml_path = Path(
|
|
||||||
hf_hub_download(
|
|
||||||
repo_id=ggml_repoid,
|
|
||||||
filename=ggml_filename,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
# create the client
|
# create the client
|
||||||
client = LLMClient(
|
client = LLMClient(
|
||||||
room=room,
|
room=room,
|
||||||
username=username,
|
username=username,
|
||||||
device_id=device_id,
|
device_id=device_id,
|
||||||
ggml_path=ggml_path,
|
|
||||||
preprompt=preprompt,
|
preprompt=preprompt,
|
||||||
homeserver=homeserver,
|
homeserver=homeserver,
|
||||||
|
openai_api_key=openai_api_key,
|
||||||
|
openai_api_endpoint=openai_api_endpoint,
|
||||||
|
openai_temperature=openai_temperature,
|
||||||
|
openai_max_tokens=openai_max_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
# start the client
|
# start the client
|
||||||
|
@ -86,6 +93,6 @@ if __name__ == "__main__":
|
||||||
CLI(
|
CLI(
|
||||||
components=main,
|
components=main,
|
||||||
as_positional=False,
|
as_positional=False,
|
||||||
env_prefix="NIO_LLM",
|
env_prefix="NIOLLM",
|
||||||
default_env=True,
|
default_env=True,
|
||||||
)
|
)
|
|
@ -1,18 +1,15 @@
|
||||||
"""A Matrix client that uses Llama to respond to messages."""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from llama_cpp import Llama
|
import openai
|
||||||
from nio import AsyncClient, MatrixRoom, RoomMessageText
|
from nio import AsyncClient, MatrixRoom, RoomMessageText
|
||||||
|
|
||||||
logger = logging.getLogger("nio-llm.client")
|
logger = logging.getLogger("nio-llm.client")
|
||||||
|
|
||||||
|
|
||||||
class LLMClient(AsyncClient):
|
class LLMClient(AsyncClient):
|
||||||
"""A Matrix client that uses Llama to respond to messages."""
|
"""A Matrix client that uses llama.cpp to respond to messages."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -20,18 +17,33 @@ class LLMClient(AsyncClient):
|
||||||
homeserver: str,
|
homeserver: str,
|
||||||
device_id: str,
|
device_id: str,
|
||||||
preprompt: str,
|
preprompt: str,
|
||||||
ggml_path: Path,
|
|
||||||
room: str,
|
room: str,
|
||||||
):
|
openai_api_key: str,
|
||||||
|
openai_api_endpoint: str,
|
||||||
|
openai_temperature: float,
|
||||||
|
openai_max_tokens: int,
|
||||||
|
) -> None:
|
||||||
"""Create a new LLMClient instance.
|
"""Create a new LLMClient instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
username (`str`): The username to log in as.
|
username (`str`):
|
||||||
homeserver (`str`): The homeserver to connect to.
|
The username to log in as.
|
||||||
device_id (`str`): The device ID to use.
|
homeserver (`str`):
|
||||||
preprompt (`str`): The preprompt to use.
|
The homeserver to connect to.
|
||||||
ggml_path (`Path`): The path to the GGML model.
|
device_id (`str`):
|
||||||
room (`str`): The room to join.
|
The device ID to use.
|
||||||
|
preprompt (`str`):
|
||||||
|
The preprompt to use.
|
||||||
|
room (`str`):
|
||||||
|
The room to join.
|
||||||
|
openai_api_key (`str`):
|
||||||
|
The OpenAI API key to use.
|
||||||
|
openai_api_endpoint (`str`):
|
||||||
|
The OpenAI API endpoint to use.
|
||||||
|
openai_temperature (`float`):
|
||||||
|
The OpenAI temperature to use.
|
||||||
|
openai_max_tokens (`int`):
|
||||||
|
The OpenAI max tokens to use.
|
||||||
"""
|
"""
|
||||||
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
||||||
self.spawn_time = time.time() * 1000
|
self.spawn_time = time.time() * 1000
|
||||||
|
@ -39,20 +51,19 @@ class LLMClient(AsyncClient):
|
||||||
self.preprompt = preprompt
|
self.preprompt = preprompt
|
||||||
self.room = room
|
self.room = room
|
||||||
|
|
||||||
# create the AsyncClient instance
|
# setup openai settings
|
||||||
|
openai.api_base = openai_api_endpoint
|
||||||
|
openai.api_key = openai_api_key
|
||||||
|
self.openai_temperature = openai_temperature
|
||||||
|
self.openai_max_tokens = openai_max_tokens
|
||||||
|
|
||||||
|
# create nio AsyncClient instance
|
||||||
super().__init__(
|
super().__init__(
|
||||||
user=self.uid,
|
user=self.uid,
|
||||||
homeserver=homeserver,
|
homeserver=homeserver,
|
||||||
device_id=device_id,
|
device_id=device_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# create the Llama instance
|
|
||||||
self.llm = Llama(
|
|
||||||
model_path=str(ggml_path),
|
|
||||||
n_threads=12,
|
|
||||||
n_ctx=512 + 128,
|
|
||||||
)
|
|
||||||
|
|
||||||
# create message history queue
|
# create message history queue
|
||||||
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
||||||
|
|
||||||
|
@ -63,8 +74,10 @@ class LLMClient(AsyncClient):
|
||||||
"""Process new messages as they come in.
|
"""Process new messages as they come in.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
room (`MatrixRoom`): The room the message was sent in.
|
room (`MatrixRoom`):
|
||||||
event (`RoomMessageText`): The message event.
|
The room the message was sent in.
|
||||||
|
event (`RoomMessageText`):
|
||||||
|
The message event.
|
||||||
"""
|
"""
|
||||||
logger.debug(f"New RoomMessageText: {event.source}")
|
logger.debug(f"New RoomMessageText: {event.source}")
|
||||||
|
|
||||||
|
@ -93,6 +106,7 @@ class LLMClient(AsyncClient):
|
||||||
|
|
||||||
# update history
|
# update history
|
||||||
self.history.append(event)
|
self.history.append(event)
|
||||||
|
logger.debug(f"Updated history: {self.history}")
|
||||||
|
|
||||||
# ignore our own messages
|
# ignore our own messages
|
||||||
if event.sender == self.user:
|
if event.sender == self.user:
|
||||||
|
@ -107,51 +121,46 @@ class LLMClient(AsyncClient):
|
||||||
and f'<a href="https://matrix.to/#/{self.uid}">{self.username}</a>'
|
and f'<a href="https://matrix.to/#/{self.uid}">{self.username}</a>'
|
||||||
in event.source["content"]["formatted_body"]
|
in event.source["content"]["formatted_body"]
|
||||||
):
|
):
|
||||||
logger.debug("Ignoring message not directed at us.")
|
logger.debug("Ignoring message not mentioning us.")
|
||||||
return
|
|
||||||
|
|
||||||
# generate prompt from message and history
|
|
||||||
history = "\n".join(f"<{message.sender}>: {message.body}" for message in self.history)
|
|
||||||
prompt = "\n".join([self.preprompt, history, f"<{self.uid}>:"])
|
|
||||||
tokens = self.llm.tokenize(str.encode(prompt))
|
|
||||||
logger.debug(f"Prompt:\n{prompt}")
|
|
||||||
logger.debug(f"Tokens: {len(tokens)}")
|
|
||||||
|
|
||||||
# ignore prompts that are too long
|
|
||||||
if len(tokens) > 512:
|
|
||||||
logger.debug("Prompt too long, skipping.")
|
|
||||||
await self.room_send(
|
|
||||||
room_id=self.room,
|
|
||||||
message_type="m.room.message",
|
|
||||||
content={
|
|
||||||
"msgtype": "m.emote",
|
|
||||||
"body": "reached prompt token limit",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# enable typing indicator
|
# enable typing indicator
|
||||||
await self.room_typing(
|
await self.room_typing(
|
||||||
self.room,
|
self.room,
|
||||||
typing_state=True,
|
typing_state=True,
|
||||||
timeout=100000000,
|
timeout=30000,
|
||||||
)
|
)
|
||||||
|
logger.debug("Enabled typing indicator.")
|
||||||
|
|
||||||
# generate response using llama.cpp
|
# generate response using llama.cpp
|
||||||
senders = [f"<{message.sender}>" for message in self.history]
|
response = openai.ChatCompletion.create(
|
||||||
output = self.llm(
|
model="local-model",
|
||||||
prompt,
|
messages=[
|
||||||
max_tokens=128,
|
{
|
||||||
stop=[f"<{self.uid}>", "### Human", "### Assistant", *senders],
|
"content": self.preprompt,
|
||||||
echo=True,
|
"role": "system",
|
||||||
|
},
|
||||||
|
*[
|
||||||
|
{
|
||||||
|
"content": f"{message.sender}: {message.body}",
|
||||||
|
"role": "assistant" if message.sender == self.uid else "user",
|
||||||
|
}
|
||||||
|
for message in self.history
|
||||||
|
],
|
||||||
|
],
|
||||||
|
stop=["<|im_end|>"],
|
||||||
|
temperature=self.openai_temperature,
|
||||||
|
max_tokens=self.openai_max_tokens,
|
||||||
)
|
)
|
||||||
|
logger.debug(f"Generated response: {response}")
|
||||||
|
|
||||||
# retreive the response
|
# retreive the response
|
||||||
output = output["choices"][0]["text"] # type: ignore
|
output = response["choices"][0]["message"]["content"] # type: ignore
|
||||||
output = output.removeprefix(prompt).strip()
|
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
||||||
|
|
||||||
# disable typing indicator
|
# disable typing indicator
|
||||||
await self.room_typing(self.room, typing_state=False)
|
await self.room_typing(self.room, typing_state=False)
|
||||||
|
logger.debug("Disabled typing indicator.")
|
||||||
|
|
||||||
# send the response
|
# send the response
|
||||||
await self.room_send(
|
await self.room_send(
|
||||||
|
@ -162,8 +171,9 @@ class LLMClient(AsyncClient):
|
||||||
"body": output,
|
"body": output,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
logger.debug(f"Sent response: {output}")
|
||||||
|
|
||||||
async def start(self, password, sync_timeout=30000):
|
async def start(self, password, sync_timeout=30000) -> None:
|
||||||
"""Start the client.
|
"""Start the client.
|
||||||
|
|
||||||
Args:
|
Args:
|
33
src/nio_llm_server/Dockerfile
Normal file
33
src/nio_llm_server/Dockerfile
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
FROM python:3
|
||||||
|
|
||||||
|
# Update and upgrade the existing packages
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get upgrade -y && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
ninja-build \
|
||||||
|
libopenblas-dev \
|
||||||
|
build-essential
|
||||||
|
|
||||||
|
RUN mkdir /app
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# install python dependencies
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
RUN pip install huggingface_hub
|
||||||
|
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \
|
||||||
|
pip install llama-cpp-python[server]@git+https://github.com/abetlen/llama-cpp-python.git --verbose
|
||||||
|
|
||||||
|
# Set environment variable for the host
|
||||||
|
ENV HOST=0.0.0.0
|
||||||
|
ENV PORT=8000
|
||||||
|
ENV HF_REPO=TheBloke/Mistral-7B-OpenOrca-GGUF
|
||||||
|
ENV HF_FILE=mistral-7b-openorca.Q4_K_M.gguf
|
||||||
|
ENV MODEL_ALIAS=local-model
|
||||||
|
ENV CHAT_FORMAT=chatml
|
||||||
|
|
||||||
|
# Expose a port for the server
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
COPY run.sh /app
|
||||||
|
# Run the server start script
|
||||||
|
CMD ["/bin/sh", "/app/run.sh"]
|
6
src/nio_llm_server/run.sh
Normal file
6
src/nio_llm_server/run.sh
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
huggingface-cli download $HF_REPO $HF_FILE
|
||||||
|
MODEL_PATH=`huggingface-cli download --quiet $HF_REPO $HF_FILE`
|
||||||
|
|
||||||
|
python3 -m llama_cpp.server --host $HOST --port $PORT --model $MODEL_PATH --model_alias $MODEL_ALIAS --chat_format $CHAT_FORMAT
|
Loading…
Reference in a new issue