mirror of
https://github.com/Laurent2916/nio-llm.git
synced 2024-11-23 06:38:47 +00:00
♻️ big refactor, use llama server and openai python library
This commit is contained in:
parent
541007380a
commit
904dde744f
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,5 @@
|
|||
.direnv
|
||||
.venv
|
||||
result
|
||||
|
||||
# https://github.com/github/gitignore/blob/main/Python.gitignore
|
||||
|
|
4
.vscode/extensions.json
vendored
4
.vscode/extensions.json
vendored
|
@ -1,7 +1,9 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"editorconfig.editorconfig",
|
||||
"charliermarsh.ruff",
|
||||
"editorconfig.editorconfig",
|
||||
"ms-python.black-formatter",
|
||||
"ms-python.python",
|
||||
"tamasfe.even-better-toml",
|
||||
]
|
||||
}
|
40
.vscode/settings.json
vendored
40
.vscode/settings.json
vendored
|
@ -1,30 +1,38 @@
|
|||
{
|
||||
"python.analysis.typeCheckingMode": "basic",
|
||||
"python.formatting.provider": "black",
|
||||
// nice editor settings
|
||||
"editor.formatOnSave": true,
|
||||
"python.linting.enabled": true,
|
||||
"python.linting.lintOnSave": true,
|
||||
"python.linting.mypyEnabled": true,
|
||||
"python.linting.banditEnabled": true,
|
||||
"python.languageServer": "Pylance",
|
||||
"[python]": {
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": true
|
||||
}
|
||||
},
|
||||
"terminal.integrated.env.linux": {
|
||||
"PYTHONPATH": "${workspaceFolder}"
|
||||
},
|
||||
"editor.formatOnPaste": true,
|
||||
"editor.rulers": [
|
||||
120
|
||||
],
|
||||
// editorconfig redundancy
|
||||
"files.insertFinalNewline": true,
|
||||
"files.trimTrailingWhitespace": true,
|
||||
// hide unimportant files/folders
|
||||
"files.exclude": {
|
||||
// defaults
|
||||
"**/.git": true,
|
||||
"**/.svn": true,
|
||||
"**/.hg": true,
|
||||
"**/CVS": true,
|
||||
"**/.DS_Store": true,
|
||||
"**/Thumbs.db": true,
|
||||
// annoying
|
||||
"**/__pycache__": true,
|
||||
"**/.mypy_cache": true,
|
||||
"**/.direnv": true,
|
||||
"**/.ruff_cache": true,
|
||||
"**/*.tmp": true,
|
||||
},
|
||||
// python settings
|
||||
"python.analysis.typeCheckingMode": "basic", // get ready to be annoyed
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports.ruff": true,
|
||||
"source.fixAll": true,
|
||||
}
|
||||
},
|
||||
"terminal.integrated.env.linux": {
|
||||
"PYTHONPATH": "${workspaceFolder}/src/",
|
||||
},
|
||||
}
|
||||
|
|
29
docker-compose.yml
Normal file
29
docker-compose.yml
Normal file
|
@ -0,0 +1,29 @@
|
|||
version: "3.8"
|
||||
services:
|
||||
server:
|
||||
build:
|
||||
context: src/nio_llm_server/
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8000:8000
|
||||
volumes:
|
||||
- /home/laurent/.cache/huggingface/hub/:/root/.cache/huggingface/hub/
|
||||
healthcheck:
|
||||
test: ["CMD", "nc", "-z", "-v", "localhost", "8000"]
|
||||
restart: unless-stopped
|
||||
|
||||
client:
|
||||
build:
|
||||
context: src/nio_llm/
|
||||
dockerfile: Dockerfile
|
||||
environment:
|
||||
- NIOLLM_HOMESERVER=$NIOLLM_HOMESERVER
|
||||
- NIOLLM_USERNAME=$NIOLLM_USERNAME
|
||||
- NIOLLM_DEVICE_ID=$NIOLLM_DEVICE_ID
|
||||
- NIOLLM_ROOM=$NIOLLM_ROOM
|
||||
- NIOLLM_PASSWORD=$NIOLLM_PASSWORD
|
||||
- NIOLLM_OPENAI_API_ENDPOINT=$NIOLLM_OPENAI_API_ENDPOINT
|
||||
depends_on:
|
||||
server:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
85
flake.lock
85
flake.lock
|
@ -1,85 +0,0 @@
|
|||
{
|
||||
"nodes": {
|
||||
"flake-utils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1685518550,
|
||||
"narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1686501370,
|
||||
"narHash": "sha256-G0WuM9fqTPRc2URKP9Lgi5nhZMqsfHGrdEbrLvAPJcg=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "75a5ebf473cd60148ba9aec0d219f72e5cf52519",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"poetry2nix": {
|
||||
"inputs": {
|
||||
"flake-utils": [
|
||||
"flake-utils"
|
||||
],
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1686140708,
|
||||
"narHash": "sha256-CKTahDFlhx07OQb4Afj+4/cNaxIWfxb8VGUlllUgoPY=",
|
||||
"owner": "nix-community",
|
||||
"repo": "poetry2nix",
|
||||
"rev": "d91e2dd14caf4d09240bedf69a778c88f356ebda",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-community",
|
||||
"repo": "poetry2nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"poetry2nix": "poetry2nix"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
40
flake.nix
40
flake.nix
|
@ -1,40 +0,0 @@
|
|||
{
|
||||
description = "nio-llm";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
|
||||
poetry2nix = {
|
||||
url = "github:nix-community/poetry2nix";
|
||||
inputs = {
|
||||
nixpkgs.follows = "nixpkgs";
|
||||
flake-utils.follows = "flake-utils";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
outputs = { nixpkgs, flake-utils, poetry2nix, ... }:
|
||||
flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
overlays = [ poetry2nix.overlay ];
|
||||
};
|
||||
|
||||
pythonEnv = pkgs.poetry2nix.mkPoetryEnv {
|
||||
projectDir = ./.;
|
||||
preferWheels = true;
|
||||
python = pkgs.python311;
|
||||
};
|
||||
in {
|
||||
packages.default = pkgs.poetry2nix.mkPoetryApplication {
|
||||
projectDir = ./.;
|
||||
preferWheels = true;
|
||||
python = pkgs.python311;
|
||||
};
|
||||
|
||||
devShells.default =
|
||||
pkgs.mkShell { buildInputs = [ pythonEnv pkgs.poetry ]; };
|
||||
});
|
||||
}
|
1633
poetry.lock
generated
1633
poetry.lock
generated
File diff suppressed because it is too large
Load diff
3
poetry.toml
Normal file
3
poetry.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
[virtualenvs]
|
||||
create = true
|
||||
in-project = true
|
|
@ -6,7 +6,7 @@ license = "MIT"
|
|||
name = "nio-llm"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/Laurent2916/nio-llm.git"
|
||||
version = "0.1.0"
|
||||
version = "1.0.0"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
nio-llm = "nio_llm.main:main"
|
||||
|
@ -29,9 +29,21 @@ mypy = "^1.3.0"
|
|||
ruff = "^0.0.267"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py311"
|
||||
line-length = 120
|
||||
ignore-init-module-imports = true
|
||||
include = [
|
||||
"*.py", # regular python files
|
||||
"*.pyi", # python stub files
|
||||
"*.ipynb", # jupyter notebooks
|
||||
"**/pyproject.toml", # python config files
|
||||
]
|
||||
ignore = [
|
||||
"G004", # Logging statement uses f-string
|
||||
"EM102", # Exception must not use an f-string literal, assign to variable first
|
||||
"D100", # Missing docstring in public module
|
||||
"D104", # Missing docstring in public package
|
||||
"N812", # Lowercase imported as non lowercase
|
||||
]
|
||||
select = [
|
||||
"A", # flake8-builtins
|
||||
|
@ -47,6 +59,8 @@ select = [
|
|||
"N", # pep8-naming
|
||||
"PIE", # flake8-pie
|
||||
"PTH", # flake8-use-pathlib
|
||||
"TD", # flake8-todo
|
||||
"FIX", # flake8-fixme
|
||||
"RET", # flake8-return
|
||||
"RUF", # ruff
|
||||
"S", # flake8-bandit
|
||||
|
@ -59,26 +73,28 @@ select = [
|
|||
[tool.ruff.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.ruff.isort]
|
||||
known-first-party = ["nio_llm"]
|
||||
|
||||
[tool.ruff.per-file-ignores]
|
||||
"__init__.py" = [
|
||||
"F401", # Imported but unused
|
||||
]
|
||||
"src/aube/main.py" = [
|
||||
"F401", # Imported but unused
|
||||
"E402", # Module level import not at top of file
|
||||
]
|
||||
|
||||
[tool.ruff.mccabe]
|
||||
max-complexity = 5 # C901
|
||||
|
||||
[tool.black]
|
||||
include = '\.pyi?$'
|
||||
target-version = ["py311"]
|
||||
line-length = 120
|
||||
exclude = '''
|
||||
/(
|
||||
\.git
|
||||
\.venv
|
||||
)/
|
||||
'''
|
||||
|
||||
[tool.isort]
|
||||
multi_line_output = 3
|
||||
profile = "black"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
include = '\.pyi?$'
|
||||
line-length = 120
|
||||
target-version = ["py311"]
|
||||
|
|
21
src/nio_llm/Dockerfile
Normal file
21
src/nio_llm/Dockerfile
Normal file
|
@ -0,0 +1,21 @@
|
|||
FROM python:3.9
|
||||
|
||||
# Update and upgrade the existing packages
|
||||
RUN apt-get update && \
|
||||
apt-get upgrade -y
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
# install python dependencies
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install \
|
||||
jsonargparse[signatures] \
|
||||
git+https://github.com/abetlen/llama-cpp-python.git \
|
||||
matrix-nio \
|
||||
openai \
|
||||
rich
|
||||
|
||||
COPY *.py /app/nio_llm/
|
||||
ENV PYTHONPATH=/app
|
||||
CMD ["/usr/local/bin/python", "/app/nio_llm"]
|
|
@ -2,9 +2,7 @@
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
from jsonargparse import CLI
|
||||
from rich.logging import RichHandler
|
||||
|
||||
|
@ -15,53 +13,62 @@ logger = logging.getLogger("nio-llm.main")
|
|||
|
||||
def main(
|
||||
room: str,
|
||||
password: str,
|
||||
username: str,
|
||||
preprompt: str,
|
||||
password: str,
|
||||
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
|
||||
device_id: str = "nio-llm",
|
||||
homeserver: str = "https://matrix.org",
|
||||
ggml_repoid: str = "TheBloke/stable-vicuna-13B-GGML",
|
||||
ggml_filename: str = "stable-vicuna-13B.ggmlv3.q5_1.bin",
|
||||
sync_timeout: int = 30000,
|
||||
openai_api_key: str = "osftw",
|
||||
openai_api_endpoint: str = "http://localhost:8000/v1",
|
||||
openai_temperature: float = 0,
|
||||
openai_max_tokens: int = 256,
|
||||
) -> None:
|
||||
"""Download llama model from HuggingFace and start the client.
|
||||
"""Instantiate and start the client.
|
||||
|
||||
Args:
|
||||
room (`str`):
|
||||
The room to join.
|
||||
password (`str`):
|
||||
The password to log in with.
|
||||
username (`str`):
|
||||
The username to log in as.
|
||||
device_id (`str`):
|
||||
The device ID to use.
|
||||
password (`str`):
|
||||
The password to log in with.
|
||||
preprompt (`str`):
|
||||
The preprompt to use.
|
||||
ggml_repoid (`str`, default `"TheBloke/stable-vicuna-13B-GGML"`):
|
||||
The HuggingFace Hub repo ID to download the model from.
|
||||
ggml_filename (`str`, default `"stable-vicuna-13B.ggmlv3.q5_1.bin"`):
|
||||
The HuggingFace Hub filename to download the model from.
|
||||
homeserver (`str`, default `"matrix.org"`):
|
||||
The homeserver to connect to.
|
||||
sync_timeout (`int`, default `30000`):
|
||||
Defaults to `"You are a helpful assistant."`.
|
||||
device_id (`str`):
|
||||
The device ID to use.
|
||||
Defaults to `"nio-llm"`.
|
||||
homeserver (`str`):
|
||||
The matrix homeserver to connect to.
|
||||
Defaults to `"https://matrix.org"`.
|
||||
sync_timeout (`int`):
|
||||
The timeout to use when syncing with the homeserver.
|
||||
Defaults to `30000`.
|
||||
openai_api_key (`str`):
|
||||
The OpenAI API key to use.
|
||||
Defaults to `"osftw"`.
|
||||
openai_api_endpoint (`str`):
|
||||
The OpenAI API endpoint to use.
|
||||
Defaults to `"http://localhost:8000/v1"`.
|
||||
openai_temperature (`float`):
|
||||
The OpenAI temperature to use.
|
||||
Defaults to `0`.
|
||||
openai_max_tokens (`int`):
|
||||
The OpenAI max tokens to use.
|
||||
Defaults to `256`.
|
||||
"""
|
||||
# download the model
|
||||
ggml_path = Path(
|
||||
hf_hub_download(
|
||||
repo_id=ggml_repoid,
|
||||
filename=ggml_filename,
|
||||
),
|
||||
)
|
||||
|
||||
# create the client
|
||||
client = LLMClient(
|
||||
room=room,
|
||||
username=username,
|
||||
device_id=device_id,
|
||||
ggml_path=ggml_path,
|
||||
preprompt=preprompt,
|
||||
homeserver=homeserver,
|
||||
openai_api_key=openai_api_key,
|
||||
openai_api_endpoint=openai_api_endpoint,
|
||||
openai_temperature=openai_temperature,
|
||||
openai_max_tokens=openai_max_tokens,
|
||||
)
|
||||
|
||||
# start the client
|
||||
|
@ -86,6 +93,6 @@ if __name__ == "__main__":
|
|||
CLI(
|
||||
components=main,
|
||||
as_positional=False,
|
||||
env_prefix="NIO_LLM",
|
||||
env_prefix="NIOLLM",
|
||||
default_env=True,
|
||||
)
|
|
@ -1,18 +1,15 @@
|
|||
"""A Matrix client that uses Llama to respond to messages."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from collections import deque
|
||||
from pathlib import Path
|
||||
|
||||
from llama_cpp import Llama
|
||||
import openai
|
||||
from nio import AsyncClient, MatrixRoom, RoomMessageText
|
||||
|
||||
logger = logging.getLogger("nio-llm.client")
|
||||
|
||||
|
||||
class LLMClient(AsyncClient):
|
||||
"""A Matrix client that uses Llama to respond to messages."""
|
||||
"""A Matrix client that uses llama.cpp to respond to messages."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -20,18 +17,33 @@ class LLMClient(AsyncClient):
|
|||
homeserver: str,
|
||||
device_id: str,
|
||||
preprompt: str,
|
||||
ggml_path: Path,
|
||||
room: str,
|
||||
):
|
||||
openai_api_key: str,
|
||||
openai_api_endpoint: str,
|
||||
openai_temperature: float,
|
||||
openai_max_tokens: int,
|
||||
) -> None:
|
||||
"""Create a new LLMClient instance.
|
||||
|
||||
Args:
|
||||
username (`str`): The username to log in as.
|
||||
homeserver (`str`): The homeserver to connect to.
|
||||
device_id (`str`): The device ID to use.
|
||||
preprompt (`str`): The preprompt to use.
|
||||
ggml_path (`Path`): The path to the GGML model.
|
||||
room (`str`): The room to join.
|
||||
username (`str`):
|
||||
The username to log in as.
|
||||
homeserver (`str`):
|
||||
The homeserver to connect to.
|
||||
device_id (`str`):
|
||||
The device ID to use.
|
||||
preprompt (`str`):
|
||||
The preprompt to use.
|
||||
room (`str`):
|
||||
The room to join.
|
||||
openai_api_key (`str`):
|
||||
The OpenAI API key to use.
|
||||
openai_api_endpoint (`str`):
|
||||
The OpenAI API endpoint to use.
|
||||
openai_temperature (`float`):
|
||||
The OpenAI temperature to use.
|
||||
openai_max_tokens (`int`):
|
||||
The OpenAI max tokens to use.
|
||||
"""
|
||||
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
||||
self.spawn_time = time.time() * 1000
|
||||
|
@ -39,20 +51,19 @@ class LLMClient(AsyncClient):
|
|||
self.preprompt = preprompt
|
||||
self.room = room
|
||||
|
||||
# create the AsyncClient instance
|
||||
# setup openai settings
|
||||
openai.api_base = openai_api_endpoint
|
||||
openai.api_key = openai_api_key
|
||||
self.openai_temperature = openai_temperature
|
||||
self.openai_max_tokens = openai_max_tokens
|
||||
|
||||
# create nio AsyncClient instance
|
||||
super().__init__(
|
||||
user=self.uid,
|
||||
homeserver=homeserver,
|
||||
device_id=device_id,
|
||||
)
|
||||
|
||||
# create the Llama instance
|
||||
self.llm = Llama(
|
||||
model_path=str(ggml_path),
|
||||
n_threads=12,
|
||||
n_ctx=512 + 128,
|
||||
)
|
||||
|
||||
# create message history queue
|
||||
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
||||
|
||||
|
@ -63,8 +74,10 @@ class LLMClient(AsyncClient):
|
|||
"""Process new messages as they come in.
|
||||
|
||||
Args:
|
||||
room (`MatrixRoom`): The room the message was sent in.
|
||||
event (`RoomMessageText`): The message event.
|
||||
room (`MatrixRoom`):
|
||||
The room the message was sent in.
|
||||
event (`RoomMessageText`):
|
||||
The message event.
|
||||
"""
|
||||
logger.debug(f"New RoomMessageText: {event.source}")
|
||||
|
||||
|
@ -93,6 +106,7 @@ class LLMClient(AsyncClient):
|
|||
|
||||
# update history
|
||||
self.history.append(event)
|
||||
logger.debug(f"Updated history: {self.history}")
|
||||
|
||||
# ignore our own messages
|
||||
if event.sender == self.user:
|
||||
|
@ -107,51 +121,46 @@ class LLMClient(AsyncClient):
|
|||
and f'<a href="https://matrix.to/#/{self.uid}">{self.username}</a>'
|
||||
in event.source["content"]["formatted_body"]
|
||||
):
|
||||
logger.debug("Ignoring message not directed at us.")
|
||||
return
|
||||
|
||||
# generate prompt from message and history
|
||||
history = "\n".join(f"<{message.sender}>: {message.body}" for message in self.history)
|
||||
prompt = "\n".join([self.preprompt, history, f"<{self.uid}>:"])
|
||||
tokens = self.llm.tokenize(str.encode(prompt))
|
||||
logger.debug(f"Prompt:\n{prompt}")
|
||||
logger.debug(f"Tokens: {len(tokens)}")
|
||||
|
||||
# ignore prompts that are too long
|
||||
if len(tokens) > 512:
|
||||
logger.debug("Prompt too long, skipping.")
|
||||
await self.room_send(
|
||||
room_id=self.room,
|
||||
message_type="m.room.message",
|
||||
content={
|
||||
"msgtype": "m.emote",
|
||||
"body": "reached prompt token limit",
|
||||
},
|
||||
)
|
||||
logger.debug("Ignoring message not mentioning us.")
|
||||
return
|
||||
|
||||
# enable typing indicator
|
||||
await self.room_typing(
|
||||
self.room,
|
||||
typing_state=True,
|
||||
timeout=100000000,
|
||||
timeout=30000,
|
||||
)
|
||||
logger.debug("Enabled typing indicator.")
|
||||
|
||||
# generate response using llama.cpp
|
||||
senders = [f"<{message.sender}>" for message in self.history]
|
||||
output = self.llm(
|
||||
prompt,
|
||||
max_tokens=128,
|
||||
stop=[f"<{self.uid}>", "### Human", "### Assistant", *senders],
|
||||
echo=True,
|
||||
response = openai.ChatCompletion.create(
|
||||
model="local-model",
|
||||
messages=[
|
||||
{
|
||||
"content": self.preprompt,
|
||||
"role": "system",
|
||||
},
|
||||
*[
|
||||
{
|
||||
"content": f"{message.sender}: {message.body}",
|
||||
"role": "assistant" if message.sender == self.uid else "user",
|
||||
}
|
||||
for message in self.history
|
||||
],
|
||||
],
|
||||
stop=["<|im_end|>"],
|
||||
temperature=self.openai_temperature,
|
||||
max_tokens=self.openai_max_tokens,
|
||||
)
|
||||
logger.debug(f"Generated response: {response}")
|
||||
|
||||
# retreive the response
|
||||
output = output["choices"][0]["text"] # type: ignore
|
||||
output = output.removeprefix(prompt).strip()
|
||||
output = response["choices"][0]["message"]["content"] # type: ignore
|
||||
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
||||
|
||||
# disable typing indicator
|
||||
await self.room_typing(self.room, typing_state=False)
|
||||
logger.debug("Disabled typing indicator.")
|
||||
|
||||
# send the response
|
||||
await self.room_send(
|
||||
|
@ -162,8 +171,9 @@ class LLMClient(AsyncClient):
|
|||
"body": output,
|
||||
},
|
||||
)
|
||||
logger.debug(f"Sent response: {output}")
|
||||
|
||||
async def start(self, password, sync_timeout=30000):
|
||||
async def start(self, password, sync_timeout=30000) -> None:
|
||||
"""Start the client.
|
||||
|
||||
Args:
|
33
src/nio_llm_server/Dockerfile
Normal file
33
src/nio_llm_server/Dockerfile
Normal file
|
@ -0,0 +1,33 @@
|
|||
FROM python:3
|
||||
|
||||
# Update and upgrade the existing packages
|
||||
RUN apt-get update && \
|
||||
apt-get upgrade -y && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
ninja-build \
|
||||
libopenblas-dev \
|
||||
build-essential
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
# install python dependencies
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install huggingface_hub
|
||||
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \
|
||||
pip install llama-cpp-python[server]@git+https://github.com/abetlen/llama-cpp-python.git --verbose
|
||||
|
||||
# Set environment variable for the host
|
||||
ENV HOST=0.0.0.0
|
||||
ENV PORT=8000
|
||||
ENV HF_REPO=TheBloke/Mistral-7B-OpenOrca-GGUF
|
||||
ENV HF_FILE=mistral-7b-openorca.Q4_K_M.gguf
|
||||
ENV MODEL_ALIAS=local-model
|
||||
ENV CHAT_FORMAT=chatml
|
||||
|
||||
# Expose a port for the server
|
||||
EXPOSE 8000
|
||||
|
||||
COPY run.sh /app
|
||||
# Run the server start script
|
||||
CMD ["/bin/sh", "/app/run.sh"]
|
6
src/nio_llm_server/run.sh
Normal file
6
src/nio_llm_server/run.sh
Normal file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
huggingface-cli download $HF_REPO $HF_FILE
|
||||
MODEL_PATH=`huggingface-cli download --quiet $HF_REPO $HF_FILE`
|
||||
|
||||
python3 -m llama_cpp.server --host $HOST --port $PORT --model $MODEL_PATH --model_alias $MODEL_ALIAS --chat_format $CHAT_FORMAT
|
Loading…
Reference in a new issue