mirror of
https://github.com/Laurent2916/nio-llm.git
synced 2024-10-22 17:46:22 +00:00
Compare commits
8 commits
904dde744f
...
d7a14fd4ee
Author | SHA1 | Date | |
---|---|---|---|
Laureηt | d7a14fd4ee | ||
Laureηt | 10c7513add | ||
Laureηt | 5b5a18d73b | ||
Laureηt | 0f312a0a70 | ||
Laureηt | 8eda4825d9 | ||
Laureηt | 12080ad3a5 | ||
Laureηt | ca22fe640f | ||
Laureηt | 2d91052d6e |
74
README.md
74
README.md
|
@ -8,79 +8,11 @@ You own little LLM in your matrix chatroom.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
|
This project is split in two parts: the client and the server.
|
||||||
|
|
||||||
To see the available options, run:
|
The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
|
||||||
|
|
||||||
```bash
|
The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
|
||||||
nio_llm --help
|
|
||||||
```
|
|
||||||
|
|
||||||
To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
|
|
||||||
|
|
||||||
### Command line arguments
|
|
||||||
|
|
||||||
```bash
|
|
||||||
nio_llm \
|
|
||||||
# required \
|
|
||||||
--room <YOUR ROOM> \
|
|
||||||
--password <YOUR PASSWORD> \
|
|
||||||
--username <YOUR USERNAME> \
|
|
||||||
--preprompt <YOUR PREPROMPT> \
|
|
||||||
# optional \
|
|
||||||
--device-id nio-llm \
|
|
||||||
--homeserver https://matrix.org \
|
|
||||||
--ggml-repoid TheBloke/stable-vicuna-13B-GGML \
|
|
||||||
--ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
|
|
||||||
--sync-timeout 30000
|
|
||||||
```
|
|
||||||
|
|
||||||
### Environment variables
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# required
|
|
||||||
export NIO_LLM_ROOM=<YOUR ROOM>
|
|
||||||
export NIO_LLM_PASSWORD=<YOUR PASSWORD>
|
|
||||||
export NIO_LLM_USERNAME=<YOUR USERNAME>
|
|
||||||
export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
|
|
||||||
|
|
||||||
# optional
|
|
||||||
export NIO_LLM_DEVICE_ID=nio-llm
|
|
||||||
export NIO_LLM_HOMESERVER=https://matrix.org
|
|
||||||
export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
|
|
||||||
export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
|
|
||||||
export NIO_LLM_SYNC_TIMEOUT=30000
|
|
||||||
|
|
||||||
nio_llm
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Config file
|
|
||||||
|
|
||||||
Create a config file with the following content:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# config_file.yaml
|
|
||||||
|
|
||||||
# required
|
|
||||||
room: <YOUR ROOM>
|
|
||||||
password: <YOUR PASSWORD>
|
|
||||||
username: <YOUR USERNAME>
|
|
||||||
preprompt: <YOUR PREPROMPT>
|
|
||||||
|
|
||||||
# optional
|
|
||||||
device_id: nio-llm
|
|
||||||
homeserver: https://matrix.org
|
|
||||||
ggml_repoid: TheBloke/stable-vicuna-13B-GGML
|
|
||||||
ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
|
|
||||||
sync_timeout: 30000
|
|
||||||
```
|
|
||||||
|
|
||||||
Then run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
nio_llm --config config_file.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
## Special thanks
|
## Special thanks
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ def main(
|
||||||
room: str,
|
room: str,
|
||||||
username: str,
|
username: str,
|
||||||
password: str,
|
password: str,
|
||||||
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
|
preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
|
||||||
device_id: str = "nio-llm",
|
device_id: str = "nio-llm",
|
||||||
homeserver: str = "https://matrix.org",
|
homeserver: str = "https://matrix.org",
|
||||||
sync_timeout: int = 30000,
|
sync_timeout: int = 30000,
|
||||||
|
@ -23,6 +23,7 @@ def main(
|
||||||
openai_api_endpoint: str = "http://localhost:8000/v1",
|
openai_api_endpoint: str = "http://localhost:8000/v1",
|
||||||
openai_temperature: float = 0,
|
openai_temperature: float = 0,
|
||||||
openai_max_tokens: int = 256,
|
openai_max_tokens: int = 256,
|
||||||
|
history_size: int = 3,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Instantiate and start the client.
|
"""Instantiate and start the client.
|
||||||
|
|
||||||
|
@ -57,6 +58,9 @@ def main(
|
||||||
openai_max_tokens (`int`):
|
openai_max_tokens (`int`):
|
||||||
The OpenAI max tokens to use.
|
The OpenAI max tokens to use.
|
||||||
Defaults to `256`.
|
Defaults to `256`.
|
||||||
|
history_size (`int`):
|
||||||
|
The number of messages to keep in history.
|
||||||
|
Defaults to `3`.
|
||||||
"""
|
"""
|
||||||
# create the client
|
# create the client
|
||||||
client = LLMClient(
|
client = LLMClient(
|
||||||
|
@ -69,6 +73,7 @@ def main(
|
||||||
openai_api_endpoint=openai_api_endpoint,
|
openai_api_endpoint=openai_api_endpoint,
|
||||||
openai_temperature=openai_temperature,
|
openai_temperature=openai_temperature,
|
||||||
openai_max_tokens=openai_max_tokens,
|
openai_max_tokens=openai_max_tokens,
|
||||||
|
history_size=history_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
# start the client
|
# start the client
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
|
@ -22,6 +24,7 @@ class LLMClient(AsyncClient):
|
||||||
openai_api_endpoint: str,
|
openai_api_endpoint: str,
|
||||||
openai_temperature: float,
|
openai_temperature: float,
|
||||||
openai_max_tokens: int,
|
openai_max_tokens: int,
|
||||||
|
history_size: int,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create a new LLMClient instance.
|
"""Create a new LLMClient instance.
|
||||||
|
|
||||||
|
@ -44,6 +47,8 @@ class LLMClient(AsyncClient):
|
||||||
The OpenAI temperature to use.
|
The OpenAI temperature to use.
|
||||||
openai_max_tokens (`int`):
|
openai_max_tokens (`int`):
|
||||||
The OpenAI max tokens to use.
|
The OpenAI max tokens to use.
|
||||||
|
history_size (`int`):
|
||||||
|
The number of messages to keep in history.
|
||||||
"""
|
"""
|
||||||
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
||||||
self.spawn_time = time.time() * 1000
|
self.spawn_time = time.time() * 1000
|
||||||
|
@ -65,12 +70,36 @@ class LLMClient(AsyncClient):
|
||||||
)
|
)
|
||||||
|
|
||||||
# create message history queue
|
# create message history queue
|
||||||
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
self.history: deque[RoomMessageText] = deque(maxlen=history_size)
|
||||||
|
|
||||||
# add callbacks
|
# add callbacks
|
||||||
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
|
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
|
||||||
|
|
||||||
async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
|
async def typing_loop(
|
||||||
|
self,
|
||||||
|
sleep_time: int = 10,
|
||||||
|
) -> None:
|
||||||
|
"""Send typing indicators every `sleep_time` seconds.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sleep_time (`int`, default `10`):
|
||||||
|
The time to sleep between sending typing indicators.
|
||||||
|
"""
|
||||||
|
logging.debug("Started typing indicator.")
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
logging.debug("Sending typing indicator.")
|
||||||
|
await self.room_typing(self.room, True)
|
||||||
|
await asyncio.sleep(sleep_time)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
await self.room_typing(self.room, False)
|
||||||
|
logging.debug("Stopped typing indicator.")
|
||||||
|
|
||||||
|
async def message_callback(
|
||||||
|
self,
|
||||||
|
room: MatrixRoom,
|
||||||
|
event: RoomMessageText,
|
||||||
|
) -> None:
|
||||||
"""Process new messages as they come in.
|
"""Process new messages as they come in.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -99,6 +128,7 @@ class LLMClient(AsyncClient):
|
||||||
# ignore thread messages
|
# ignore thread messages
|
||||||
if (
|
if (
|
||||||
"m.relates_to" in event.source["content"]
|
"m.relates_to" in event.source["content"]
|
||||||
|
and "rel_type" in event.source["content"]["m.relates_to"]
|
||||||
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
|
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
|
||||||
):
|
):
|
||||||
logger.debug("Ignoring thread message.")
|
logger.debug("Ignoring thread message.")
|
||||||
|
@ -108,6 +138,14 @@ class LLMClient(AsyncClient):
|
||||||
self.history.append(event)
|
self.history.append(event)
|
||||||
logger.debug(f"Updated history: {self.history}")
|
logger.debug(f"Updated history: {self.history}")
|
||||||
|
|
||||||
|
# update read receipt
|
||||||
|
await self.room_read_markers(
|
||||||
|
room_id=self.room,
|
||||||
|
fully_read_event=event.event_id,
|
||||||
|
read_event=event.event_id,
|
||||||
|
)
|
||||||
|
logger.debug(f"Updated read receipt to event: {event.event_id}")
|
||||||
|
|
||||||
# ignore our own messages
|
# ignore our own messages
|
||||||
if event.sender == self.user:
|
if event.sender == self.user:
|
||||||
logger.debug("Ignoring our own message.")
|
logger.debug("Ignoring our own message.")
|
||||||
|
@ -124,16 +162,11 @@ class LLMClient(AsyncClient):
|
||||||
logger.debug("Ignoring message not mentioning us.")
|
logger.debug("Ignoring message not mentioning us.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# enable typing indicator
|
# start typing indicator loop
|
||||||
await self.room_typing(
|
typing_task = asyncio.create_task(self.typing_loop())
|
||||||
self.room,
|
|
||||||
typing_state=True,
|
|
||||||
timeout=30000,
|
|
||||||
)
|
|
||||||
logger.debug("Enabled typing indicator.")
|
|
||||||
|
|
||||||
# generate response using llama.cpp
|
# generate response using llama.cpp
|
||||||
response = openai.ChatCompletion.create(
|
response = await openai.ChatCompletion.acreate(
|
||||||
model="local-model",
|
model="local-model",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
@ -158,9 +191,17 @@ class LLMClient(AsyncClient):
|
||||||
output = response["choices"][0]["message"]["content"] # type: ignore
|
output = response["choices"][0]["message"]["content"] # type: ignore
|
||||||
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
||||||
|
|
||||||
# disable typing indicator
|
# replace newlines with <br>
|
||||||
await self.room_typing(self.room, typing_state=False)
|
formatted_output = output.replace("\n", "<br>")
|
||||||
logger.debug("Disabled typing indicator.")
|
|
||||||
|
# detect mentions and replace them with html mentions
|
||||||
|
formatted_output = re.sub(
|
||||||
|
r"@[^:]+:[^ :]+",
|
||||||
|
lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
|
||||||
|
formatted_output,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Formatted response: {formatted_output}")
|
||||||
|
|
||||||
# send the response
|
# send the response
|
||||||
await self.room_send(
|
await self.room_send(
|
||||||
|
@ -169,16 +210,27 @@ class LLMClient(AsyncClient):
|
||||||
content={
|
content={
|
||||||
"msgtype": "m.text",
|
"msgtype": "m.text",
|
||||||
"body": output,
|
"body": output,
|
||||||
|
"format": "org.matrix.custom.html",
|
||||||
|
"formatted_body": formatted_output,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
logger.debug(f"Sent response: {output}")
|
logger.debug(f"Sent response: {output}")
|
||||||
|
|
||||||
async def start(self, password, sync_timeout=30000) -> None:
|
# stop typing indicator loop
|
||||||
|
typing_task.cancel()
|
||||||
|
|
||||||
|
async def start(
|
||||||
|
self,
|
||||||
|
password: str,
|
||||||
|
sync_timeout: int = 30000,
|
||||||
|
) -> None:
|
||||||
"""Start the client.
|
"""Start the client.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
password (`str`): The password to log in with.
|
password (`str`):
|
||||||
sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
|
The password to log in with.
|
||||||
|
sync_timeout (`int`, default `30000`):
|
||||||
|
The sync timeout in milliseconds.
|
||||||
"""
|
"""
|
||||||
# Login to the homeserver
|
# Login to the homeserver
|
||||||
logger.debug(await self.login(password))
|
logger.debug(await self.login(password))
|
||||||
|
|
Loading…
Reference in a new issue