Compare commits

...

8 commits

3 changed files with 77 additions and 88 deletions

View file

@ -8,79 +8,11 @@ You own little LLM in your matrix chatroom.
## Usage
This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
This project is split in two parts: the client and the server.
To see the available options, run:
The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
```bash
nio_llm --help
```
To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
### Command line arguments
```bash
nio_llm \
# required \
--room <YOUR ROOM> \
--password <YOUR PASSWORD> \
--username <YOUR USERNAME> \
--preprompt <YOUR PREPROMPT> \
# optional \
--device-id nio-llm \
--homeserver https://matrix.org \
--ggml-repoid TheBloke/stable-vicuna-13B-GGML \
--ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
--sync-timeout 30000
```
### Environment variables
```bash
# required
export NIO_LLM_ROOM=<YOUR ROOM>
export NIO_LLM_PASSWORD=<YOUR PASSWORD>
export NIO_LLM_USERNAME=<YOUR USERNAME>
export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
# optional
export NIO_LLM_DEVICE_ID=nio-llm
export NIO_LLM_HOMESERVER=https://matrix.org
export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
export NIO_LLM_SYNC_TIMEOUT=30000
nio_llm
```
### Config file
Create a config file with the following content:
```yaml
# config_file.yaml
# required
room: <YOUR ROOM>
password: <YOUR PASSWORD>
username: <YOUR USERNAME>
preprompt: <YOUR PREPROMPT>
# optional
device_id: nio-llm
homeserver: https://matrix.org
ggml_repoid: TheBloke/stable-vicuna-13B-GGML
ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
sync_timeout: 30000
```
Then run:
```bash
nio_llm --config config_file.yaml
```
The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
## Special thanks

View file

@ -15,7 +15,7 @@ def main(
room: str,
username: str,
password: str,
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
device_id: str = "nio-llm",
homeserver: str = "https://matrix.org",
sync_timeout: int = 30000,
@ -23,6 +23,7 @@ def main(
openai_api_endpoint: str = "http://localhost:8000/v1",
openai_temperature: float = 0,
openai_max_tokens: int = 256,
history_size: int = 3,
) -> None:
"""Instantiate and start the client.
@ -57,6 +58,9 @@ def main(
openai_max_tokens (`int`):
The OpenAI max tokens to use.
Defaults to `256`.
history_size (`int`):
The number of messages to keep in history.
Defaults to `3`.
"""
# create the client
client = LLMClient(
@ -69,6 +73,7 @@ def main(
openai_api_endpoint=openai_api_endpoint,
openai_temperature=openai_temperature,
openai_max_tokens=openai_max_tokens,
history_size=history_size,
)
# start the client

View file

@ -1,4 +1,6 @@
import asyncio
import logging
import re
import time
from collections import deque
@ -22,6 +24,7 @@ class LLMClient(AsyncClient):
openai_api_endpoint: str,
openai_temperature: float,
openai_max_tokens: int,
history_size: int,
) -> None:
"""Create a new LLMClient instance.
@ -44,6 +47,8 @@ class LLMClient(AsyncClient):
The OpenAI temperature to use.
openai_max_tokens (`int`):
The OpenAI max tokens to use.
history_size (`int`):
The number of messages to keep in history.
"""
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
self.spawn_time = time.time() * 1000
@ -65,12 +70,36 @@ class LLMClient(AsyncClient):
)
# create message history queue
self.history: deque[RoomMessageText] = deque(maxlen=10)
self.history: deque[RoomMessageText] = deque(maxlen=history_size)
# add callbacks
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
async def typing_loop(
self,
sleep_time: int = 10,
) -> None:
"""Send typing indicators every `sleep_time` seconds.
Args:
sleep_time (`int`, default `10`):
The time to sleep between sending typing indicators.
"""
logging.debug("Started typing indicator.")
try:
while True:
logging.debug("Sending typing indicator.")
await self.room_typing(self.room, True)
await asyncio.sleep(sleep_time)
except asyncio.CancelledError:
await self.room_typing(self.room, False)
logging.debug("Stopped typing indicator.")
async def message_callback(
self,
room: MatrixRoom,
event: RoomMessageText,
) -> None:
"""Process new messages as they come in.
Args:
@ -99,6 +128,7 @@ class LLMClient(AsyncClient):
# ignore thread messages
if (
"m.relates_to" in event.source["content"]
and "rel_type" in event.source["content"]["m.relates_to"]
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
):
logger.debug("Ignoring thread message.")
@ -108,6 +138,14 @@ class LLMClient(AsyncClient):
self.history.append(event)
logger.debug(f"Updated history: {self.history}")
# update read receipt
await self.room_read_markers(
room_id=self.room,
fully_read_event=event.event_id,
read_event=event.event_id,
)
logger.debug(f"Updated read receipt to event: {event.event_id}")
# ignore our own messages
if event.sender == self.user:
logger.debug("Ignoring our own message.")
@ -124,16 +162,11 @@ class LLMClient(AsyncClient):
logger.debug("Ignoring message not mentioning us.")
return
# enable typing indicator
await self.room_typing(
self.room,
typing_state=True,
timeout=30000,
)
logger.debug("Enabled typing indicator.")
# start typing indicator loop
typing_task = asyncio.create_task(self.typing_loop())
# generate response using llama.cpp
response = openai.ChatCompletion.create(
response = await openai.ChatCompletion.acreate(
model="local-model",
messages=[
{
@ -158,9 +191,17 @@ class LLMClient(AsyncClient):
output = response["choices"][0]["message"]["content"] # type: ignore
output = output.strip().removeprefix(f"{self.uid}:").strip()
# disable typing indicator
await self.room_typing(self.room, typing_state=False)
logger.debug("Disabled typing indicator.")
# replace newlines with <br>
formatted_output = output.replace("\n", "<br>")
# detect mentions and replace them with html mentions
formatted_output = re.sub(
r"@[^:]+:[^ :]+",
lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
formatted_output,
)
logger.debug(f"Formatted response: {formatted_output}")
# send the response
await self.room_send(
@ -169,16 +210,27 @@ class LLMClient(AsyncClient):
content={
"msgtype": "m.text",
"body": output,
"format": "org.matrix.custom.html",
"formatted_body": formatted_output,
},
)
logger.debug(f"Sent response: {output}")
async def start(self, password, sync_timeout=30000) -> None:
# stop typing indicator loop
typing_task.cancel()
async def start(
self,
password: str,
sync_timeout: int = 30000,
) -> None:
"""Start the client.
Args:
password (`str`): The password to log in with.
sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
password (`str`):
The password to log in with.
sync_timeout (`int`, default `30000`):
The sync timeout in milliseconds.
"""
# Login to the homeserver
logger.debug(await self.login(password))