Compare commits

...

8 commits

3 changed files with 77 additions and 88 deletions

View file

@ -8,79 +8,11 @@ You own little LLM in your matrix chatroom.
## Usage ## Usage
This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments. This project is split in two parts: the client and the server.
To see the available options, run: The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
```bash The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
nio_llm --help
```
To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
### Command line arguments
```bash
nio_llm \
# required \
--room <YOUR ROOM> \
--password <YOUR PASSWORD> \
--username <YOUR USERNAME> \
--preprompt <YOUR PREPROMPT> \
# optional \
--device-id nio-llm \
--homeserver https://matrix.org \
--ggml-repoid TheBloke/stable-vicuna-13B-GGML \
--ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
--sync-timeout 30000
```
### Environment variables
```bash
# required
export NIO_LLM_ROOM=<YOUR ROOM>
export NIO_LLM_PASSWORD=<YOUR PASSWORD>
export NIO_LLM_USERNAME=<YOUR USERNAME>
export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
# optional
export NIO_LLM_DEVICE_ID=nio-llm
export NIO_LLM_HOMESERVER=https://matrix.org
export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
export NIO_LLM_SYNC_TIMEOUT=30000
nio_llm
```
### Config file
Create a config file with the following content:
```yaml
# config_file.yaml
# required
room: <YOUR ROOM>
password: <YOUR PASSWORD>
username: <YOUR USERNAME>
preprompt: <YOUR PREPROMPT>
# optional
device_id: nio-llm
homeserver: https://matrix.org
ggml_repoid: TheBloke/stable-vicuna-13B-GGML
ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
sync_timeout: 30000
```
Then run:
```bash
nio_llm --config config_file.yaml
```
## Special thanks ## Special thanks

View file

@ -15,7 +15,7 @@ def main(
room: str, room: str,
username: str, username: str,
password: str, password: str,
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.", preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
device_id: str = "nio-llm", device_id: str = "nio-llm",
homeserver: str = "https://matrix.org", homeserver: str = "https://matrix.org",
sync_timeout: int = 30000, sync_timeout: int = 30000,
@ -23,6 +23,7 @@ def main(
openai_api_endpoint: str = "http://localhost:8000/v1", openai_api_endpoint: str = "http://localhost:8000/v1",
openai_temperature: float = 0, openai_temperature: float = 0,
openai_max_tokens: int = 256, openai_max_tokens: int = 256,
history_size: int = 3,
) -> None: ) -> None:
"""Instantiate and start the client. """Instantiate and start the client.
@ -57,6 +58,9 @@ def main(
openai_max_tokens (`int`): openai_max_tokens (`int`):
The OpenAI max tokens to use. The OpenAI max tokens to use.
Defaults to `256`. Defaults to `256`.
history_size (`int`):
The number of messages to keep in history.
Defaults to `3`.
""" """
# create the client # create the client
client = LLMClient( client = LLMClient(
@ -69,6 +73,7 @@ def main(
openai_api_endpoint=openai_api_endpoint, openai_api_endpoint=openai_api_endpoint,
openai_temperature=openai_temperature, openai_temperature=openai_temperature,
openai_max_tokens=openai_max_tokens, openai_max_tokens=openai_max_tokens,
history_size=history_size,
) )
# start the client # start the client

View file

@ -1,4 +1,6 @@
import asyncio
import logging import logging
import re
import time import time
from collections import deque from collections import deque
@ -22,6 +24,7 @@ class LLMClient(AsyncClient):
openai_api_endpoint: str, openai_api_endpoint: str,
openai_temperature: float, openai_temperature: float,
openai_max_tokens: int, openai_max_tokens: int,
history_size: int,
) -> None: ) -> None:
"""Create a new LLMClient instance. """Create a new LLMClient instance.
@ -44,6 +47,8 @@ class LLMClient(AsyncClient):
The OpenAI temperature to use. The OpenAI temperature to use.
openai_max_tokens (`int`): openai_max_tokens (`int`):
The OpenAI max tokens to use. The OpenAI max tokens to use.
history_size (`int`):
The number of messages to keep in history.
""" """
self.uid = f"@{username}:{homeserver.removeprefix('https://')}" self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
self.spawn_time = time.time() * 1000 self.spawn_time = time.time() * 1000
@ -65,12 +70,36 @@ class LLMClient(AsyncClient):
) )
# create message history queue # create message history queue
self.history: deque[RoomMessageText] = deque(maxlen=10) self.history: deque[RoomMessageText] = deque(maxlen=history_size)
# add callbacks # add callbacks
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None: async def typing_loop(
self,
sleep_time: int = 10,
) -> None:
"""Send typing indicators every `sleep_time` seconds.
Args:
sleep_time (`int`, default `10`):
The time to sleep between sending typing indicators.
"""
logging.debug("Started typing indicator.")
try:
while True:
logging.debug("Sending typing indicator.")
await self.room_typing(self.room, True)
await asyncio.sleep(sleep_time)
except asyncio.CancelledError:
await self.room_typing(self.room, False)
logging.debug("Stopped typing indicator.")
async def message_callback(
self,
room: MatrixRoom,
event: RoomMessageText,
) -> None:
"""Process new messages as they come in. """Process new messages as they come in.
Args: Args:
@ -99,6 +128,7 @@ class LLMClient(AsyncClient):
# ignore thread messages # ignore thread messages
if ( if (
"m.relates_to" in event.source["content"] "m.relates_to" in event.source["content"]
and "rel_type" in event.source["content"]["m.relates_to"]
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread" and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
): ):
logger.debug("Ignoring thread message.") logger.debug("Ignoring thread message.")
@ -108,6 +138,14 @@ class LLMClient(AsyncClient):
self.history.append(event) self.history.append(event)
logger.debug(f"Updated history: {self.history}") logger.debug(f"Updated history: {self.history}")
# update read receipt
await self.room_read_markers(
room_id=self.room,
fully_read_event=event.event_id,
read_event=event.event_id,
)
logger.debug(f"Updated read receipt to event: {event.event_id}")
# ignore our own messages # ignore our own messages
if event.sender == self.user: if event.sender == self.user:
logger.debug("Ignoring our own message.") logger.debug("Ignoring our own message.")
@ -124,16 +162,11 @@ class LLMClient(AsyncClient):
logger.debug("Ignoring message not mentioning us.") logger.debug("Ignoring message not mentioning us.")
return return
# enable typing indicator # start typing indicator loop
await self.room_typing( typing_task = asyncio.create_task(self.typing_loop())
self.room,
typing_state=True,
timeout=30000,
)
logger.debug("Enabled typing indicator.")
# generate response using llama.cpp # generate response using llama.cpp
response = openai.ChatCompletion.create( response = await openai.ChatCompletion.acreate(
model="local-model", model="local-model",
messages=[ messages=[
{ {
@ -158,9 +191,17 @@ class LLMClient(AsyncClient):
output = response["choices"][0]["message"]["content"] # type: ignore output = response["choices"][0]["message"]["content"] # type: ignore
output = output.strip().removeprefix(f"{self.uid}:").strip() output = output.strip().removeprefix(f"{self.uid}:").strip()
# disable typing indicator # replace newlines with <br>
await self.room_typing(self.room, typing_state=False) formatted_output = output.replace("\n", "<br>")
logger.debug("Disabled typing indicator.")
# detect mentions and replace them with html mentions
formatted_output = re.sub(
r"@[^:]+:[^ :]+",
lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
formatted_output,
)
logger.debug(f"Formatted response: {formatted_output}")
# send the response # send the response
await self.room_send( await self.room_send(
@ -169,16 +210,27 @@ class LLMClient(AsyncClient):
content={ content={
"msgtype": "m.text", "msgtype": "m.text",
"body": output, "body": output,
"format": "org.matrix.custom.html",
"formatted_body": formatted_output,
}, },
) )
logger.debug(f"Sent response: {output}") logger.debug(f"Sent response: {output}")
async def start(self, password, sync_timeout=30000) -> None: # stop typing indicator loop
typing_task.cancel()
async def start(
self,
password: str,
sync_timeout: int = 30000,
) -> None:
"""Start the client. """Start the client.
Args: Args:
password (`str`): The password to log in with. password (`str`):
sync_timeout (`int`, default `30000`): The sync timeout in milliseconds. The password to log in with.
sync_timeout (`int`, default `30000`):
The sync timeout in milliseconds.
""" """
# Login to the homeserver # Login to the homeserver
logger.debug(await self.login(password)) logger.debug(await self.login(password))