mirror of
https://github.com/Laurent2916/nio-llm.git
synced 2024-10-22 17:46:22 +00:00
Compare commits
8 commits
904dde744f
...
d7a14fd4ee
Author | SHA1 | Date | |
---|---|---|---|
Laureηt | d7a14fd4ee | ||
Laureηt | 10c7513add | ||
Laureηt | 5b5a18d73b | ||
Laureηt | 0f312a0a70 | ||
Laureηt | 8eda4825d9 | ||
Laureηt | 12080ad3a5 | ||
Laureηt | ca22fe640f | ||
Laureηt | 2d91052d6e |
74
README.md
74
README.md
|
@ -8,79 +8,11 @@ You own little LLM in your matrix chatroom.
|
|||
|
||||
## Usage
|
||||
|
||||
This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
|
||||
This project is split in two parts: the client and the server.
|
||||
|
||||
To see the available options, run:
|
||||
The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
|
||||
|
||||
```bash
|
||||
nio_llm --help
|
||||
```
|
||||
|
||||
To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
|
||||
|
||||
### Command line arguments
|
||||
|
||||
```bash
|
||||
nio_llm \
|
||||
# required \
|
||||
--room <YOUR ROOM> \
|
||||
--password <YOUR PASSWORD> \
|
||||
--username <YOUR USERNAME> \
|
||||
--preprompt <YOUR PREPROMPT> \
|
||||
# optional \
|
||||
--device-id nio-llm \
|
||||
--homeserver https://matrix.org \
|
||||
--ggml-repoid TheBloke/stable-vicuna-13B-GGML \
|
||||
--ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
|
||||
--sync-timeout 30000
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
|
||||
```bash
|
||||
# required
|
||||
export NIO_LLM_ROOM=<YOUR ROOM>
|
||||
export NIO_LLM_PASSWORD=<YOUR PASSWORD>
|
||||
export NIO_LLM_USERNAME=<YOUR USERNAME>
|
||||
export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
|
||||
|
||||
# optional
|
||||
export NIO_LLM_DEVICE_ID=nio-llm
|
||||
export NIO_LLM_HOMESERVER=https://matrix.org
|
||||
export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
|
||||
export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
|
||||
export NIO_LLM_SYNC_TIMEOUT=30000
|
||||
|
||||
nio_llm
|
||||
```
|
||||
|
||||
|
||||
### Config file
|
||||
|
||||
Create a config file with the following content:
|
||||
|
||||
```yaml
|
||||
# config_file.yaml
|
||||
|
||||
# required
|
||||
room: <YOUR ROOM>
|
||||
password: <YOUR PASSWORD>
|
||||
username: <YOUR USERNAME>
|
||||
preprompt: <YOUR PREPROMPT>
|
||||
|
||||
# optional
|
||||
device_id: nio-llm
|
||||
homeserver: https://matrix.org
|
||||
ggml_repoid: TheBloke/stable-vicuna-13B-GGML
|
||||
ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
|
||||
sync_timeout: 30000
|
||||
```
|
||||
|
||||
Then run:
|
||||
|
||||
```bash
|
||||
nio_llm --config config_file.yaml
|
||||
```
|
||||
The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
|
||||
|
||||
## Special thanks
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ def main(
|
|||
room: str,
|
||||
username: str,
|
||||
password: str,
|
||||
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
|
||||
preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
|
||||
device_id: str = "nio-llm",
|
||||
homeserver: str = "https://matrix.org",
|
||||
sync_timeout: int = 30000,
|
||||
|
@ -23,6 +23,7 @@ def main(
|
|||
openai_api_endpoint: str = "http://localhost:8000/v1",
|
||||
openai_temperature: float = 0,
|
||||
openai_max_tokens: int = 256,
|
||||
history_size: int = 3,
|
||||
) -> None:
|
||||
"""Instantiate and start the client.
|
||||
|
||||
|
@ -57,6 +58,9 @@ def main(
|
|||
openai_max_tokens (`int`):
|
||||
The OpenAI max tokens to use.
|
||||
Defaults to `256`.
|
||||
history_size (`int`):
|
||||
The number of messages to keep in history.
|
||||
Defaults to `3`.
|
||||
"""
|
||||
# create the client
|
||||
client = LLMClient(
|
||||
|
@ -69,6 +73,7 @@ def main(
|
|||
openai_api_endpoint=openai_api_endpoint,
|
||||
openai_temperature=openai_temperature,
|
||||
openai_max_tokens=openai_max_tokens,
|
||||
history_size=history_size,
|
||||
)
|
||||
|
||||
# start the client
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
|
@ -22,6 +24,7 @@ class LLMClient(AsyncClient):
|
|||
openai_api_endpoint: str,
|
||||
openai_temperature: float,
|
||||
openai_max_tokens: int,
|
||||
history_size: int,
|
||||
) -> None:
|
||||
"""Create a new LLMClient instance.
|
||||
|
||||
|
@ -44,6 +47,8 @@ class LLMClient(AsyncClient):
|
|||
The OpenAI temperature to use.
|
||||
openai_max_tokens (`int`):
|
||||
The OpenAI max tokens to use.
|
||||
history_size (`int`):
|
||||
The number of messages to keep in history.
|
||||
"""
|
||||
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
||||
self.spawn_time = time.time() * 1000
|
||||
|
@ -65,12 +70,36 @@ class LLMClient(AsyncClient):
|
|||
)
|
||||
|
||||
# create message history queue
|
||||
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
||||
self.history: deque[RoomMessageText] = deque(maxlen=history_size)
|
||||
|
||||
# add callbacks
|
||||
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
|
||||
|
||||
async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
|
||||
async def typing_loop(
|
||||
self,
|
||||
sleep_time: int = 10,
|
||||
) -> None:
|
||||
"""Send typing indicators every `sleep_time` seconds.
|
||||
|
||||
Args:
|
||||
sleep_time (`int`, default `10`):
|
||||
The time to sleep between sending typing indicators.
|
||||
"""
|
||||
logging.debug("Started typing indicator.")
|
||||
try:
|
||||
while True:
|
||||
logging.debug("Sending typing indicator.")
|
||||
await self.room_typing(self.room, True)
|
||||
await asyncio.sleep(sleep_time)
|
||||
except asyncio.CancelledError:
|
||||
await self.room_typing(self.room, False)
|
||||
logging.debug("Stopped typing indicator.")
|
||||
|
||||
async def message_callback(
|
||||
self,
|
||||
room: MatrixRoom,
|
||||
event: RoomMessageText,
|
||||
) -> None:
|
||||
"""Process new messages as they come in.
|
||||
|
||||
Args:
|
||||
|
@ -99,6 +128,7 @@ class LLMClient(AsyncClient):
|
|||
# ignore thread messages
|
||||
if (
|
||||
"m.relates_to" in event.source["content"]
|
||||
and "rel_type" in event.source["content"]["m.relates_to"]
|
||||
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
|
||||
):
|
||||
logger.debug("Ignoring thread message.")
|
||||
|
@ -108,6 +138,14 @@ class LLMClient(AsyncClient):
|
|||
self.history.append(event)
|
||||
logger.debug(f"Updated history: {self.history}")
|
||||
|
||||
# update read receipt
|
||||
await self.room_read_markers(
|
||||
room_id=self.room,
|
||||
fully_read_event=event.event_id,
|
||||
read_event=event.event_id,
|
||||
)
|
||||
logger.debug(f"Updated read receipt to event: {event.event_id}")
|
||||
|
||||
# ignore our own messages
|
||||
if event.sender == self.user:
|
||||
logger.debug("Ignoring our own message.")
|
||||
|
@ -124,16 +162,11 @@ class LLMClient(AsyncClient):
|
|||
logger.debug("Ignoring message not mentioning us.")
|
||||
return
|
||||
|
||||
# enable typing indicator
|
||||
await self.room_typing(
|
||||
self.room,
|
||||
typing_state=True,
|
||||
timeout=30000,
|
||||
)
|
||||
logger.debug("Enabled typing indicator.")
|
||||
# start typing indicator loop
|
||||
typing_task = asyncio.create_task(self.typing_loop())
|
||||
|
||||
# generate response using llama.cpp
|
||||
response = openai.ChatCompletion.create(
|
||||
response = await openai.ChatCompletion.acreate(
|
||||
model="local-model",
|
||||
messages=[
|
||||
{
|
||||
|
@ -158,9 +191,17 @@ class LLMClient(AsyncClient):
|
|||
output = response["choices"][0]["message"]["content"] # type: ignore
|
||||
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
||||
|
||||
# disable typing indicator
|
||||
await self.room_typing(self.room, typing_state=False)
|
||||
logger.debug("Disabled typing indicator.")
|
||||
# replace newlines with <br>
|
||||
formatted_output = output.replace("\n", "<br>")
|
||||
|
||||
# detect mentions and replace them with html mentions
|
||||
formatted_output = re.sub(
|
||||
r"@[^:]+:[^ :]+",
|
||||
lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
|
||||
formatted_output,
|
||||
)
|
||||
|
||||
logger.debug(f"Formatted response: {formatted_output}")
|
||||
|
||||
# send the response
|
||||
await self.room_send(
|
||||
|
@ -169,16 +210,27 @@ class LLMClient(AsyncClient):
|
|||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": output,
|
||||
"format": "org.matrix.custom.html",
|
||||
"formatted_body": formatted_output,
|
||||
},
|
||||
)
|
||||
logger.debug(f"Sent response: {output}")
|
||||
|
||||
async def start(self, password, sync_timeout=30000) -> None:
|
||||
# stop typing indicator loop
|
||||
typing_task.cancel()
|
||||
|
||||
async def start(
|
||||
self,
|
||||
password: str,
|
||||
sync_timeout: int = 30000,
|
||||
) -> None:
|
||||
"""Start the client.
|
||||
|
||||
Args:
|
||||
password (`str`): The password to log in with.
|
||||
sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
|
||||
password (`str`):
|
||||
The password to log in with.
|
||||
sync_timeout (`int`, default `30000`):
|
||||
The sync timeout in milliseconds.
|
||||
"""
|
||||
# Login to the homeserver
|
||||
logger.debug(await self.login(password))
|
||||
|
|
Loading…
Reference in a new issue