mirror of
https://github.com/Laurent2916/nio-llm.git
synced 2024-10-23 01:56:21 +00:00
Compare commits
No commits in common. "d7a14fd4eefb8c1098a222dcba5aab8756ca44fb" and "904dde744f15226eaf6e43192c57ca941b5a781b" have entirely different histories.
d7a14fd4ee
...
904dde744f
74
README.md
74
README.md
|
@ -8,11 +8,79 @@ You own little LLM in your matrix chatroom.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
This project is split in two parts: the client and the server.
|
This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
|
||||||
|
|
||||||
The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
|
To see the available options, run:
|
||||||
|
|
||||||
The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
|
```bash
|
||||||
|
nio_llm --help
|
||||||
|
```
|
||||||
|
|
||||||
|
To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
|
||||||
|
|
||||||
|
### Command line arguments
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nio_llm \
|
||||||
|
# required \
|
||||||
|
--room <YOUR ROOM> \
|
||||||
|
--password <YOUR PASSWORD> \
|
||||||
|
--username <YOUR USERNAME> \
|
||||||
|
--preprompt <YOUR PREPROMPT> \
|
||||||
|
# optional \
|
||||||
|
--device-id nio-llm \
|
||||||
|
--homeserver https://matrix.org \
|
||||||
|
--ggml-repoid TheBloke/stable-vicuna-13B-GGML \
|
||||||
|
--ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
|
||||||
|
--sync-timeout 30000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# required
|
||||||
|
export NIO_LLM_ROOM=<YOUR ROOM>
|
||||||
|
export NIO_LLM_PASSWORD=<YOUR PASSWORD>
|
||||||
|
export NIO_LLM_USERNAME=<YOUR USERNAME>
|
||||||
|
export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
|
||||||
|
|
||||||
|
# optional
|
||||||
|
export NIO_LLM_DEVICE_ID=nio-llm
|
||||||
|
export NIO_LLM_HOMESERVER=https://matrix.org
|
||||||
|
export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
|
||||||
|
export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
|
||||||
|
export NIO_LLM_SYNC_TIMEOUT=30000
|
||||||
|
|
||||||
|
nio_llm
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Config file
|
||||||
|
|
||||||
|
Create a config file with the following content:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# config_file.yaml
|
||||||
|
|
||||||
|
# required
|
||||||
|
room: <YOUR ROOM>
|
||||||
|
password: <YOUR PASSWORD>
|
||||||
|
username: <YOUR USERNAME>
|
||||||
|
preprompt: <YOUR PREPROMPT>
|
||||||
|
|
||||||
|
# optional
|
||||||
|
device_id: nio-llm
|
||||||
|
homeserver: https://matrix.org
|
||||||
|
ggml_repoid: TheBloke/stable-vicuna-13B-GGML
|
||||||
|
ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
|
||||||
|
sync_timeout: 30000
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nio_llm --config config_file.yaml
|
||||||
|
```
|
||||||
|
|
||||||
## Special thanks
|
## Special thanks
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ def main(
|
||||||
room: str,
|
room: str,
|
||||||
username: str,
|
username: str,
|
||||||
password: str,
|
password: str,
|
||||||
preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
|
preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
|
||||||
device_id: str = "nio-llm",
|
device_id: str = "nio-llm",
|
||||||
homeserver: str = "https://matrix.org",
|
homeserver: str = "https://matrix.org",
|
||||||
sync_timeout: int = 30000,
|
sync_timeout: int = 30000,
|
||||||
|
@ -23,7 +23,6 @@ def main(
|
||||||
openai_api_endpoint: str = "http://localhost:8000/v1",
|
openai_api_endpoint: str = "http://localhost:8000/v1",
|
||||||
openai_temperature: float = 0,
|
openai_temperature: float = 0,
|
||||||
openai_max_tokens: int = 256,
|
openai_max_tokens: int = 256,
|
||||||
history_size: int = 3,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Instantiate and start the client.
|
"""Instantiate and start the client.
|
||||||
|
|
||||||
|
@ -58,9 +57,6 @@ def main(
|
||||||
openai_max_tokens (`int`):
|
openai_max_tokens (`int`):
|
||||||
The OpenAI max tokens to use.
|
The OpenAI max tokens to use.
|
||||||
Defaults to `256`.
|
Defaults to `256`.
|
||||||
history_size (`int`):
|
|
||||||
The number of messages to keep in history.
|
|
||||||
Defaults to `3`.
|
|
||||||
"""
|
"""
|
||||||
# create the client
|
# create the client
|
||||||
client = LLMClient(
|
client = LLMClient(
|
||||||
|
@ -73,7 +69,6 @@ def main(
|
||||||
openai_api_endpoint=openai_api_endpoint,
|
openai_api_endpoint=openai_api_endpoint,
|
||||||
openai_temperature=openai_temperature,
|
openai_temperature=openai_temperature,
|
||||||
openai_max_tokens=openai_max_tokens,
|
openai_max_tokens=openai_max_tokens,
|
||||||
history_size=history_size,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# start the client
|
# start the client
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
|
@ -24,7 +22,6 @@ class LLMClient(AsyncClient):
|
||||||
openai_api_endpoint: str,
|
openai_api_endpoint: str,
|
||||||
openai_temperature: float,
|
openai_temperature: float,
|
||||||
openai_max_tokens: int,
|
openai_max_tokens: int,
|
||||||
history_size: int,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Create a new LLMClient instance.
|
"""Create a new LLMClient instance.
|
||||||
|
|
||||||
|
@ -47,8 +44,6 @@ class LLMClient(AsyncClient):
|
||||||
The OpenAI temperature to use.
|
The OpenAI temperature to use.
|
||||||
openai_max_tokens (`int`):
|
openai_max_tokens (`int`):
|
||||||
The OpenAI max tokens to use.
|
The OpenAI max tokens to use.
|
||||||
history_size (`int`):
|
|
||||||
The number of messages to keep in history.
|
|
||||||
"""
|
"""
|
||||||
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
|
||||||
self.spawn_time = time.time() * 1000
|
self.spawn_time = time.time() * 1000
|
||||||
|
@ -70,36 +65,12 @@ class LLMClient(AsyncClient):
|
||||||
)
|
)
|
||||||
|
|
||||||
# create message history queue
|
# create message history queue
|
||||||
self.history: deque[RoomMessageText] = deque(maxlen=history_size)
|
self.history: deque[RoomMessageText] = deque(maxlen=10)
|
||||||
|
|
||||||
# add callbacks
|
# add callbacks
|
||||||
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
|
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
|
||||||
|
|
||||||
async def typing_loop(
|
async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
|
||||||
self,
|
|
||||||
sleep_time: int = 10,
|
|
||||||
) -> None:
|
|
||||||
"""Send typing indicators every `sleep_time` seconds.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sleep_time (`int`, default `10`):
|
|
||||||
The time to sleep between sending typing indicators.
|
|
||||||
"""
|
|
||||||
logging.debug("Started typing indicator.")
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
logging.debug("Sending typing indicator.")
|
|
||||||
await self.room_typing(self.room, True)
|
|
||||||
await asyncio.sleep(sleep_time)
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
await self.room_typing(self.room, False)
|
|
||||||
logging.debug("Stopped typing indicator.")
|
|
||||||
|
|
||||||
async def message_callback(
|
|
||||||
self,
|
|
||||||
room: MatrixRoom,
|
|
||||||
event: RoomMessageText,
|
|
||||||
) -> None:
|
|
||||||
"""Process new messages as they come in.
|
"""Process new messages as they come in.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -128,7 +99,6 @@ class LLMClient(AsyncClient):
|
||||||
# ignore thread messages
|
# ignore thread messages
|
||||||
if (
|
if (
|
||||||
"m.relates_to" in event.source["content"]
|
"m.relates_to" in event.source["content"]
|
||||||
and "rel_type" in event.source["content"]["m.relates_to"]
|
|
||||||
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
|
and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
|
||||||
):
|
):
|
||||||
logger.debug("Ignoring thread message.")
|
logger.debug("Ignoring thread message.")
|
||||||
|
@ -138,14 +108,6 @@ class LLMClient(AsyncClient):
|
||||||
self.history.append(event)
|
self.history.append(event)
|
||||||
logger.debug(f"Updated history: {self.history}")
|
logger.debug(f"Updated history: {self.history}")
|
||||||
|
|
||||||
# update read receipt
|
|
||||||
await self.room_read_markers(
|
|
||||||
room_id=self.room,
|
|
||||||
fully_read_event=event.event_id,
|
|
||||||
read_event=event.event_id,
|
|
||||||
)
|
|
||||||
logger.debug(f"Updated read receipt to event: {event.event_id}")
|
|
||||||
|
|
||||||
# ignore our own messages
|
# ignore our own messages
|
||||||
if event.sender == self.user:
|
if event.sender == self.user:
|
||||||
logger.debug("Ignoring our own message.")
|
logger.debug("Ignoring our own message.")
|
||||||
|
@ -162,11 +124,16 @@ class LLMClient(AsyncClient):
|
||||||
logger.debug("Ignoring message not mentioning us.")
|
logger.debug("Ignoring message not mentioning us.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# start typing indicator loop
|
# enable typing indicator
|
||||||
typing_task = asyncio.create_task(self.typing_loop())
|
await self.room_typing(
|
||||||
|
self.room,
|
||||||
|
typing_state=True,
|
||||||
|
timeout=30000,
|
||||||
|
)
|
||||||
|
logger.debug("Enabled typing indicator.")
|
||||||
|
|
||||||
# generate response using llama.cpp
|
# generate response using llama.cpp
|
||||||
response = await openai.ChatCompletion.acreate(
|
response = openai.ChatCompletion.create(
|
||||||
model="local-model",
|
model="local-model",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
@ -191,17 +158,9 @@ class LLMClient(AsyncClient):
|
||||||
output = response["choices"][0]["message"]["content"] # type: ignore
|
output = response["choices"][0]["message"]["content"] # type: ignore
|
||||||
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
output = output.strip().removeprefix(f"{self.uid}:").strip()
|
||||||
|
|
||||||
# replace newlines with <br>
|
# disable typing indicator
|
||||||
formatted_output = output.replace("\n", "<br>")
|
await self.room_typing(self.room, typing_state=False)
|
||||||
|
logger.debug("Disabled typing indicator.")
|
||||||
# detect mentions and replace them with html mentions
|
|
||||||
formatted_output = re.sub(
|
|
||||||
r"@[^:]+:[^ :]+",
|
|
||||||
lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
|
|
||||||
formatted_output,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug(f"Formatted response: {formatted_output}")
|
|
||||||
|
|
||||||
# send the response
|
# send the response
|
||||||
await self.room_send(
|
await self.room_send(
|
||||||
|
@ -210,27 +169,16 @@ class LLMClient(AsyncClient):
|
||||||
content={
|
content={
|
||||||
"msgtype": "m.text",
|
"msgtype": "m.text",
|
||||||
"body": output,
|
"body": output,
|
||||||
"format": "org.matrix.custom.html",
|
|
||||||
"formatted_body": formatted_output,
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
logger.debug(f"Sent response: {output}")
|
logger.debug(f"Sent response: {output}")
|
||||||
|
|
||||||
# stop typing indicator loop
|
async def start(self, password, sync_timeout=30000) -> None:
|
||||||
typing_task.cancel()
|
|
||||||
|
|
||||||
async def start(
|
|
||||||
self,
|
|
||||||
password: str,
|
|
||||||
sync_timeout: int = 30000,
|
|
||||||
) -> None:
|
|
||||||
"""Start the client.
|
"""Start the client.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
password (`str`):
|
password (`str`): The password to log in with.
|
||||||
The password to log in with.
|
sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
|
||||||
sync_timeout (`int`, default `30000`):
|
|
||||||
The sync timeout in milliseconds.
|
|
||||||
"""
|
"""
|
||||||
# Login to the homeserver
|
# Login to the homeserver
|
||||||
logger.debug(await self.login(password))
|
logger.debug(await self.login(password))
|
||||||
|
|
Loading…
Reference in a new issue