✨ modify matrix message content to format mentions and newlines

✨ update read receipt when message history is updated
🎨 parametrize typing_loop with typing_loop
2024-10-22 17:46:22 +00:00 · 2023-10-19 17:27:27 +00:00 · 2023-10-19 16:34:27 +00:00 · 2023-10-19 16:13:05 +00:00 · 2023-10-19 16:10:47 +00:00 · 2023-10-19 14:23:25 +00:00
3 changed files with 77 additions and 88 deletions
--- a/README.md
+++ b/README.md
@ -8,79 +8,11 @@ You own little LLM in your matrix chatroom.

 ## Usage

-This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
+This project is split in two parts: the client and the server.

-To see the available options, run:
+The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).

-```bash
-nio_llm --help
-```
-
-To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
-
-### Command line arguments
-
-```bash
-nio_llm \
-  # required \
-  --room <YOUR ROOM> \
-  --password <YOUR PASSWORD> \
-  --username <YOUR USERNAME> \
-  --preprompt <YOUR PREPROMPT> \
-  # optional \
-  --device-id nio-llm \
-  --homeserver https://matrix.org \
-  --ggml-repoid TheBloke/stable-vicuna-13B-GGML \
-  --ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
-  --sync-timeout 30000
-```
-
-### Environment variables
-
-```bash
-# required
-export NIO_LLM_ROOM=<YOUR ROOM>
-export NIO_LLM_PASSWORD=<YOUR PASSWORD>
-export NIO_LLM_USERNAME=<YOUR USERNAME>
-export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
-
-# optional
-export NIO_LLM_DEVICE_ID=nio-llm
-export NIO_LLM_HOMESERVER=https://matrix.org
-export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
-export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
-export NIO_LLM_SYNC_TIMEOUT=30000
-
-nio_llm
-```
-
-
-### Config file
-
-Create a config file with the following content:
-
-```yaml
-# config_file.yaml
-
-# required
-room: <YOUR ROOM>
-password: <YOUR PASSWORD>
-username: <YOUR USERNAME>
-preprompt: <YOUR PREPROMPT>
-
-# optional
-device_id: nio-llm
-homeserver: https://matrix.org
-ggml_repoid: TheBloke/stable-vicuna-13B-GGML
-ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
-sync_timeout: 30000
-```
-
-Then run:
-
-```bash
-nio_llm --config config_file.yaml
-```
+The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.

 ## Special thanks

--- a/src/nio_llm/main.py
+++ b/src/nio_llm/main.py
@ -15,7 +15,7 @@ def main(
    room: str,
    username: str,
    password: str,
-    preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
+    preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
    device_id: str = "nio-llm",
    homeserver: str = "https://matrix.org",
    sync_timeout: int = 30000,
@ -23,6 +23,7 @@ def main(
    openai_api_endpoint: str = "http://localhost:8000/v1",
    openai_temperature: float = 0,
    openai_max_tokens: int = 256,
+    history_size: int = 3,
 ) -> None:
    """Instantiate and start the client.

@ -57,6 +58,9 @@ def main(
        openai_max_tokens (`int`):
            The OpenAI max tokens to use.
            Defaults to `256`.
+        history_size (`int`):
+            The number of messages to keep in history.
+            Defaults to `3`.
    """
    # create the client
    client = LLMClient(
@ -69,6 +73,7 @@ def main(
        openai_api_endpoint=openai_api_endpoint,
        openai_temperature=openai_temperature,
        openai_max_tokens=openai_max_tokens,
+        history_size=history_size,
    )

    # start the client
--- a/src/nio_llm/client.py
+++ b/src/nio_llm/client.py
@ -1,4 +1,6 @@
+import asyncio
 import logging
+import re
 import time
 from collections import deque

@ -22,6 +24,7 @@ class LLMClient(AsyncClient):
        openai_api_endpoint: str,
        openai_temperature: float,
        openai_max_tokens: int,
+        history_size: int,
    ) -> None:
        """Create a new LLMClient instance.

@ -44,6 +47,8 @@ class LLMClient(AsyncClient):
                The OpenAI temperature to use.
            openai_max_tokens (`int`):
                The OpenAI max tokens to use.
+            history_size (`int`):
+                The number of messages to keep in history.
        """
        self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
        self.spawn_time = time.time() * 1000
@ -65,12 +70,36 @@ class LLMClient(AsyncClient):
        )

        # create message history queue
-        self.history: deque[RoomMessageText] = deque(maxlen=10)
+        self.history: deque[RoomMessageText] = deque(maxlen=history_size)

        # add callbacks
        self.add_event_callback(self.message_callback, RoomMessageText)  # type: ignore

-    async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
+    async def typing_loop(
+        self,
+        sleep_time: int = 10,
+    ) -> None:
+        """Send typing indicators every `sleep_time` seconds.
+
+        Args:
+            sleep_time (`int`, default `10`):
+                The time to sleep between sending typing indicators.
+        """
+        logging.debug("Started typing indicator.")
+        try:
+            while True:
+                logging.debug("Sending typing indicator.")
+                await self.room_typing(self.room, True)
+                await asyncio.sleep(sleep_time)
+        except asyncio.CancelledError:
+            await self.room_typing(self.room, False)
+            logging.debug("Stopped typing indicator.")
+
+    async def message_callback(
+        self,
+        room: MatrixRoom,
+        event: RoomMessageText,
+    ) -> None:
        """Process new messages as they come in.

        Args:
@ -99,6 +128,7 @@ class LLMClient(AsyncClient):
        # ignore thread messages
        if (
            "m.relates_to" in event.source["content"]
+            and "rel_type" in event.source["content"]["m.relates_to"]
            and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
        ):
            logger.debug("Ignoring thread message.")
@ -108,6 +138,14 @@ class LLMClient(AsyncClient):
        self.history.append(event)
        logger.debug(f"Updated history: {self.history}")

+        # update read receipt
+        await self.room_read_markers(
+            room_id=self.room,
+            fully_read_event=event.event_id,
+            read_event=event.event_id,
+        )
+        logger.debug(f"Updated read receipt to event: {event.event_id}")
+
        # ignore our own messages
        if event.sender == self.user:
            logger.debug("Ignoring our own message.")
@ -124,16 +162,11 @@ class LLMClient(AsyncClient):
            logger.debug("Ignoring message not mentioning us.")
            return

-        # enable typing indicator
-        await self.room_typing(
-            self.room,
-            typing_state=True,
-            timeout=30000,
-        )
-        logger.debug("Enabled typing indicator.")
+        # start typing indicator loop
+        typing_task = asyncio.create_task(self.typing_loop())

        # generate response using llama.cpp
-        response = openai.ChatCompletion.create(
+        response = await openai.ChatCompletion.acreate(
            model="local-model",
            messages=[
                {
@ -158,9 +191,17 @@ class LLMClient(AsyncClient):
        output = response["choices"][0]["message"]["content"]  # type: ignore
        output = output.strip().removeprefix(f"{self.uid}:").strip()

-        # disable typing indicator
-        await self.room_typing(self.room, typing_state=False)
-        logger.debug("Disabled typing indicator.")
+        # replace newlines with <br>
+        formatted_output = output.replace("\n", "<br>")
+
+        # detect mentions and replace them with html mentions
+        formatted_output = re.sub(
+            r"@[^:]+:[^ :]+",
+            lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
+            formatted_output,
+        )
+
+        logger.debug(f"Formatted response: {formatted_output}")

        # send the response
        await self.room_send(
@ -169,16 +210,27 @@ class LLMClient(AsyncClient):
            content={
                "msgtype": "m.text",
                "body": output,
+                "format": "org.matrix.custom.html",
+                "formatted_body": formatted_output,
            },
        )
        logger.debug(f"Sent response: {output}")

-    async def start(self, password, sync_timeout=30000) -> None:
+        # stop typing indicator loop
+        typing_task.cancel()
+
+    async def start(
+        self,
+        password: str,
+        sync_timeout: int = 30000,
+    ) -> None:
        """Start the client.

        Args:
-            password (`str`): The password to log in with.
-            sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
+            password (`str`):
+                The password to log in with.
+            sync_timeout (`int`, default `30000`):
+                The sync timeout in milliseconds.
        """
        # Login to the homeserver
        logger.debug(await self.login(password))
Author	SHA1	Message	Date
Laureηt	d7a14fd4ee	✨ modify matrix message content to format mentions and newlines	2023-10-19 17:27:27 +00:00
Laureηt	10c7513add	✨ update read receipt when message history is updated	2023-10-19 16:34:27 +00:00
Laureηt	5b5a18d73b	🎨 parametrize typing_loop with `typing_loop`	2023-10-19 16:13:05 +00:00
Laureηt	0f312a0a70	⚡️ use async openai methods + create async task for the typing loop	2023-10-19 16:10:47 +00:00
Laureηt	8eda4825d9	🔥 simplify the README	2023-10-19 14:23:25 +00:00
Laureηt	12080ad3a5	✨ make history_size configurable	2023-10-19 14:15:09 +00:00
Laureηt	ca22fe640f	🔧 change the default preprompt, add `concise` keyword	2023-10-19 13:46:56 +00:00
Laureηt	2d91052d6e	🐛 preemptively check if `rel_type` is in `event.source["content"]["m.relates_to"]`	2023-10-19 13:46:28 +00:00