2024-10-23 01:56:21 +00:00
3 changed files with 88 additions and 77 deletions
--- a/README.md
+++ b/README.md
@ -8,11 +8,79 @@ You own little LLM in your matrix chatroom.
 ## Usage
-This project is split in two parts: the client and the server.
+This project uses [jsonargparse](https://github.com/omni-us/jsonargparse/) to help with the command line arguments.
-The server simply downloads an LLM and starts a llama-cpp-python server (which mimics an openai server).
+To see the available options, run:
-The client connects to the matrix server and queries the llama-cpp-python server to create matrix messages.
+```bash
 nio_llm --help
 ```
 To run the bot, you can either use command line arguments, environment variables or a config file. (or a mix of all three)
 ### Command line arguments
 ```bash
 nio_llm \
  # required \
  --room <YOUR ROOM> \
  --password <YOUR PASSWORD> \
  --username <YOUR USERNAME> \
  --preprompt <YOUR PREPROMPT> \
  # optional \
  --device-id nio-llm \
  --homeserver https://matrix.org \
  --ggml-repoid TheBloke/stable-vicuna-13B-GGML \
  --ggml-filename stable-vicuna-13B.ggmlv3.q5_1.bin \
  --sync-timeout 30000
 ```
 ### Environment variables
 ```bash
 # required
 export NIO_LLM_ROOM=<YOUR ROOM>
 export NIO_LLM_PASSWORD=<YOUR PASSWORD>
 export NIO_LLM_USERNAME=<YOUR USERNAME>
 export NIO_LLM_PREPROMPT=<YOUR PREPROMPT>
 # optional
 export NIO_LLM_DEVICE_ID=nio-llm
 export NIO_LLM_HOMESERVER=https://matrix.org
 export NIO_LLM_GGML_REPOID=TheBloke/stable-vicuna-13B-GGML
 export NIO_LLM_GGML_FILENAME=stable-vicuna-13B.ggmlv3.q5_1.bin
 export NIO_LLM_SYNC_TIMEOUT=30000
 nio_llm
 ```
 ### Config file
 Create a config file with the following content:
 ```yaml
 # config_file.yaml
 # required
 room: <YOUR ROOM>
 password: <YOUR PASSWORD>
 username: <YOUR USERNAME>
 preprompt: <YOUR PREPROMPT>
 # optional
 device_id: nio-llm
 homeserver: https://matrix.org
 ggml_repoid: TheBloke/stable-vicuna-13B-GGML
 ggml_filename: stable-vicuna-13B.ggmlv3.q5_1.bin
 sync_timeout: 30000
 ```
 Then run:
 ```bash
 nio_llm --config config_file.yaml
 ```
 ## Special thanks
--- a/src/nio_llm/main.py
+++ b/src/nio_llm/main.py
@ -15,7 +15,7 @@ def main(
    room: str,
    username: str,
    password: str,
-    preprompt: str = "You are a helpful assistant in a multi-agent conversation. Be as concise as possible.",
+    preprompt: str = "You are a helpful assistant in a multi-agent [matrix] conversation.",
    device_id: str = "nio-llm",
    homeserver: str = "https://matrix.org",
    sync_timeout: int = 30000,
@ -23,7 +23,6 @@ def main(
    openai_api_endpoint: str = "http://localhost:8000/v1",
    openai_temperature: float = 0,
    openai_max_tokens: int = 256,
    history_size: int = 3,
 ) -> None:
    """Instantiate and start the client.
@ -58,9 +57,6 @@ def main(
        openai_max_tokens (`int`):
            The OpenAI max tokens to use.
            Defaults to `256`.
        history_size (`int`):
            The number of messages to keep in history.
            Defaults to `3`.
    """
    # create the client
    client = LLMClient(
@ -73,7 +69,6 @@ def main(
        openai_api_endpoint=openai_api_endpoint,
        openai_temperature=openai_temperature,
        openai_max_tokens=openai_max_tokens,
        history_size=history_size,
    )
    # start the client
--- a/src/nio_llm/client.py
+++ b/src/nio_llm/client.py
@ -1,6 +1,4 @@
 import asyncio
 import logging
 import re
 import time
 from collections import deque
@ -24,7 +22,6 @@ class LLMClient(AsyncClient):
        openai_api_endpoint: str,
        openai_temperature: float,
        openai_max_tokens: int,
        history_size: int,
    ) -> None:
        """Create a new LLMClient instance.
@ -47,8 +44,6 @@ class LLMClient(AsyncClient):
                The OpenAI temperature to use.
            openai_max_tokens (`int`):
                The OpenAI max tokens to use.
            history_size (`int`):
                The number of messages to keep in history.
        """
        self.uid = f"@{username}:{homeserver.removeprefix('https://')}"
        self.spawn_time = time.time() * 1000
@ -70,36 +65,12 @@ class LLMClient(AsyncClient):
        )
        # create message history queue
-        self.history: deque[RoomMessageText] = deque(maxlen=history_size)
+        self.history: deque[RoomMessageText] = deque(maxlen=10)
        # add callbacks
        self.add_event_callback(self.message_callback, RoomMessageText)  # type: ignore
-    async def typing_loop(
+    async def message_callback(self, room: MatrixRoom, event: RoomMessageText) -> None:
        self,
        sleep_time: int = 10,
    ) -> None:
        """Send typing indicators every `sleep_time` seconds.
        Args:
            sleep_time (`int`, default `10`):
                The time to sleep between sending typing indicators.
        """
        logging.debug("Started typing indicator.")
        try:
            while True:
                logging.debug("Sending typing indicator.")
                await self.room_typing(self.room, True)
                await asyncio.sleep(sleep_time)
        except asyncio.CancelledError:
            await self.room_typing(self.room, False)
            logging.debug("Stopped typing indicator.")
    async def message_callback(
        self,
        room: MatrixRoom,
        event: RoomMessageText,
    ) -> None:
        """Process new messages as they come in.
        Args:
@ -128,7 +99,6 @@ class LLMClient(AsyncClient):
        # ignore thread messages
        if (
            "m.relates_to" in event.source["content"]
            and "rel_type" in event.source["content"]["m.relates_to"]
            and event.source["content"]["m.relates_to"]["rel_type"] == "m.thread"
        ):
            logger.debug("Ignoring thread message.")
@ -138,14 +108,6 @@ class LLMClient(AsyncClient):
        self.history.append(event)
        logger.debug(f"Updated history: {self.history}")
        # update read receipt
        await self.room_read_markers(
            room_id=self.room,
            fully_read_event=event.event_id,
            read_event=event.event_id,
        )
        logger.debug(f"Updated read receipt to event: {event.event_id}")
        # ignore our own messages
        if event.sender == self.user:
            logger.debug("Ignoring our own message.")
@ -162,11 +124,16 @@ class LLMClient(AsyncClient):
            logger.debug("Ignoring message not mentioning us.")
            return
-        # start typing indicator loop
+        # enable typing indicator
-        typing_task = asyncio.create_task(self.typing_loop())
+        await self.room_typing(
            self.room,
            typing_state=True,
            timeout=30000,
        )
        logger.debug("Enabled typing indicator.")
        # generate response using llama.cpp
-        response = await openai.ChatCompletion.acreate(
+        response = openai.ChatCompletion.create(
            model="local-model",
            messages=[
                {
@ -191,17 +158,9 @@ class LLMClient(AsyncClient):
        output = response["choices"][0]["message"]["content"]  # type: ignore
        output = output.strip().removeprefix(f"{self.uid}:").strip()
-        # replace newlines with <br>
+        # disable typing indicator
-        formatted_output = output.replace("\n", "<br>")
+        await self.room_typing(self.room, typing_state=False)
-
+        logger.debug("Disabled typing indicator.")
        # detect mentions and replace them with html mentions
        formatted_output = re.sub(
            r"@[^:]+:[^ :]+",
            lambda match: f'<a href="https://matrix.to/#/{match.group(0)}"></a>',
            formatted_output,
        )
        logger.debug(f"Formatted response: {formatted_output}")
        # send the response
        await self.room_send(
@ -210,27 +169,16 @@ class LLMClient(AsyncClient):
            content={
                "msgtype": "m.text",
                "body": output,
                "format": "org.matrix.custom.html",
                "formatted_body": formatted_output,
            },
        )
        logger.debug(f"Sent response: {output}")
-        # stop typing indicator loop
+    async def start(self, password, sync_timeout=30000) -> None:
        typing_task.cancel()
    async def start(
        self,
        password: str,
        sync_timeout: int = 30000,
    ) -> None:
        """Start the client.
        Args:
-            password (`str`):
+            password (`str`): The password to log in with.
-                The password to log in with.
+            sync_timeout (`int`, default `30000`): The sync timeout in milliseconds.
            sync_timeout (`int`, default `30000`):
                The sync timeout in milliseconds.
        """
        # Login to the homeserver
        logger.debug(await self.login(password))