nio-llm/nio-llm/client.py

192 lines
5.1 KiB
Python
Raw Normal View History

2023-05-20 19:19:14 +00:00
"""A Matrix client that uses Llama to respond to messages."""
import asyncio
import logging
2023-05-22 19:19:11 +00:00
import time
2023-05-20 19:19:14 +00:00
from textwrap import dedent
2023-05-22 19:19:11 +00:00
import click
from huggingface_hub import hf_hub_download
2023-05-20 19:19:14 +00:00
from llama_cpp import Llama
from nio import AsyncClient, MatrixRoom, RoomMessageText
logger = logging.getLogger("nio-llm")
class LLMClient(AsyncClient):
"""A Matrix client that uses Llama to respond to messages."""
def __init__(
self,
2023-05-22 19:19:11 +00:00
username: str,
2023-05-20 19:19:14 +00:00
homeserver: str,
device_id: str,
2023-05-22 19:19:11 +00:00
preprompt: str,
room: str,
ggml_path: str,
2023-05-20 19:19:14 +00:00
):
"""Create a new LLMClient instance."""
super().__init__(
2023-05-22 19:19:11 +00:00
user=f"@{username}:{homeserver.removeprefix('https://')}",
2023-05-20 19:19:14 +00:00
homeserver=homeserver,
device_id=device_id,
)
2023-05-22 19:19:11 +00:00
self.spawn_time = time.time() * 1000
self.username = username
self.preprompt = preprompt
self.room = room
2023-05-20 19:19:14 +00:00
# create the Llama instance
self.llm = Llama(
model_path=ggml_path,
2023-05-20 19:19:14 +00:00
n_threads=12,
)
# add callbacks
self.add_event_callback(self.message_callback, RoomMessageText) # type: ignore
async def message_callback(self, room: MatrixRoom, event: RoomMessageText):
"""Process new messages as they come in."""
2023-05-22 19:19:11 +00:00
logger.debug(f"Received new message in room {room.room_id}.")
logger.debug(f"Message body: {event.body}")
# ignore our own messages
if event.sender == self.user:
logger.debug("Ignoring our own message.")
return
# ignore messages pre-spawn
if event.server_timestamp < self.spawn_time:
logger.debug("Ignoring message pre-spawn.")
return
2023-05-20 19:19:14 +00:00
# ignore messages sent in other rooms
2023-05-22 19:19:11 +00:00
if room.room_id != self.room:
logger.debug("Ignoring message in different room.")
2023-05-20 19:19:14 +00:00
return
2023-05-22 19:19:11 +00:00
if self.username not in event.body:
logger.debug("Ignoring message not directed at us.")
return
prompt = dedent(
f"""
{self.preprompt}
<{event.sender}>: {event.body}
<pipobot>:
""",
).strip()
logger.debug(f"Prompt: {prompt}")
# enable typing indicator
await self.room_typing(
self.room,
typing_state=True,
timeout=100000000,
)
output = self.llm(
prompt,
max_tokens=100,
stop=[f"<{event.sender}>"],
2023-05-22 19:19:11 +00:00
echo=True,
)
# retreive the response
output = output["choices"][0]["text"] # type: ignore
output = output.removeprefix(prompt).strip()
# disable typing indicator
await self.room_typing(self.room, typing_state=False)
# send the response
await self.room_send(
room_id=self.room,
message_type="m.room.message",
content={
"msgtype": "m.text",
"body": output,
},
)
@click.command()
@click.option("--homeserver", "-h", help="The homeserver to connect to.", required=True)
@click.option("--device-id", "-d", help="The device ID to use.", required=True)
@click.option("--username", "-u", help="The username to log in as.", required=True)
@click.option("--password", "-p", help="The password to log in with.", required=True)
@click.option("--room", "-r", help="The room to join.", required=True)
@click.option("--preprompt", "-t", help="The preprompt to use.", required=True)
def main(
homeserver: str,
device_id: str,
username: str,
password: str,
room: str,
preprompt: str,
2023-05-22 19:19:11 +00:00
) -> None:
"""Run the main program.
Download the model from HuggingFace Hub and start the async loop.
"""
# download the model
ggml_path = hf_hub_download(
repo_id="TheBloke/stable-vicuna-13B-GGML",
filename="stable-vicuna-13B.ggmlv3.q5_1.bin",
)
2023-05-22 19:19:11 +00:00
asyncio.get_event_loop().run_until_complete(
_main(
ggml_path=ggml_path,
2023-05-22 19:19:11 +00:00
homeserver=homeserver,
device_id=device_id,
username=username,
password=password,
preprompt=preprompt,
room=room,
),
2023-05-22 19:19:11 +00:00
)
async def _main(
homeserver: str,
device_id: str,
username: str,
password: str,
room: str,
preprompt: str,
ggml_path: str,
2023-05-22 19:19:11 +00:00
) -> None:
"""Run the async main program.
Create the client, login, join the room, and sync forever.
"""
2023-05-20 19:19:14 +00:00
# create the client
client = LLMClient(
2023-05-22 19:19:11 +00:00
homeserver=homeserver,
device_id=device_id,
username=username,
room=room,
preprompt=preprompt,
ggml_path=ggml_path,
2023-05-20 19:19:14 +00:00
)
# Login to the homeserver
2023-05-22 19:19:11 +00:00
print(await client.login(password))
2023-05-20 19:19:14 +00:00
# Join the room, if not already joined
2023-05-22 19:19:11 +00:00
print(await client.join(room))
2023-05-20 19:19:14 +00:00
# Sync with the server forever
await client.sync_forever(timeout=30000)
if __name__ == "__main__":
# set up logging
2023-05-22 19:19:11 +00:00
logging.basicConfig(level=logging.DEBUG)
# run the main program (with environment variables)
2023-05-22 19:19:11 +00:00
main(auto_envvar_prefix="NIOLLM")