From 65bcb80f8591915c44c17529dd26685751158a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laure=CE=B7t?= Date: Wed, 24 May 2023 20:48:00 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20download=20ggml=20weights=20from=20?= =?UTF-8?q?huggingface=5Fhub?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nio-llm/client.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/nio-llm/client.py b/nio-llm/client.py index 84af82f..b6a4d6f 100644 --- a/nio-llm/client.py +++ b/nio-llm/client.py @@ -6,6 +6,7 @@ import time from textwrap import dedent import click +from huggingface_hub import hf_hub_download from llama_cpp import Llama from nio import AsyncClient, MatrixRoom, RoomMessageText @@ -22,6 +23,7 @@ class LLMClient(AsyncClient): device_id: str, preprompt: str, room: str, + ggml_path: str, ): """Create a new LLMClient instance.""" super().__init__( @@ -37,7 +39,7 @@ class LLMClient(AsyncClient): # create the Llama instance self.llm = Llama( - model_path="../../../llama.cpp/models/sv13B/stable-vicuna-13B.ggml.q5_1.bin", + model_path=ggml_path, n_threads=12, ) @@ -88,7 +90,7 @@ class LLMClient(AsyncClient): output = self.llm( prompt, max_tokens=100, - stop=["<{event.sender}>"], + stop=[f"<{event.sender}>"], echo=True, ) @@ -123,17 +125,28 @@ def main( username: str, password: str, room: str, - preprompt, + preprompt: str, ) -> None: + """Run the main program. + + Download the model from HuggingFace Hub and start the async loop. + """ + # download the model + ggml_path = hf_hub_download( + repo_id="TheBloke/stable-vicuna-13B-GGML", + filename="stable-vicuna-13B.ggmlv3.q5_1.bin", + ) + asyncio.get_event_loop().run_until_complete( _main( + ggml_path=ggml_path, homeserver=homeserver, device_id=device_id, username=username, password=password, preprompt=preprompt, room=room, - ) + ), ) @@ -143,9 +156,13 @@ async def _main( username: str, password: str, room: str, - preprompt, + preprompt: str, + ggml_path: str, ) -> None: - """Run the main program.""" + """Run the async main program. + + Create the client, login, join the room, and sync forever. + """ # create the client client = LLMClient( homeserver=homeserver, @@ -153,6 +170,7 @@ async def _main( username=username, room=room, preprompt=preprompt, + ggml_path=ggml_path, ) # Login to the homeserver @@ -166,6 +184,8 @@ async def _main( if __name__ == "__main__": + # set up logging logging.basicConfig(level=logging.DEBUG) - main(auto_envvar_prefix="NIOLLM") + + # run the main program (with environment variables) main(auto_envvar_prefix="NIOLLM")