From 2795fba1941503620aab2e624a891ab09f5e3d3b Mon Sep 17 00:00:00 2001 From: James Shiffer Date: Sun, 26 Dec 2021 11:55:19 -0800 Subject: [PATCH] Updated readme, minor configuration stuff --- README.md | 61 ++++++++++++++++++++++++++++++++++++++++- requirements.txt | Bin 1278 -> 1176 bytes src/scraper/scraper.py | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d1c17e..39dea30 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,62 @@ # miku -Discord bot/companion for the group chatte, powered by the GPT-J language model and modified with a soft prompt to understand all of our esoteric, elaborate inside jokes. +Discord bot/companion for the group chatte, powered by the GPT-~~J~~ Neo language model and modified with a soft prompt to understand all of our esoteric, elaborate inside jokes. + +## Setup + +Python 3.8+ and PyTorch required. CUDA strongly recommended. + +The `c1-1.3B` model used in development should work without needing beefy hardware. It has been tested on a 1050 Ti 4 GB. + +Set up a virtual environment: + +Linux/MacOS + +```shell +python3 -m venv venv +source venv/bin/activate +``` + +Windows + +```shell +py -3 -m venv venv +.\venv\Scripts\activate +``` + +Install required packages + +```shell +pip install -r requirements.txt +``` + +Copy `.env.example` to `.env` and fill in the bot's `TOKEN`. + +For chat scraping, you will also need to get your own `USER_TOKEN`. + +* In Discord, hit Ctrl+Shift+I to open up developer tools +* Go to the Network tab and filter by XHR requests +* Open a new channel, or scroll up, or do something else that will trigger an authenticated request +* Click on one that looks suitable (e.g. `messages?limit=50`) +* Under the "Request" tab, copy the contents of the `Authorization` request header. + +## Usage + +Scrape the messages from the chat channel you wish to use for a soft prompt. You will be prompted for the channel ID, which you can get by having developer mode on in Discord and right-clicking, or copying the last part of the URL in the browser. + +```shell +cd src +python -m scraper +``` + +Train the soft prompt (TODO) + +Run the Hatsune Miku bot. The first time you do this, it will download the model, which is ~5 GB. + +```shell +python -m miku +``` + +## Final Remarks + +sukima nuts diff --git a/requirements.txt b/requirements.txt index 92700e5434ee7f66658048ec41b55f85a6cc6882..7679e18076cda8d0fc3a4610dcb7ed3e4d00e184 100644 GIT binary patch delta 200 zcmeyzIfHY8)$lbg8DM3RdknIVTE6G&z=*aD#igC2tc5F1bCXH=YQ z#Hcbkf>B{|6{DUo7f@9SLmopSSe-Fg&3;DN$@dtyO`gW2GcP} zlYN*aCZ{p0OzvS;n!JHoO`MCN7-&KTShpciw;_WW$gasBnbm==C}AiBnw|@mH35p7 bF&Keuh-cBB+{0ot`2dT;KEz1}=s~h7uqwVkl;?1wtbRLk2w{smEZzz&kNt9jI=qJdoII zDhpDa%#g#72_&<@iY>s(^caj8cqcP5Dl+m;R%BEKlCEHODxy8j$hE zKph~}%Nb>X>JBq*2a=Uc%0O~96G&t`6Udb7V3D6py3!#10SuK4kqnLuz6?HKGt3~4 zn{3MrvMZ4pq`R3}9jvw(Xj=tX704Tg3^26^nbqAvW|c7H1HGEekb$Je0IX6QC|U~C sVF+>`SY;v5-ML_OCO{Qt;6MTUltmk8qc)2%kc?#kxwn=@k_Ds(0H9nso&W#< diff --git a/src/scraper/scraper.py b/src/scraper/scraper.py index 26251f6..b396a9e 100644 --- a/src/scraper/scraper.py +++ b/src/scraper/scraper.py @@ -113,7 +113,7 @@ def boot(token: str): if not token: token = input('Enter your Discord user token (Authorization request header): ') channel = input('Enter channel ID: ') - default_export = Path.cwd().parent / 'chats' + default_export = Path.cwd().parent.parent / 'chats' export = input('Enter path to export transcripts (default "chats"): ') scraper = Scraper(token, channel, Path(export) if export else default_export) scraper.scrape()