FemScoreboard/discord/provider/huggingface.ts

import { Message } from 'discord.js';
import { LLMProvider } from './provider';
import { HfInference } from "@huggingface/inference"
import 'dotenv/config';
import { serializeMessageHistory } from '../util';
import { logError, logInfo } from '../../logging';
import { LLMConfig } from '../commands/types';


const USER_PROMPT = `Continue the following Discord conversation by completing the next message, playing the role of Hatsune Miku. Each message is represented as a line of JSON. Maintain the same JSON format as the preceding messages. Refer to other users by their "name" instead of "author" field whenever possible.

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
\`\`\`
{"properties": {"timestamp": {"description": "When the message was sent, in RFC 7231 format", "title": "Timestamp", "type": "string"}, "author": {"description": "The author's username, which may be one of the following, or something else: \"vinso1445\", \"f0oby\", \"1thinker\", \"scoliono\", \"ahjc\", \"cinnaba\", \"M6481\", \"hypadrive\", \"need_correction\", \"Hatsune Miku#1740\" (You)", "title": "Author", "type": "string"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "description": "The author's real name, which may be blank or one of the following: \"Vincent Iannelli\", \"Myles Linden\", \"Samuel Habib\", \"James Shiffer\", \"Alex\", \"Jinsung Park\", \"Lawrence Liu\", \"Nazar Khan\", \"Ethan Cheng\", \"Hatsune Miku\" (You)", "title": "Name"}, "context": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "The contents of the message being replied to, if this message is a reply", "title": "Context"}, "content": {"description": "The text content of this message", "title": "Content", "type": "string"}, "reactions": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional list of emoji reactions this message received, if any. The following comma-separated format is used: \":skull: (3), :100: (1)\"", "title": "Reactions"}}, "required": ["timestamp", "author", "name", "content"]}
\`\`\`

The conversation is as follows. The last line is the message you have to complete:

`;


export class HuggingfaceProvider implements LLMProvider
{
    private client: HfInference;
    private model: string;

    constructor(hf_token: string | undefined = process.env.HF_TOKEN, model = "meta-llama/Llama-3.2-3B-Instruct")
    {
        if (!hf_token) {
            throw new TypeError("Huggingface API token was not passed in, and environment variable HF_TOKEN was unset!");
        }
        this.client = new HfInference(hf_token);
        this.model = model;
    }

    name() {
        return 'HuggingFace API: ' + this.model;
    }

    async requestLLMResponse(history: Message[], sysprompt: string, params: LLMConfig): Promise<string>
    {
        let messageList = await Promise.all(
            history.map(serializeMessageHistory)
        );
        messageList = messageList.filter(x => !!x);

        if (messageList.length === 0) {
            throw new TypeError("No messages with content provided in history!");
        }

        // dummy message for last line of prompt
        const lastMsg = messageList[messageList.length - 1];

        // advance by 5 seconds
        let newDate = new Date(lastMsg!.timestamp);
        newDate.setSeconds(newDate.getSeconds() + 5);

        let templateMsgTxt = JSON.stringify({
            timestamp: newDate.toUTCString(),
            author: lastMsg!.author,
            name: lastMsg!.name,
            context: lastMsg!.content,
            content: ""
        });

        // cut off the end, leaving an open string for the model to complete
        templateMsgTxt = templateMsgTxt.slice(0, templateMsgTxt.length - 2);

        const messageHistoryTxt = messageList.map(msg => JSON.stringify(msg)).join('\n') + templateMsgTxt;

        try {
            const chatCompletion = await this.client.chatCompletion({
                model: this.model,
                messages: [
                    { role: "system", content: sysprompt },
                    { role: "user", content: USER_PROMPT + messageHistoryTxt }
                ],
                temperature: params?.temperature || 0.9,
                max_tokens: params?.max_new_tokens || 256,
                top_p: params?.top_p || 0.5
            });

            let raw = chatCompletion.choices[0].message.content;
            logInfo(`[hf] API response: ${raw}`);

            if (!raw) {
                throw new TypeError("HuggingFace completion API returned no message.");
            }

            let responseMessage;
            try {
                responseMessage = JSON.parse(raw);
            } catch (err) {
                // gotta make sure we properly terminate our json
                if (!raw.endsWith('"}')) {
                    if (raw.endsWith('"')) {
                        raw += '}';
                    } else {
                        raw += '"}';
                    }
                }
                responseMessage = JSON.parse(raw);
            }

            if (!responseMessage.content) {
                throw new TypeError("HuggingFace completion API returned a message with no content.");
            }

            return responseMessage.content;
        } catch (err) {
            logError(`[hf] API Error: ` + err);
            throw err;
        }
    }
}
Huggingface api mode, support for editing system prompts, config refactor 2025-01-29 09:39:59 +00:00			`import { Message } from 'discord.js';`
			`import { LLMProvider } from './provider';`
			`import { HfInference } from "@huggingface/inference"`
			`import 'dotenv/config';`
			`import { serializeMessageHistory } from '../util';`
			`import { logError, logInfo } from '../../logging';`
			`import { LLMConfig } from '../commands/types';`


			const USER_PROMPT = `Continue the following Discord conversation by completing the next message, playing the role of Hatsune Miku. Each message is represented as a line of JSON. Maintain the same JSON format as the preceding messages. Refer to other users by their "name" instead of "author" field whenever possible.

			`The output should be formatted as a JSON instance that conforms to the JSON schema below.`

			`As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}`
			`the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.`

			`Here is the output schema:`
			\`\`\`
			{"properties": {"timestamp": {"description": "When the message was sent, in RFC 7231 format", "title": "Timestamp", "type": "string"}, "author": {"description": "The author's username, which may be one of the following, or something else: \"vinso1445\", \"f0oby\", \"1thinker\", \"scoliono\", \"ahjc\", \"cinnaba\", \"M6481\", \"hypadrive\", \"need_correction\", \"Hatsune Miku#1740\" (You)", "title": "Author", "type": "string"}, "name": {"anyOf": [{"type": "string"}, {"type": "null"}], "description": "The author's real name, which may be blank or one of the following: \"Vincent Iannelli\", \"Myles Linden\", \"Samuel Habib\", \"James Shiffer\", \"Alex\", \"Jinsung Park\", \"Lawrence Liu\", \"Nazar Khan\", \"Ethan Cheng\", \"Hatsune Miku\" (You)", "title": "Name"}, "context": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "The contents of the message being replied to, if this message is a reply", "title": "Context"}, "content": {"description": "The text content of this message", "title": "Content", "type": "string"}, "reactions": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional list of emoji reactions this message received, if any. The following comma-separated format is used: \":skull: (3), :100: (1)\"", "title": "Reactions"}}, "required": ["timestamp", "author", "name", "content"]}
			\`\`\`

			`The conversation is as follows. The last line is the message you have to complete:`

			`;


			`export class HuggingfaceProvider implements LLMProvider`
			`{`
			`private client: HfInference;`
			`private model: string;`

			`constructor(hf_token: string \| undefined = process.env.HF_TOKEN, model = "meta-llama/Llama-3.2-3B-Instruct")`
			`{`
			`if (!hf_token) {`
			`throw new TypeError("Huggingface API token was not passed in, and environment variable HF_TOKEN was unset!");`
			`}`
			`this.client = new HfInference(hf_token);`
			`this.model = model;`
			`}`

			`name() {`
			`return 'HuggingFace API: ' + this.model;`
			`}`

			`async requestLLMResponse(history: Message[], sysprompt: string, params: LLMConfig): Promise<string>`
			`{`
			`let messageList = await Promise.all(`
			`history.map(serializeMessageHistory)`
			`);`
			`messageList = messageList.filter(x => !!x);`

			`if (messageList.length === 0) {`
			`throw new TypeError("No messages with content provided in history!");`
			`}`

			`// dummy message for last line of prompt`
			`const lastMsg = messageList[messageList.length - 1];`

			`// advance by 5 seconds`
			`let newDate = new Date(lastMsg!.timestamp);`
			`newDate.setSeconds(newDate.getSeconds() + 5);`

			`let templateMsgTxt = JSON.stringify({`
			`timestamp: newDate.toUTCString(),`
			`author: lastMsg!.author,`
			`name: lastMsg!.name,`
			`context: lastMsg!.content,`
			`content: ""`
			`});`

			`// cut off the end, leaving an open string for the model to complete`
			`templateMsgTxt = templateMsgTxt.slice(0, templateMsgTxt.length - 2);`

			`const messageHistoryTxt = messageList.map(msg => JSON.stringify(msg)).join('\n') + templateMsgTxt;`

			`try {`
			`const chatCompletion = await this.client.chatCompletion({`
			`model: this.model,`
			`messages: [`
			`{ role: "system", content: sysprompt },`
			`{ role: "user", content: USER_PROMPT + messageHistoryTxt }`
			`],`
			`temperature: params?.temperature \|\| 0.9,`
			`max_tokens: params?.max_new_tokens \|\| 256,`
			`top_p: params?.top_p \|\| 0.5`
			`});`

			`let raw = chatCompletion.choices[0].message.content;`
			logInfo(`[hf] API response: ${raw}`);

			`if (!raw) {`
			`throw new TypeError("HuggingFace completion API returned no message.");`
			`}`

			`let responseMessage;`
			`try {`
			`responseMessage = JSON.parse(raw);`
			`} catch (err) {`
			`// gotta make sure we properly terminate our json`
			`if (!raw.endsWith('"}')) {`
			`if (raw.endsWith('"')) {`
			`raw += '}';`
			`} else {`
			`raw += '"}';`
			`}`
			`}`
			`responseMessage = JSON.parse(raw);`
			`}`

			`if (!responseMessage.content) {`
			`throw new TypeError("HuggingFace completion API returned a message with no content.");`
			`}`

			`return responseMessage.content;`
			`} catch (err) {`
			logError(`[hf] API Error: ` + err);
			`throw err;`
			`}`
			`}`
			`}`