| const paramDefaults = { |
| stream: true, |
| temperature: 0.2, |
| }; |
|
|
| let generation_settings = null; |
|
|
| export class CompletionError extends Error { |
| constructor(message, name, data) { |
| super(message); |
| this.name = name; |
| } |
| }; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function* llama(prompt, params = {}, config = {}) { |
| let controller = config.controller; |
| const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
|
|
| if (!controller) { |
| controller = new AbortController(); |
| } |
|
|
| const completionParams = { ...paramDefaults, ...params, prompt }; |
|
|
| const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { |
| method: 'POST', |
| body: JSON.stringify(completionParams), |
| headers: { |
| 'Connection': 'keep-alive', |
| 'Content-Type': 'application/json', |
| 'Accept': 'text/event-stream', |
| ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) |
| }, |
| signal: controller.signal, |
| }); |
|
|
| const status = response.status; |
| if (status !== 200) { |
| try { |
| const body = await response.json(); |
| if (body && body.error && body.error.message) { |
| throw new CompletionError(body.error.message, 'ServerError'); |
| } |
| } catch (err) { |
| throw new CompletionError(err.message, 'ServerError'); |
| } |
| } |
|
|
| const reader = response.body.getReader(); |
| const decoder = new TextDecoder(); |
|
|
| let content = ""; |
| let leftover = ""; |
|
|
| try { |
| let cont = true; |
|
|
| while (cont) { |
| const result = await reader.read(); |
| if (result.done) { |
| break; |
| } |
|
|
| |
| const text = leftover + decoder.decode(result.value); |
|
|
| |
| const endsWithLineBreak = text.endsWith('\n'); |
|
|
| |
| let lines = text.split('\n'); |
|
|
| |
| |
| if (!endsWithLineBreak) { |
| leftover = lines.pop(); |
| } else { |
| leftover = ""; |
| } |
|
|
| |
| const regex = /^(\S+):\s(.*)$/gm; |
| for (const line of lines) { |
| const match = regex.exec(line); |
| if (match) { |
| result[match[1]] = match[2]; |
| if (result.data === '[DONE]') { |
| cont = false; |
| break; |
| } |
|
|
| |
| if (result.data) { |
| result.data = JSON.parse(result.data); |
| content += result.data.content; |
|
|
| |
| yield result; |
|
|
| |
| if (result.data.stop) { |
| if (result.data.generation_settings) { |
| generation_settings = result.data.generation_settings; |
| } |
| cont = false; |
| break; |
| } |
| } |
| if (result.error) { |
| try { |
| result.error = JSON.parse(result.error); |
| if (result.error.message.includes('slot unavailable')) { |
| |
| throw new Error('slot unavailable'); |
| } else { |
| console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); |
| } |
| } catch(e) { |
| console.error(`llama.cpp error ${result.error}`) |
| } |
| } |
| } |
| } |
| } |
| } catch (e) { |
| if (e.name !== 'AbortError') { |
| console.error("llama error: ", e); |
| } |
| throw e; |
| } |
| finally { |
| controller.abort(); |
| } |
|
|
| return content; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export const llamaEventTarget = (prompt, params = {}, config = {}) => { |
| const eventTarget = new EventTarget(); |
| (async () => { |
| let content = ""; |
| for await (const chunk of llama(prompt, params, config)) { |
| if (chunk.data) { |
| content += chunk.data.content; |
| eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); |
| } |
| if (chunk.data.generation_settings) { |
| eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); |
| } |
| if (chunk.data.timings) { |
| eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); |
| } |
| } |
| eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); |
| })(); |
| return eventTarget; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export const llamaPromise = (prompt, params = {}, config = {}) => { |
| return new Promise(async (resolve, reject) => { |
| let content = ""; |
| try { |
| for await (const chunk of llama(prompt, params, config)) { |
| content += chunk.data.content; |
| } |
| resolve(content); |
| } catch (error) { |
| reject(error); |
| } |
| }); |
| }; |
|
|
| |
| |
| |
| export const llamaComplete = async (params, controller, callback) => { |
| for await (const chunk of llama(params.prompt, params, { controller })) { |
| callback(chunk); |
| } |
| } |
|
|
| |
| export const llamaModelInfo = async (config = {}) => { |
| if (!generation_settings) { |
| const api_url = config.api_url?.replace(/\/+$/, '') || ""; |
| const props = await fetch(`${api_url}/props`).then(r => r.json()); |
| generation_settings = props.default_generation_settings; |
| } |
| return generation_settings; |
| } |
|
|