171 lines
5.3 KiB
TypeScript
171 lines
5.3 KiB
TypeScript
import fetch from "ags/fetch";
|
||
import Gio from "gi://Gio";
|
||
import { type LLMConfig } from "./config";
|
||
|
||
export type Role = "user" | "assistant" | "system";
|
||
export type Message = { role: Role; content: string };
|
||
|
||
// Re-export config types for convenience
|
||
export { type LLMConfig, type Backend, getConfig, MODELS, BACKEND_DEFAULTS } from "./config";
|
||
|
||
export async function streamCompletion(
|
||
config: LLMConfig,
|
||
messages: Message[],
|
||
onChunk: (text: string) => void,
|
||
onDone: () => void,
|
||
onError: (err: string) => void,
|
||
) {
|
||
try {
|
||
const history: Message[] = config.systemPrompt
|
||
? [{ role: "system", content: config.systemPrompt }, ...messages]
|
||
: messages;
|
||
|
||
// Per‑backend endpoint
|
||
const endpoint =
|
||
config.backend === "ollama"
|
||
? `${config.baseUrl}/api/chat`
|
||
: `${config.baseUrl}/chat/completions`;
|
||
|
||
const headers: Record<string, string> = {
|
||
"Content-Type": "application/json",
|
||
};
|
||
if (config.apiKey) {
|
||
headers["Authorization"] = `Bearer ${config.apiKey}`;
|
||
}
|
||
|
||
console.log("[LLM] POST", endpoint, {
|
||
backend: config.backend,
|
||
model: config.model,
|
||
messageCount: history.length,
|
||
});
|
||
|
||
const res = await fetch(endpoint, {
|
||
method: "POST",
|
||
headers,
|
||
body: JSON.stringify({
|
||
model: config.model,
|
||
messages: history,
|
||
stream: true,
|
||
}),
|
||
});
|
||
|
||
if (!res.ok) {
|
||
const errorText = await res.text();
|
||
console.error("[LLM] HTTP Error", res.status, errorText);
|
||
onError(`HTTP ${res.status}: ${errorText}`);
|
||
return;
|
||
}
|
||
|
||
const stream = res.body as Gio.InputStream;
|
||
const dis = new Gio.DataInputStream({
|
||
base_stream: stream,
|
||
close_base_stream: true,
|
||
});
|
||
const decoder = new TextDecoder();
|
||
|
||
if (config.backend === "ollama") {
|
||
await parseOllamaStream(dis, decoder, onChunk, onDone);
|
||
} else {
|
||
await parseDeepSeekStream(dis, decoder, onChunk, onDone);
|
||
}
|
||
} catch (e) {
|
||
console.error("[LLM] Fatal error:", e);
|
||
onError(String(e));
|
||
}
|
||
}
|
||
|
||
// ── helpers ────────────────────────────────────────────────────────────────
|
||
|
||
async function readLine(
|
||
dis: Gio.DataInputStream,
|
||
decoder: TextDecoder,
|
||
): Promise<string | null> {
|
||
const [bytes] = await new Promise<[Uint8Array | null, number]>(
|
||
(resolve, reject) => {
|
||
dis.read_line_async(0, null, (src, result) => {
|
||
try {
|
||
if (!src) {
|
||
resolve([null, 0]);
|
||
} else {
|
||
resolve(src.read_line_finish(result));
|
||
}
|
||
} catch (e) {
|
||
reject(e);
|
||
}
|
||
});
|
||
},
|
||
);
|
||
if (bytes === null) return null;
|
||
return decoder.decode(bytes).trim();
|
||
}
|
||
|
||
// ── Ollama stream parser (JSON‑lines) ──────────────────────────────────────
|
||
|
||
async function parseOllamaStream(
|
||
dis: Gio.DataInputStream,
|
||
decoder: TextDecoder,
|
||
onChunk: (text: string) => void,
|
||
onDone: () => void,
|
||
) {
|
||
while (true) {
|
||
const line = await readLine(dis, decoder);
|
||
if (line === null) break;
|
||
if (!line) continue;
|
||
|
||
console.log("[LLM] Line:", line);
|
||
|
||
try {
|
||
const data = JSON.parse(line);
|
||
const content = data?.message?.content;
|
||
if (content) onChunk(content);
|
||
if (data.done) {
|
||
console.log("[LLM] Stream complete, reason:", data.done_reason);
|
||
onDone();
|
||
return;
|
||
}
|
||
} catch (_e) {
|
||
console.warn("[LLM] Skipping unparseable line:", line);
|
||
}
|
||
}
|
||
onDone();
|
||
}
|
||
|
||
// ── DeepSeek / OpenAI‑compatible SSE parser ────────────────────────────────
|
||
|
||
async function parseDeepSeekStream(
|
||
dis: Gio.DataInputStream,
|
||
decoder: TextDecoder,
|
||
onChunk: (text: string) => void,
|
||
onDone: () => void,
|
||
) {
|
||
while (true) {
|
||
const line = await readLine(dis, decoder);
|
||
if (line === null) break;
|
||
if (!line || !line.startsWith("data: ")) continue;
|
||
|
||
const payload = line.slice(6); // strip "data: "
|
||
|
||
if (payload === "[DONE]") {
|
||
console.log("[LLM] Stream complete (DONE sentinel)");
|
||
onDone();
|
||
return;
|
||
}
|
||
|
||
try {
|
||
const data = JSON.parse(payload);
|
||
const content = data?.choices?.[0]?.delta?.content;
|
||
if (content) onChunk(content);
|
||
if (data?.choices?.[0]?.finish_reason) {
|
||
console.log(
|
||
"[LLM] Stream complete, reason:",
|
||
data.choices[0].finish_reason,
|
||
);
|
||
onDone();
|
||
return;
|
||
}
|
||
} catch (_e) {
|
||
console.warn("[LLM] Skipping unparseable line:", payload);
|
||
}
|
||
}
|
||
onDone();
|
||
}
|