import * as vscode from 'vscode'; // llama.cpp server response format export type llamaData = { content: string, generation_settings: JSON, model: string, prompt: string, stopped_eos: boolean, stopped_limit: boolean, stopped_word: boolean, stopping_word: string, timings: { predicted_ms: number, predicted_n: number, predicted_per_second: number, predicted_per_token_ms: number, prompt_ms: number, prompt_n: number, prompt_per_second: number, prompt_per_token_ms: number }, tokens_cached: number, tokens_evaluated: number, tokens_predicted: number, truncated: boolean }; export type llamaRequest = { n_predict: number, mirostat: number, repeat_penalty: number, frequency_penalty: number, presence_penalty: number, repeat_last_n: number, temperature: number, top_p: number, top_k: number, typical_p: number, tfs_z: number, seed: number, stream: boolean, cache_prompt: boolean, prompt?: string, input_prefix?: string, input_suffix?: string }; export function createLlamacppRequest(config: vscode.WorkspaceConfiguration, doc_before: string, doc_after: string): llamaRequest { let request: llamaRequest = { n_predict: config.get("llamaMaxtokens") as number, mirostat: config.get("llamaMirostat") as number, repeat_penalty: config.get("llamaRepeatPenalty") as number, frequency_penalty: config.get("llamaFrequencyPenalty,") as number, presence_penalty: config.get("llamaPresencePenalty,") as number, repeat_last_n: config.get("llamaRepeatCtx,") as number, temperature: config.get("llamaTemperature") as number, top_p: config.get("llamaTop_p") as number, top_k: config.get("llamaTop_k") as number, typical_p: config.get("llamaTypical_p") as number, tfs_z: config.get("llamaTailfree_z,") as number, seed: config.get("llamaSeed") as number, stream: false, cache_prompt: config.get("llamaCachePrompt") as boolean }; const fim = config.get("fimEnabled") as boolean; const fimRequest = config.get("useFillInMiddleRequest") as boolean; if (fim === true) { if (fimRequest === true) { request.input_prefix = doc_before; request.input_suffix = doc_after; } else { const fim_beg = config.get("fimBeginString") as string; const fim_hole = config.get("fimHoleString") as string; const fim_end = config.get("fimEndString") as string; request.prompt = fim_beg + doc_before + fim_hole + doc_after + fim_end; } } else { request.prompt = doc_before; } return request; } export function llamacppRequestEndpoint(config: vscode.WorkspaceConfiguration): string { const fim = config.get("fimEnabled") as boolean; const fimRequest = config.get("useFillInMiddleRequest") as boolean; let req_str: string = config.get("llamaHost") as string; if (fim === true && fimRequest === true) { req_str += '/infill'; } else { req_str += '/completion'; } return req_str; }