hendrik/services/llm_client.py

209 lines
8.7 KiB
Python

import json
from lib import gadget
import urllib.request
import urllib.error
class LLMClient:
class Message:
def __init__(self, msg):
raw_content = msg.get('content', '')
# Auto-strip thinking dari content
self.content = gadget.strip_thinking(raw_content) if isinstance(raw_content, str) else raw_content
self.tool_calls = msg.get('tool_calls', None)
self.warning = None
def __init__(self, base_url, model, api_key, timeout=600):
self.base_url = base_url.rstrip('/')
self.model = model
self.api_key = api_key
self.timeout = timeout
self.cancel_requested = False
def chat(self, messages, tools=None, on_stream_chunk=None, disable_reasoning=False):
url = f"{self.base_url}/chat/completions"
payload = {
"model": self.model,
"messages": messages,
"stream": True # Enable streaming
}
if tools:
payload["tools"] = tools
payload["tool_choice"] = "auto"
# Hanya kirim parameter reasoning jika diminta eksplisit
# Beberapa model/provider justru error jika parameter ini ada tapi tidak didukung
if disable_reasoning:
payload["reasoning"] = {"enabled": False}
data = json.dumps(payload).encode('utf-8')
req = urllib.request.Request(url, data=data, method='POST')
req.add_header('Content-Type', 'application/json')
req.add_header('Authorization', f'Bearer {self.api_key}')
# Variabel untuk mengumpulkan hasil
full_content = ""
full_tool_calls = []
reasoning_content = ""
try:
self.cancel_requested = False
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
# Streaming: baca line by line
for line in resp:
if self.cancel_requested:
# Stream cancellation
full_content += "\n\n[Stream cancelled by user]"
break
line = line.decode('utf-8').strip()
if not line or not line.startswith('data: '):
continue
data_str = line[6:] # Hapus "data: " prefix
if data_str == '[DONE]':
break
try:
chunk = json.loads(data_str)
except json.JSONDecodeError:
continue
# Parse delta dari chunk
delta = chunk.get('choices', [{}])[0].get('delta', {})
finish_reason = chunk.get('choices', [{}])[0].get('finish_reason', None)
# Stream reasoning content jika ada
if 'reasoning_content' in delta:
reasoning_content += delta['reasoning_content']
# Stream tool_calls jika ada
if 'tool_calls' in delta:
tool_calls = delta['tool_calls']
for tc in tool_calls:
idx = tc.get('index', 0)
# Pastikan list cukup panjang
while len(full_tool_calls) <= idx:
full_tool_calls.append({
"id": "",
"type": "function",
"function": {"name": "", "arguments": ""}
})
# Update ID
if 'id' in tc and tc['id']:
full_tool_calls[idx]['id'] = tc['id']
# Update function name
if 'function' in tc and 'name' in tc['function']:
full_tool_calls[idx]['function']['name'] += tc['function']['name']
# Update arguments
if 'function' in tc and 'arguments' in tc['function']:
full_tool_calls[idx]['function']['arguments'] += tc['function']['arguments']
# Stream content (text response)
if 'content' in delta:
chunk_text = delta['content'] or ""
full_content += chunk_text
# Callback untuk streaming ke UI
if on_stream_chunk and chunk_text:
on_stream_chunk(chunk_text)
# Build final response
message = {'content': full_content}
if full_tool_calls:
# Filter tool_calls yang valid (ada name dan arguments)
valid_tool_calls = []
for tc in full_tool_calls:
name = tc.get('function', {}).get('name')
args_str = tc.get('function', {}).get('arguments')
tc_id = tc.get('id')
# Pastikan name dan arguments ada
if name and args_str and args_str.strip():
# Generate ID jika kosong
if not tc_id:
tc_id = f"call_{len(valid_tool_calls)}"
tc['id'] = tc_id
try:
# Validate dan re-encode JSON untuk format yang konsisten
parsed_args = json.loads(args_str)
tc['function']['arguments'] = json.dumps(parsed_args, ensure_ascii=False)
valid_tool_calls.append(tc)
except json.JSONDecodeError:
# Invalid JSON, coba raw string tapi hanya jika tidak kosong
tc['function']['arguments'] = args_str
valid_tool_calls.append(tc)
if valid_tool_calls:
message['tool_calls'] = valid_tool_calls
response = {'choices': [{'message': message}]}
except urllib.error.HTTPError as e:
body_text = ""
try:
body_text = e.read().decode('utf-8', errors='replace')
except Exception:
pass
if tools and e.code == 404:
try:
body = json.loads(body_text) if body_text else {}
if 'tool use' in body.get('error', {}).get('message', '').lower():
result = self.chat(messages, tools=None)
result.warning = "Tool calling not supported by this model. Running in chat-only mode."
return result
except Exception:
pass
detail = f" - {body_text[:500]}" if body_text else ""
return self.Message({'content': f"HTTP Error: {e.code} {e.reason}{detail}", 'tool_calls': None})
except Exception as e:
return self.Message({'content': f"Error: {str(e)}", 'tool_calls': None})
if 'choices' not in response:
raw_preview = json.dumps(response)[:500]
return self.Message({
'content': (
f"Error: Unexpected response — 'choices' key missing.\n"
f" URL : {url}\n"
f" Model : {self.model}\n"
f" Response: {raw_preview}"
),
'tool_calls': None
})
if not response['choices']:
raw_preview = json.dumps(response)[:500]
return self.Message({
'content': (
f"Error: 'choices' is empty in the response.\n"
f" URL : {url}\n"
f" Model : {self.model}\n"
f" Response: {raw_preview}"
),
'tool_calls': None
})
if 'message' not in response['choices'][0]:
raw_preview = json.dumps(response['choices'][0])[:500]
return self.Message({
'content': (
f"Error: 'message' key missing in first choice.\n"
f" URL : {url}\n"
f" Model : {self.model}\n"
f" Choice : {raw_preview}"
),
'tool_calls': None
})
message = response['choices'][0]['message']
# Handle reasoning_content field dari OpenRouter/models yang support thinking
# Pindahkan ke content jangan sampai keluar
reasoning_content = message.pop('reasoning_content', None)
reasoning_field = message.pop('reasoning', None)
# Jangan inject reasoning ke content — buang saja
# (kita sudah strip via _strip_thinking di Message.__init__)
return self.Message(message)