hendrik/services/llm_client.py

import json
from lib import gadget
import urllib.request
import urllib.error

class LLMClient:
    class Message:
        def __init__(self, msg):
            raw_content = msg.get('content', '')
            # Auto-strip thinking dari content
            self.content = gadget.strip_thinking(raw_content) if isinstance(raw_content, str) else raw_content
            self.tool_calls = msg.get('tool_calls', None)
            self.warning = None

    def __init__(self, base_url, model, api_key, timeout=600):
        self.base_url = base_url.rstrip('/')
        self.model = model
        self.api_key = api_key
        self.timeout = timeout
        self.cancel_requested = False

    def chat(self, messages, tools=None, on_stream_chunk=None):
        url = f"{self.base_url}/chat/completions"
        payload = {
            "model": self.model,
            "messages": messages,
            "stream": True  # Enable streaming
        }
        if tools:
            payload["tools"] = tools
            payload["tool_choice"] = "auto"

        # Disable reasoning/thinking di level API bila didukung
        # OpenRouter & beberapa provider support ini
        payload["reasoning"] = {"enabled": False}

        data = json.dumps(payload).encode('utf-8')
        req = urllib.request.Request(url, data=data, method='POST')
        req.add_header('Content-Type', 'application/json')
        req.add_header('Authorization', f'Bearer {self.api_key}')

        # Variabel untuk mengumpulkan hasil
        full_content = ""
        full_tool_calls = []
        reasoning_content = ""

        try:
            self.cancel_requested = False
            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
                # Streaming: baca line by line
                for line in resp:
                    if self.cancel_requested:
                        # Stream cancellation
                        full_content += "\n\n[Stream cancelled by user]"
                        break

                    line = line.decode('utf-8').strip()
                    if not line or not line.startswith('data: '):
                        continue

                    data_str = line[6:]  # Hapus "data: " prefix
                    if data_str == '[DONE]':
                        break

                    try:
                        chunk = json.loads(data_str)
                    except json.JSONDecodeError:
                        continue

                    # Parse delta dari chunk
                    delta = chunk.get('choices', [{}])[0].get('delta', {})
                    finish_reason = chunk.get('choices', [{}])[0].get('finish_reason', None)

                    # Stream reasoning content jika ada
                    if 'reasoning_content' in delta:
                        reasoning_content += delta['reasoning_content']

                    # Stream tool_calls jika ada
                    if 'tool_calls' in delta:
                        tool_calls = delta['tool_calls']
                        for tc in tool_calls:
                            idx = tc.get('index', 0)
                            # Pastikan list cukup panjang
                            while len(full_tool_calls) <= idx:
                                full_tool_calls.append({
                                    "id": "",
                                    "type": "function",
                                    "function": {"name": "", "arguments": ""}
                                })

                            # Update ID
                            if 'id' in tc and tc['id']:
                                full_tool_calls[idx]['id'] = tc['id']

                            # Update function name
                            if 'function' in tc and 'name' in tc['function']:
                                full_tool_calls[idx]['function']['name'] += tc['function']['name']

                            # Update arguments
                            if 'function' in tc and 'arguments' in tc['function']:
                                full_tool_calls[idx]['function']['arguments'] += tc['function']['arguments']

                    # Stream content (text response)
                    if 'content' in delta:
                        chunk_text = delta['content']
                        full_content += chunk_text

                        # Callback untuk streaming ke UI
                        if on_stream_chunk:
                            on_stream_chunk(chunk_text)

            # Build final response
            message = {'content': full_content}

            if full_tool_calls:
                # Filter tool_calls yang valid (ada name dan arguments)
                valid_tool_calls = []
                for tc in full_tool_calls:
                    name = tc.get('function', {}).get('name')
                    args_str = tc.get('function', {}).get('arguments')
                    tc_id = tc.get('id')

                    # Pastikan name dan arguments ada
                    if name and args_str and args_str.strip():
                        # Generate ID jika kosong
                        if not tc_id:
                            tc_id = f"call_{len(valid_tool_calls)}"
                        tc['id'] = tc_id

                        try:
                            # Validate dan re-encode JSON untuk format yang konsisten
                            parsed_args = json.loads(args_str)
                            tc['function']['arguments'] = json.dumps(parsed_args, ensure_ascii=False)
                            valid_tool_calls.append(tc)
                        except json.JSONDecodeError:
                            # Invalid JSON, coba raw string tapi hanya jika tidak kosong
                            tc['function']['arguments'] = args_str
                            valid_tool_calls.append(tc)

                if valid_tool_calls:
                    message['tool_calls'] = valid_tool_calls

            response = {'choices': [{'message': message}]}
        except urllib.error.HTTPError as e:
            body_text = ""
            try:
                body_text = e.read().decode('utf-8', errors='replace')
            except Exception:
                pass
            if tools and e.code == 404:
                try:
                    body = json.loads(body_text) if body_text else {}
                    if 'tool use' in body.get('error', {}).get('message', '').lower():
                        result = self.chat(messages, tools=None)
                        result.warning = "Tool calling not supported by this model. Running in chat-only mode."
                        return result
                except Exception:
                    pass
            detail = f" - {body_text[:500]}" if body_text else ""
            return self.Message({'content': f"HTTP Error: {e.code} {e.reason}{detail}", 'tool_calls': None})
        except Exception as e:
            return self.Message({'content': f"Error: {str(e)}", 'tool_calls': None})

        if 'choices' not in response:
            raw_preview = json.dumps(response)[:500]
            return self.Message({
                'content': (
                    f"Error: Unexpected response — 'choices' key missing.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Response: {raw_preview}"
                ),
                'tool_calls': None
            })
        if not response['choices']:
            raw_preview = json.dumps(response)[:500]
            return self.Message({
                'content': (
                    f"Error: 'choices' is empty in the response.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Response: {raw_preview}"
                ),
                'tool_calls': None
            })
        if 'message' not in response['choices'][0]:
            raw_preview = json.dumps(response['choices'][0])[:500]
            return self.Message({
                'content': (
                    f"Error: 'message' key missing in first choice.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Choice : {raw_preview}"
                ),
                'tool_calls': None
            })

        message = response['choices'][0]['message']

        # Handle reasoning_content field dari OpenRouter/models yang support thinking
        # Pindahkan ke content jangan sampai keluar
        reasoning_content = message.pop('reasoning_content', None)
        reasoning_field = message.pop('reasoning', None)
        # Jangan inject reasoning ke content — buang saja
        # (kita sudah strip via _strip_thinking di Message.__init__)

        return self.Message(message)