hendrik/scripts/llm_client.py

import json
import re
import urllib.request
import urllib.error


def _strip_thinking(text: str) -> str:
    """
    Hapus semua bentuk thinking/reasoning dari response text.
    Handles:
      - <think>...</think> blocks (any case)
      - <reasoning>...</reasoning> blocks
      - "Thinking:" / "Reasoning:" inline prefixes
    """
    if not text:
        return text

    # Strip XML-style thinking blocks (case-insensitive, DOTALL for multiline)
    text = re.sub(r'<think[^>]*>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)
    text = re.sub(r'<reasoning[^>]*>.*?</reasoning>', '', text, flags=re.DOTALL | re.IGNORECASE)

    # Strip lines starting with Thinking: / Reasoning: / Let me think...
    lines = text.splitlines()
    cleaned = []
    skip_block = False
    for line in lines:
        stripped = line.strip().lower()
        if stripped.startswith(('thinking:', 'reasoning:', 'let me thought', 'let me think')):
            skip_block = True
            continue
        if skip_block and not stripped:
            skip_block = False
            continue
        if not skip_block:
            cleaned.append(line)

    result = '\n'.join(cleaned).strip()
    return result


class LLMClient:
    class Message:
        def __init__(self, msg):
            raw_content = msg.get('content', '')
            # Auto-strip thinking dari content
            self.content = _strip_thinking(raw_content) if isinstance(raw_content, str) else raw_content
            self.tool_calls = msg.get('tool_calls', None)
            self.warning = None

    def __init__(self, base_url, model, api_key, timeout=600):
        self.base_url = base_url.rstrip('/')
        self.model = model
        self.api_key = api_key
        self.timeout = timeout

    def chat(self, messages, tools=None):
        url = f"{self.base_url}/chat/completions"
        payload = {
            "model": self.model,
            "messages": messages
        }
        if tools:
            payload["tools"] = tools
            payload["tool_choice"] = "auto"

        # Disable reasoning/thinking di level API bila didukung
        # OpenRouter & beberapa provider support ini
        payload["reasoning"] = {"enabled": False}

        data = json.dumps(payload).encode('utf-8')
        req = urllib.request.Request(url, data=data, method='POST')
        req.add_header('Content-Type', 'application/json')
        req.add_header('Authorization', f'Bearer {self.api_key}')

        try:
            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
                raw = resp.read().decode('utf-8')
            response = json.loads(raw)
        except urllib.error.HTTPError as e:
            body_text = ""
            try:
                body_text = e.read().decode('utf-8', errors='replace')
            except Exception:
                pass
            if tools and e.code == 404:
                try:
                    body = json.loads(body_text) if body_text else {}
                    if 'tool use' in body.get('error', {}).get('message', '').lower():
                        result = self.chat(messages, tools=None)
                        result.warning = "Tool calling not supported by this model. Running in chat-only mode."
                        return result
                except Exception:
                    pass
            detail = f" - {body_text[:500]}" if body_text else ""
            return self.Message({'content': f"HTTP Error: {e.code} {e.reason}{detail}", 'tool_calls': None})
        except Exception as e:
            return self.Message({'content': f"Error: {str(e)}", 'tool_calls': None})

        if 'choices' not in response:
            raw_preview = json.dumps(response)[:500]
            return self.Message({
                'content': (
                    f"Error: Unexpected response — 'choices' key missing.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Response: {raw_preview}"
                ),
                'tool_calls': None
            })
        if not response['choices']:
            raw_preview = json.dumps(response)[:500]
            return self.Message({
                'content': (
                    f"Error: 'choices' is empty in the response.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Response: {raw_preview}"
                ),
                'tool_calls': None
            })
        if 'message' not in response['choices'][0]:
            raw_preview = json.dumps(response['choices'][0])[:500]
            return self.Message({
                'content': (
                    f"Error: 'message' key missing in first choice.\n"
                    f"  URL   : {url}\n"
                    f"  Model : {self.model}\n"
                    f"  Choice : {raw_preview}"
                ),
                'tool_calls': None
            })

        message = response['choices'][0]['message']

        # Handle reasoning_content field dari OpenRouter/models yang support thinking
        # Pindahkan ke content jangan sampai keluar
        reasoning_content = message.pop('reasoning_content', None)
        reasoning_field = message.pop('reasoning', None)
        # Jangan inject reasoning ke content — buang saja
        # (kita sudah strip via _strip_thinking di Message.__init__)

        return self.Message(message)