import json import re import urllib.request import urllib.error def _strip_thinking(text: str) -> str: """ Hapus semua bentuk thinking/reasoning dari response text. Handles: - ... blocks (any case) - ... blocks - "Thinking:" / "Reasoning:" inline prefixes """ if not text: return text # Strip XML-style thinking blocks (case-insensitive, DOTALL for multiline) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) # Strip lines starting with Thinking: / Reasoning: / Let me think... lines = text.splitlines() cleaned = [] skip_block = False for line in lines: stripped = line.strip().lower() if stripped.startswith(('thinking:', 'reasoning:', 'let me thought', 'let me think')): skip_block = True continue if skip_block and not stripped: skip_block = False continue if not skip_block: cleaned.append(line) result = '\n'.join(cleaned).strip() return result class LLMClient: class Message: def __init__(self, msg): raw_content = msg.get('content', '') # Auto-strip thinking dari content self.content = _strip_thinking(raw_content) if isinstance(raw_content, str) else raw_content self.tool_calls = msg.get('tool_calls', None) self.warning = None def __init__(self, base_url, model, api_key, timeout=600): self.base_url = base_url.rstrip('/') self.model = model self.api_key = api_key self.timeout = timeout def chat(self, messages, tools=None): url = f"{self.base_url}/chat/completions" payload = { "model": self.model, "messages": messages } if tools: payload["tools"] = tools payload["tool_choice"] = "auto" # Disable reasoning/thinking di level API bila didukung # OpenRouter & beberapa provider support ini payload["reasoning"] = {"enabled": False} data = json.dumps(payload).encode('utf-8') req = urllib.request.Request(url, data=data, method='POST') req.add_header('Content-Type', 'application/json') req.add_header('Authorization', f'Bearer {self.api_key}') try: with urllib.request.urlopen(req, timeout=self.timeout) as resp: raw = resp.read().decode('utf-8') response = json.loads(raw) except urllib.error.HTTPError as e: body_text = "" try: body_text = e.read().decode('utf-8', errors='replace') except Exception: pass if tools and e.code == 404: try: body = json.loads(body_text) if body_text else {} if 'tool use' in body.get('error', {}).get('message', '').lower(): result = self.chat(messages, tools=None) result.warning = "Tool calling not supported by this model. Running in chat-only mode." return result except Exception: pass detail = f" - {body_text[:500]}" if body_text else "" return self.Message({'content': f"HTTP Error: {e.code} {e.reason}{detail}", 'tool_calls': None}) except Exception as e: return self.Message({'content': f"Error: {str(e)}", 'tool_calls': None}) if 'choices' not in response: raw_preview = json.dumps(response)[:500] return self.Message({ 'content': ( f"Error: Unexpected response — 'choices' key missing.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Response: {raw_preview}" ), 'tool_calls': None }) if not response['choices']: raw_preview = json.dumps(response)[:500] return self.Message({ 'content': ( f"Error: 'choices' is empty in the response.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Response: {raw_preview}" ), 'tool_calls': None }) if 'message' not in response['choices'][0]: raw_preview = json.dumps(response['choices'][0])[:500] return self.Message({ 'content': ( f"Error: 'message' key missing in first choice.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Choice : {raw_preview}" ), 'tool_calls': None }) message = response['choices'][0]['message'] # Handle reasoning_content field dari OpenRouter/models yang support thinking # Pindahkan ke content jangan sampai keluar reasoning_content = message.pop('reasoning_content', None) reasoning_field = message.pop('reasoning', None) # Jangan inject reasoning ke content — buang saja # (kita sudah strip via _strip_thinking di Message.__init__) return self.Message(message)