import json import re import urllib.request import urllib.error def _strip_thinking(text: str) -> str: """ Hapus semua bentuk thinking/reasoning dari response text. Handles: - ... blocks (any case) - ... blocks - "Thinking:" / "Reasoning:" inline prefixes """ if not text: return text # Strip XML-style thinking blocks (case-insensitive, DOTALL for multiline) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>.*?', '', text, flags=re.DOTALL | re.IGNORECASE) # Strip lines starting with Thinking: / Reasoning: / Let me think... lines = text.splitlines() cleaned = [] skip_block = False for line in lines: stripped = line.strip().lower() if stripped.startswith(('thinking:', 'reasoning:', 'let me thought', 'let me think')): skip_block = True continue if skip_block and not stripped: skip_block = False continue if not skip_block: cleaned.append(line) result = '\n'.join(cleaned).strip() return result class LLMClient: class Message: def __init__(self, msg): raw_content = msg.get('content', '') # Auto-strip thinking dari content self.content = _strip_thinking(raw_content) if isinstance(raw_content, str) else raw_content self.tool_calls = msg.get('tool_calls', None) self.warning = None def __init__(self, base_url, model, api_key, timeout=600): self.base_url = base_url.rstrip('/') self.model = model self.api_key = api_key self.timeout = timeout self.cancel_requested = False def chat(self, messages, tools=None, on_stream_chunk=None): url = f"{self.base_url}/chat/completions" payload = { "model": self.model, "messages": messages, "stream": True # Enable streaming } if tools: payload["tools"] = tools payload["tool_choice"] = "auto" # Disable reasoning/thinking di level API bila didukung # OpenRouter & beberapa provider support ini payload["reasoning"] = {"enabled": False} data = json.dumps(payload).encode('utf-8') req = urllib.request.Request(url, data=data, method='POST') req.add_header('Content-Type', 'application/json') req.add_header('Authorization', f'Bearer {self.api_key}') # Variabel untuk mengumpulkan hasil full_content = "" full_tool_calls = [] reasoning_content = "" try: self.cancel_requested = False with urllib.request.urlopen(req, timeout=self.timeout) as resp: # Streaming: baca line by line for line in resp: if self.cancel_requested: # Stream cancellation full_content += "\n\n[Stream cancelled by user]" break line = line.decode('utf-8').strip() if not line or not line.startswith('data: '): continue data_str = line[6:] # Hapus "data: " prefix if data_str == '[DONE]': break try: chunk = json.loads(data_str) except json.JSONDecodeError: continue # Parse delta dari chunk delta = chunk.get('choices', [{}])[0].get('delta', {}) # Stream reasoning content jika ada if 'reasoning_content' in delta: reasoning_content += delta['reasoning_content'] # Stream tool_calls jika ada if 'tool_calls' in delta: tool_calls = delta['tool_calls'] for tc in tool_calls: idx = tc.get('index', 0) # Pastikan list cukup panjang while len(full_tool_calls) <= idx: full_tool_calls.append({ "id": "", "type": "function", "function": {"name": "", "arguments": ""} }) # Update ID if 'id' in tc and tc['id']: full_tool_calls[idx]['id'] = tc['id'] # Update function name if 'function' in tc and 'name' in tc['function']: full_tool_calls[idx]['function']['name'] += tc['function']['name'] # Update arguments if 'function' in tc and 'arguments' in tc['function']: full_tool_calls[idx]['function']['arguments'] += tc['function']['arguments'] # Stream content (text response) if 'content' in delta: chunk_text = delta['content'] full_content += chunk_text # Callback untuk streaming ke UI if on_stream_chunk: on_stream_chunk(chunk_text) # Build final response message = {'content': full_content} if full_tool_calls: message['tool_calls'] = full_tool_calls response = {'choices': [{'message': message}]} except urllib.error.HTTPError as e: body_text = "" try: body_text = e.read().decode('utf-8', errors='replace') except Exception: pass if tools and e.code == 404: try: body = json.loads(body_text) if body_text else {} if 'tool use' in body.get('error', {}).get('message', '').lower(): result = self.chat(messages, tools=None) result.warning = "Tool calling not supported by this model. Running in chat-only mode." return result except Exception: pass detail = f" - {body_text[:500]}" if body_text else "" return self.Message({'content': f"HTTP Error: {e.code} {e.reason}{detail}", 'tool_calls': None}) except Exception as e: return self.Message({'content': f"Error: {str(e)}", 'tool_calls': None}) if 'choices' not in response: raw_preview = json.dumps(response)[:500] return self.Message({ 'content': ( f"Error: Unexpected response — 'choices' key missing.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Response: {raw_preview}" ), 'tool_calls': None }) if not response['choices']: raw_preview = json.dumps(response)[:500] return self.Message({ 'content': ( f"Error: 'choices' is empty in the response.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Response: {raw_preview}" ), 'tool_calls': None }) if 'message' not in response['choices'][0]: raw_preview = json.dumps(response['choices'][0])[:500] return self.Message({ 'content': ( f"Error: 'message' key missing in first choice.\n" f" URL : {url}\n" f" Model : {self.model}\n" f" Choice : {raw_preview}" ), 'tool_calls': None }) message = response['choices'][0]['message'] # Handle reasoning_content field dari OpenRouter/models yang support thinking # Pindahkan ke content jangan sampai keluar reasoning_content = message.pop('reasoning_content', None) reasoning_field = message.pop('reasoning', None) # Jangan inject reasoning ke content — buang saja # (kita sudah strip via _strip_thinking di Message.__init__) return self.Message(message)