diff --git a/scripts/llm_client.py b/scripts/llm_client.py index 3e032e3..f898b2f 100644 --- a/scripts/llm_client.py +++ b/scripts/llm_client.py @@ -52,12 +52,14 @@ class LLMClient: self.model = model self.api_key = api_key self.timeout = timeout + self.cancel_requested = False - def chat(self, messages, tools=None): + def chat(self, messages, tools=None, on_stream_chunk=None): url = f"{self.base_url}/chat/completions" payload = { "model": self.model, - "messages": messages + "messages": messages, + "stream": True # Enable streaming } if tools: payload["tools"] = tools @@ -72,10 +74,82 @@ class LLMClient: req.add_header('Content-Type', 'application/json') req.add_header('Authorization', f'Bearer {self.api_key}') + # Variabel untuk mengumpulkan hasil + full_content = "" + full_tool_calls = [] + reasoning_content = "" + try: + self.cancel_requested = False with urllib.request.urlopen(req, timeout=self.timeout) as resp: - raw = resp.read().decode('utf-8') - response = json.loads(raw) + # Streaming: baca line by line + for line in resp: + if self.cancel_requested: + # Stream cancellation + full_content += "\n\n[Stream cancelled by user]" + break + + line = line.decode('utf-8').strip() + if not line or not line.startswith('data: '): + continue + + data_str = line[6:] # Hapus "data: " prefix + if data_str == '[DONE]': + break + + try: + chunk = json.loads(data_str) + except json.JSONDecodeError: + continue + + # Parse delta dari chunk + delta = chunk.get('choices', [{}])[0].get('delta', {}) + + # Stream reasoning content jika ada + if 'reasoning_content' in delta: + reasoning_content += delta['reasoning_content'] + + # Stream tool_calls jika ada + if 'tool_calls' in delta: + tool_calls = delta['tool_calls'] + for tc in tool_calls: + idx = tc.get('index', 0) + # Pastikan list cukup panjang + while len(full_tool_calls) <= idx: + full_tool_calls.append({ + "id": "", + "type": "function", + "function": {"name": "", "arguments": ""} + }) + + # Update ID + if 'id' in tc and tc['id']: + full_tool_calls[idx]['id'] = tc['id'] + + # Update function name + if 'function' in tc and 'name' in tc['function']: + full_tool_calls[idx]['function']['name'] += tc['function']['name'] + + # Update arguments + if 'function' in tc and 'arguments' in tc['function']: + full_tool_calls[idx]['function']['arguments'] += tc['function']['arguments'] + + # Stream content (text response) + if 'content' in delta: + chunk_text = delta['content'] + full_content += chunk_text + + # Callback untuk streaming ke UI + if on_stream_chunk: + on_stream_chunk(chunk_text) + + # Build final response + message = {'content': full_content} + + if full_tool_calls: + message['tool_calls'] = full_tool_calls + + response = {'choices': [{'message': message}]} except urllib.error.HTTPError as e: body_text = "" try: diff --git a/tui/agent.py b/tui/agent.py index 763a735..f8e3d10 100644 --- a/tui/agent.py +++ b/tui/agent.py @@ -87,18 +87,35 @@ def _agent_loop(app): log(app, "system", f" step {step + 1} \u2014 Thinking...") app.scroll = 999999 - response = app.llm.chat(app.messages, tools=app.TOOLS) + # Streaming response - buat placeholder untuk AI response + stream_idx = len(app.log) + log(app, "ai", "...") # Placeholder sambil streaming - app.log.pop() + stream_buffer = [] + def on_stream_chunk(chunk): + stream_buffer.append(chunk) + current_text = ''.join(stream_buffer) + # Update placeholder secara real-time + if stream_idx < len(app.log): + app.log[stream_idx]['text'] = current_text + app.scroll = 999999 + + response = app.llm.chat(app.messages, tools=app.TOOLS, on_stream_chunk=on_stream_chunk) + + # Hapus "Thinking..." log + for i in range(len(app.log) - 1, -1, -1): + if app.log[i].get('role') == 'system' and 'Thinking' in app.log[i].get('text', ''): + app.log.pop(i) + break if response.warning: log(app, "system", f" {response.warning}") if response.tool_calls: _add_msg(app, "assistant", response.content, tool_calls=response.tool_calls) - if response.content and response.content.strip(): - log(app, "ai", response.content) - app.scroll = 999999 + # Placeholder sudah terupdate via streaming, jangan log lagi + if stream_idx < len(app.log) and response.content and response.content.strip(): + app.log[stream_idx]['text'] = response.content for tc in response.tool_calls: tname = tc["function"]["name"] targs = tc["function"]["arguments"] @@ -111,7 +128,7 @@ def _agent_loop(app): else: if response.content: _add_msg(app, "assistant", response.content) - log(app, "ai", response.content) + # Placeholder sudah terupdate via streaming, jangan log lagi log(app, "sep", "") ntro.end(stamp) app.agent_done.set() diff --git a/tui/input.py b/tui/input.py index b87c802..a2b168f 100644 --- a/tui/input.py +++ b/tui/input.py @@ -39,8 +39,14 @@ def handle_key(app, stdscr, key): processing = app.processing # -- Always allowed (even during processing) -- - if key == 3: # Ctrl+C → exit - app.running = False + if key == 3: # Ctrl+C → cancel stream jika processing, exit jika tidak + if processing: + # Cancel stream yang sedang berjalan + app.llm.cancel_requested = True + log(app, "system", " Stream cancelled by user") + app.scroll = 999999 + else: + app.running = False elif key == curses.KEY_PPAGE: app.scroll = max(0, app.scroll - (app.h - 10) // 2) elif key == curses.KEY_NPAGE: diff --git a/tui/render.py b/tui/render.py index ccc43d6..fdeac2c 100644 --- a/tui/render.py +++ b/tui/render.py @@ -105,6 +105,23 @@ def draw_chat(app, stdscr): _add_row([(color, " " * indent + chunk, bold)]) start += available + def _wrap_text_simple(text, indent=0, color=C_INPUT, bold=False): + """Wrap text yang panjang tanpa break di dalam kata, dipakai untuk tool arguments.""" + available = w - indent - 1 + if available <= 0: + return + lines = text.split("\n") + for line in lines: + if not line: + _add_row([(color, " " * indent, bold)]) + continue + # Jika line lebih panjang dari available, pecah + start = 0 + while start < len(line): + chunk = line[start:start + available] + _add_row([(color, " " * indent + chunk, bold)]) + start += available + for idx, item in enumerate(app.log): role, text = item["role"], item["text"] if role == "sep": @@ -179,8 +196,8 @@ def draw_chat(app, stdscr): (C_TOOL_CALL, tname), (C_AI, f" ({item['time']}) "), ]) - for aline in args_str.split("\n"): - _add_row([(C_INPUT, " " + aline)]) + # Wrap arguments sesuai lebar terminal + _wrap_text_simple(args_str, indent=1, color=C_INPUT, bold=False) except Exception: _add_row([ (C_AI, " Hendrik "), @@ -344,7 +361,10 @@ def draw_status(app, stdscr): session_tag = f"[{sname}] " mode = " PROCESSING " if app.processing else " READY " - hints = " ^N:new ^O:open ^R:rename ^D:send ^E:model ^W:ws ^C:exit " + if app.processing: + hints = " ^N:new ^O:open ^R:rename ^E:model ^W:ws ^C:cancel " + else: + hints = " ^N:new ^O:open ^R:rename ^D:send ^E:model ^W:ws ^C:exit " max_left = w - len(mode) - len(hints) - 4 left = session_tag + ws if len(left) > max_left: @@ -368,7 +388,8 @@ def draw_status(app, stdscr): (" ^D:send", not app.processing), (" ^E:model", True), (" ^W:ws", True), - (" ^C:exit", True), + (" ^C:cancel", app.processing), + (" ^C:exit", not app.processing), ] for text, enabled in hints_parts: attr = curses.color_pair(C_STATUS_INFO) | curses.A_BOLD if enabled else curses.color_pair(C_HINT_DISABLED)