Stream canceling feature

2026-06-21 10:58:50 +07:00 · 2026-06-21 10:58:50 +07:00 · 94c364798f
commit 94c364798f
parent 419f2832c8
4 changed files with 134 additions and 16 deletions
--- a/scripts/llm_client.py
+++ b/scripts/llm_client.py
@ -52,12 +52,14 @@ class LLMClient:
        self.model = model
        self.api_key = api_key
        self.timeout = timeout
        self.cancel_requested = False
-    def chat(self, messages, tools=None):
+    def chat(self, messages, tools=None, on_stream_chunk=None):
        url = f"{self.base_url}/chat/completions"
        payload = {
            "model": self.model,
-            "messages": messages
+            "messages": messages,
            "stream": True  # Enable streaming
        }
        if tools:
            payload["tools"] = tools
@ -72,10 +74,82 @@ class LLMClient:
        req.add_header('Content-Type', 'application/json')
        req.add_header('Authorization', f'Bearer {self.api_key}')
        # Variabel untuk mengumpulkan hasil
        full_content = ""
        full_tool_calls = []
        reasoning_content = ""
        try:
            self.cancel_requested = False
            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
-                raw = resp.read().decode('utf-8')
+                # Streaming: baca line by line
-            response = json.loads(raw)
+                for line in resp:
                    if self.cancel_requested:
                        # Stream cancellation
                        full_content += "\n\n[Stream cancelled by user]"
                        break
                    line = line.decode('utf-8').strip()
                    if not line or not line.startswith('data: '):
                        continue
                    data_str = line[6:]  # Hapus "data: " prefix
                    if data_str == '[DONE]':
                        break
                    try:
                        chunk = json.loads(data_str)
                    except json.JSONDecodeError:
                        continue
                    # Parse delta dari chunk
                    delta = chunk.get('choices', [{}])[0].get('delta', {})
                    # Stream reasoning content jika ada
                    if 'reasoning_content' in delta:
                        reasoning_content += delta['reasoning_content']
                    # Stream tool_calls jika ada
                    if 'tool_calls' in delta:
                        tool_calls = delta['tool_calls']
                        for tc in tool_calls:
                            idx = tc.get('index', 0)
                            # Pastikan list cukup panjang
                            while len(full_tool_calls) <= idx:
                                full_tool_calls.append({
                                    "id": "",
                                    "type": "function",
                                    "function": {"name": "", "arguments": ""}
                                })
                            # Update ID
                            if 'id' in tc and tc['id']:
                                full_tool_calls[idx]['id'] = tc['id']
                            # Update function name
                            if 'function' in tc and 'name' in tc['function']:
                                full_tool_calls[idx]['function']['name'] += tc['function']['name']
                            # Update arguments
                            if 'function' in tc and 'arguments' in tc['function']:
                                full_tool_calls[idx]['function']['arguments'] += tc['function']['arguments']
                    # Stream content (text response)
                    if 'content' in delta:
                        chunk_text = delta['content']
                        full_content += chunk_text
                        # Callback untuk streaming ke UI
                        if on_stream_chunk:
                            on_stream_chunk(chunk_text)
            # Build final response
            message = {'content': full_content}
            if full_tool_calls:
                message['tool_calls'] = full_tool_calls
            response = {'choices': [{'message': message}]}
        except urllib.error.HTTPError as e:
            body_text = ""
            try:
--- a/tui/agent.py
+++ b/tui/agent.py
@ -87,18 +87,35 @@ def _agent_loop(app):
        log(app, "system", f"  step {step + 1} \u2014 Thinking...")
        app.scroll = 999999
-        response = app.llm.chat(app.messages, tools=app.TOOLS)
+        # Streaming response - buat placeholder untuk AI response
        stream_idx = len(app.log)
        log(app, "ai", "...")  # Placeholder sambil streaming
-        app.log.pop()
+        stream_buffer = []
        def on_stream_chunk(chunk):
            stream_buffer.append(chunk)
            current_text = ''.join(stream_buffer)
            # Update placeholder secara real-time
            if stream_idx < len(app.log):
                app.log[stream_idx]['text'] = current_text
                app.scroll = 999999
        response = app.llm.chat(app.messages, tools=app.TOOLS, on_stream_chunk=on_stream_chunk)
        # Hapus "Thinking..." log
        for i in range(len(app.log) - 1, -1, -1):
            if app.log[i].get('role') == 'system' and 'Thinking' in app.log[i].get('text', ''):
                app.log.pop(i)
                break
        if response.warning:
            log(app, "system", f"  {response.warning}")
        if response.tool_calls:
            _add_msg(app, "assistant", response.content, tool_calls=response.tool_calls)
-            if response.content and response.content.strip():
+            # Placeholder sudah terupdate via streaming, jangan log lagi
-                log(app, "ai", response.content)
+            if stream_idx < len(app.log) and response.content and response.content.strip():
-                app.scroll = 999999
+                app.log[stream_idx]['text'] = response.content
            for tc in response.tool_calls:
                tname = tc["function"]["name"]
                targs = tc["function"]["arguments"]
@ -111,7 +128,7 @@ def _agent_loop(app):
        else:
            if response.content:
                _add_msg(app, "assistant", response.content)
-                log(app, "ai", response.content)
+                # Placeholder sudah terupdate via streaming, jangan log lagi
            log(app, "sep", "")
            ntro.end(stamp)
            app.agent_done.set()
--- a/tui/input.py
+++ b/tui/input.py
@ -39,8 +39,14 @@ def handle_key(app, stdscr, key):
    processing = app.processing
    # -- Always allowed (even during processing) --
-    if key == 3:                    # Ctrl+C → exit
+    if key == 3:                    # Ctrl+C → cancel stream jika processing, exit jika tidak
-        app.running = False
+        if processing:
            # Cancel stream yang sedang berjalan
            app.llm.cancel_requested = True
            log(app, "system", "  Stream cancelled by user")
            app.scroll = 999999
        else:
            app.running = False
    elif key == curses.KEY_PPAGE:
        app.scroll = max(0, app.scroll - (app.h - 10) // 2)
    elif key == curses.KEY_NPAGE:
--- a/tui/render.py
+++ b/tui/render.py
@ -105,6 +105,23 @@ def draw_chat(app, stdscr):
                _add_row([(color, " " * indent + chunk, bold)])
                start += available
    def _wrap_text_simple(text, indent=0, color=C_INPUT, bold=False):
        """Wrap text yang panjang tanpa break di dalam kata, dipakai untuk tool arguments."""
        available = w - indent - 1
        if available <= 0:
            return
        lines = text.split("\n")
        for line in lines:
            if not line:
                _add_row([(color, " " * indent, bold)])
                continue
            # Jika line lebih panjang dari available, pecah
            start = 0
            while start < len(line):
                chunk = line[start:start + available]
                _add_row([(color, " " * indent + chunk, bold)])
                start += available
    for idx, item in enumerate(app.log):
        role, text = item["role"], item["text"]
        if role == "sep":
@ -179,8 +196,8 @@ def draw_chat(app, stdscr):
                    (C_TOOL_CALL, tname),
                    (C_AI, f" ({item['time']}) "),
                ])
-                for aline in args_str.split("\n"):
+                # Wrap arguments sesuai lebar terminal
-                    _add_row([(C_INPUT, " " + aline)])
+                _wrap_text_simple(args_str, indent=1, color=C_INPUT, bold=False)
            except Exception:
                _add_row([
                    (C_AI, " Hendrik "),
@ -344,7 +361,10 @@ def draw_status(app, stdscr):
        session_tag = f"[{sname}] "
    mode = " PROCESSING " if app.processing else " READY "
-    hints = " ^N:new  ^O:open  ^R:rename  ^D:send  ^E:model  ^W:ws  ^C:exit "
+    if app.processing:
        hints = " ^N:new  ^O:open  ^R:rename  ^E:model  ^W:ws  ^C:cancel "
    else:
        hints = " ^N:new  ^O:open  ^R:rename  ^D:send  ^E:model  ^W:ws  ^C:exit "
    max_left = w - len(mode) - len(hints) - 4
    left = session_tag + ws
    if len(left) > max_left:
@ -368,7 +388,8 @@ def draw_status(app, stdscr):
        ("  ^D:send", not app.processing),
        ("  ^E:model", True),
        ("  ^W:ws", True),
-        ("  ^C:exit", True),
+        ("  ^C:cancel", app.processing),
        ("  ^C:exit", not app.processing),
    ]
    for text, enabled in hints_parts:
        attr = curses.color_pair(C_STATUS_INFO) | curses.A_BOLD if enabled else curses.color_pair(C_HINT_DISABLED)