diff --git a/.gitignore b/.gitignore index 261c375..c503ad7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ dist/ .config.json +sandbox/ \ No newline at end of file diff --git a/agent.go b/agent.go new file mode 100644 index 0000000..35459bb --- /dev/null +++ b/agent.go @@ -0,0 +1,59 @@ +package main + +import ( + "context" + "fmt" +) + +type ToolExec func(ctx context.Context, name, argsJSON string) string + +const maxAgentIterations = 50 + +// Agent is a stateless driver: given a message slice and tools, it advances +// the conversation until the assistant replies with no tool calls. +type Agent struct { + Name string + Client *Client + SystemPrompt string + Tools []Tool + ToolExec ToolExec + OnToolCall func(agent string, tc ToolCall, result string) +} + +// Run advances the given messages, returning the final assistant text and +// the updated message slice (including tool calls + tool results). +func (a *Agent) Run(ctx context.Context, messages []Message) (string, []Message, error) { + for i := 0; i < maxAgentIterations; i++ { + msg, err := a.Client.Chat(ctx, messages, a.Tools) + if err != nil { + return "", messages, err + } + messages = append(messages, msg) + if len(msg.ToolCalls) == 0 { + return msg.Content, messages, nil + } + for _, tc := range msg.ToolCalls { + result := a.ToolExec(ctx, tc.Function.Name, tc.Function.Arguments) + if a.OnToolCall != nil { + a.OnToolCall(a.Name, tc, result) + } + messages = append(messages, Message{ + Role: "tool", + ToolCallID: tc.ID, + Name: tc.Function.Name, + Content: result, + }) + } + } + return "", messages, fmt.Errorf("%s: exceeded %d iterations without final reply", a.Name, maxAgentIterations) +} + +// Do is a one-shot helper: fresh conversation of system+user → final text. +func (a *Agent) Do(ctx context.Context, userMsg string) (string, error) { + messages := []Message{ + {Role: "system", Content: a.SystemPrompt}, + {Role: "user", Content: userMsg}, + } + reply, _, err := a.Run(ctx, messages) + return reply, err +} diff --git a/main.go b/main.go index 37796bd..2f33626 100644 --- a/main.go +++ b/main.go @@ -8,11 +8,6 @@ import ( "strings" ) -const systemPrompt = `You are a coding assistant running inside a CLI. You have file tools scoped -to the current project directory. All paths are relative to the project root; absolute paths -and paths that escape the root (via "..") will be rejected. Prefer listing and reading before -editing. Keep replies concise.` - func main() { root, err := os.Getwd() if err != nil { @@ -35,12 +30,10 @@ func main() { } client := NewClient(baseURL, apiKey, model) - tools := toolDefinitions() + orch := NewOrchestrator(client, root) - messages := []Message{{Role: "system", Content: systemPrompt}} - - fmt.Printf("NyxTex agent — model=%s root=%s\n", model, root) - fmt.Println("Type your request. Empty line to submit. Ctrl+D or /exit to quit.") + fmt.Printf("NyxTex agent (manager + programmer + qa) — model=%s root=%s\n", model, root) + fmt.Println("Type your request. Ctrl+D or /exit to quit.") reader := bufio.NewReader(os.Stdin) for { @@ -58,10 +51,14 @@ func main() { return } - messages = append(messages, Message{Role: "user", Content: input}) - - if err := runTurn(context.Background(), client, tools, root, &messages); err != nil { + reply, err := orch.Handle(context.Background(), input) + if err != nil { fmt.Fprintln(os.Stderr, "error:", err) + continue + } + if strings.TrimSpace(reply) != "" { + fmt.Println() + fmt.Println(reply) } } } @@ -74,36 +71,6 @@ func readUserInput(r *bufio.Reader) (string, error) { return line, nil } -// runTurn sends the conversation and handles any tool-call loop until the -// assistant produces a plain text reply. -func runTurn(ctx context.Context, client *Client, tools []Tool, root string, messages *[]Message) error { - for { - msg, err := client.Chat(ctx, *messages, tools) - if err != nil { - return err - } - *messages = append(*messages, msg) - - if len(msg.ToolCalls) == 0 { - if strings.TrimSpace(msg.Content) != "" { - fmt.Println(msg.Content) - } - return nil - } - - for _, tc := range msg.ToolCalls { - fmt.Printf(" [tool] %s %s\n", tc.Function.Name, tc.Function.Arguments) - result := runTool(root, tc.Function.Name, tc.Function.Arguments) - *messages = append(*messages, Message{ - Role: "tool", - ToolCallID: tc.ID, - Name: tc.Function.Name, - Content: result, - }) - } - } -} - func getEnvDefault(key, def string) string { if v := os.Getenv(key); v != "" { return v diff --git a/openai.go b/openai.go index 4c9c34d..e5ed4f2 100644 --- a/openai.go +++ b/openai.go @@ -7,6 +7,9 @@ import ( "fmt" "io" "net/http" + "os" + "strconv" + "sync/atomic" "time" ) @@ -57,19 +60,38 @@ type Client struct { } func NewClient(baseURL, apiKey, model string) *Client { + timeout := 600 * time.Second + if v := os.Getenv("OPENAI_TIMEOUT_SECONDS"); v != "" { + if n, err := strconv.Atoi(v); err == nil && n > 0 { + timeout = time.Duration(n) * time.Second + } + } return &Client{ BaseURL: baseURL, APIKey: apiKey, Model: model, - HTTP: &http.Client{Timeout: 120 * time.Second}, + HTTP: &http.Client{Timeout: timeout}, } } +var aiCallCounter uint64 + +func logAIIO() bool { return os.Getenv("LOG_AI_IO") == "1" } + func (c *Client) Chat(ctx context.Context, messages []Message, tools []Tool) (Message, error) { body, err := json.Marshal(chatRequest{Model: c.Model, Messages: messages, Tools: tools}) if err != nil { return Message{}, err } + + callID := atomic.AddUint64(&aiCallCounter, 1) + debug := logAIIO() + if debug { + fmt.Fprintf(os.Stderr, "\n===== AI REQUEST #%d model=%s msgs=%d tools=%d =====\n%s\n", + callID, c.Model, len(messages), len(tools), string(body)) + } + + start := time.Now() req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.BaseURL+"/chat/completions", bytes.NewReader(body)) if err != nil { return Message{}, err @@ -80,6 +102,10 @@ func (c *Client) Chat(ctx context.Context, messages []Message, tools []Tool) (Me } resp, err := c.HTTP.Do(req) if err != nil { + if debug { + fmt.Fprintf(os.Stderr, "===== AI REQUEST #%d FAILED after %s: %v =====\n", + callID, time.Since(start).Round(time.Millisecond), err) + } return Message{}, err } defer resp.Body.Close() @@ -88,6 +114,10 @@ func (c *Client) Chat(ctx context.Context, messages []Message, tools []Tool) (Me if err != nil { return Message{}, err } + if debug { + fmt.Fprintf(os.Stderr, "===== AI RESPONSE #%d status=%d elapsed=%s bytes=%d =====\n%s\n", + callID, resp.StatusCode, time.Since(start).Round(time.Millisecond), len(raw), string(raw)) + } if resp.StatusCode >= 400 { return Message{}, fmt.Errorf("api error %d: %s", resp.StatusCode, string(raw)) } diff --git a/orchestrator.go b/orchestrator.go new file mode 100644 index 0000000..c47b9c6 --- /dev/null +++ b/orchestrator.go @@ -0,0 +1,150 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "strings" +) + +const managerSystemPrompt = `You are the Manager agent. You coordinate between the user, a Programmer agent, and a QA agent. + +You have two orchestration tools: +- assign_task(instructions): delegate coding work to the Programmer. Returns the Programmer's report. +- request_qa_review(focus): ask QA to validate specific files/changes. Returns QA's report. + +Workflow: +1. Read the user's request. If it is purely conversational or meta (a greeting, a question about the agent itself), answer directly with no tool calls. +2. Otherwise, plan the work, then call assign_task with clear, self-contained instructions for the Programmer. +3. When the Programmer reports back, call request_qa_review naming the files touched and what QA should verify. +4. If QA reports issues, call assign_task again with specific fix instructions that reference QA's findings. +5. Iterate until QA approves, then reply to the user with a concise summary of what changed and any caveats. + +Important: +- You have NO file tools. Do not try to read or edit files yourself. +- Every call to assign_task/request_qa_review spawns a fresh sub-agent with no memory of prior calls — put all needed context into the brief. +- Keep the final user-facing reply brief: what was done, where, and anything flagged by QA.` + +func managerTools() []Tool { + return []Tool{ + { + Type: "function", + Function: ToolFunction{ + Name: "assign_task", + Description: "Delegate a coding task to the Programmer agent. The Programmer has read/create/edit/delete/list file tools scoped to the project root. Returns the Programmer's written report.", + Parameters: map[string]any{ + "type": "object", + "properties": map[string]any{ + "instructions": map[string]any{ + "type": "string", + "description": "Clear, self-contained instructions for the Programmer. Include all context needed (files, goals, constraints).", + }, + }, + "required": []string{"instructions"}, + }, + }, + }, + { + Type: "function", + Function: ToolFunction{ + Name: "request_qa_review", + Description: "Ask the QA agent to review code. QA has read-only file tools (read_file, list_directory) plus a shell command runner (run_command) that can execute any test suite, linter, type-checker, or build command. Returns QA's written report with a verdict.", + Parameters: map[string]any{ + "type": "object", + "properties": map[string]any{ + "focus": map[string]any{ + "type": "string", + "description": "What QA should review: specific files, what aspect of the change, what to look for. Be explicit.", + }, + }, + "required": []string{"focus"}, + }, + }, + }, + } +} + +type Orchestrator struct { + client *Client + root string + manager *Agent + history []Message +} + +func NewOrchestrator(client *Client, root string) *Orchestrator { + o := &Orchestrator{client: client, root: root} + o.manager = &Agent{ + Name: "manager", + Client: client, + SystemPrompt: managerSystemPrompt, + Tools: managerTools(), + ToolExec: o.execManagerTool, + OnToolCall: logToolCall(""), + } + o.history = []Message{{Role: "system", Content: managerSystemPrompt}} + return o +} + +// Handle advances the Manager conversation with one user turn. +func (o *Orchestrator) Handle(ctx context.Context, userInput string) (string, error) { + o.history = append(o.history, Message{Role: "user", Content: userInput}) + reply, updated, err := o.manager.Run(ctx, o.history) + o.history = updated + return reply, err +} + +func (o *Orchestrator) execManagerTool(ctx context.Context, name, argsJSON string) string { + switch name { + case "assign_task": + var a struct { + Instructions string `json:"instructions"` + } + if err := json.Unmarshal([]byte(argsJSON), &a); err != nil { + return "error: invalid arguments: " + err.Error() + } + if strings.TrimSpace(a.Instructions) == "" { + return "error: instructions is required" + } + prog := newProgrammerAgent(o.client, o.root, logToolCall(" ")) + report, err := prog.Do(ctx, a.Instructions) + if err != nil { + return "error: programmer failed: " + err.Error() + } + printReport("programmer", report) + return report + + case "request_qa_review": + var a struct { + Focus string `json:"focus"` + } + if err := json.Unmarshal([]byte(argsJSON), &a); err != nil { + return "error: invalid arguments: " + err.Error() + } + if strings.TrimSpace(a.Focus) == "" { + return "error: focus is required" + } + qa := newQAAgent(o.client, o.root, logToolCall(" ")) + report, err := qa.Do(ctx, a.Focus) + if err != nil { + return "error: qa failed: " + err.Error() + } + printReport("qa", report) + return report + + default: + return "error: unknown tool " + name + } +} + +func logToolCall(indent string) func(string, ToolCall, string) { + return func(agent string, tc ToolCall, _ string) { + fmt.Printf("%s[%s] %s %s\n", indent, agent, tc.Function.Name, tc.Function.Arguments) + } +} + +func printReport(from, report string) { + fmt.Printf(" [%s → manager]\n", from) + for _, line := range strings.Split(strings.TrimRight(report, "\n"), "\n") { + fmt.Printf(" %s\n", line) + } +} diff --git a/roles.go b/roles.go new file mode 100644 index 0000000..070f573 --- /dev/null +++ b/roles.go @@ -0,0 +1,52 @@ +package main + +import "context" + +const programmerSystemPrompt = `You are the Programmer agent. +A Manager agent delegates coding tasks to you. Execute them using your file tools. +All paths are relative to the project root; absolute paths and parent escapes ("..") are rejected. + +Guidelines: +- Read relevant files before editing. +- Make minimal, focused changes that fulfill the task. Do not refactor unrelated code or add speculative features. +- When done, reply with a concise report listing each file you changed and a one-line summary of the change. +- If you cannot complete the task, report the blocker honestly rather than inventing a workaround.` + +const qaSystemPrompt = `You are the QA/Tester agent. +A Manager agent asks you to validate code changes. You have: +- read-only file tools: read_file, list_directory (scoped to the project root) +- a shell command runner: run_command (executes via 'sh -c' with cwd at the project root) + +Guidelines: +- Inspect the files called out in the focus brief, plus surrounding and related code that might be affected. +- Check for: correctness vs. the stated task, missing edge cases, broken references, inconsistencies with the rest of the codebase, regressions. +- Use run_command to execute the project's tests / linters / type-checkers / builds. Pick commands appropriate to the stack you detect (e.g., look for go.mod → 'go test ./...'; package.json → 'npm test' or 'bun test'; composer.json / artisan → 'php artisan test' or 'vendor/bin/phpunit'; pyproject.toml → 'pytest -q'; Cargo.toml → 'cargo test'). +- If no test suite exists, at minimum run a build/type-check (e.g., 'go build ./...', 'tsc --noEmit', 'php -l '). +- Do not modify files. +- Reply with a concise report: what you checked, what commands you ran and their results, what looks correct, and any issues. Cite file:line where possible. End with a clear verdict: "approved" or "needs changes".` + +func newProgrammerAgent(client *Client, root string, log func(string, ToolCall, string)) *Agent { + return &Agent{ + Name: "programmer", + Client: client, + SystemPrompt: programmerSystemPrompt, + Tools: programmerTools(), + ToolExec: func(_ context.Context, name, args string) string { + return runTool(root, name, args) + }, + OnToolCall: log, + } +} + +func newQAAgent(client *Client, root string, log func(string, ToolCall, string)) *Agent { + return &Agent{ + Name: "qa", + Client: client, + SystemPrompt: qaSystemPrompt, + Tools: qaTools(), + ToolExec: func(_ context.Context, name, args string) string { + return runTool(root, name, args) + }, + OnToolCall: log, + } +} diff --git a/tools.go b/tools.go index 3d15dd3..56e0a8f 100644 --- a/tools.go +++ b/tools.go @@ -1,12 +1,16 @@ package main import ( + "context" "encoding/json" + "errors" "fmt" "os" + "os/exec" "path/filepath" "sort" "strings" + "time" ) type Tool struct { @@ -20,7 +24,52 @@ type ToolFunction struct { Parameters map[string]any `json:"parameters"` } -func toolDefinitions() []Tool { +// programmerTools returns the full file toolset for the Programmer agent. +func programmerTools() []Tool { return allFileTools() } + +// qaTools returns read-only file tools plus a shell command runner so QA can +// execute test suites / linters / builds for any language or framework. +func qaTools() []Tool { + keep := map[string]bool{"read_file": true, "list_directory": true} + all := allFileTools() + out := make([]Tool, 0, len(keep)+1) + for _, t := range all { + if keep[t.Function.Name] { + out = append(out, t) + } + } + out = append(out, commandTool()) + return out +} + +func commandTool() Tool { + return Tool{ + Type: "function", + Function: ToolFunction{ + Name: "run_command", + Description: "Run a shell command via 'sh -c' in the project root and return combined stdout+stderr plus exit code. " + + "Use for running test suites, linters, type-checkers, or builds — any language or framework. " + + "Examples: 'go test ./...', 'bun test', 'npm run lint', 'php artisan test', 'composer test', 'pytest -q', 'cargo test'. " + + "Inherits the caller's PATH and environment. The working directory starts at the project root; note that the shell itself is not path-sandboxed, so prefer commands that stay within the project.", + Parameters: map[string]any{ + "type": "object", + "properties": map[string]any{ + "command": map[string]any{ + "type": "string", + "description": "Full shell command to execute (pipes, redirects, env-prefixes supported).", + }, + "timeout_seconds": map[string]any{ + "type": "integer", + "description": "Max seconds to wait. Default 60, capped at 600.", + }, + }, + "required": []string{"command"}, + }, + }, + } +} + +func allFileTools() []Tool { return []Tool{ { Type: "function", @@ -104,8 +153,10 @@ func toolDefinitions() []Tool { // Errors are returned as strings too so the model can react, not fatal. func runTool(root, name, argsJSON string) string { var a struct { - Path string `json:"path"` - Content string `json:"content"` + Path string `json:"path"` + Content string `json:"content"` + Command string `json:"command"` + TimeoutSeconds int `json:"timeout_seconds"` } if argsJSON != "" { if err := json.Unmarshal([]byte(argsJSON), &a); err != nil { @@ -124,6 +175,8 @@ func runTool(root, name, argsJSON string) string { return doDelete(root, a.Path) case "list_directory": return doList(root, a.Path) + case "run_command": + return doRunCommand(root, a.Command, a.TimeoutSeconds) default: return fmt.Sprintf("error: unknown tool %q", name) } @@ -197,6 +250,63 @@ func doDelete(root, p string) string { return fmt.Sprintf("deleted %s", p) } +const ( + defaultCommandTimeout = 60 + maxCommandTimeout = 600 + maxCommandOutput = 16 * 1024 +) + +func doRunCommand(root, command string, timeoutSec int) string { + if strings.TrimSpace(command) == "" { + return "error: empty command" + } + if timeoutSec <= 0 { + timeoutSec = defaultCommandTimeout + } + if timeoutSec > maxCommandTimeout { + timeoutSec = maxCommandTimeout + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSec)*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "sh", "-c", command) + cmd.Dir = root + cmd.Env = os.Environ() + + start := time.Now() + out, runErr := cmd.CombinedOutput() + elapsed := time.Since(start).Round(time.Millisecond) + + truncated := false + if len(out) > maxCommandOutput { + out = out[:maxCommandOutput] + truncated = true + } + + var header string + switch { + case errors.Is(ctx.Err(), context.DeadlineExceeded): + header = fmt.Sprintf("status=timeout after %ds elapsed=%s", timeoutSec, elapsed) + case runErr != nil: + var ee *exec.ExitError + if errors.As(runErr, &ee) { + header = fmt.Sprintf("exit_code=%d elapsed=%s", ee.ExitCode(), elapsed) + } else { + header = fmt.Sprintf("status=error: %v elapsed=%s", runErr, elapsed) + } + default: + header = fmt.Sprintf("exit_code=0 elapsed=%s", elapsed) + } + + suffix := "" + if truncated { + suffix = fmt.Sprintf("\n[output truncated at %d bytes]", maxCommandOutput) + } + + return fmt.Sprintf("$ %s\n%s\n--- output ---\n%s%s", command, header, string(out), suffix) +} + func doList(root, p string) string { if p == "" { p = "."