An AI agent is a system where a language model takes actions, observes results, and decides what to do next — repeating until a goal is achieved. Claude is particularly well-suited for agentic tasks due to its strong instruction-following and multi-step reasoning.
What Makes an Agent Different from a Chatbot
A chatbot responds to messages. An agent:
- Takes actions (calls tools, runs code, reads files)
- Observes results of those actions
- Makes decisions based on results
- Loops until the task is complete
The core loop: Receive task → Plan → Act → Observe → Repeat until done
Basic Agent Architecture
import anthropic
import json
from typing import Any
client = anthropic.Anthropic()
def run_agent(task: str, tools: list, tool_executor: callable, max_iterations: int = 10) -> str:
messages = [{"role": "user", "content": task}]
for i in range(max_iterations):
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=4096,
tools=tools,
messages=messages,
)
# Agent is done
if response.stop_reason == "end_turn":
return response.content[0].text
# Agent wants to use a tool
if response.stop_reason == "tool_use":
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = tool_executor(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(result),
})
messages.append({"role": "user", "content": tool_results})
return "Max iterations reached"
Defining Tools
Tools tell Claude what capabilities it has. The schema is critical — Claude uses it to decide when and how to call each tool.
tools = [
{
"name": "read_file",
"description": "Read the contents of a file. Use when you need to examine file contents.",
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Absolute or relative path to the file",
}
},
"required": ["path"],
},
},
{
"name": "write_file",
"description": "Write content to a file. Creates the file if it doesn't exist.",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "Path to the file"},
"content": {"type": "string", "description": "Content to write"},
},
"required": ["path", "content"],
},
},
{
"name": "run_command",
"description": "Execute a shell command and return output. Use for running scripts, tests, etc.",
"input_schema": {
"type": "object",
"properties": {
"command": {"type": "string", "description": "Shell command to execute"},
"working_dir": {"type": "string", "description": "Working directory (optional)"},
},
"required": ["command"],
},
},
{
"name": "search_web",
"description": "Search the web for current information. Use when you need facts you don't know.",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
},
"required": ["query"],
},
},
]
Tool Execution Layer
import subprocess
import os
def execute_tool(tool_name: str, tool_input: dict) -> Any:
if tool_name == "read_file":
try:
with open(tool_input["path"], "r") as f:
return f.read()
except FileNotFoundError:
return f"Error: File not found: {tool_input['path']}"
except Exception as e:
return f"Error reading file: {str(e)}"
elif tool_name == "write_file":
try:
os.makedirs(os.path.dirname(tool_input["path"]), exist_ok=True)
with open(tool_input["path"], "w") as f:
f.write(tool_input["content"])
return f"Successfully wrote {len(tool_input['content'])} characters to {tool_input['path']}"
except Exception as e:
return f"Error writing file: {str(e)}"
elif tool_name == "run_command":
try:
result = subprocess.run(
tool_input["command"],
shell=True,
capture_output=True,
text=True,
timeout=30,
cwd=tool_input.get("working_dir"),
)
output = result.stdout
if result.returncode != 0:
output += f"\nSTDERR: {result.stderr}"
return output or "(no output)"
except subprocess.TimeoutExpired:
return "Error: Command timed out after 30 seconds"
except Exception as e:
return f"Error running command: {str(e)}"
else:
return f"Error: Unknown tool '{tool_name}'"
Complete Coding Agent Example
import anthropic
import subprocess
import os
client = anthropic.Anthropic()
tools = [
{
"name": "read_file",
"description": "Read file contents",
"input_schema": {
"type": "object",
"properties": {"path": {"type": "string"}},
"required": ["path"],
},
},
{
"name": "write_file",
"description": "Write content to file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string"},
"content": {"type": "string"},
},
"required": ["path", "content"],
},
},
{
"name": "run_tests",
"description": "Run pytest tests and return results",
"input_schema": {
"type": "object",
"properties": {"test_path": {"type": "string"}},
"required": ["test_path"],
},
},
]
def execute_tool(name: str, inputs: dict) -> str:
if name == "read_file":
with open(inputs["path"]) as f:
return f.read()
elif name == "write_file":
with open(inputs["path"], "w") as f:
f.write(inputs["content"])
return f"Written to {inputs['path']}"
elif name == "run_tests":
result = subprocess.run(
["pytest", inputs["test_path"], "-v"],
capture_output=True, text=True
)
return result.stdout + result.stderr
def coding_agent(task: str) -> str:
system = """You are a coding agent. Fix bugs and implement features.
Process:
1. Read relevant files to understand the codebase
2. Understand what needs to change
3. Make the changes
4. Run tests to verify
5. Fix any failures
6. Report what you did"""
messages = [{"role": "user", "content": task}]
while True:
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=4096,
system=system,
tools=tools,
messages=messages,
)
if response.stop_reason == "end_turn":
return response.content[0].text
messages.append({"role": "assistant", "content": response.content})
results = []
for block in response.content:
if block.type == "tool_use":
print(f"Tool: {block.name}({block.input})")
result = execute_tool(block.name, block.input)
results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result,
})
messages.append({"role": "user", "content": results})
result = coding_agent("Fix the failing tests in test_calculator.py")
print(result)
Adding a System Prompt for Reliability
A good system prompt dramatically improves agent behavior:
system = """You are an autonomous coding agent.
PROCESS:
1. Before writing any code, read the relevant files
2. Make minimal changes — only what's needed
3. After making changes, run tests
4. If tests fail, diagnose and fix before reporting success
5. Never report success without running tests
CONSTRAINTS:
- Only modify files that are necessary
- Don't refactor or "improve" code that isn't part of the task
- If stuck after 3 attempts, report what you've tried and ask for guidance
REPORTING:
When done, report:
- What you changed and why
- Test results
- Any concerns or follow-up items"""
Agent Patterns
ReAct Pattern (Reason + Act)
Explicitly ask Claude to reason before acting:
messages = [{
"role": "user",
"content": f"""Task: {task}
Think step by step before taking any actions. For each step:
1. THOUGHT: What do I need to do and why?
2. ACTION: What tool should I use?
3. OBSERVATION: What did I learn from the result?
Then continue to the next step."""
}]
Plan-Then-Execute
For complex tasks, have the agent plan first:
# First call: planning only (no tools)
plan_response = client.messages.create(
model="claude-opus-4-7",
max_tokens=2048,
system="You are a planning agent. Create a detailed step-by-step plan but DO NOT take any actions yet.",
messages=[{"role": "user", "content": task}],
)
plan = plan_response.content[0].text
# Second call: execution with the plan
execute_messages = [
{"role": "user", "content": f"Execute this plan:\n\n{plan}\n\nOriginal task: {task}"}
]
Production Considerations
Timeouts: Set maximum iteration limits and per-tool timeouts. Agents can get stuck in loops.
Sandboxing: Never run agent-generated code without sandboxing. Use Docker or subprocess with limited permissions.
Logging: Log every tool call and result. You need this for debugging and auditing.
Human-in-the-loop: For high-stakes actions (deleting files, external API calls), confirm with the user before executing.
REQUIRE_CONFIRMATION = ["delete_file", "send_email", "create_pull_request"]
def safe_execute_tool(name: str, inputs: dict) -> str:
if name in REQUIRE_CONFIRMATION:
print(f"\n⚠️ Agent wants to call: {name}({inputs})")
confirm = input("Allow? (y/n): ")
if confirm.lower() != 'y':
return "Tool call rejected by user"
return execute_tool(name, inputs)
Cost monitoring: Agentic loops can use many tokens. Track usage per task.
total_input_tokens = 0
total_output_tokens = 0
# After each API call:
total_input_tokens += response.usage.input_tokens
total_output_tokens += response.usage.output_tokens
cost = (total_input_tokens / 1_000_000 * 3) + (total_output_tokens / 1_000_000 * 15)
print(f"Task cost: ${cost:.4f}")