AI agents are programs that use LLMs to reason and take actions — searching the web, executing code, calling APIs, and making decisions across multiple steps. This tutorial builds a functional agent from scratch.
What We’re Building
A research agent that:
- Takes a question from the user
- Searches the web for relevant information
- Reads and extracts information from URLs
- Synthesizes findings into a structured report
- Cites sources
Prerequisites
pip install anthropic requests beautifulsoup4 duckduckgo-search
Step 1: Basic Tool Definition
Agents work by giving the LLM access to tools — functions it can call to gather information or take action.
import anthropic
import json
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
client = anthropic.Anthropic()
# Define tools the agent can use
tools = [
{
"name": "web_search",
"description": "Search the web for current information. Returns a list of results with titles, URLs, and snippets.",
"input_schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query"
},
"num_results": {
"type": "integer",
"description": "Number of results to return (1-10)",
"default": 5
}
},
"required": ["query"]
}
},
{
"name": "read_url",
"description": "Read and extract text content from a webpage URL.",
"input_schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to read"
}
},
"required": ["url"]
}
}
]
Step 2: Implement Tool Functions
def web_search(query: str, num_results: int = 5) -> list[dict]:
"""Search the web using DuckDuckGo."""
results = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=num_results):
results.append({
"title": r["title"],
"url": r["href"],
"snippet": r["body"]
})
return results
def read_url(url: str) -> str:
"""Fetch and extract text from a URL."""
try:
headers = {"User-Agent": "Mozilla/5.0 (research bot)"}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Remove scripts and styles
for tag in soup(["script", "style", "nav", "footer"]):
tag.decompose()
text = soup.get_text(separator="\n", strip=True)
# Limit to first 5000 chars to avoid context overflow
return text[:5000] if len(text) > 5000 else text
except Exception as e:
return f"Error reading URL: {str(e)}"
def execute_tool(tool_name: str, tool_input: dict) -> str:
"""Execute a tool and return its result as a string."""
if tool_name == "web_search":
results = web_search(**tool_input)
return json.dumps(results, indent=2)
elif tool_name == "read_url":
return read_url(**tool_input)
else:
return f"Unknown tool: {tool_name}"
Step 3: The Agent Loop
The core of an agent is a loop: send message → LLM decides action → execute action → send result → repeat until done.
def run_agent(question: str, max_iterations: int = 10) -> str:
"""
Run the research agent.
Returns the final answer after tool use.
"""
messages = [
{"role": "user", "content": question}
]
system_prompt = """You are a research agent. When given a question:
1. Search for relevant information using web_search
2. Read important URLs in detail using read_url
3. Synthesize what you've learned into a comprehensive answer
4. Always cite your sources (URL and title)
Be thorough but efficient — only read URLs that seem highly relevant."""
for iteration in range(max_iterations):
# Call Claude with tools
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=4096,
system=system_prompt,
tools=tools,
messages=messages
)
print(f"\n--- Iteration {iteration + 1} ---")
print(f"Stop reason: {response.stop_reason}")
# If Claude is done (no more tool calls), return the answer
if response.stop_reason == "end_turn":
# Extract text from the final response
for block in response.content:
if hasattr(block, "text"):
return block.text
# Process tool calls
if response.stop_reason == "tool_use":
# Add Claude's response to messages
messages.append({
"role": "assistant",
"content": response.content
})
# Execute each tool call and collect results
tool_results = []
for block in response.content:
if block.type == "tool_use":
print(f"Calling tool: {block.name}")
print(f"Input: {json.dumps(block.input, indent=2)}")
result = execute_tool(block.name, block.input)
print(f"Result preview: {result[:200]}...")
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})
# Add tool results to messages
messages.append({
"role": "user",
"content": tool_results
})
return "Agent reached maximum iterations without completing."
Step 4: Run the Agent
if __name__ == "__main__":
question = """What are the most significant AI model releases in 2026 so far?
Focus on model capabilities, benchmarks, and what problems they solve better
than previous models."""
print(f"Question: {question}\n")
print("Running agent...\n")
answer = run_agent(question)
print("\n=== FINAL ANSWER ===")
print(answer)
Step 5: Adding Memory
For agents that need to remember past interactions:
import sqlite3
from datetime import datetime
class AgentMemory:
def __init__(self, db_path: str = "agent_memory.db"):
self.conn = sqlite3.connect(db_path)
self._create_table()
def _create_table(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
tags TEXT
)
""")
self.conn.commit()
def store(self, content: str, tags: list[str] = None):
self.conn.execute(
"INSERT INTO memories (content, tags) VALUES (?, ?)",
(content, json.dumps(tags or []))
)
self.conn.commit()
def recall(self, query: str, limit: int = 5) -> list[str]:
# Simple keyword search — use vector search for production
cursor = self.conn.execute(
"SELECT content FROM memories WHERE content LIKE ? LIMIT ?",
(f"%{query}%", limit)
)
return [row[0] for row in cursor.fetchall()]
Step 6: Adding Error Handling and Retries
Production agents need robust error handling:
import time
from anthropic import APIError, RateLimitError
def run_agent_robust(question: str) -> str:
max_retries = 3
for attempt in range(max_retries):
try:
return run_agent(question)
except RateLimitError:
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limited. Waiting {wait_time}s...")
time.sleep(wait_time)
except APIError as e:
print(f"API error: {e}")
if attempt == max_retries - 1:
raise
time.sleep(1)
raise Exception("Max retries exceeded")
Step 7: Streaming for Better UX
Show tool use in real-time:
async def run_agent_streaming(question: str):
"""Agent with streaming output for real-time display."""
messages = [{"role": "user", "content": question}]
while True:
with client.messages.stream(
model="claude-haiku-4-5-20251001",
max_tokens=4096,
tools=tools,
messages=messages
) as stream:
response = stream.get_final_message()
if response.stop_reason == "end_turn":
for block in response.content:
if hasattr(block, "text"):
print(block.text)
break
# Handle tool use
messages.append({"role": "assistant", "content": response.content})
tool_results = []
for block in response.content:
if block.type == "tool_use":
print(f"\n[Tool: {block.name}]")
result = execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})
messages.append({"role": "user", "content": tool_results})
Production Considerations
Rate limiting: Use exponential backoff and token bucket algorithms for high-volume agents.
Context management: Long agent runs accumulate context. Periodically summarize earlier conversation history to stay within context limits.
Monitoring: Log all tool calls, inputs, outputs, and costs. Agents can take unexpected paths — visibility is essential.
Safety guardrails: Validate tool inputs before execution. Never let the agent execute arbitrary code without sandboxing.
Cost estimation: Track tokens used per run. An agent that calls read_url 20 times on 5,000-char pages can consume 100K+ tokens quickly.
Full Working Example
# Complete minimal agent example
import anthropic, json, requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
client = anthropic.Anthropic()
def search(query: str) -> str:
with DDGS() as d:
results = list(d.text(query, max_results=5))
return json.dumps(results)
def fetch(url: str) -> str:
r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
return BeautifulSoup(r.content, "html.parser").get_text()[:3000]
TOOLS = [
{"name": "search", "description": "Search the web",
"input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}},
{"name": "fetch", "description": "Read a URL",
"input_schema": {"type": "object", "properties": {"url": {"type": "string"}}, "required": ["url"]}}
]
def agent(question: str) -> str:
msgs = [{"role": "user", "content": question}]
for _ in range(10):
r = client.messages.create(model="claude-haiku-4-5-20251001", max_tokens=2048, tools=TOOLS, messages=msgs)
if r.stop_reason == "end_turn":
return next(b.text for b in r.content if hasattr(b, "text"))
msgs.append({"role": "assistant", "content": r.content})
results = []
for b in r.content:
if b.type == "tool_use":
out = search(**b.input) if b.name == "search" else fetch(**b.input)
results.append({"type": "tool_result", "tool_use_id": b.id, "content": out})
msgs.append({"role": "user", "content": results})
print(agent("What is the latest Claude model and what are its key capabilities?"))
Agents are the foundation of most advanced AI applications in 2026. Mastering this pattern opens the door to automated research, coding assistants, customer service bots, and autonomous workflows.