How to Build an AI Agent in Python: Step-by-Step Guide (2026)

AI agents are programs that use LLMs to reason and take actions — searching the web, executing code, calling APIs, and making decisions across multiple steps. This tutorial builds a functional agent from scratch.

What We’re Building

A research agent that:

Takes a question from the user
Searches the web for relevant information
Reads and extracts information from URLs
Synthesizes findings into a structured report
Cites sources

Prerequisites

pip install anthropic requests beautifulsoup4 duckduckgo-search

Step 1: Basic Tool Definition

Agents work by giving the LLM access to tools — functions it can call to gather information or take action.

import anthropic
import json
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS

client = anthropic.Anthropic()

# Define tools the agent can use
tools = [
    {
        "name": "web_search",
        "description": "Search the web for current information. Returns a list of results with titles, URLs, and snippets.",
        "input_schema": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query"
                },
                "num_results": {
                    "type": "integer",
                    "description": "Number of results to return (1-10)",
                    "default": 5
                }
            },
            "required": ["query"]
        }
    },
    {
        "name": "read_url",
        "description": "Read and extract text content from a webpage URL.",
        "input_schema": {
            "type": "object",
            "properties": {
                "url": {
                    "type": "string",
                    "description": "The URL to read"
                }
            },
            "required": ["url"]
        }
    }
]

Step 2: Implement Tool Functions

def web_search(query: str, num_results: int = 5) -> list[dict]:
    """Search the web using DuckDuckGo."""
    results = []
    with DDGS() as ddgs:
        for r in ddgs.text(query, max_results=num_results):
            results.append({
                "title": r["title"],
                "url": r["href"],
                "snippet": r["body"]
            })
    return results

def read_url(url: str) -> str:
    """Fetch and extract text from a URL."""
    try:
        headers = {"User-Agent": "Mozilla/5.0 (research bot)"}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Remove scripts and styles
        for tag in soup(["script", "style", "nav", "footer"]):
            tag.decompose()
        
        text = soup.get_text(separator="\n", strip=True)
        
        # Limit to first 5000 chars to avoid context overflow
        return text[:5000] if len(text) > 5000 else text
        
    except Exception as e:
        return f"Error reading URL: {str(e)}"

def execute_tool(tool_name: str, tool_input: dict) -> str:
    """Execute a tool and return its result as a string."""
    if tool_name == "web_search":
        results = web_search(**tool_input)
        return json.dumps(results, indent=2)
    elif tool_name == "read_url":
        return read_url(**tool_input)
    else:
        return f"Unknown tool: {tool_name}"

Step 3: The Agent Loop

The core of an agent is a loop: send message → LLM decides action → execute action → send result → repeat until done.

def run_agent(question: str, max_iterations: int = 10) -> str:
    """
    Run the research agent.
    Returns the final answer after tool use.
    """
    messages = [
        {"role": "user", "content": question}
    ]
    
    system_prompt = """You are a research agent. When given a question:
1. Search for relevant information using web_search
2. Read important URLs in detail using read_url
3. Synthesize what you've learned into a comprehensive answer
4. Always cite your sources (URL and title)

Be thorough but efficient — only read URLs that seem highly relevant."""
    
    for iteration in range(max_iterations):
        # Call Claude with tools
        response = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=4096,
            system=system_prompt,
            tools=tools,
            messages=messages
        )
        
        print(f"\n--- Iteration {iteration + 1} ---")
        print(f"Stop reason: {response.stop_reason}")
        
        # If Claude is done (no more tool calls), return the answer
        if response.stop_reason == "end_turn":
            # Extract text from the final response
            for block in response.content:
                if hasattr(block, "text"):
                    return block.text
        
        # Process tool calls
        if response.stop_reason == "tool_use":
            # Add Claude's response to messages
            messages.append({
                "role": "assistant",
                "content": response.content
            })
            
            # Execute each tool call and collect results
            tool_results = []
            for block in response.content:
                if block.type == "tool_use":
                    print(f"Calling tool: {block.name}")
                    print(f"Input: {json.dumps(block.input, indent=2)}")
                    
                    result = execute_tool(block.name, block.input)
                    print(f"Result preview: {result[:200]}...")
                    
                    tool_results.append({
                        "type": "tool_result",
                        "tool_use_id": block.id,
                        "content": result
                    })
            
            # Add tool results to messages
            messages.append({
                "role": "user",
                "content": tool_results
            })
    
    return "Agent reached maximum iterations without completing."

Step 4: Run the Agent

if __name__ == "__main__":
    question = """What are the most significant AI model releases in 2026 so far? 
    Focus on model capabilities, benchmarks, and what problems they solve better 
    than previous models."""
    
    print(f"Question: {question}\n")
    print("Running agent...\n")
    
    answer = run_agent(question)
    print("\n=== FINAL ANSWER ===")
    print(answer)

Step 5: Adding Memory

For agents that need to remember past interactions:

import sqlite3
from datetime import datetime

class AgentMemory:
    def __init__(self, db_path: str = "agent_memory.db"):
        self.conn = sqlite3.connect(db_path)
        self._create_table()
    
    def _create_table(self):
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS memories (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                content TEXT NOT NULL,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                tags TEXT
            )
        """)
        self.conn.commit()
    
    def store(self, content: str, tags: list[str] = None):
        self.conn.execute(
            "INSERT INTO memories (content, tags) VALUES (?, ?)",
            (content, json.dumps(tags or []))
        )
        self.conn.commit()
    
    def recall(self, query: str, limit: int = 5) -> list[str]:
        # Simple keyword search — use vector search for production
        cursor = self.conn.execute(
            "SELECT content FROM memories WHERE content LIKE ? LIMIT ?",
            (f"%{query}%", limit)
        )
        return [row[0] for row in cursor.fetchall()]

Step 6: Adding Error Handling and Retries

Production agents need robust error handling:

import time
from anthropic import APIError, RateLimitError

def run_agent_robust(question: str) -> str:
    max_retries = 3
    
    for attempt in range(max_retries):
        try:
            return run_agent(question)
        except RateLimitError:
            wait_time = 2 ** attempt  # Exponential backoff
            print(f"Rate limited. Waiting {wait_time}s...")
            time.sleep(wait_time)
        except APIError as e:
            print(f"API error: {e}")
            if attempt == max_retries - 1:
                raise
            time.sleep(1)
    
    raise Exception("Max retries exceeded")

Step 7: Streaming for Better UX

Show tool use in real-time:

async def run_agent_streaming(question: str):
    """Agent with streaming output for real-time display."""
    messages = [{"role": "user", "content": question}]
    
    while True:
        with client.messages.stream(
            model="claude-haiku-4-5-20251001",
            max_tokens=4096,
            tools=tools,
            messages=messages
        ) as stream:
            response = stream.get_final_message()
        
        if response.stop_reason == "end_turn":
            for block in response.content:
                if hasattr(block, "text"):
                    print(block.text)
            break
        
        # Handle tool use
        messages.append({"role": "assistant", "content": response.content})
        tool_results = []
        
        for block in response.content:
            if block.type == "tool_use":
                print(f"\n[Tool: {block.name}]")
                result = execute_tool(block.name, block.input)
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": block.id,
                    "content": result
                })
        
        messages.append({"role": "user", "content": tool_results})

Production Considerations

Rate limiting: Use exponential backoff and token bucket algorithms for high-volume agents.

Context management: Long agent runs accumulate context. Periodically summarize earlier conversation history to stay within context limits.

Monitoring: Log all tool calls, inputs, outputs, and costs. Agents can take unexpected paths — visibility is essential.

Safety guardrails: Validate tool inputs before execution. Never let the agent execute arbitrary code without sandboxing.

Cost estimation: Track tokens used per run. An agent that calls read_url 20 times on 5,000-char pages can consume 100K+ tokens quickly.

Full Working Example

# Complete minimal agent example
import anthropic, json, requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS

client = anthropic.Anthropic()

def search(query: str) -> str:
    with DDGS() as d:
        results = list(d.text(query, max_results=5))
    return json.dumps(results)

def fetch(url: str) -> str:
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
    return BeautifulSoup(r.content, "html.parser").get_text()[:3000]

TOOLS = [
    {"name": "search", "description": "Search the web", 
     "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}},
    {"name": "fetch", "description": "Read a URL",
     "input_schema": {"type": "object", "properties": {"url": {"type": "string"}}, "required": ["url"]}}
]

def agent(question: str) -> str:
    msgs = [{"role": "user", "content": question}]
    for _ in range(10):
        r = client.messages.create(model="claude-haiku-4-5-20251001", max_tokens=2048, tools=TOOLS, messages=msgs)
        if r.stop_reason == "end_turn":
            return next(b.text for b in r.content if hasattr(b, "text"))
        msgs.append({"role": "assistant", "content": r.content})
        results = []
        for b in r.content:
            if b.type == "tool_use":
                out = search(**b.input) if b.name == "search" else fetch(**b.input)
                results.append({"type": "tool_result", "tool_use_id": b.id, "content": out})
        msgs.append({"role": "user", "content": results})

print(agent("What is the latest Claude model and what are its key capabilities?"))

Agents are the foundation of most advanced AI applications in 2026. Mastering this pattern opens the door to automated research, coding assistants, customer service bots, and autonomous workflows.