How to Use the OpenAI Assistants API: Build Persistent AI Agents

The OpenAI Assistants API provides persistent AI agents with built-in memory, file search, and code execution. Unlike direct API calls, Assistants maintain conversation threads and can access files you upload. Here’s how to build with it.

Core Concepts

Assistant — A configured AI instance with instructions, tools, and model settings. Reusable across many conversations.

Thread — A conversation session. Persists messages and context indefinitely.

Message — A user or assistant message within a thread.

Run — The execution of an assistant against a thread. Creates a response.

Tool — Optional capabilities: file_search, code_interpreter, or custom functions.

Installation and Setup

pip install openai
export OPENAI_API_KEY="sk-..."

Create an Assistant

from openai import OpenAI

client = OpenAI()

assistant = client.beta.assistants.create(
    name="Research Assistant",
    instructions="""You are a research assistant that helps analyze documents 
    and answer questions based on their content. Always cite the specific 
    document and section when providing information.""",
    model="gpt-4o",
    tools=[{"type": "file_search"}]
)

print(f"Assistant ID: {assistant.id}")
# Save this ID — you'll reuse it, not create a new one each time

Upload Files

# Upload a file to the Assistants API
with open("report.pdf", "rb") as f:
    file = client.files.create(
        file=f,
        purpose="assistants"
    )

# Create a vector store with the file
vector_store = client.beta.vector_stores.create(
    name="Research Documents",
    file_ids=[file.id]
)

# Attach the vector store to your assistant
client.beta.assistants.update(
    assistant.id,
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}}
)

Have a Conversation

import time

def ask_assistant(assistant_id: str, thread_id: str | None, question: str) -> str:
    # Create or reuse a thread
    if thread_id is None:
        thread = client.beta.threads.create()
        thread_id = thread.id
    
    # Add the user message
    client.beta.threads.messages.create(
        thread_id=thread_id,
        role="user",
        content=question
    )
    
    # Run the assistant
    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        assistant_id=assistant_id
    )
    
    # Wait for completion
    while run.status in ["queued", "in_progress"]:
        time.sleep(0.5)
        run = client.beta.threads.runs.retrieve(
            thread_id=thread_id,
            run_id=run.id
        )
    
    if run.status == "failed":
        raise Exception(f"Run failed: {run.last_error}")
    
    # Get the latest message
    messages = client.beta.threads.messages.list(thread_id=thread_id)
    response = messages.data[0].content[0].text.value
    
    return response, thread_id

# First question
answer, thread_id = ask_assistant(
    assistant.id,
    None,
    "What are the key findings in the report?"
)
print(answer)

# Follow-up (same thread = remembers context)
answer, thread_id = ask_assistant(
    assistant.id,
    thread_id,
    "Which finding is most actionable for Q3?"
)
print(answer)

Code Interpreter Tool

Let the assistant run Python to analyze data:

data_analyst = client.beta.assistants.create(
    name="Data Analyst",
    instructions="Analyze data and create visualizations. Show your Python code.",
    model="gpt-4o",
    tools=[{"type": "code_interpreter"}]
)

# Upload a CSV
with open("sales_data.csv", "rb") as f:
    csv_file = client.files.create(file=f, purpose="assistants")

# Ask it to analyze
thread = client.beta.threads.create()
client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Analyze this sales data and create a trend chart.",
    attachments=[{
        "file_id": csv_file.id,
        "tools": [{"type": "code_interpreter"}]
    }]
)

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=data_analyst.id
)

# Get the response (may include generated images)
messages = client.beta.threads.messages.list(thread_id=thread.id)
for msg in messages.data:
    for content in msg.content:
        if content.type == "text":
            print(content.text.value)
        elif content.type == "image_file":
            # Download the generated chart
            file_data = client.files.content(content.image_file.file_id)
            with open("chart.png", "wb") as f:
                f.write(file_data.content)

Custom Function Tools

import json

# Define tools
tools = [
    {
        "type": "function",
        "function": {
            "name": "search_database",
            "description": "Search the product database",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string"},
                    "category": {"type": "string", "enum": ["electronics", "clothing", "books"]}
                },
                "required": ["query"]
            }
        }
    }
]

assistant = client.beta.assistants.create(
    name="Product Assistant",
    instructions="Help users find products.",
    model="gpt-4o",
    tools=tools
)

# Handle tool calls in the run loop
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id
)

while run.status in ["queued", "in_progress", "requires_action"]:
    if run.status == "requires_action":
        tool_calls = run.required_action.submit_tool_outputs.tool_calls
        outputs = []
        
        for tool_call in tool_calls:
            if tool_call.function.name == "search_database":
                args = json.loads(tool_call.function.arguments)
                # Execute your actual function
                result = search_database(args["query"], args.get("category"))
                outputs.append({
                    "tool_call_id": tool_call.id,
                    "output": json.dumps(result)
                })
        
        run = client.beta.threads.runs.submit_tool_outputs(
            thread_id=thread.id,
            run_id=run.id,
            tool_outputs=outputs
        )
    else:
        time.sleep(0.5)
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id
        )

Production Patterns

Reuse Assistants

Create assistants once; reuse by ID:

ASSISTANT_ID = "asst_..."  # Store in environment variable

# Don't create a new assistant every time
assistant = client.beta.assistants.retrieve(ASSISTANT_ID)

Thread Management

Threads persist by default but accrue cost if you’re building context. For new conversations, create new threads. For ongoing conversations (like a chat app), reuse the same thread.

Streaming Runs

with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id
) as stream:
    for text in stream.text_deltas:
        print(text, end="", flush=True)

When to Use Assistants API vs. Direct Messages API

Use Assistants API when:

You need persistent conversation threads
File search over uploaded documents is required
Code Interpreter for data analysis is needed
You want session persistence without managing history yourself

Use Messages API directly when:

You manage conversation history yourself
You want lower latency (Assistants has overhead)
You need maximum control over the request
You’re using Claude (Anthropic doesn’t have an Assistants equivalent)

The Assistants API trades latency and control for convenience features. For simple chat applications, the direct Messages API is often better.