The Quick Version

A web research agent takes a question, decides what to search for, reads the results, and synthesizes an answer with citations. It’s an LLM with two tools: search the web and read a URL. The agent decides when and how to use them.

1
pip install openai httpx beautifulsoup4
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import json
import httpx
from bs4 import BeautifulSoup
from openai import OpenAI

client = OpenAI()

def web_search(query: str, num_results: int = 5) -> list[dict]:
    """Search the web using SerpAPI (or any search API)."""
    response = httpx.get(
        "https://serpapi.com/search",
        params={"q": query, "num": num_results, "api_key": "YOUR_SERPAPI_KEY"},
    )
    data = response.json()
    results = []
    for item in data.get("organic_results", [])[:num_results]:
        results.append({
            "title": item["title"],
            "url": item["link"],
            "snippet": item.get("snippet", ""),
        })
    return results

def read_url(url: str, max_chars: int = 8000) -> str:
    """Fetch and extract text content from a URL."""
    try:
        response = httpx.get(url, timeout=10, follow_redirects=True,
                             headers={"User-Agent": "ResearchBot/1.0"})
        soup = BeautifulSoup(response.text, "html.parser")
        for tag in soup(["script", "style", "nav", "footer", "header"]):
            tag.decompose()
        text = soup.get_text(separator="\n", strip=True)
        return text[:max_chars]
    except Exception as e:
        return f"Error reading URL: {e}"

# Quick test
results = web_search("transformer architecture explained 2024")
print(f"Found {len(results)} results")
content = read_url(results[0]["url"])
print(f"Read {len(content)} chars from {results[0]['url']}")

That gives you the two core tools. Now wire them into an agent loop.

The Agent Loop

The agent uses function calling to decide between searching, reading, or answering. It can do multiple searches, read several pages, and cross-reference information before producing a final answer.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
tools = [
    {
        "type": "function",
        "function": {
            "name": "web_search",
            "description": "Search the web for information. Returns titles, URLs, and snippets.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "Search query"},
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "read_url",
            "description": "Read the text content of a web page. Use after searching to get detailed information.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string", "description": "URL to read"},
                },
                "required": ["url"],
            },
        },
    },
]

def handle_tool_call(tool_call) -> str:
    name = tool_call.function.name
    args = json.loads(tool_call.function.arguments)

    if name == "web_search":
        results = web_search(args["query"])
        return json.dumps(results, indent=2)
    elif name == "read_url":
        return read_url(args["url"])
    return "Unknown tool"

def research(question: str, max_steps: int = 10) -> str:
    """Run the research agent on a question."""
    messages = [
        {
            "role": "system",
            "content": (
                "You are a research agent. Your job is to answer questions thoroughly "
                "using web search and page reading. Always cite your sources with URLs. "
                "Search first, then read the most relevant pages, then synthesize an answer. "
                "Do multiple searches if the first results don't fully answer the question."
            ),
        },
        {"role": "user", "content": question},
    ]

    for step in range(max_steps):
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=tools,
        )
        msg = response.choices[0].message
        messages.append(msg)

        if not msg.tool_calls:
            return msg.content  # Final answer

        for tc in msg.tool_calls:
            print(f"  Step {step + 1}: {tc.function.name}({tc.function.arguments[:80]}...)")
            result = handle_tool_call(tc)
            messages.append({
                "role": "tool",
                "tool_call_id": tc.id,
                "content": result,
            })

    return "Research incomplete — hit step limit."

# Run it
answer = research("What are the latest advances in protein structure prediction since AlphaFold 3?")
print(answer)

The agent typically does 2-3 searches and reads 3-5 pages before producing a cited answer. The system prompt is important — without it, the model tends to answer from training data instead of actually searching.

Adding Source Tracking and Citations

For research you can trust, track every source the agent reads and include proper citations in the output.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from dataclasses import dataclass, field
from datetime import datetime

@dataclass
class Source:
    url: str
    title: str
    accessed_at: str = field(default_factory=lambda: datetime.now().isoformat())
    snippet: str = ""

class ResearchAgent:
    def __init__(self, model: str = "gpt-4o"):
        self.client = OpenAI()
        self.model = model
        self.sources: list[Source] = []

    def _handle_tool(self, tool_call) -> str:
        name = tool_call.function.name
        args = json.loads(tool_call.function.arguments)

        if name == "web_search":
            results = web_search(args["query"])
            for r in results:
                self.sources.append(Source(url=r["url"], title=r["title"], snippet=r["snippet"]))
            return json.dumps(results, indent=2)
        elif name == "read_url":
            content = read_url(args["url"])
            return content
        return "Unknown tool"

    def research(self, question: str) -> dict:
        self.sources = []
        messages = [
            {
                "role": "system",
                "content": (
                    "You are a research agent. Search the web, read relevant pages, "
                    "and provide a thorough answer with inline citations like [1], [2]. "
                    "At the end, list all sources with their URLs."
                ),
            },
            {"role": "user", "content": question},
        ]

        for _ in range(12):
            response = self.client.chat.completions.create(
                model=self.model, messages=messages, tools=tools,
            )
            msg = response.choices[0].message
            messages.append(msg)

            if not msg.tool_calls:
                unique_sources = {s.url: s for s in self.sources}
                return {
                    "answer": msg.content,
                    "sources": list(unique_sources.values()),
                    "num_searches": sum(
                        1 for m in messages
                        if hasattr(m, "tool_calls") and m.tool_calls
                        and any(tc.function.name == "web_search" for tc in m.tool_calls)
                    ),
                }

            for tc in msg.tool_calls:
                result = self._handle_tool(tc)
                messages.append({"role": "tool", "tool_call_id": tc.id, "content": result})

        return {"answer": "Incomplete", "sources": self.sources, "num_searches": 0}

agent = ResearchAgent()
result = agent.research("Compare the performance of Mamba vs Transformer architectures for long sequences")
print(result["answer"])
print(f"\nSources used: {len(result['sources'])}")
for s in result["sources"]:
    print(f"  - {s.title}: {s.url}")

Parallel Search with Multiple Queries

Better research often requires searching from different angles. Have the agent generate multiple search queries and run them in parallel.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import concurrent.futures

def multi_search(queries: list[str]) -> list[dict]:
    """Run multiple searches in parallel and deduplicate results."""
    all_results = []
    seen_urls = set()

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        futures = {executor.submit(web_search, q): q for q in queries}
        for future in concurrent.futures.as_completed(futures):
            for result in future.result():
                if result["url"] not in seen_urls:
                    all_results.append(result)
                    seen_urls.add(result["url"])

    return all_results

# Add as a tool for the agent
multi_search_tool = {
    "type": "function",
    "function": {
        "name": "multi_search",
        "description": "Run multiple search queries in parallel. Use when you need to research a topic from different angles.",
        "parameters": {
            "type": "object",
            "properties": {
                "queries": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "List of search queries to run",
                },
            },
            "required": ["queries"],
        },
    },
}

Rate Limiting and Caching

Real-world research agents need rate limiting to avoid hammering search APIs and caching to avoid re-reading the same pages.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import hashlib
import time
from functools import lru_cache

# Simple in-memory cache for URL content
@lru_cache(maxsize=100)
def cached_read_url(url: str) -> str:
    return read_url(url)

# Rate limiter
class RateLimiter:
    def __init__(self, calls_per_minute: int = 30):
        self.calls_per_minute = calls_per_minute
        self.timestamps: list[float] = []

    def wait(self):
        now = time.time()
        self.timestamps = [t for t in self.timestamps if now - t < 60]
        if len(self.timestamps) >= self.calls_per_minute:
            sleep_time = 60 - (now - self.timestamps[0])
            time.sleep(max(sleep_time, 0))
        self.timestamps.append(time.time())

limiter = RateLimiter(calls_per_minute=30)

def rate_limited_search(query: str) -> list[dict]:
    limiter.wait()
    return web_search(query)

Common Errors and Fixes

Agent answers from training data without searching

Strengthen the system prompt: “You MUST search the web before answering. Never answer from memory alone.” Also check that the tool definitions are being passed correctly.

Read URL returns garbage text

Some sites block bots or serve JavaScript-rendered content. Add a fallback: if BeautifulSoup returns less than 100 characters of text, try using the search snippet instead. For JavaScript-heavy sites, use a headless browser like Playwright.

Agent reads too many pages and hits token limits

Set max_chars lower in read_url (4000-6000 chars) and limit the agent to reading 5 pages max. You can also add a “summarize_page” tool that condenses content before adding it to the conversation.

Search API returns irrelevant results

Add date filters to queries for current topics: append “2025 2026” to the query. For technical topics, add “site:github.com OR site:arxiv.org” to focus on primary sources.

Agent loops between searching and reading without answering

Add an explicit instruction: “After reading 3-5 pages, you must provide your final answer. Do not continue searching indefinitely.” The max_steps limit is your safety net.