Mini Project: Autonomous Research Agent

Build a fully autonomous research agent that takes a research topic, searches the web, reads papers (arXiv), synthesizes findings across multiple sources, identifies knowledge gaps, and produces a structured research report — all without human intervention.

60 min•By Priygop Team•Updated 2026

Autonomous Research Agent

from openai import OpenAI
import requests, json, arxiv, re
from pydantic import BaseModel
from typing import Optional

client = OpenAI()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH TOOLS
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

def search_arxiv(query: str, max_results: int = 5) -> str:
    '''Search arXiv for academic papers.'''
    search = arxiv.Search(query=query, max_results=max_results,
                          sort_by=arxiv.SortCriterion.Relevance)
    papers = []
    for r in search.results():
        papers.append({
            "title": r.title,
            "authors": [a.name for a in r.authors[:3]],
            "published": r.published.strftime("%Y-%m-%d"),
            "abstract": r.summary[:400] + "...",
            "url": r.pdf_url,
        })
    return json.dumps(papers, indent=2)

def search_web(query: str) -> str:
    '''Search DuckDuckGo for web results.'''
    try:
        r = requests.get("https://api.duckduckgo.com/", timeout=10,
                         params={"q": query, "format": "json", "no_html": 1})
        data = r.json()
        results = [{"text": t.get("Text", ""), "url": t.get("FirstURL", "")}
                   for t in data.get("RelatedTopics", [])[:5] if isinstance(t, dict) and t.get("Text")]
        return json.dumps(results, indent=2)
    except Exception as e:
        return f"Error: {e}"

def write_section(title: str, content: str, filepath: str) -> str:
    '''Append a section to the research report.'''
    with open(filepath, "a") as f:
        f.write(f"
## {title}

{content}
")
    return f"Section '{title}' written to {filepath}"

TOOLS = [
    {"type": "function", "function": {"name": "search_arxiv", "description": "Search academic papers on arXiv", "parameters": {"type": "object", "properties": {"query": {"type": "string"}, "max_results": {"type": "integer", "default": 5}}, "required": ["query"]}}},
    {"type": "function", "function": {"name": "search_web", "description": "Search web for current information", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
    {"type": "function", "function": {"name": "write_section", "description": "Write a section to the research report", "parameters": {"type": "object", "properties": {"title": {"type": "string"}, "content": {"type": "string"}, "filepath": {"type": "string"}}, "required": ["title", "content", "filepath"]}}},
]

TOOL_REGISTRY = {"search_arxiv": search_arxiv, "search_web": search_web, "write_section": write_section}

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH AGENT
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def research_agent(topic: str, output_file: str = "research_report.md") -> str:
    '''Autonomous research agent that produces a structured report.'''

    with open(output_file, "w") as f:
        f.write(f"# Research Report: {topic}

")

    messages = [
        {"role": "system", "content": f'''You are an autonomous research agent specializing in AI and computer science.
Your task: research the topic '{topic}' comprehensively.

Steps to follow:
1. Search arXiv for recent papers (last 2 years)
2. Search web for practical applications and industry news
3. Write sections to the report file: Overview, Key Papers, Technical Details, Applications, Open Challenges
4. Synthesize findings and identify knowledge gaps
5. Write a Conclusion section

Report file: {output_file}
Be thorough, cite sources, use technical accuracy. When report is complete, say REPORT_COMPLETE.'''},
        {"role": "user", "content": f"Research this topic thoroughly: {topic}"}
    ]

    for step in range(20):  # max 20 tool calls
        response = client.chat.completions.create(
            model="gpt-4o", messages=messages, tools=TOOLS, tool_choice="auto",
            max_tokens=2000,
        )
        msg = response.choices[0].message
        messages.append(msg)

        if "REPORT_COMPLETE" in (msg.content or ""):
            break

        if not msg.tool_calls:
            break

        for tc in msg.tool_calls:
            fn_name = tc.function.name
            fn_args = json.loads(tc.function.arguments)
            print(f"  [{step+1}] {fn_name}({list(fn_args.keys())})")
            result = TOOL_REGISTRY[fn_name](**fn_args)
            messages.append({"role": "tool", "tool_call_id": tc.id, "content": str(result)[:3000]})

    return f"Report saved to {output_file}"

# Run the agent
result = research_agent("Mixture of Experts (MoE) in Large Language Models")
print(result)
# The agent searches arXiv, finds recent MoE papers (GPT-4, Mixtral, etc.),
# writes structured sections, and produces a 2000+ word technical research report

Tip

Practice Mini Project Autonomous Research Agent in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

ReAct = Reason + Act loop. Agent decides when to use tools. LangChain, CrewAI, AutoGen for frameworks.

Practice Task

Note

Practice Task — (1) Write a working example of Mini Project Autonomous Research Agent from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Mini Project Autonomous Research Agent is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

Autonomous Research Agent

from openai import OpenAI
import requests, json, arxiv, re
from pydantic import BaseModel
from typing import Optional

client = OpenAI()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH TOOLS
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

def search_arxiv(query: str, max_results: int = 5) -> str:
    '''Search arXiv for academic papers.'''
    search = arxiv.Search(query=query, max_results=max_results,
                          sort_by=arxiv.SortCriterion.Relevance)
    papers = []
    for r in search.results():
        papers.append({
            "title": r.title,
            "authors": [a.name for a in r.authors[:3]],
            "published": r.published.strftime("%Y-%m-%d"),
            "abstract": r.summary[:400] + "...",
            "url": r.pdf_url,
        })
    return json.dumps(papers, indent=2)

def search_web(query: str) -> str:
    '''Search DuckDuckGo for web results.'''
    try:
        r = requests.get("https://api.duckduckgo.com/", timeout=10,
                         params={"q": query, "format": "json", "no_html": 1})
        data = r.json()
        results = [{"text": t.get("Text", ""), "url": t.get("FirstURL", "")}
                   for t in data.get("RelatedTopics", [])[:5] if isinstance(t, dict) and t.get("Text")]
        return json.dumps(results, indent=2)
    except Exception as e:
        return f"Error: {e}"

def write_section(title: str, content: str, filepath: str) -> str:
    '''Append a section to the research report.'''
    with open(filepath, "a") as f:
        f.write(f"
## {title}

{content}
")
    return f"Section '{title}' written to {filepath}"

TOOLS = [
    {"type": "function", "function": {"name": "search_arxiv", "description": "Search academic papers on arXiv", "parameters": {"type": "object", "properties": {"query": {"type": "string"}, "max_results": {"type": "integer", "default": 5}}, "required": ["query"]}}},
    {"type": "function", "function": {"name": "search_web", "description": "Search web for current information", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
    {"type": "function", "function": {"name": "write_section", "description": "Write a section to the research report", "parameters": {"type": "object", "properties": {"title": {"type": "string"}, "content": {"type": "string"}, "filepath": {"type": "string"}}, "required": ["title", "content", "filepath"]}}},
]

TOOL_REGISTRY = {"search_arxiv": search_arxiv, "search_web": search_web, "write_section": write_section}

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH AGENT
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def research_agent(topic: str, output_file: str = "research_report.md") -> str:
    '''Autonomous research agent that produces a structured report.'''

    with open(output_file, "w") as f:
        f.write(f"# Research Report: {topic}

")

    messages = [
        {"role": "system", "content": f'''You are an autonomous research agent specializing in AI and computer science.
Your task: research the topic '{topic}' comprehensively.

Steps to follow:
1. Search arXiv for recent papers (last 2 years)
2. Search web for practical applications and industry news
3. Write sections to the report file: Overview, Key Papers, Technical Details, Applications, Open Challenges
4. Synthesize findings and identify knowledge gaps
5. Write a Conclusion section

Report file: {output_file}
Be thorough, cite sources, use technical accuracy. When report is complete, say REPORT_COMPLETE.'''},
        {"role": "user", "content": f"Research this topic thoroughly: {topic}"}
    ]

    for step in range(20):  # max 20 tool calls
        response = client.chat.completions.create(
            model="gpt-4o", messages=messages, tools=TOOLS, tool_choice="auto",
            max_tokens=2000,
        )
        msg = response.choices[0].message
        messages.append(msg)

        if "REPORT_COMPLETE" in (msg.content or ""):
            break

        if not msg.tool_calls:
            break

        for tc in msg.tool_calls:
            fn_name = tc.function.name
            fn_args = json.loads(tc.function.arguments)
            print(f"  [{step+1}] {fn_name}({list(fn_args.keys())})")
            result = TOOL_REGISTRY[fn_name](**fn_args)
            messages.append({"role": "tool", "tool_call_id": tc.id, "content": str(result)[:3000]})

    return f"Report saved to {output_file}"

# Run the agent
result = research_agent("Mixture of Experts (MoE) in Large Language Models")
print(result)
# The agent searches arXiv, finds recent MoE papers (GPT-4, Mixtral, etc.),
# writes structured sections, and produces a 2000+ word technical research report

Tip

Diagram

Loading diagram…

ReAct = Reason + Act loop. Agent decides when to use tools. LangChain, CrewAI, AutoGen for frameworks.

Topics in This Module