Mini Project: Autonomous Research Agent
Build a fully autonomous research agent that takes a research topic, searches the web, reads papers (arXiv), synthesizes findings across multiple sources, identifies knowledge gaps, and produces a structured research report — all without human intervention.
Autonomous Research Agent
from openai import OpenAI
import requests, json, arxiv, re
from pydantic import BaseModel
from typing import Optional
client = OpenAI()
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH TOOLS
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def search_arxiv(query: str, max_results: int = 5) -> str:
'''Search arXiv for academic papers.'''
search = arxiv.Search(query=query, max_results=max_results,
sort_by=arxiv.SortCriterion.Relevance)
papers = []
for r in search.results():
papers.append({
"title": r.title,
"authors": [a.name for a in r.authors[:3]],
"published": r.published.strftime("%Y-%m-%d"),
"abstract": r.summary[:400] + "...",
"url": r.pdf_url,
})
return json.dumps(papers, indent=2)
def search_web(query: str) -> str:
'''Search DuckDuckGo for web results.'''
try:
r = requests.get("https://api.duckduckgo.com/", timeout=10,
params={"q": query, "format": "json", "no_html": 1})
data = r.json()
results = [{"text": t.get("Text", ""), "url": t.get("FirstURL", "")}
for t in data.get("RelatedTopics", [])[:5] if isinstance(t, dict) and t.get("Text")]
return json.dumps(results, indent=2)
except Exception as e:
return f"Error: {e}"
def write_section(title: str, content: str, filepath: str) -> str:
'''Append a section to the research report.'''
with open(filepath, "a") as f:
f.write(f"
## {title}
{content}
")
return f"Section '{title}' written to {filepath}"
TOOLS = [
{"type": "function", "function": {"name": "search_arxiv", "description": "Search academic papers on arXiv", "parameters": {"type": "object", "properties": {"query": {"type": "string"}, "max_results": {"type": "integer", "default": 5}}, "required": ["query"]}}},
{"type": "function", "function": {"name": "search_web", "description": "Search web for current information", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
{"type": "function", "function": {"name": "write_section", "description": "Write a section to the research report", "parameters": {"type": "object", "properties": {"title": {"type": "string"}, "content": {"type": "string"}, "filepath": {"type": "string"}}, "required": ["title", "content", "filepath"]}}},
]
TOOL_REGISTRY = {"search_arxiv": search_arxiv, "search_web": search_web, "write_section": write_section}
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESEARCH AGENT
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def research_agent(topic: str, output_file: str = "research_report.md") -> str:
'''Autonomous research agent that produces a structured report.'''
with open(output_file, "w") as f:
f.write(f"# Research Report: {topic}
")
messages = [
{"role": "system", "content": f'''You are an autonomous research agent specializing in AI and computer science.
Your task: research the topic '{topic}' comprehensively.
Steps to follow:
1. Search arXiv for recent papers (last 2 years)
2. Search web for practical applications and industry news
3. Write sections to the report file: Overview, Key Papers, Technical Details, Applications, Open Challenges
4. Synthesize findings and identify knowledge gaps
5. Write a Conclusion section
Report file: {output_file}
Be thorough, cite sources, use technical accuracy. When report is complete, say REPORT_COMPLETE.'''},
{"role": "user", "content": f"Research this topic thoroughly: {topic}"}
]
for step in range(20): # max 20 tool calls
response = client.chat.completions.create(
model="gpt-4o", messages=messages, tools=TOOLS, tool_choice="auto",
max_tokens=2000,
)
msg = response.choices[0].message
messages.append(msg)
if "REPORT_COMPLETE" in (msg.content or ""):
break
if not msg.tool_calls:
break
for tc in msg.tool_calls:
fn_name = tc.function.name
fn_args = json.loads(tc.function.arguments)
print(f" [{step+1}] {fn_name}({list(fn_args.keys())})")
result = TOOL_REGISTRY[fn_name](**fn_args)
messages.append({"role": "tool", "tool_call_id": tc.id, "content": str(result)[:3000]})
return f"Report saved to {output_file}"
# Run the agent
result = research_agent("Mixture of Experts (MoE) in Large Language Models")
print(result)
# The agent searches arXiv, finds recent MoE papers (GPT-4, Mixtral, etc.),
# writes structured sections, and produces a 2000+ word technical research reportTip
Tip
Practice Mini Project Autonomous Research Agent in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
ReAct = Reason + Act loop. Agent decides when to use tools. LangChain, CrewAI, AutoGen for frameworks.
Practice Task
Note
Practice Task — (1) Write a working example of Mini Project Autonomous Research Agent from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Mini Project Autonomous Research Agent is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.