The Quick Version#
You define filesystem operations as OpenAI tools, run them inside an agent loop, and let the LLM decide which file operation to execute next. The agent reads a user request, picks the right tool, executes it in a sandboxed directory, feeds the result back, and repeats until the task is done.
Here’s a minimal working agent that manages files through natural language:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| import os
import json
import glob
import shutil
from pathlib import Path
from openai import OpenAI
client = OpenAI()
SANDBOX_DIR = Path("./agent_workspace").resolve()
SANDBOX_DIR.mkdir(exist_ok=True)
def safe_path(filepath: str) -> Path:
"""Resolve a path and ensure it stays inside the sandbox."""
target = (SANDBOX_DIR / filepath).resolve()
if not str(target).startswith(str(SANDBOX_DIR)):
raise PermissionError(f"Access denied: {filepath} is outside the sandbox")
return target
|
That safe_path function is critical. Every filesystem tool passes through it, which prevents path traversal attacks like ../../etc/passwd.
OpenAI function calling uses the tools parameter with JSON Schema definitions. Each tool maps to a Python function that performs the actual file operation.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
| tools = [
{
"type": "function",
"function": {
"name": "read_file",
"description": "Read the contents of a file. Returns the file text.",
"parameters": {
"type": "object",
"properties": {
"filepath": {
"type": "string",
"description": "Path relative to the workspace root"
}
},
"required": ["filepath"]
}
}
},
{
"type": "function",
"function": {
"name": "write_file",
"description": "Write content to a file. Creates the file if it doesn't exist, overwrites if it does.",
"parameters": {
"type": "object",
"properties": {
"filepath": {
"type": "string",
"description": "Path relative to the workspace root"
},
"content": {
"type": "string",
"description": "The text content to write"
}
},
"required": ["filepath", "content"]
}
}
},
{
"type": "function",
"function": {
"name": "list_directory",
"description": "List all files and subdirectories in a directory.",
"parameters": {
"type": "object",
"properties": {
"dirpath": {
"type": "string",
"description": "Directory path relative to workspace root. Use '.' for the root."
}
},
"required": ["dirpath"]
}
}
},
{
"type": "function",
"function": {
"name": "search_files",
"description": "Search for files matching a glob pattern. Returns a list of matching file paths.",
"parameters": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "Glob pattern like '*.txt' or '**/*.py'"
}
},
"required": ["pattern"]
}
}
},
{
"type": "function",
"function": {
"name": "move_file",
"description": "Move or rename a file from one path to another.",
"parameters": {
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "Current file path relative to workspace root"
},
"destination": {
"type": "string",
"description": "New file path relative to workspace root"
}
},
"required": ["source", "destination"]
}
}
}
]
|
Each tool function takes keyword arguments matching its JSON Schema parameters, performs the operation inside the sandbox, and returns a string result.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
| BLOCKED_EXTENSIONS = {".exe", ".sh", ".bat", ".cmd", ".ps1", ".dll", ".so"}
def read_file(filepath: str) -> str:
target = safe_path(filepath)
if not target.exists():
return f"Error: File not found: {filepath}"
if not target.is_file():
return f"Error: {filepath} is not a file"
return target.read_text(encoding="utf-8")
def write_file(filepath: str, content: str) -> str:
target = safe_path(filepath)
if target.suffix in BLOCKED_EXTENSIONS:
return f"Error: Writing {target.suffix} files is blocked for safety"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(content, encoding="utf-8")
return f"Written {len(content)} bytes to {filepath}"
def list_directory(dirpath: str) -> str:
target = safe_path(dirpath)
if not target.exists():
return f"Error: Directory not found: {dirpath}"
if not target.is_dir():
return f"Error: {dirpath} is not a directory"
entries = []
for item in sorted(target.iterdir()):
kind = "dir" if item.is_dir() else "file"
size = item.stat().st_size if item.is_file() else 0
entries.append(f"[{kind}] {item.name} ({size} bytes)")
return "\n".join(entries) if entries else "(empty directory)"
def search_files(pattern: str) -> str:
matches = glob.glob(str(SANDBOX_DIR / pattern), recursive=True)
# Filter to only paths inside sandbox
results = []
for m in matches:
p = Path(m).resolve()
if str(p).startswith(str(SANDBOX_DIR)):
results.append(str(p.relative_to(SANDBOX_DIR)))
return "\n".join(results) if results else "No files matched the pattern"
def move_file(source: str, destination: str) -> str:
src = safe_path(source)
dst = safe_path(destination)
if not src.exists():
return f"Error: Source not found: {source}"
if dst.suffix in BLOCKED_EXTENSIONS:
return f"Error: Moving to {dst.suffix} files is blocked for safety"
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(src), str(dst))
return f"Moved {source} -> {destination}"
TOOL_MAP = {
"read_file": read_file,
"write_file": write_file,
"list_directory": list_directory,
"search_files": search_files,
"move_file": move_file,
}
|
Notice BLOCKED_EXTENSIONS – the agent can’t write or rename files to executable formats. This is a second layer of defense on top of the sandbox path restriction.
The Agent Loop#
The core pattern: send messages to the LLM, check if it wants to call tools, execute them, append results, and loop until the model gives a final text response.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
| def run_agent(user_message: str, max_iterations: int = 10) -> str:
messages = [
{
"role": "system",
"content": (
"You are a file management assistant. You can read, write, list, "
"search, and move files in the user's workspace. Always confirm "
"destructive operations before executing. Work step by step."
),
},
{"role": "user", "content": user_message},
]
for i in range(max_iterations):
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
)
choice = response.choices[0]
# If the model is done calling tools, return the final answer
if choice.finish_reason == "stop":
return choice.message.content
# Process each tool call
messages.append(choice.message)
for tool_call in choice.message.tool_calls:
fn_name = tool_call.function.name
fn_args = json.loads(tool_call.function.arguments)
if fn_name not in TOOL_MAP:
result = f"Error: Unknown tool {fn_name}"
else:
try:
result = TOOL_MAP[fn_name](**fn_args)
except PermissionError as e:
result = f"Permission denied: {e}"
except Exception as e:
result = f"Error executing {fn_name}: {e}"
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result,
})
return "Agent reached maximum iterations without completing the task."
|
The max_iterations cap prevents infinite loops. In practice, most file tasks finish in 2-4 iterations.
Running the Agent#
1
2
3
4
5
6
7
| if __name__ == "__main__":
# Example: organize files by extension
result = run_agent(
"List everything in the workspace, then create a 'docs' folder "
"and move all .txt files into it. Show me the final directory structure."
)
print(result)
|
The agent will: (1) call list_directory on ., (2) call move_file for each .txt file, (3) call list_directory again to show the result, and (4) return a summary. You didn’t specify any of those steps – the LLM figured out the sequence on its own.
Safety Guardrails Worth Adding#
The sandbox and blocked extensions are a good start. Here are more guardrails for production use.
File size limits#
Don’t let the agent write gigabytes of data:
1
2
3
4
5
6
7
8
9
10
11
| MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
def write_file(filepath: str, content: str) -> str:
if len(content.encode("utf-8")) > MAX_FILE_SIZE:
return f"Error: Content exceeds {MAX_FILE_SIZE} byte limit"
target = safe_path(filepath)
if target.suffix in BLOCKED_EXTENSIONS:
return f"Error: Writing {target.suffix} files is blocked for safety"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(content, encoding="utf-8")
return f"Written {len(content)} bytes to {filepath}"
|
Operation logging#
Track every action for auditing:
1
2
3
4
5
6
| import logging
logging.basicConfig(filename="agent_ops.log", level=logging.INFO)
def logged_tool_call(fn_name: str, fn_args: dict, result: str):
logging.info(f"Tool: {fn_name} | Args: {json.dumps(fn_args)} | Result: {result[:200]}")
|
Add a logged_tool_call invocation inside the agent loop right after each tool execution. This gives you a complete audit trail of what the agent did and why.
Confirmation for destructive operations#
For write and move operations in production, you’d typically add a human-in-the-loop step:
1
2
3
| def confirm_action(action_description: str) -> bool:
response = input(f"Agent wants to: {action_description}\nAllow? (y/n): ")
return response.strip().lower() == "y"
|
Wire this into the agent loop before executing write_file or move_file. The LLM decides what to do, but a human approves whether to do it.
Common Errors and Fixes#
PermissionError: Access denied – The agent tried to access a path outside the sandbox. Check that you’re passing relative paths, not absolute ones. If user messages contain absolute paths, strip the prefix in the tool function before resolving.
json.JSONDecodeError when parsing tool arguments – The model occasionally produces malformed JSON, especially with long file contents. Wrap json.loads in a try/except and return a clear error message so the model can retry.
Agent loops forever without finishing – Usually happens when a tool returns an error the model doesn’t know how to recover from. Lower max_iterations and add a check: if the same tool is called with the same arguments twice in a row, break the loop and return an error.
UnicodeDecodeError on read_file – Binary files like images or PDFs will fail with read_text(). Add a check at the start of read_file:
1
2
3
4
5
6
7
8
9
| def read_file(filepath: str) -> str:
target = safe_path(filepath)
if not target.exists():
return f"Error: File not found: {filepath}"
# Reject binary files
try:
return target.read_text(encoding="utf-8")
except UnicodeDecodeError:
return f"Error: {filepath} appears to be a binary file and cannot be read as text"
|
Rate limits from OpenAI – Each iteration is an API call. For tasks that touch many files, you could batch operations into a single tool (e.g., move_files that accepts a list) to reduce round trips.
Model invents nonexistent files – The model sometimes hallucinates file names. Always have your tool functions check path.exists() and return clear “not found” errors so the model can self-correct.