Back to catalogue
ContextPreToolUse· ReadPreToolUseBefore tool execution · can block⚡ blocking
Binary file to Markdown converter
Read any PDF, DOCX or PPTX as clean Markdown — slash token usage on binary files
Intercepts Read tool calls on binary documents (PDF, DOCX, PPTX, ODT, RTF, XLSX, EPUB, HTML) and converts them to Markdown via pandoc or pdftotext before Claude processes them. Injects the converted content directly, eliminating the need for Claude to parse raw binary data and drastically reducing token consumption. Falls back silently if no conversion tool is available.
What does the Binary file to Markdown converter hook do?
Binary file to Markdown converter is a Claude Code PreToolUse hook matching Read. It fires automatically at that lifecycle event — outside the model, so it can't be skipped or forgotten. Read any PDF, DOCX or PPTX as clean Markdown — slash token usage on binary files.
Use cases
- Reading PDF documentation without wasting tokens on binary encoding
- Analysing Word or PowerPoint files in agentic workflows
- Reducing context window usage when processing office documents
Tags
#context-optimization#pdf#markdown#tokens#docx#pptx#productivity
settings.json fragment
{
"hooks": {
"PreToolUse": [
{
"hooks": [
{
"command": "node $CLAUDE_PROJECT_DIR/.claude/hooks/file-to-markdown.mjs",
"type": "command"
}
],
"matcher": "Read"
}
]
}
}Script · .claude/hooks/file-to-markdown.mjs
#!/usr/bin/env node
// Convertit PDF/DOCX/PPTX et autres fichiers binaires en Markdown avant lecture (PreToolUse Read)
import { readFileSync, existsSync } from 'fs';
import { execSync } from 'child_process';
import { fileURLToPath } from 'url';
import { extname, basename } from 'path';
const MAX_CHARS = 50_000;
const SUPPORTED = new Set(['pdf', 'docx', 'pptx', 'odt', 'rtf', 'doc', 'ppt', 'xlsx', 'epub', 'html', 'htm']);
function defaultExec(cmd) {
return execSync(cmd, { encoding: 'utf8', timeout: 30_000 }).trim();
}
function hasBinary(name, exec) {
try { exec(`which ${name}`); return true; } catch { return false; }
}
export function run(input, { exec = defaultExec, exists = existsSync } = {}) {
if (input.tool_name !== 'Read') return null;
const filePath = input.tool_input?.file_path ?? '';
if (!filePath) return null;
const ext = extname(filePath).toLowerCase().replace('.', '');
if (!SUPPORTED.has(ext)) return null;
if (!exists(filePath)) return null;
const hasPdftotext = ext === 'pdf' && hasBinary('pdftotext', exec);
const hasPandoc = hasBinary('pandoc', exec);
if (!hasPdftotext && !hasPandoc) return null;
let markdown;
try {
if (ext === 'pdf' && hasPdftotext) {
markdown = exec(`pdftotext "${filePath}" -`);
} else if (hasPandoc) {
markdown = exec(`pandoc --to markdown --wrap=none "${filePath}"`);
} else {
return null;
}
} catch {
return null;
}
if (!markdown || !markdown.trim()) return null;
let content = markdown.trim();
let truncated = false;
if (content.length > MAX_CHARS) {
content = content.slice(0, MAX_CHARS);
truncated = true;
}
const name = basename(filePath);
const suffix = truncated ? ` (truncated to ${MAX_CHARS} chars)` : '';
const header = `[file-to-markdown] \`${name}\` converted to Markdown${suffix}:\n\n`;
return { decision: 'block', reason: header + content };
}
/* v8 ignore next 5 */
if (process.argv[1] === fileURLToPath(import.meta.url)) {
const input = JSON.parse(readFileSync(0, 'utf8'));
const result = run(input);
if (result) process.stdout.write(JSON.stringify(result));
}