Install
openclaw skills install office-to-md-v2Convert PDF, DOC, DOCX, and PPTX office documents to Markdown, supporting legacy .doc files with text extraction and basic formatting preservation.
openclaw skills install office-to-md-v2Convert office documents (PDF, DOC, DOCX, PPTX) to Markdown format. This skill uses the word-extractor library for .doc support and provides full OpenClaw integration.
cp -r /root/.openclaw/workspace/office-to-md-v2/office-to-md /path/to/your/workspace/
cd /path/to/your/workspace/office-to-md
npm install
pip3 install python-pptx
// Convert any supported document
const result = await exec(
'node /path/to/office-to-md/openclaw-skill.js /path/to/document.doc',
{ workdir: '/path/to/workspace', timeout: 60000 }
);
if (result.exitCode === 0) {
console.log('✅ Document converted successfully');
// Output file: /path/to/document.md
} else {
console.error('❌ Conversion failed:', result.stderr);
}
// Import the converter
const { convertOfficeToMarkdown } = require('/path/to/office-to-md/openclaw-skill.js');
// Convert document
const conversionResult = await convertOfficeToMarkdown('/path/to/document.pdf');
if (conversionResult.success) {
console.log(`Output: ${conversionResult.outputPath}`);
console.log(`Preview: ${conversionResult.preview}`);
} else {
console.error(`Error: ${conversionResult.error}`);
}
async function convertDocumentToMarkdown(filePath) {
// Validate file exists
try {
await read(filePath);
} catch (error) {
return { success: false, error: `File not found: ${filePath}` };
}
// Check file extension
const ext = filePath.toLowerCase().slice(-5);
const supported = ['.pdf', '.doc', '.docx', '.pptx'];
if (!supported.some(s => ext.endsWith(s))) {
return {
success: false,
error: `Unsupported file type. Supported: ${supported.join(', ')}`
};
}
// Convert using the skill
const cmd = `node /path/to/office-to-md/openclaw-skill.js "${filePath}"`;
const result = await exec(cmd, {
workdir: '/path/to/workspace',
timeout: 120000 // 2 minutes for large files
});
if (result.exitCode === 0) {
const outputPath = filePath.replace(/\.[^/.]+$/, '.md');
return {
success: true,
outputPath: outputPath,
message: `Converted to: ${outputPath}`
};
} else {
return {
success: false,
error: result.stderr || 'Conversion failed'
};
}
}
// Usage example
const result = await convertDocumentToMarkdown('/path/to/document.doc');
if (result.success) {
const markdown = await read(result.outputPath);
console.log(markdown.substring(0, 1000));
}
// Convert a .doc file and analyze its content
const docPath = '/path/to/document.doc';
const convertResult = await exec(
`node /path/to/office-to-md/openclaw-skill.js "${docPath}"`,
{ workdir: '/path/to/workspace' }
);
if (convertResult.exitCode === 0) {
const mdPath = docPath.replace('.doc', '.md');
const content = await read(mdPath);
// Analyze the content
const wordCount = content.split(/\s+/).length;
const lines = content.split('\n').length;
const hasChinese = /[\u4e00-\u9fff]/.test(content);
console.log(`Document analysis:`);
console.log(`- Word count: ${wordCount}`);
console.log(`- Lines: ${lines}`);
console.log(`- Contains Chinese: ${hasChinese}`);
console.log(`- Preview: ${content.substring(0, 200)}...`);
}
// Convert multiple documents of different formats
const documents = [
'/path/to/report.pdf',
'/path/to/legacy.doc',
'/path/to/modern.docx',
'/path/to/presentation.pptx'
];
const results = [];
for (const doc of documents) {
console.log(`Converting ${doc}...`);
const result = await exec(
`node /path/to/office-to-md/openclaw-skill.js "${doc}"`,
{ workdir: '/path/to/workspace', timeout: 90000 }
);
const success = result.exitCode === 0;
results.push({
file: doc,
success: success,
error: success ? null : result.stderr
});
console.log(success ? '✅ Success' : '❌ Failed');
}
// Summary
const successful = results.filter(r => r.success).length;
console.log(`\nConversion summary: ${successful}/${results.length} successful`);
Returns a Promise that resolves to:
{
success: boolean,
outputPath?: string,
markdown?: string,
preview?: string,
fileType?: string,
message?: string,
stats?: {
lines: number,
characters: number,
words: number
},
error?: string,
stack?: string
}
node --max-old-space-size=4096 openclaw-skill.js large-file.doc
"File not found"
"Unsupported file type"
Conversion errors with .doc files
Chinese text appears as gibberish
Timeout errors
Enable debug logging by setting environment variable:
DEBUG=office-to-md node openclaw-skill.js document.doc
This skill is provided as-is. The underlying libraries have their own licenses: