Install
openclaw skills install data-analyzerData analysis tool for Excel, CSV, Word, PDF, TXT, Markdown files. Use when user needs to analyze, summarize, or compare data from multiple files. Supports folder scanning, data aggregation, statistics, report generation in Markdown/Excel/Word/PDF. Multi-language support. 数据分析、文件夹分析、Excel分析。
openclaw skills install data-analyzerAnalyze and summarize data from Excel, CSV, Word, and PDF files.
import os
import pandas as pd
from pathlib import Path
from docx import Document
import fitz # PyMuPDF
class DataAnalyzer:
def __init__(self, folder_path):
self.folder = Path(folder_path)
self.files = self._scan_files()
def _scan_files(self):
"""Scan folder for supported files"""
files = {
'excel': [], 'csv': [], 'word': [],
'pdf': [], 'txt': [], 'markdown': []
}
for f in self.folder.rglob('*'):
ext = f.suffix.lower()
if ext in ['.xlsx', '.xls']:
files['excel'].append(str(f))
elif ext == '.csv':
files['csv'].append(str(f))
elif ext == '.docx':
files['word'].append(str(f))
elif ext == '.pdf':
files['pdf'].append(str(f))
elif ext == '.txt':
files['txt'].append(str(f))
elif ext in ['.md', '.markdown']:
files['markdown'].append(str(f))
return files
def analyze_excel(self, file_path):
"""Analyze Excel file"""
df = pd.read_excel(file_path)
return {'rows': len(df), 'columns': len(df.columns), 'data': df}
def analyze_csv(self, file_path):
"""Analyze CSV file"""
df = pd.read_csv(file_path)
return {'rows': len(df), 'columns': len(df.columns), 'data': df}
def analyze_word(self, file_path):
"""Analyze Word file"""
doc = Document(file_path)
text = '\n'.join([p.text for p in doc.paragraphs if p.text.strip()])
return {'paragraphs': len(doc.paragraphs), 'text': text}
def analyze_pdf(self, file_path):
"""Analyze PDF file"""
doc = fitz.open(file_path)
text = ''
for page in doc:
text += page.get_text()
result = {'pages': len(doc), 'text': text}
doc.close()
return result
def analyze_txt(self, file_path):
"""Analyze TXT file"""
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
return {'lines': len(text.split('\n')), 'text': text}
def analyze_markdown(self, file_path):
"""Analyze Markdown file"""
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
return {'lines': len(text.split('\n')), 'text': text}
def analyze_file(self, file_path):
"""Auto-detect and analyze any supported file"""
ext = Path(file_path).suffix.lower()
if ext in ['.xlsx', '.xls']:
return self.analyze_excel(file_path)
elif ext == '.csv':
return self.analyze_csv(file_path)
elif ext == '.docx':
return self.analyze_word(file_path)
elif ext == '.pdf':
return self.analyze_pdf(file_path)
elif ext == '.txt':
return self.analyze_txt(file_path)
elif ext in ['.md', '.markdown']:
return self.analyze_markdown(file_path)
else:
return {'error': f'Unsupported format: {ext}'}
def generate_summary(self):
"""Generate analysis summary"""
summary = {'total_files': 0, 'file_details': []}
for ftype, flist in self.files.items():
for fpath in flist:
try:
analysis = self.analyze_file(fpath)
summary['file_details'].append({
'name': os.path.basename(fpath),
'type': ftype,
'analysis': analysis
})
summary['total_files'] += 1
except Exception as e:
summary['file_details'].append({
'name': os.path.basename(fpath),
'type': ftype,
'error': str(e)
})
return summary
User: "Analyze all Excel files in this folder"
Agent: 使用 DataAnalyzer 扫描并分析
User: "Compare these CSV files"
Agent: 读取并对比数据
User: "Generate a data report"
Agent: 生成分析报告