#!/usr/bin/env python3 """ Academic paper search via OpenAlex API (free, no key needed). Part of the academic-research skill for OpenClaw. Built by Topanga (topanga.ludwitt.com) — AI Research Consultant """ import argparse import json import sys import time import requests BASE = "https://api.openalex.org" MAILTO = "topanga@ludwitt.com" # polite pool (faster rate limits) def _get(url, params=None): params = params or {} params["mailto"] = MAILTO for attempt in range(3): try: r = requests.get(url, params=params, timeout=20) if r.status_code == 429: time.sleep(2 ** attempt) continue r.raise_for_status() return r.json() except requests.exceptions.RequestException as e: if attempt == 2: print(f"Error: {e}", file=sys.stderr) return None time.sleep(1) return None def _parse_work(w): """Extract structured data from an OpenAlex work object.""" loc = w.get("primary_location") or {} source = loc.get("source") or {} oa = w.get("open_access") or {} authors = [] for a in w.get("authorships", [])[:5]: auth = a.get("author") or {} authors.append(auth.get("display_name", "Unknown")) # Get abstract from inverted index abstract = None inv = w.get("abstract_inverted_index") if inv: words = [""] * (max(max(pos) for pos in inv.values()) + 1) for word, positions in inv.items(): for p in positions: words[p] = word abstract = " ".join(words).strip() return { "title": w.get("display_name", "N/A"), "year": w.get("publication_year"), "authors": authors, "abstract": abstract, "citations": w.get("cited_by_count", 0), "doi": (w.get("doi") or "").replace("https://doi.org/", "") or None, "open_access": oa.get("is_oa", False), "oa_url": oa.get("oa_url"), "landing_url": loc.get("landing_page_url"), "source": source.get("display_name"), "type": w.get("type"), "openalex_id": w.get("id"), } def search(query, limit=10, sort="relevance", year_range=None, oa_only=False): """Search works by topic.""" params = {"search": query, "per_page": min(limit, 50)} if sort == "citations": params["sort"] = "cited_by_count:desc" if year_range: params["filter"] = f"publication_year:{year_range}" if oa_only: f = params.get("filter", "") params["filter"] = (f + "," if f else "") + "open_access.is_oa:true" data = _get(f"{BASE}/works", params) if not data: return [] return [_parse_work(w) for w in data.get("results", [])] def search_author(name, limit=5): """Search for papers by a specific author.""" # First find the author data = _get(f"{BASE}/authors", {"search": name, "per_page": 1}) if not data or not data.get("results"): print(f"Author '{name}' not found", file=sys.stderr) return [] author = data["results"][0] author_id = author["id"] print(f"Found: {author['display_name']} ({author.get('works_count', '?')} works, " f"h-index: {author.get('summary_stats', {}).get('h_index', '?')})", file=sys.stderr) # Get their works works = _get(f"{BASE}/works", { "filter": f"authorships.author.id:{author_id}", "sort": "cited_by_count:desc", "per_page": min(limit, 50), }) if not works: return [] return [_parse_work(w) for w in works.get("results", [])] def lookup_doi(doi): """Get paper details by DOI.""" doi = doi.strip().removeprefix("https://doi.org/") data = _get(f"{BASE}/works/https://doi.org/{doi}") if not data: return None return _parse_work(data) def get_citations(doi, direction="cited_by", limit=10): """Get citation chain for a paper. direction: 'cited_by' (papers citing this), 'references' (papers this cites), 'both' """ doi = doi.strip().removeprefix("https://doi.org/") work = _get(f"{BASE}/works/https://doi.org/{doi}") if not work: return {"cited_by": [], "references": []} result = {} work_id = work.get("id", "").split("/")[-1] if direction in ("cited_by", "both"): cited = _get(f"{BASE}/works", { "filter": f"cites:{work_id}", "sort": "cited_by_count:desc", "per_page": min(limit, 50), }) result["cited_by"] = [_parse_work(w) for w in (cited or {}).get("results", [])] if direction in ("references", "both"): refs = _get(f"{BASE}/works", { "filter": f"cited_by:{work_id}", "sort": "cited_by_count:desc", "per_page": min(limit, 50), }) result["references"] = [_parse_work(w) for w in (refs or {}).get("results", [])] return result def deep_read(doi): """Fetch detailed paper info including abstract and full text URL.""" paper = lookup_doi(doi) if not paper: print("Paper not found", file=sys.stderr) return None # Try Unpaywall for better PDF URL try: r = requests.get(f"https://api.unpaywall.org/v2/{paper['doi']}?email={MAILTO}", timeout=10) if r.status_code == 200: up = r.json() best = up.get("best_oa_location") or {} if best.get("url_for_pdf"): paper["pdf_url"] = best["url_for_pdf"] elif best.get("url"): paper["pdf_url"] = best["url"] except Exception: pass return paper def _format_paper(p, idx=None): prefix = f"{idx}. " if idx else "" oa = "🔓" if p.get("open_access") else "🔒" year = f"({p['year']})" if p.get("year") else "" cites = f"[{p['citations']} citations]" if p.get("citations") else "" authors = ", ".join(p.get("authors", [])[:3]) if len(p.get("authors", [])) > 3: authors += " et al." lines = [f"{prefix}{oa} {p['title']} {year} {cites}"] if authors: lines.append(f" Authors: {authors}") if p.get("source"): lines.append(f" Source: {p['source']}") if p.get("doi"): lines.append(f" DOI: {p['doi']}") if p.get("oa_url"): lines.append(f" URL: {p['oa_url']}") if p.get("pdf_url"): lines.append(f" PDF: {p['pdf_url']}") if p.get("abstract"): ab = p["abstract"][:300] + ("..." if len(p["abstract"]) > 300 else "") lines.append(f" Abstract: {ab}") return "\n".join(lines) def main(): parser = argparse.ArgumentParser(description="Academic paper search (OpenAlex)") parser.add_argument("--json", action="store_true", help="Output JSON") sub = parser.add_subparsers(dest="cmd") s = sub.add_parser("search", help="Search papers by topic") s.add_argument("query") s.add_argument("--limit", "-l", type=int, default=10) s.add_argument("--sort", choices=["relevance", "citations"], default="relevance") s.add_argument("--years", help="Year range, e.g. 2020-2025") s.add_argument("--oa", action="store_true", help="Open access only") s.add_argument("--json", action="store_true") a = sub.add_parser("author", help="Search by author name") a.add_argument("name") a.add_argument("--limit", "-l", type=int, default=5) a.add_argument("--json", action="store_true") d = sub.add_parser("doi", help="Look up paper by DOI") d.add_argument("doi") d.add_argument("--json", action="store_true") c = sub.add_parser("citations", help="Get citation chain") c.add_argument("doi") c.add_argument("--direction", "-d", choices=["cited_by", "references", "both"], default="cited_by") c.add_argument("--limit", "-l", type=int, default=10) c.add_argument("--json", action="store_true") dp = sub.add_parser("deep", help="Deep read — full details + PDF URL") dp.add_argument("doi") dp.add_argument("--json", action="store_true") args = parser.parse_args() use_json = getattr(args, "json", False) or parser.parse_known_args()[0].json if args.cmd == "search": year_range = None if args.years: parts = args.years.split("-") year_range = f"{parts[0]}-{parts[1]}" if len(parts) == 2 else args.years results = search(args.query, args.limit, args.sort, year_range, args.oa) if use_json: print(json.dumps(results, indent=2)) else: print(f"🔍 Found {len(results)} results for: {args.query}\n") for i, p in enumerate(results, 1): print(_format_paper(p, i)) print() elif args.cmd == "author": results = search_author(args.name, args.limit) if use_json: print(json.dumps(results, indent=2)) else: for i, p in enumerate(results, 1): print(_format_paper(p, i)) print() elif args.cmd == "doi": paper = lookup_doi(args.doi) if use_json: print(json.dumps(paper, indent=2)) elif paper: print(_format_paper(paper)) else: print("❌ Not found") elif args.cmd == "citations": result = get_citations(args.doi, args.direction, args.limit) if use_json: print(json.dumps(result, indent=2)) else: for dir_name, papers in result.items(): print(f"\n{'📥' if dir_name == 'cited_by' else '📤'} {dir_name.replace('_', ' ').title()} ({len(papers)}):\n") for i, p in enumerate(papers, 1): print(_format_paper(p, i)) print() elif args.cmd == "deep": paper = deep_read(args.doi) if use_json: print(json.dumps(paper, indent=2)) elif paper: print(_format_paper(paper)) else: print("❌ Not found") else: parser.print_help() if __name__ == "__main__": main()