{"openapi":"3.1.0","info":{"title":"web2md API","version":"2.1.0","description":"Convert any URL to clean, structured, LLM-ready markdown. 1M free daily requests.\n\n**Authentication:** Pass your API key via the `x-api-key` header. Sign in at [web2md.app](https://web2md.app) to get your free key.\n\n**Rate Limits:** 100 req/min without API key, 1000 req/min with API key. 2 req/sec per target domain.\n\n---\n\n**A [LeadMagic](https://leadmagic.io) Product**","contact":{"name":"LeadMagic","url":"https://leadmagic.io","email":"support@leadmagic.io"},"x-logo":{"url":"https://leadmagic.io/icon.svg","altText":"LeadMagic Logo"},"name":"LeadMagic","url":"https://leadmagic.io","email":"support@leadmagic.io"},"x-logo":{"url":"https://leadmagic.io/icon.svg","altText":"LeadMagic Logo"},"servers":[{"url":"https://web2md.app","description":"Production"}],"security":[{"apiKey":[]}],"components":{"securitySchemes":{"apiKey":{"type":"apiKey","in":"header","name":"x-api-key","description":"Your web2md API key. Get one free at web2md.app"}},"schemas":{"ScrapeRequest":{"type":"object","required":["url"],"properties":{"url":{"type":"string","format":"uri","description":"URL to convert to markdown","example":"https://stripe.com/pricing"},"extract":{"type":"boolean","default":false,"description":"Extract structured metadata (emails, phones, social links, company info)"},"frontmatter":{"type":"boolean","default":false,"description":"Add YAML frontmatter with title, description, URL, timestamp, tier, tokens"},"retain_images":{"type":"boolean","default":false,"description":"Keep image references in the markdown output"},"extract_prompt":{"type":"string","maxLength":300,"description":"AI extraction prompt — define what structured data to extract from the page","example":"Return JSON with: company_name, pricing_tiers[], features[], business_type (B2B/B2C)"},"output_format":{"type":"string","enum":["markdown","json","text"],"default":"markdown","description":"Output format. Text strips all markdown formatting."},"method":{"type":"string","enum":["auto","markdown","ai","browser"],"default":"auto","description":"Force a specific scraping method. Auto tries tiers in order."},"async":{"type":"boolean","default":false,"description":"Queue for background processing. Returns a poll URL."},"llm_filter":{"type":"boolean","default":false,"description":"Post-process markdown with AI to remove ads, navigation, and boilerplate"},"crawl_subpages":{"type":"integer","minimum":1,"maximum":10,"description":"Discover and scrape up to N same-domain subpages"},"crawl_urls":{"type":"array","items":{"type":"string"},"description":"Custom URLs to crawl (same-domain only). Overrides discovery when set."},"crawl_source":{"type":"string","enum":["links","sitemap","both"],"description":"How to discover links: links=page links, sitemap=sitemap.xml, both=merge"}}},"ScrapeResponse":{"type":"object","properties":{"success":{"type":"boolean","example":true},"url":{"type":"string","example":"https://stripe.com/pricing"},"markdown":{"type":"string","description":"Converted markdown content"},"metadata":{"type":"object","properties":{"title":{"type":"string","example":"Pricing & Fees"},"description":{"type":"string"},"tokens":{"type":"integer","description":"Estimated LLM token count","example":1250},"tier":{"type":"string","enum":["tier0","tier1","tier2","document","twitter"],"example":"tier1"},"cached":{"type":"boolean"},"render_time_ms":{"type":"integer","example":385}}},"extracted":{"type":"object","description":"Present when extract=true","properties":{"company_name":{"type":"string"},"emails":{"type":"array","items":{"type":"string"}},"phones":{"type":"array","items":{"type":"string"}},"social":{"type":"object","additionalProperties":{"type":"string"}},"description":{"type":"string"}}},"ai_extraction":{"type":"string","description":"Present when extract_prompt is provided. AI-generated structured data based on your prompt."},"subpages":{"type":"array","description":"Present when crawl_subpages is set. Array of scraped subpages.","items":{"type":"object","properties":{"url":{"type":"string"},"markdown":{"type":"string"},"title":{"type":"string"}}}}}},"AiRequest":{"type":"object","required":["prompt","markdown"],"properties":{"prompt":{"type":"string","maxLength":500,"description":"Question about the content","example":"What pricing tiers does this company offer?"},"markdown":{"type":"string","description":"The markdown content to analyze"},"url":{"type":"string","description":"Source URL for context"}}},"LimitsResponse":{"type":"object","properties":{"daily_limit":{"type":"integer","example":10000},"used_today":{"type":"integer","example":42},"remaining":{"type":"integer","example":9958},"percent_used":{"type":"integer","example":0},"reset_at":{"type":"string","example":"2026-02-19T14:00:00.000Z"},"provided_by":{"type":"string","example":"LeadMagic"}}},"ErrorResponse":{"type":"object","properties":{"success":{"type":"boolean","example":false},"error":{"type":"string","example":"Invalid URL"}}}}},"tags":[{"name":"Scrape","description":"Convert URLs to markdown"},{"name":"AI","description":"Analyze scraped content with AI"},{"name":"Usage","description":"Rate limits and usage stats"},{"name":"System","description":"Health and monitoring"}],"paths":{"/api/scrape":{"post":{"operationId":"scrapeUrl","summary":"Convert URL to Markdown","description":"Fetches a URL and converts it to clean, LLM-ready markdown. Supports AI extraction, frontmatter, subpage crawling, and multiple output formats.\n\n**Tiers:** The scraper automatically tries multiple methods:\n- **Tier 0:** Native markdown endpoint (free, fastest)\n- **Tier 1:** HTML fetch + Readability + Turndown (cheap, reliable)\n- **Tier 2:** Browser rendering (JS-heavy sites)\n- **Twitter:** X/Twitter URLs use syndication API (no browser needed)\n\n**AI Extraction:** Pass `extract_prompt` to get structured data extracted by AI.\n\n**LLM Filter:** Set `llm_filter: true` to clean markdown of ads and boilerplate.\n\n**Subpage Crawl:** Set `crawl_subpages: N` to discover and scrape up to N pages. Use `crawl_source: \"sitemap\"` for sitemap.xml, `\"links\"` for page links, or `\"both\"`. Or pass `crawl_urls: [\"url1\", \"url2\"]` for custom URLs.\n\n**Accept Header:** Send `Accept: text/plain` to get raw markdown instead of JSON.","requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ScrapeRequest"}}}},"responses":{"200":{"description":"Successful conversion","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ScrapeResponse"}}}},"400":{"description":"Invalid URL or missing parameters","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"422":{"description":"URL could not be scraped","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ErrorResponse"}}}},"429":{"description":"Rate limit exceeded"}}},"get":{"operationId":"scrapeUrlGet","summary":"Convert URL to Markdown (GET)","description":"Query string variant of the scrape endpoint. Useful for Clay HTTP API GET enrichments and simple integrations.","tags":["Scrape"],"parameters":[{"name":"url","in":"query","required":true,"schema":{"type":"string","format":"uri"},"description":"URL to scrape","example":"https://example.com"},{"name":"extract","in":"query","schema":{"type":"string","enum":["true","false"]},"description":"Extract metadata"},{"name":"frontmatter","in":"query","schema":{"type":"string","enum":["true","false"]},"description":"Add YAML frontmatter"},{"name":"retain_images","in":"query","schema":{"type":"string","enum":["true","false"]},"description":"Keep images"}],"responses":{"200":{"description":"Successful conversion","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ScrapeResponse"}}}},"400":{"description":"Missing url parameter"}}}},"/api/ai":{"post":{"operationId":"analyzeContent","summary":"Analyze content with AI","description":"Send scraped markdown content and a question to get AI-powered analysis. Powered by edge-deployed language models for fast inference.","tags":["AI"],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AiRequest"}}}},"responses":{"200":{"description":"AI analysis result","content":{"application/json":{"schema":{"type":"object","properties":{"success":{"type":"boolean"},"answer":{"type":"string"},"model":{"type":"string","example":"instruct-3b"}}}}}},"400":{"description":"Missing prompt or markdown"}}}},"/api/limits":{"get":{"operationId":"getDailyLimits","summary":"Get daily usage limits","description":"Returns the current daily API call limit, how many have been used today, and how many remain. Resets at midnight UTC.","tags":["Usage"],"responses":{"200":{"description":"Current usage","content":{"application/json":{"schema":{"$ref":"#/components/schemas/LimitsResponse"}}}}}}},"/api/feed":{"get":{"operationId":"liveActivityFeed","summary":"Live activity feed (SSE)","description":"Server-Sent Events stream of real-time usage data. Updates every 2 seconds with current limits and recent scrape activity. Connect with `EventSource`.","tags":["Usage"],"responses":{"200":{"description":"SSE stream","content":{"text/event-stream":{"schema":{"type":"string"}}}}}}},"/api/status/{requestId}":{"get":{"operationId":"getJobStatus","summary":"Poll async job status","description":"Check the status of an asynchronous scrape job submitted with `async: true`.","tags":["Scrape"],"parameters":[{"name":"requestId","in":"path","required":true,"schema":{"type":"string"}}],"responses":{"200":{"description":"Job completed with result"},"202":{"description":"Job still processing"},"404":{"description":"Job not found"}}}},"/api/health":{"get":{"operationId":"healthCheck","summary":"Health check","description":"Returns service status, version, and timestamp.","tags":["System"],"responses":{"200":{"description":"Service is healthy","content":{"application/json":{"schema":{"type":"object","properties":{"status":{"type":"string","example":"healthy"},"service":{"type":"string","example":"web2md API"},"version":{"type":"string","example":"2.0.0"},"timestamp":{"type":"string","format":"date-time"}}}}}}}}}}}