/* global React */ // ========================================================================= // Minimal markdown renderer — headings, paragraphs, bold/italic/code, // links, images, blockquotes, lists, code fences, horizontal rules. // Calibrated for the output of Turndown via Readability (server-side). // ========================================================================= function renderMarkdown(src) { // Strip zero-width and BOM characters that occasionally hitch a ride // through Turndown and end up before a line's first visible char, // making `^###` (and other line-start patterns) fail to match. src = (src || "").replace(/[-‍⁠]/g, ""); const lines = src.split("\n"); const out = []; let i = 0; let key = 0; let ledeAssigned = false; // Walk forward from `start` until we hit a `)` that isn't balanced inside // the URL — Wikipedia & friends pack parens into their links. Backslash // escapes are skipped so `\)` doesn't close prematurely. function findUrlEnd(str, start) { let depth = 0; for (let k = start; k < str.length; k++) { const c = str[k]; if (c === "\\" && k + 1 < str.length) { k++; continue; } if (c === "(") depth++; else if (c === ")") { if (depth === 0) return k; depth--; } } return -1; } // Same idea for square brackets — needed to parse image-in-link // syntax like `[![alt](img)](url)`, where a naive indexOf("]") would // grab the inner image's bracket instead of the outer link's. function findBracketEnd(str, start) { let depth = 0; for (let k = start; k < str.length; k++) { const c = str[k]; if (c === "\\" && k + 1 < str.length) { k++; continue; } if (c === "[") depth++; else if (c === "]") { if (depth === 0) return k; depth--; } } return -1; } function inline(s) { const parts = []; let buf = ""; let j = 0; const flush = () => { if (buf) { parts.push(buf); buf = ""; } }; while (j < s.length) { const ch = s[j]; // Backslash escape — render the next char literally. Turndown // escapes brackets and other markdown punctuation in plain text // (e.g. Wikipedia footnotes like `\[16\]`) and we need to honor it. if (ch === "\\" && j + 1 < s.length && "\\`*_{}[]()#+-.!".includes(s[j + 1])) { buf += s[j + 1]; j += 2; continue; } if (ch === "`") { const end = s.indexOf("`", j + 1); if (end > j) { flush(); parts.push({s.slice(j + 1, end)}); j = end + 1; continue; } } if (ch === "!" && s[j + 1] === "[") { const bracket = findBracketEnd(s, j + 2); if (bracket > j && s[bracket + 1] === "(") { const end = findUrlEnd(s, bracket + 2); if (end > bracket) { flush(); const alt = s.slice(j + 2, bracket); const inner = s.slice(bracket + 2, end); // Standard markdown image title syntax: ![alt](src "title"). // We piggy-back on the title to encode a video / GIF source // so server-side converters (the X handler) can hand us a // poster-image + playable-media pair in one inline element. const titleMatch = inner.match(/^(\S+)\s+"([^"]*)"$/); const src = (titleMatch ? titleMatch[1] : inner.split(/\s+"/)[0]).trim(); const title = titleMatch ? titleMatch[2] : ""; // (title is ignored — earlier we used title="video" / // "gif" to drive an overlay, but a play button without // playback is a misleading affordance.) parts.push( {alt}

); j = end + 1; continue; } } } if (ch === "[") { const bracket = findBracketEnd(s, j + 1); if (bracket > j && s[bracket + 1] === "(") { const end = findUrlEnd(s, bracket + 2); if (end > bracket) { flush(); const text = s.slice(j + 1, bracket); const href = s.slice(bracket + 2, end).split(/\s+"/)[0].trim(); parts.push( {inline(text)} ); j = end + 1; continue; } } } if (ch === "*" && s[j + 1] === "*") { const end = s.indexOf("**", j + 2); if (end > j) { flush(); parts.push({inline(s.slice(j + 2, end))}); j = end + 2; continue; } } if (ch === "~" && s[j + 1] === "~") { const end = s.indexOf("~~", j + 2); if (end > j) { flush(); parts.push(~~{inline(s.slice(j + 2, end))}~~); j = end + 2; continue; } } if (ch === "*") { const end = s.indexOf("*", j + 1); if (end > j && /\S/.test(s.slice(j + 1, end))) { flush(); parts.push({inline(s.slice(j + 1, end))}); j = end + 1; continue; } } // Underscore italic: only opens/closes at word boundaries. Stops // identifiers like `utm_source` from being eaten mid-URL. if (ch === "_") { const prevIsWord = j > 0 && /\w/.test(s[j - 1]); if (!prevIsWord) { let end = -1; for (let k = j + 1; k < s.length; k++) { if (s[k] === "_" && (k === s.length - 1 || !/\w/.test(s[k + 1]))) { end = k; break; } } if (end > j && /\S/.test(s.slice(j + 1, end))) { flush(); parts.push({inline(s.slice(j + 1, end))}); j = end + 1; continue; } } } buf += ch; j++; } flush(); return parts; } // Collect a list's items into an array of arrays-of-lines. Handles: // • Multiple consecutive items into one list (so HTML's auto-numbering // spans the whole list rather than restarting at 1 per item) // • Blank lines between items (peek past them and stay in the list if // the next non-blank line is another marker or indented continuation) // • Lazy continuation lines (unindented continuation of the previous // item — Turndown emits these for wrapped item bodies) // • Indented continuation (CommonMark's 4-space rule, which preserves // fenced code blocks nested inside items) function collectListItems(markerRe) { const items = []; let buf = null; const push = () => { if (buf) { items.push(buf); buf = null; } }; while (i < lines.length) { const l = lines[i]; const m = l.match(markerRe); if (m) { push(); buf = [l.slice(m[0].length)]; i++; continue; } if (!l.trim()) { let p = i + 1; while (p < lines.length && !lines[p].trim()) p++; if (p >= lines.length) break; const next = lines[p]; if (markerRe.test(next)) { i = p; continue; } if (/^ {2,}\S/.test(next)) { if (buf) buf.push(""); // preserve paragraph break inside item i = p; continue; } break; } // Anything else: only count it as continuation if we already have an // item open. Bail out otherwise so a stray line doesn't get swallowed. if (!buf) break; if (/^(#{1,6}\s|>\s)/.test(l)) break; buf.push(l.replace(/^ {1,4}/, "")); i++; } push(); return items; } // Render one list item's content as a mix of inline paragraphs and code // fences. Item-internal code fences are recognized at column 0 because // collectListItems has already stripped the list-indent off each line. function renderListItemContent(itemLines) { const blocks = []; let li = 0; let para = []; const flushPara = () => { if (para.length) { const text = para.join(" ").replace(/\s+/g, " ").trim(); if (text) blocks.push({inline(text)}); para = []; } }; while (li < itemLines.length) { const l = itemLines[li]; if (l.startsWith("```")) { flushPara(); const code = []; li++; while (li < itemLines.length && !itemLines[li].startsWith("```")) { code.push(itemLines[li]); li++; } li++; blocks.push(

{code.join("\n")}

); continue; } if (!l.trim()) { flushPara(); li++; continue; } para.push(l); li++; } flushPara(); return blocks; } while (i < lines.length) { const line = lines[i]; if (/^\s{0,3}([-*_])(\s*\1){2,}\s*$/.test(line)) { out.push(

); i++; continue; } if (line.startsWith("```")) { const block = []; i++; while (i < lines.length && !lines[i].startsWith("```")) { block.push(lines[i]); i++; } i++; out.push(

{block.join("\n")}

); continue; } // ATX heading. CommonMark allows up to 3 leading spaces — Turndown // emits them in some nested contexts (notably headings inside list // items or figure captions). Earlier we required `^###` flush // against column 0 and missed those, dropping them into a paragraph // run-on with the body text below. const m = line.match(/^ {0,3}(#{1,6})\s+(.*?)\s*#*\s*$/); if (m) { const level = m[1].length; if (level === 1) out.push(

{inline(m[2])}

); else if (level === 2) out.push(

{inline(m[2])}

); else out.push(React.createElement("h" + Math.min(level + 1, 6), { key: key++, className: "md-h3" }, inline(m[2]))); i++; continue; } if (line.startsWith("> ")) { const block = []; while (i < lines.length && lines[i].startsWith(">")) { block.push(lines[i].replace(/^>\s?/, "")); i++; } out.push(

{block.filter(b => b.length).map((b, idx) =>
{inline(b)}
)}

); continue; } if (/^\d+\.\s/.test(line)) { const items = collectListItems(/^\d+\.\s+/); out.push(

{renderListItemContent(itemLines)}

); continue; } if (/^[-*]\s/.test(line)) { const items = collectListItems(/^[-*]\s+/); out.push(

{renderListItemContent(itemLines)}

); continue; } // GFM tables: `| header | header |` with a delimiter row of `|---|---|`. // Optional leading/trailing pipes; cells get trimmed; delimiter row's // colons drive per-column alignment. Allows single-column tables too. if (line.startsWith("|") && i + 1 < lines.length && /^\s*\|?\s*:?-{2,}:?\s*(\|\s*:?-{2,}:?\s*)*\|?\s*$/.test(lines[i + 1])) { const splitRow = (row) => { let s = row.trim(); if (s.startsWith("|")) s = s.slice(1); if (s.endsWith("|")) s = s.slice(0, -1); return s.split(/\s*\|\s*/); }; const header = splitRow(line); const aligns = splitRow(lines[i + 1]).map((c) => { const l = c.trim().startsWith(":"); const r = c.trim().endsWith(":"); if (l && r) return "center"; if (r) return "right"; if (l) return "left"; return null; }); i += 2; const rows = []; while (i < lines.length && /^\s*\|.+\|/.test(lines[i])) { rows.push(splitRow(lines[i])); i++; } out.push(

{header.map((c, idx) => ( ))} {rows.map((r, ri) => ( {r.map((c, ci) => ( ))} ))}

{inline(c)}
{inline(c)}

); continue; } if (!line.trim()) { i++; continue; } const p = [line]; i++; while (i < lines.length && lines[i].trim() && !/^ {0,3}(#{1,6}\s|>\s|[-*]\s|\d+\.\s|```|\|)/.test(lines[i])) { p.push(lines[i]); i++; } const text = p.join(" ").trim(); if (!text) continue; // Drop-cap qualification: must be the first paragraph in the document, // start with a real letter (not [link], !image, or punctuation), and be // substantial — otherwise the drop cap lands on something like // "[About](url)" and looks broken. const isLede = !ledeAssigned && text.length >= 80 && /^[A-Za-z0-9"'“‘]/.test(text); if (isLede) ledeAssigned = true; out.push(

{inline(text)}

); } return out; } // The markdown the user gets when they copy/download. function toFullMarkdown(doc) { const meta = [doc.byline, doc.site, doc.date].filter(Boolean).join(" · "); const head = `# ${doc.title}\n\n` + (meta ? `*${meta}*\n\n` : ""); return head + (doc.body || "") + "\n"; } function wordCount(s) { return s.trim().split(/\s+/).filter(Boolean).length; } function readMinutes(s) { return Math.max(1, Math.round(wordCount(s) / 230)); } function slugify(s) { return (s || "document").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 60) || "document"; } Object.assign(window, { renderMarkdown, wordCount, readMinutes, slugify, toFullMarkdown, });