Table of Contents
Implementation
import unified from 'unified'
import markdown from 'remark-parse'
import remarkStringify from 'remark-stringify'
import slug from 'remark-slug'
import headingIds from 'remark-heading-id'
import toc from 'remark-toc'
import footnotes from 'remark-footnotes'
import gfm from 'remark-gfm'
import wikiLinkPlugin from 'remark-wiki-link'
import select from 'unist-util-select'
import { to_string } from './utils/mdast-util-to-string'
import toMarkdown from 'mdast-util-to-markdown'
import {sections, ungroupSections} from './sections'
import {sections as sectionsV3} from './sections-v3'
import {cells as cellsV3} from './cells-v3'
import codeblocks, {parseMeta, metaToString} from './codeblocks'
import frontmatter from './frontmatter'
import {mdblocks} from './mdblocks'
import links, { decorateLinkNode } from './links'
import { getConsoleForNamespace} from '../utils/console'
import {time} from '../utils/timings'
//import { transform as jsTransform } from './transformers/js'
const jsTransform = null
const console = getConsoleForNamespace('parser')
const timer = () => ({ns, marker}) => (t,f) => { time(ns,marker) }
const baseProcessor = ({litroot, files} = {}) => {
return unified()
.use(timer(),{ns:'parser'})
.use((...args) => (tree, file) => {
console.log("Parsing file: ", file.path)
file.data = file.data || {}
if (file && file.path) {
file.data.canonical = decorateLinkNode({url: file.path}, '', '/', files).data.canonical
}
})
// remark
.use(markdown, {})
.use(gfm)
.use(frontmatter, {})
// Extact title
.use((...args) => (tree,file) => {
if(!file.data.frontmatter || !file.data.frontmatter.title) {
file.data = file.data || {}
file.data.frontmatter = file.data.frontmatter || {}
const heading = select.select('heading', tree)
// console.log("Found heading:", heading)
if (heading) {
console.log("No title in frontmatter, extracting heading.")
const title = to_string(heading)
file.data.frontmatter.title = title
}
}
},{})
.use(wikiLinkPlugin, links.wikiLinkOptions(files))
.use(slug)
.use(headingIds)
.use(footnotes, {inlineNotes: true})
.use(timer(),{ns:'parser', marker: 'baseProcessorComplete'})
}
export const processor = ({files, fs, litroot} = {files: []}) => {
console.log(`Setting up processor litroot: "${litroot}" files: ${!!files} fs: ${!!fs}`)
return baseProcessor({files, litroot})
// remark-litmd (rehype compatable)
.use(codeblocks)
// Async reparse `md` codeblocks as children
.use(mdblocks, {baseProcessor, litroot, files})
.use(links.resolveLinks({litroot, files}))
.use(toc, {})
.use(sectionsV3({processSection: cellsV3}), {})
.use(timer(),{ns:'parser', marker: 'processorComplete'})
}
export const utils = {
mdblocks,
sections, ungroupSections,
links,
codeblocks, parseMeta, metaToString,
remarkStringify,
to_string,
toMarkdown,
frontmatter,
}
export async function parse(vfile, options) {
const p = processor(options)
const parsed = await p.parse( vfile )
const ast = await p.run(parsed)
if (!parsed.data) parsed.data = {}
parsed.data.ast = ast
// parsed.data.frontmatter = select.selectAll('html',ast).reduce( (memo,el) => Object.assign(memo,el.data || {}),{})
return parsed
}
export const transformers = {
jsx: jsTransform,
}
export function stringify(vfile) {
return processor()
.use(ungroupSections())
.use(remarkStringify, {
bullet: '-',
})
.process(vfile)
}
Extensions
Frontmatter
Implementation
import visit from 'unist-util-visit'
import before from 'unist-util-find-before'
import after from 'unist-util-find-after'
import yaml from 'js-yaml'
import {log, level} from '../utils/console'
import { notEqual } from 'assert'
import { getConsoleForNamespace } from '../utils/console'
const console = getConsoleForNamespace('frontmatter')
const FRONTMATTER_OPEN = '<!-- data'
const FRONTMATTER_CLOSE = '-->'
export default function (...args) {
return (tree,file) => {
const matters = []
visit( tree, 'html', (node, index, parent) => {
if (node.value.indexOf(FRONTMATTER_OPEN) === 0 && node.value.indexOf(FRONTMATTER_CLOSE) === (node.value.length - FRONTMATTER_CLOSE.length)) {
console.log( 'Raw', node.value)
const yamlString = node.value.slice(FRONTMATTER_OPEN.length, node.value.length - FRONTMATTER_CLOSE.length).trim()
try {
node.data = yaml.load(yamlString, 'utf8')
} catch(err) {
node.data = {error: err.toString()}
}
console.log( 'Parsed', yamlString)
matters.push(node.data)
}
})
file.data.frontmatter = matters.reduce( (memo,matter) => Object.assign({}, memo, matter || {}), {})
}
}
Sections
Sections are automatically created from the nested structure of Headings.
# Headline (root section)
## Subtitle (child section)
They can be collapse/folded (tbd)
# >Headline (collapsed)
Implementation
import heading from "mdast-util-heading-range";
import visit from "unist-util-visit";
import flatMap from "unist-util-flatmap";
import { getConsoleForNamespace } from '../utils/console'
import { Identity } from "../utils/functions";
const console = getConsoleForNamespace('sections', {disabled: true})
const firstChild = (node,type) => node.children
&& node.children[0]
&& node.children[0].type === type
const createCell = (node,nodes) => {
const pos = node.position
// pos.start.offset = pos.start.offset - pos.start.column
// pos.start.column = 0
return {
type: "cell",
position: pos,
data: {
hName: "cell",
hProperties: {
class: "cell",
},
},
children: nodes || [node],
}
}
const createSection = (node,nodes) => {
if (!nodes) node.children = cellsFromNodes(node.children)
return {
type: "section",
data: {
hName: "section",
},
position: node.position,
children: nodes || [node],
}
}
const cellsFromNodes = (nodes, {addSectionDataToFirstCell}={}) => {
const cells = [];
let newCell = null;
nodes.map((current, index) => {
const node = current;
console.log("[Sections] child: ", index, node.type);
if (node.type === "section") {
newCell = null;
cells.push(node);
} else if (false && node.type === "list" && node.spread) {
newCell = null;
let listSection = createSection(node)
cells.push(listSection);
} else if (false && node.type === "listItem" && node.spread) {
newCell = null;
let listItem = node
if (firstChild(listItem, 'section')) {
console.log("[Sections] ListItem with section: ", node.type);
listItem.children = listItem.children.map( section => {
section.children = cellsFromNodes(section.children)
})
} else {
listItem.children = [createSection(node, node.children)]
}
cells.push(listItem);
} else if (node.type === "code") {
const next = nodes[index+1]
const attached = node => node && node.data && node.data.meta && node.data.meta.attached
const nextIsAttached = attached(next)
let singleCell = createCell(node)
if (nextIsAttached) {
newCell = singleCell
} else if (newCell && attached(node)) {
newCell.children.push(node)
if (node.position) newCell.position.end = node.position.end;
cells.push(newCell)
newCell = null
} else {
newCell = null
cells.push(singleCell);
}
} else if (newCell) {
newCell.children.push(node);
if (node.position) newCell.position.end = node.position.end;
} else {
newCell = createCell(node)
if(addSectionDataToFirstCell) newCell.data.section = addSectionDataToFirstCell
cells.push(newCell);
}
});
return cells;
}
export const sections = (...args) => (tree) => {
console.log('[Sections II] Init.', args, tree.type, tree.children.length)
let headings = 0
const newSection = (children) => {
const first = children[0]
const last = children[ children.length - 1]
const depth = first.depth || 0
first.processed = true
return {
type: 'section',
data: {
name: first.data.id,
hName: 'section',
hProperties: {
depth: depth,
id: first.data.id,
}
},
depth: depth,
children: children,
position: {
start: first.position.start,
end: last.position.end
}
}
}
visit(tree, 'heading', (node, index, parent) => {
if (node.processed) {
console.log(`[Sections II] Ignoring already processed node ${node.data.id}`)
} else if (parent.type === 'root') {
console.log(`[Sections II] heading "${node.data.id}" ${headings}, depth: ${node.depth}`)
// remove rehype ids
node.data.hProperties = {}
const section = parent.children[index] = newSection([node])
const children = parent.children
for (let i = index + 1; i < children.length; i++) {
if (!children[i] || children[i].processed) {
console.log('Skipping removed', children[i])
break
}
const nextNode = children[i]
if ((nextNode.type === 'heading' || nextNode.type === 'section') && nextNode.depth <= node.depth) {
console.log(`[Sections II] ended "${node.data.id}" due to "${nextNode.data.id || nextNode.data.name}"`, nextNode.type, nextNode.depth)
console.log(`[Sections II] contains: "${node.data.id}"`, section.children.map( n => n.type).join(','))
break;
}
console.log(`[Sections II] child index: ${i}, type: ${nextNode.type} depth: ${nextNode.depth} id: ${nextNode.data && (nextNode.data.id || nextNode.data.name)}`)
section.children.push(nextNode)
if (nextNode.position) section.position.end = nextNode.position.end
delete parent.children[i]
}
headings++
node = section
node.children = cellsFromNodes(node.children, {addSectionDataToFirstCell: { id: section.data.id, position: section.position}})
} else {
console.log('[Sections II] WARN: Header parent not root', node.data.id)
}
}, true)
console.log("Headings: ", headings)
if (!headings) {
tree.children = cellsFromNodes(tree.children)
} else {
tree.children = tree.children.filter(Identity)
}
}
const cells = (...args) => (tree) => {
visit( tree, cells )
}
export const ungroupSections = (options = {}) => (...args) => (tree) => {
console.log("[UnSection] Init", options)
tree = flatMap(tree, (node) => {
if (node.type === "cell") {
return node.children
} else if (node.type === "section") {
return node.children
} else {
return [node]
}
})
return tree
}
Cells
Currently implemented as part of Sections, see above.
Codeblocks
Implementation
import visit from 'unist-util-visit'
import { getConsoleForNamespace } from '../utils/console'
const console = getConsoleForNamespace('codeblocks')
const LSP = '__.litsp__'
const NONESCAPEDSPACES_REGEX = /([^\\])\s/g
const LANG = 'lang'
const ATTR = 'attribute'
const TAG = 'tag'
const DIREC = 'directive'
const FILENAME = 'filename'
const URI = 'uri'
const UNKNOWN = 'unknown'
const isListType = t => [TAG,DIREC,UNKNOWN].indexOf(t) >= 0
export default function (...args) {
return (tree) => visit( tree, 'code', transform )
}
function transform (node, index, parent) {
console.log( '[CodeBlocks] Visiting: ', node.lang, node.meta)
const litMeta = parseMeta(node)
node.data = {
...node.data,
meta: litMeta,
hProperties: {
className: litMeta && litMeta.tags ? litMeta.tags.map( t => `tag-${t}`).join(' ') : '',
meta: litMeta,
}
}
return node
}
export const parseMeta = function parseMeta (node) {
const raw = `${node.lang || ''} ${node.meta || ''}`.trim()
console.log(`[CodeBlocks] lang: "${node.lang}" meta: "${node.meta}", raw: "${raw}"`)
const isOutput = raw.indexOf('>') === 0
const hasOutput = node.meta && node.meta.indexOf('>') >= 0
let hasSource = node.meta && node.meta.indexOf('<') >= 0
let input = raw
let _, output, source
let fromSource;
if (isOutput) {
[_, input] = raw.split('>').map( x => x.trim() )
}
if (hasOutput) {
[input, output] = input.split('>').map( x => x.trim() )
}
if (hasSource) {
[input,source] = input.split('<').map( x => x.trim() )
}
const meta = input
.replace(NONESCAPEDSPACES_REGEX, "$1" + LSP)
.split(LSP)
.map(ident)
.reduce(reduceParts, {})
meta.isOutput = isOutput
meta.output = output && parseMeta({ meta: output })
meta.hasOutput = !!output
meta.hasSource = !!source
meta.source = source && parseMeta({ lang: 'txt', meta: source })
meta.raw = raw
if (meta.source) meta.fromSource = meta.source.filename || meta.source.uri
return meta
}
function isUri(str) {
return str.startsWith('http') || str.startsWith('//')
}
function ident (x, i) {
let type, value = x
if (i === 0) {
type = LANG
}
else if(x && x[0]) {
if (x[0] === "#") {
type = TAG
value = x.slice(1)
}
else if (x[0] === "!") {
type = DIREC
value = x.slice(1)
}
else if (x.indexOf("=") > 0) {
type = ATTR
value = x.split("=")
value = {
type: value[0],
value: value[1]
}
}
else if(i===1) {
if (isUri(x)) type = URI
else type = FILENAME
}
else if (!type) type = UNKNOWN
}
return {type, value}
}
function reduceParts(memo,item, i) {
memo.attrs = memo.attrs || {}
if (item.type === ATTR){
memo.attrs[item.value.type] = item.value.value
item = item.value
}
if (isListType(item.type)) {
const collective = `${item.type}s`
if(memo[collective]) {
memo[collective]
.push(item.value)
} else {
memo[collective] = [item.value]
}
if (item.type === DIREC) {
// memo.attrs[item.value] = true
}
} else {
memo[item.type] = item.value
}
return memo
}
export const metaToString = (meta) => {
const prefix = p => str => p + str
const tag = prefix("#")
const dir = prefix("!")
const attr = ([key,value]) => `${key}=${value}`
const parts = [];
const dirs = meta.directives || []
const tags = meta.tags || []
const attrs = meta.attrs || {}
parts.push(meta.isOutput && ">");
parts.push(meta.lang);
parts.push(meta.filename || meta.uri);
dirs.forEach( d => parts.push(dir(d)))
Object.entries(attrs).forEach( (e) => parts.push(attr(e)))
tags.forEach( t => parts.push(tag(t)))
if (meta.source) {
parts.push("<")
parts.push(metaToString(meta.source))
}
if (meta.output) {
parts.push(">")
parts.push(metaToString(meta.output))
}
return parts.filter((x) => x).join(" ");
};
Markdown blocks
Implementation
import visit from 'unist-util-visit'
import {getConsoleForNamespace} from '../utils/console'
import vfile from 'vfile'
const console = getConsoleForNamespace('mdblocks')
export const mdblocks = function ({baseProcessor, files, litroot}) {
return async (tree, file) => {
file.data = file.data || {}
file.data.__mdcodeblocks = 0
const filepath = file?.data?.canonical || 'inexplicable.ext'
const promises = [];
visit(tree, 'code', (node,index,parent) => {
if (!node.data || !node.data.meta || node.data.meta.lang !== 'md') return;
const idx = file.data.__mdcodeblocks++
// instead of await (why?)
const p = new Promise(async resolve => {
// console.log(idx + "Node: ", node)
const mdfile = await vfile({path: filepath, contents: node.value})
const p = baseProcessor({files, litroot})
const parsed = await p.parse( mdfile )
const ast = await p.run(parsed, mdfile)
console.log(idx + "MD AST: ", filepath, mdfile, ast)
node.children = ast.children
resolve()
});
promises.push(p)
});
await Promise.all(promises);
return null
}
}
Links
Implementation
import {join, resolve, relative as isRelative, dirname} from "path";
import visit from "unist-util-visit";
import { getConsoleForNamespace } from "../utils/console";
const console = getConsoleForNamespace("links");
export const resolveLinks =
(options = { litroot: "", filepath: "", files: []}) =>
(...args) =>
(tree, file) => {
// console.log("[Links] Init", file.path, options);
options.filepath = options.filepath || file?.data?.canonical || file.path
return visit(tree, isLink, transform(options));
};
export const slug = (str) =>
str
.replace(/[^\w\s/-]+/g, "")
.trim()
.replace(/\s+/g, "_")
.toLowerCase();
export const resolver = (str) => {
// console.log("Input: ", str);
if (!str) {
throw Error("No string to resolve")
process.exit(1)
}
let main, title, doc, hash, base, query, file, ext, _
try {
[main, title] = str.split("|");
[doc, hash] = main.split("#");
[base, query] = doc.split("?");
[_, file, ext = ''] = base.match(/([^\.]+)(\.[a-zA-Z0-9]+)?$/) || []
} catch(err) {
console.log({str, main, title, doc, hash, base, query, file, ext, _})
console.log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<', err)
process.exit(1)
}
const root = file && file[0] && ['/', '.'].indexOf(file[0]) >= 0 ? '' : '/'
const path = file && (root + slug(file))
const section = hash && slug(hash);
const href = path + (query ? "?" + query : "") + (hash ? "#" + section : "");
// console.log('Href: ', href)
return { title, path, ext, hash, query, section, href };
};
const isLink = (node) => ["link", "wikiLink"].indexOf(node.type) >= 0;
const transform = (options) => (node, index, parent) => {
return decorateLinkNode(node, options.litroot, options.filepath, options.files);
};
export const wikiLinkOptions = (files = []) => {
return {
permalinks: files,
pageResolver: (name) => {
// console.log('Resolving', name)
const {path, ext} = resolver(name)
const exts = ['.lit', '/index.lit', '.md', ext]
const opts = exts.map( ext => {
return `${path}${ext}`
})
// console.log(opts)
return opts //.filter(file => files.indexOf(file) >= 0)[0] || opts
}
}
};
// ({
// permalinks: files,
// pageResolver: nameToPermalinks,
// hrefTemplate: (permalink) => `${permalink}?file=${permalink}`
// })
const linkToUrl = (link, root) => {
if (link.type === "wikiLink") {
// console.log(link)
return link.data.permalink
} else {
return link.url;
}
};
export const decorateLinkNode = (link, root = "", filepath = "", files = []) => {
// console.log(link)
const wikilink = link.type === "wikiLink";
const url = linkToUrl(link, root);
const external = /^(https?\:)?\/\//.test(url);
const absolute = !external && /^\//.test(url);
const fragment = /^(\?|#)/.test(url);
const relative = url && !absolute && !fragment && !external;
const srcToGH = (src, prefix) => join(prefix, src);
const relToCanonical = (src, link) => resolve(dirname(src), link);
const canonicalToRel = (src1, src2) => isRelative(dirname(src1), src2)
let canonical = url;
let href = url;
let [base, frag] = url.split(/(\?|#)/);
if (relative) {
canonical = relToCanonical(filepath, base)
href = url.replace(/\.(md|lit)/i, ".html") ;
} else if (absolute) {
const rel = canonicalToRel(filepath, url);
href = rel.replace(/\.(md|lit)/i, ".html");
}
link.type = "link";
link.url = href;
const tempTitle = link.title || link.value;
if (tempTitle) {
const valueAndTitle = tempTitle.split("|")
link.title = valueAndTitle[0]
link.value = valueAndTitle[1] || valueAndTitle[0]
}
const exists = files.indexOf(canonical) >= 0
const data = {
external,
absolute,
fragment,
relative,
canonical,
wikilink,
exists,
};
link.data = Object.assign({},data,{})
if (wikilink) {
link.children = [
{ position: link.position, type: "text", value: link.value },
];
}
link.data.hProperties = {
wikilink,
filepath,
root,
data,
};
delete link.value;
console.log(`[${filepath}] resolving (${link.type}) [${canonical}] exists: ${exists}`, link.url, link)
return link;
};
export default {
resolveLinks,
wikiLinkOptions,
resolver,
linkToUrl,
decorateLinkNode,
}
AST to String
-
TODO refactor save (update src) to operate on AST directly and stringify as below, instead of the cureent ../utils/unist-patch-source implementaction.
This is complicated by the fact that new cell source can in effect result in previous and next cell semantic/structure changes, hence the patch source implementation
const {toMarkdown, ungroupSections}
= lit.parser.utils
const unGroup = ungroupSections()()
const tree = unGroup(lit.ast)
const md = toMarkdown(tree)
return md