Parser

Built atop the unified↗ ecosystem, specifically remark↗, which takes .lit Markdown source and outputs mdast^1§(Markdown Abstract Syntax Tree)

Implementation§
Extensions§
AST to String§

Implementation

jscollapse< index.js

import unified from 'unified'
import markdown from 'remark-parse'
import remarkStringify from 'remark-stringify'
import slug from 'remark-slug'
import headingIds from 'remark-heading-id'
import toc from 'remark-toc'
import footnotes from 'remark-footnotes'
import gfm from 'remark-gfm'
import wikiLinkPlugin from 'remark-wiki-link'
import select from 'unist-util-select'
import { to_string } from './utils/mdast-util-to-string'
import toMarkdown from 'mdast-util-to-markdown'

import {sections, ungroupSections} from './sections'
import {sections as sectionsV3} from './sections-v3'
import {cells as cellsV3} from './cells-v3'


import codeblocks, {parseMeta, metaToString} from './codeblocks'
import frontmatter from './frontmatter'
import {mdblocks} from './mdblocks'
import links, { decorateLinkNode } from './links'
import { getConsoleForNamespace} from '../utils/console'

import {time} from '../utils/timings'

//import { transform as jsTransform } from './transformers/js'

const jsTransform = null
const console = getConsoleForNamespace('parser')

const timer = () => ({ns, marker}) => (t,f) => { time(ns,marker) }

const baseProcessor = ({litroot, files} = {}) => {
    return unified()
    .use(timer(),{ns:'parser'})
    .use((...args) => (tree, file) => {
        console.log("Parsing file: ", file.path)
        file.data = file.data || {}
        if (file && file.path) {
            file.data.canonical = decorateLinkNode({url: file.path}, '', '/', files).data.canonical
        }
    })

    // remark
    .use(markdown, {})
    .use(gfm)
    .use(frontmatter, {})

    // Extact title
    .use((...args) => (tree,file) => {
        if(!file.data.frontmatter || !file.data.frontmatter.title) {
           
           file.data = file.data || {}
           file.data.frontmatter = file.data.frontmatter || {}
           const heading = select.select('heading', tree)
           // console.log("Found heading:", heading)
           if (heading) {
               console.log("No title in frontmatter, extracting heading.")
               const title = to_string(heading)
               file.data.frontmatter.title = title
           }
        }
    },{})

    .use(wikiLinkPlugin, links.wikiLinkOptions(files))
    .use(slug)
    .use(headingIds)
    .use(footnotes, {inlineNotes: true})

    .use(timer(),{ns:'parser', marker: 'baseProcessorComplete'})
}

export const processor = ({files, fs, litroot} = {files: []}) => {
    console.log(`Setting up processor litroot: "${litroot}" files: ${!!files} fs: ${!!fs}`)
    return baseProcessor({files, litroot})
    // remark-litmd (rehype compatable)

    .use(codeblocks)
    // Async reparse `md` codeblocks as children
    .use(mdblocks, {baseProcessor, litroot, files})
    .use(links.resolveLinks({litroot, files}))
    .use(toc, {})
    .use(sectionsV3({processSection: cellsV3}), {})
    .use(timer(),{ns:'parser', marker: 'processorComplete'})
}

export const utils = {
  mdblocks,
  sections, ungroupSections,
  links,
  codeblocks, parseMeta, metaToString,
  remarkStringify,
  to_string,
  toMarkdown,
  frontmatter,
}

export async function parse(vfile, options) {
    const p = processor(options)
    const parsed = await p.parse( vfile )
    const ast = await p.run(parsed)
    if (!parsed.data) parsed.data = {}
    parsed.data.ast = ast
    // parsed.data.frontmatter = select.selectAll('html',ast).reduce( (memo,el) => Object.assign(memo,el.data || {}),{})
    return parsed
}

export const transformers = {
    jsx: jsTransform,
}


export function stringify(vfile) {
    return processor()
        .use(ungroupSections())
        .use(remarkStringify, {
            bullet: '-',
        })
        .process(vfile)
}

Extensions

Frontmatter

Implementation

jscollapse< frontmatter.js

import visit from 'unist-util-visit'
import before from 'unist-util-find-before'
import after from 'unist-util-find-after'
import yaml from 'js-yaml'

import {log, level} from '../utils/console'
import { notEqual } from 'assert'
import { getConsoleForNamespace } from '../utils/console'

const console = getConsoleForNamespace('frontmatter')

const FRONTMATTER_OPEN = '<!-- data'
const FRONTMATTER_CLOSE = '-->'

export default function (...args) {
    return (tree,file) => {
        const matters = []
        visit( tree, 'html', (node, index, parent) => {  
            if (node.value.indexOf(FRONTMATTER_OPEN) === 0 && node.value.indexOf(FRONTMATTER_CLOSE) === (node.value.length - FRONTMATTER_CLOSE.length)) {
                console.log( 'Raw', node.value)
                const yamlString = node.value.slice(FRONTMATTER_OPEN.length, node.value.length - FRONTMATTER_CLOSE.length).trim()
                try {
                    node.data = yaml.load(yamlString, 'utf8')
                } catch(err) {
                    node.data = {error: err.toString()}
                }
               console.log( 'Parsed', yamlString)
               matters.push(node.data)
            }
        })
        file.data.frontmatter = matters.reduce( (memo,matter) => Object.assign({}, memo, matter || {}), {})
    }
}

Sections

Sections are automatically created from the nested structure of Headings.

# Headline (root section)
## Subtitle (child section)

They can be collapse/folded (tbd)

# >Headline (collapsed)

Implementation

jscollapse< sections.js

import heading from "mdast-util-heading-range";
import visit from "unist-util-visit";
import flatMap from "unist-util-flatmap";
import { getConsoleForNamespace } from '../utils/console'
import { Identity } from "../utils/functions";

const console = getConsoleForNamespace('sections', {disabled: true})

const firstChild = (node,type) => node.children 
            && node.children[0]
            && node.children[0].type === type

const createCell = (node,nodes) => {
     const pos = node.position
     // pos.start.offset = pos.start.offset - pos.start.column
     // pos.start.column = 0
     return {
          type: "cell",
          position: pos,
          data: {
            hName: "cell",
            hProperties: {
              class: "cell",
            },
          },
          children: nodes || [node],
        }
}

const createSection = (node,nodes) => {
  if (!nodes) node.children = cellsFromNodes(node.children)
  return {
        type: "section",
        data: {
          hName: "section",
        },
        position: node.position,
        children: nodes || [node],
      }
}

const cellsFromNodes = (nodes, {addSectionDataToFirstCell}={}) => {

  const cells = [];
  let newCell = null;

  nodes.map((current, index) => {
    const node = current;
    console.log("[Sections] child: ", index, node.type);

    if (node.type === "section") {
      newCell = null;
      cells.push(node);

    } else if (false && node.type === "list" && node.spread) {
      newCell = null;
      let listSection = createSection(node)
      cells.push(listSection);

    } else if (false && node.type === "listItem" && node.spread) {
      newCell = null;
      let listItem = node
      if (firstChild(listItem, 'section')) {
        console.log("[Sections] ListItem with section: ", node.type);
        listItem.children = listItem.children.map( section => {
          section.children = cellsFromNodes(section.children)
        })
      } else {
        listItem.children = [createSection(node, node.children)]
      }
      
      cells.push(listItem);

    } else if (node.type === "code") {
      const next = nodes[index+1]
      const attached = node => node && node.data && node.data.meta && node.data.meta.attached
      const nextIsAttached = attached(next)
                            
      let singleCell = createCell(node)
      if (nextIsAttached) {
          newCell = singleCell
         
      } else if (newCell && attached(node)) {
          newCell.children.push(node)
          if (node.position) newCell.position.end = node.position.end;
          cells.push(newCell)
          newCell = null

      } else {
          newCell = null
          cells.push(singleCell);
      }
     
     
      
    } else if (newCell) {
      newCell.children.push(node);
      if (node.position) newCell.position.end = node.position.end;

    } else {
      newCell = createCell(node)
      if(addSectionDataToFirstCell) newCell.data.section = addSectionDataToFirstCell
      cells.push(newCell);
    }
  });
  return cells;

}

export const sections = (...args) => (tree) => {
  console.log('[Sections II] Init.', args, tree.type, tree.children.length)
  let headings = 0
  const newSection = (children) => {
    const first = children[0]
    const last = children[ children.length - 1]
    const depth = first.depth || 0
    first.processed = true
    return {
      type: 'section',
      data: {
        name: first.data.id,
        hName: 'section',
        hProperties: {
          depth: depth,
          id: first.data.id,
        }
      },
      depth: depth,
      children: children,
      position: {
        start: first.position.start,
        end: last.position.end
      }
    }
  }

  visit(tree, 'heading', (node, index, parent) => {
    if (node.processed) {
      console.log(`[Sections II] Ignoring already processed node ${node.data.id}`)
    } else if (parent.type === 'root') {
      console.log(`[Sections II] heading "${node.data.id}" ${headings}, depth: ${node.depth}`)
      // remove rehype ids 
      node.data.hProperties = {}
      const section = parent.children[index] = newSection([node])
      const children = parent.children
      
      for (let i = index + 1; i < children.length; i++) {
        if (!children[i] || children[i].processed) {
          console.log('Skipping removed', children[i])
          break
        }
        const nextNode = children[i]
        if ((nextNode.type === 'heading' || nextNode.type === 'section') && nextNode.depth <= node.depth) {
          console.log(`[Sections II] ended "${node.data.id}" due to "${nextNode.data.id || nextNode.data.name}"`, nextNode.type, nextNode.depth)
          console.log(`[Sections II] contains: "${node.data.id}"`, section.children.map( n => n.type).join(','))
          break;
        }
        console.log(`[Sections II] child index: ${i}, type: ${nextNode.type} depth: ${nextNode.depth} id: ${nextNode.data && (nextNode.data.id || nextNode.data.name)}`)
        section.children.push(nextNode)
        if (nextNode.position) section.position.end = nextNode.position.end
        delete parent.children[i]
      }
      headings++
      node = section 
      node.children = cellsFromNodes(node.children, {addSectionDataToFirstCell: { id: section.data.id, position: section.position}})
    } else {
      console.log('[Sections II] WARN: Header parent not root', node.data.id)
    }
  }, true)

  console.log("Headings: ", headings)
  if (!headings) {
    tree.children = cellsFromNodes(tree.children)
  } else {
    tree.children = tree.children.filter(Identity)
  }
  
}


const cells = (...args) => (tree) => {
  visit( tree, cells )
}


export const ungroupSections = (options = {}) => (...args) => (tree) => {
  console.log("[UnSection] Init", options)
  tree = flatMap(tree, (node) => {
    if (node.type === "cell") {
      return node.children
    } else if (node.type === "section") {
      return node.children
    } else {
      return [node]
    }
  })
  return tree
}

Cells

Currently implemented as part of Sections, see above.

Codeblocks

Implementation

jscollapse< codeblocks.js

import visit from 'unist-util-visit'
import { getConsoleForNamespace } from '../utils/console'

const console = getConsoleForNamespace('codeblocks')

const LSP = '__.litsp__'
const NONESCAPEDSPACES_REGEX = /([^\\])\s/g
const LANG = 'lang'
const ATTR = 'attribute'
const TAG = 'tag'
const DIREC = 'directive'
const FILENAME = 'filename'
const URI = 'uri'
const UNKNOWN = 'unknown'

const isListType = t => [TAG,DIREC,UNKNOWN].indexOf(t) >= 0

export default function (...args) {
    return (tree) => visit( tree, 'code', transform )
}

function transform (node, index, parent) {  
    console.log( '[CodeBlocks] Visiting: ', node.lang, node.meta)
    const litMeta = parseMeta(node)
    
    node.data = {
        ...node.data,
        meta: litMeta,
        hProperties: {
            className: litMeta && litMeta.tags ? litMeta.tags.map( t => `tag-${t}`).join(' ') : '',
            meta: litMeta,
        }
    }
    return node
}


export const parseMeta = function parseMeta (node) {
    const raw = `${node.lang || ''} ${node.meta || ''}`.trim()
    console.log(`[CodeBlocks] lang: "${node.lang}" meta: "${node.meta}", raw: "${raw}"`)

    const isOutput = raw.indexOf('>') === 0
    const hasOutput = node.meta && node.meta.indexOf('>') >= 0
    let hasSource = node.meta && node.meta.indexOf('<') >= 0

    let input = raw
    let _, output, source
    let fromSource;

    if (isOutput) {
        [_, input] = raw.split('>').map( x => x.trim() )
    }

      if (hasOutput) {
        [input, output] = input.split('>').map( x => x.trim() )
      }

      if (hasSource) {
        [input,source] = input.split('<').map( x => x.trim() )
      }

    const meta = input
        .replace(NONESCAPEDSPACES_REGEX, "$1" + LSP)
        .split(LSP)
        .map(ident)
        .reduce(reduceParts, {})
    
    meta.isOutput = isOutput
    meta.output = output && parseMeta({ meta: output })
    meta.hasOutput = !!output
    meta.hasSource = !!source
    meta.source = source && parseMeta({ lang: 'txt', meta: source })
    meta.raw = raw
    if (meta.source) meta.fromSource = meta.source.filename || meta.source.uri

    return meta
}

function isUri(str) {
  return str.startsWith('http') || str.startsWith('//')
}

function ident (x, i) {
    let type, value = x
    if (i === 0) {
      type = LANG
    }
    else if(x && x[0]) {
      if (x[0] === "#") {
        type = TAG
        value = x.slice(1)
      }
      else if (x[0] === "!") {
        type = DIREC
        value = x.slice(1)
      }
      else if (x.indexOf("=") > 0) {
        type = ATTR
        value = x.split("=")
        value = {
          type: value[0],
          value: value[1]
        }
      }
      else if(i===1) {

        if (isUri(x)) type = URI
        else type = FILENAME
      }
      else if (!type) type = UNKNOWN
    }
    return {type, value}
  }
  
  function reduceParts(memo,item, i) {
    memo.attrs = memo.attrs || {}

    if (item.type === ATTR){
        memo.attrs[item.value.type] = item.value.value
        item = item.value
    }

    if (isListType(item.type)) {
        const collective = `${item.type}s`
        if(memo[collective]) {
             memo[collective]
             .push(item.value)
        } else {
            memo[collective] = [item.value]
        }
        if (item.type === DIREC) {
            // memo.attrs[item.value] = true
        }
    } else {
        memo[item.type] = item.value
    }
    
    return memo
}
  

export const metaToString = (meta) => {

  const prefix = p => str => p + str
  const tag = prefix("#")
  const dir = prefix("!")
  const attr = ([key,value]) => `${key}=${value}`

  const parts = [];
  const dirs = meta.directives || []
  const tags = meta.tags || []
  const attrs = meta.attrs || {}

  parts.push(meta.isOutput && ">");
  parts.push(meta.lang);
  parts.push(meta.filename || meta.uri);
  
  dirs.forEach( d => parts.push(dir(d)))
  Object.entries(attrs).forEach( (e) => parts.push(attr(e)))
  tags.forEach( t => parts.push(tag(t)))

  if (meta.source) {
    parts.push("<")
    parts.push(metaToString(meta.source))
  }

  if (meta.output) {
    parts.push(">")
    parts.push(metaToString(meta.output))
  }

  return parts.filter((x) => x).join(" ");
};

jsmetatostring

const metaToString = lit.parser.utils.metaToString;

const input =
  "js index.jsx !foo #bar baz=qux\\ zig < source.jsx > json output.json #faz !raz";
const meta = lit.parser.utils.parseMeta({ meta: input });
console.log(input);
console.log(meta);
return metaToString(meta);
return lit.parser.utils;

txtUpdated 133.9w ago

js index.jsx !foo #bar baz=qux\ zig < source.jsx > json output.json #faz !raz
{ attrs: { baz: 'qux\\ zig' },
  lang: 'js',
  filename: 'index.jsx',
  directives: [ 'foo' ],
  tags: [ 'bar' ],
  baz: 'qux\\ zig',
  isOutput: false,
  output: 
   { attrs: {},
     lang: 'json',
     filename: 'output.json',
     tags: [ 'faz' ],
     directives: [ 'raz' ],
     isOutput: false,
     output: undefined,
     hasOutput: false,
     hasSource: false,
     source: undefined,
     raw: 'json output.json #faz !raz' },
  hasOutput: true,
  hasSource: true,
  source: 
   { attrs: {},
     lang: 'txt',
     filename: 'source.jsx',
     isOutput: false,
     output: undefined,
     hasOutput: false,
     hasSource: false,
     source: undefined,
     raw: 'txt source.jsx' },
  raw: 'js index.jsx !foo #bar baz=qux\\ zig < source.jsx > json output.json #faz !raz',
  fromSource: 'source.jsx' }
js index.jsx !foo baz=qux\ zig #bar < txt source.jsx > json output.json !raz #faz

Markdown blocks

Implementation

jscollapse< mdblocks.js

import visit from 'unist-util-visit'
import {getConsoleForNamespace} from '../utils/console'
import vfile from 'vfile'

const console = getConsoleForNamespace('mdblocks')

export const mdblocks = function ({baseProcessor, files, litroot}) {
    return async (tree, file) => {
        file.data = file.data || {}
        file.data.__mdcodeblocks = 0
        const filepath = file?.data?.canonical || 'inexplicable.ext'
        const promises = [];
        visit(tree, 'code', (node,index,parent) => {
            if (!node.data || !node.data.meta || node.data.meta.lang !== 'md') return;

            const idx = file.data.__mdcodeblocks++
            // instead of await (why?)
            const p = new Promise(async resolve => {
                // console.log(idx + "Node: ", node)
                const mdfile = await vfile({path: filepath, contents: node.value})
                const p = baseProcessor({files, litroot})
                const parsed = await p.parse( mdfile )
                const ast = await p.run(parsed, mdfile)
                console.log(idx + "MD AST: ", filepath, mdfile, ast)
                node.children = ast.children
                resolve()
            });
           
            promises.push(p)
        });
        await Promise.all(promises);
        return null
    }
}

Links

Implementation

jscollapse< links.js

import {join, resolve, relative as isRelative, dirname} from "path";
import visit from "unist-util-visit";
import { getConsoleForNamespace } from "../utils/console";

const console = getConsoleForNamespace("links");

export const resolveLinks =
  (options = { litroot: "", filepath: "", files: []}) =>
  (...args) =>
  (tree, file) => {
    // console.log("[Links] Init", file.path, options);
    options.filepath = options.filepath || file?.data?.canonical || file.path
    return visit(tree, isLink, transform(options));
  };

export const slug = (str) =>
  str
    .replace(/[^\w\s/-]+/g, "")
    .trim()
    .replace(/\s+/g, "_")
    .toLowerCase();

export const resolver = (str) => {
  // console.log("Input: ", str);
  if (!str) {
    throw Error("No string to resolve")
    process.exit(1)
  }
  let main, title, doc, hash, base, query, file, ext, _
  try {
    [main, title] = str.split("|");
    [doc, hash] = main.split("#");
    [base, query] = doc.split("?");
    [_, file, ext = ''] = base.match(/([^\.]+)(\.[a-zA-Z0-9]+)?$/) || []
  } catch(err) {
    console.log({str, main, title, doc, hash, base, query, file, ext, _})
    console.log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<', err)
    process.exit(1)
  }
  
 
  const root = file && file[0] && ['/', '.'].indexOf(file[0]) >= 0 ? '' : '/'
  const path = file && (root + slug(file))
  const section = hash && slug(hash);

  const href = path + (query ? "?" + query : "") + (hash ? "#" + section : "");
  // console.log('Href: ', href)
  return { title, path, ext, hash, query, section, href };
};


const isLink = (node) => ["link", "wikiLink"].indexOf(node.type) >= 0;

const transform = (options) => (node, index, parent) => {
  return decorateLinkNode(node, options.litroot, options.filepath, options.files);
};

export const wikiLinkOptions = (files = []) => {
  return {
    permalinks: files,
    pageResolver: (name) => {
      // console.log('Resolving', name)
      const {path, ext} = resolver(name)
      const exts = ['.lit', '/index.lit', '.md', ext]
      const opts = exts.map( ext => {
        return `${path}${ext}`
      })
      // console.log(opts)
      return opts //.filter(file => files.indexOf(file) >= 0)[0] || opts
    }
  }
};
// ({
//     permalinks: files,
//     pageResolver: nameToPermalinks,
//     hrefTemplate: (permalink) => `${permalink}?file=${permalink}`
// })

const linkToUrl = (link, root) => {
  if (link.type === "wikiLink") {
    // console.log(link)
    return link.data.permalink
  } else {
    return link.url;
  }
};

export const decorateLinkNode = (link, root = "", filepath = "", files = []) => {
  // console.log(link)
  const wikilink = link.type === "wikiLink";
  const url = linkToUrl(link, root);

  
  const external = /^(https?\:)?\/\//.test(url);
  const absolute = !external && /^\//.test(url);
  const fragment = /^(\?|#)/.test(url);
  const relative = url && !absolute && !fragment && !external;
  
  const srcToGH = (src, prefix) => join(prefix, src);
  const relToCanonical = (src, link) => resolve(dirname(src), link);
  const canonicalToRel = (src1, src2) => isRelative(dirname(src1), src2)

  let canonical = url;
  let href = url;
  let [base, frag] = url.split(/(\?|#)/);

  if (relative) {
    canonical = relToCanonical(filepath, base)
    href = url.replace(/\.(md|lit)/i, ".html") ;
  } else if (absolute) {
    const rel = canonicalToRel(filepath, url);
    href = rel.replace(/\.(md|lit)/i, ".html");
  }

  link.type = "link";
  link.url = href;
  
  const tempTitle = link.title || link.value;
  if (tempTitle) {
    const valueAndTitle = tempTitle.split("|")
    link.title = valueAndTitle[0]
    link.value = valueAndTitle[1] || valueAndTitle[0]
  }

  const exists = files.indexOf(canonical) >= 0

  const data = {
    external,
    absolute,
    fragment,
    relative,
    canonical,
    wikilink,
    exists,
  };
  link.data = Object.assign({},data,{})

  if (wikilink) {
    link.children = [
      { position: link.position, type: "text", value: link.value },
    ];
  }

  link.data.hProperties = {
    wikilink,
    filepath,
    root,
    data,
  };

  delete link.value;
  console.log(`[${filepath}] resolving (${link.type}) [${canonical}] exists: ${exists}`, link.url, link)

  return link;
};


export default {
  resolveLinks,
  wikiLinkOptions,
  resolver,
  linkToUrl,
  decorateLinkNode,
}

AST to String

TODO refactor save (update src) to operate on AST directly and stringify as below, instead of the cureent ../utils/unist-patch-source implementaction.

This is complicated by the fact that new cell source can in effect result in previous and next cell semantic/structure changes, hence the patch source implementation

const {toMarkdown, ungroupSections}
      = lit.parser.utils
const unGroup = ungroupSections()()
const tree = unGroup(lit.ast)
const md = toMarkdown(tree)
return md

https://github.com/syntax-tree/mdast↗↩§

Parser

Table of Contents

Implementation

Extensions

Frontmatter

Sections

Cells

Codeblocks

Markdown blocks

Links

AST to String

Backlinks (1)