-
Notifications
You must be signed in to change notification settings - Fork 17
Simple packaging for HTML elements #14
Changes from 3 commits
6752cfe
e5aea4f
a1710dd
8f86228
8e7bc0b
a4802d6
418f3ef
46ad957
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,18 +14,27 @@ | |
}, | ||
"scripts": { | ||
"lint-md": "remark content", | ||
"package": "node scripts/package/package.js", | ||
"spell-md": "mdspell -a -n --en-us 'content/**/*.md'" | ||
}, | ||
"dependencies": { | ||
"gray-matter": "4.0.2", | ||
"jsdom": "^12.2.0", | ||
"js-yaml": "3.13.1", | ||
"marked": "0.6.2", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not even a fully functioning prototype yet and we've got two Markdown parsers. Some day (but not today) I'm going to have a full on meltdown[1] about this. Choosing a Markdown and an idea of how to not-so-dangerously extend it is something I'd like to see happen sooner rather than later (maybe "propose possible Markdowns" would be a good task for me for a future sprint). [1] This would be a great name for a Markdown implementation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely, we need to do this, and before you have a Markdown meltdown. I have filed mdn/sprints#1505 for this, but feel free to edit it as you see fit. I have made it an epic because it looks like it might be bigger than a single user story. But maybe not. I hope, again, that getting experience of migrating content will help us decide which features we need. |
||
"markdown-spellcheck": "1.3.1", | ||
"npm": "^6.4.1", | ||
"mdn-browser-compat-data": "0.x", | ||
"npm": "^6.9.0", | ||
"remark-cli": "6.0.1", | ||
"remark-preset-lint-recommended": "3.0.2" | ||
}, | ||
"remarkConfig": { | ||
"plugins": [ | ||
"remark-preset-lint-recommended", | ||
["remark-lint-list-item-indent", "space"] | ||
[ | ||
"remark-lint-list-item-indent", | ||
"space" | ||
] | ||
] | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
|
||
const yaml = require('js-yaml'); | ||
const matter = require('gray-matter'); | ||
const marked = require('marked'); | ||
const jsdom = require('jsdom'); | ||
const bcd = require('mdn-browser-compat-data'); | ||
|
||
const { JSDOM } = jsdom; | ||
|
||
function extractFromSiblings(node, terminatorTag, contentType) { | ||
let content = ''; | ||
let sib = node.nextSibling; | ||
while (sib && sib.nodeName != terminatorTag) { | ||
if (sib.outerHTML) { | ||
if (contentType === 'html') { | ||
content += sib.outerHTML; | ||
} else if (contentType === 'text') { | ||
content += sib.textContent; | ||
} | ||
} | ||
sib = sib.nextSibling; | ||
} | ||
return content; | ||
} | ||
|
||
function packageValues(heading, dom) { | ||
const values = []; | ||
const valueHeadings = dom.querySelectorAll('h3'); | ||
for (let valueHeading of valueHeadings) { | ||
let value = { | ||
value: valueHeading.textContent, | ||
description: extractFromSiblings(valueHeading, 'H3', 'html') | ||
} | ||
values.push(value); | ||
} | ||
return values; | ||
} | ||
|
||
function packageAttribute(attributePath) { | ||
const attributeMD = fs.readFileSync(attributePath, 'utf8'); | ||
const {data, content} = matter(attributeMD); | ||
const dom = JSDOM.fragment(marked(content)); | ||
const attribute = {}; | ||
|
||
// extract the name property | ||
const name = dom.querySelector('h1'); | ||
attribute.name = name.textContent; | ||
|
||
// extract the description property | ||
attribute.description = extractFromSiblings(name, 'H2', 'html'); | ||
|
||
// extract the type property | ||
const h2Headings = dom.querySelectorAll('h2'); | ||
const typeHeading = (h2Headings.length === 2)? h2Headings[1]: h2Headings[0]; | ||
attribute.type = extractFromSiblings(typeHeading, 'H2', 'text'); | ||
|
||
// extract the values property | ||
if (h2Headings.length === 2) { | ||
valuesHeading = h2Headings[0]; | ||
attribute.values = packageValues(valuesHeading, dom); | ||
} | ||
|
||
return attribute; | ||
} | ||
|
||
function package(root) { | ||
const attributePaths = fs.readdirSync(root).map(relative => path.join(root, relative)); | ||
return attributePaths.map(packageAttribute); | ||
} | ||
|
||
module.exports = { | ||
package | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
const bcd = require('mdn-browser-compat-data'); | ||
|
||
function package(query) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One first impression: it is long past time for BCD to provide this API. It's wild how many times I've seen this implemented 😆 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
let data = query.split('.').reduce(function(prev, curr) { | ||
return prev ? prev[curr] : undefined | ||
}, bcd); | ||
return data; | ||
} | ||
|
||
module.exports = { | ||
package | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
const fs = require('fs'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to avoid baking contributor data into the repo, but we'll necessarily have some legacy data to retain. It'd be nice to have a graveyard for legacy contributions (e.g., some separate thing that maps our files to lists of contributors) and derive post-wiki contributions from Git itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could be wrong, but I'm working on the assumption that including Wiki contributors is more or less a licensing requirement. Definitely I agree that in the future we should use GitHub for this. Really, including these .md files was just the quickest way I could think of to ensure that contributor information was present. I agree with your comment above that giving it more structure would be good, but also we should have a more general think about what our obligations are here and how we can best fulfill them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, if we're deriving content from the existing wiki, then we must attribute the authors. But you're right that this is easy and you shouldn't consider this a blocker to merging. My thinking here was just that we'll want to—at some point before actually inviting the community to edit Stumptown content—take the authorship data out of the general content files. Once we break from the wiki, the contribution data is historical and Git commits are the canonical source of authorship information. We wouldn't want to update authorship data except via Git from then on, but leaving authorship as part of content files might be confusing to contributors. For wiki authors who used their GitHub account on MDN, we could conceivably add a commit which makes them contributors to this repo, whether they actually contributed via GitHub or not. This would further shrink the need for attribution data in the repo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If we build this content to HTML and start serving it from developer.mozilla.org, I'm not sure what our licensing requirements are. Currently MDN displays all contributors in the page. Is it sufficient to have a link back to GitHub and let people see the contribution history there (plus something for legacy contributors)? I don't know, but will start a conversation about this.
I seem to remember John had some scheme like this. |
||
const path = require('path'); | ||
|
||
const marked = require('marked'); | ||
|
||
function package(contributorsPath) { | ||
const contributorsMD = fs.readFileSync(contributorsPath, 'utf8'); | ||
console.log(contributorsMD) | ||
console.log(marked(contributorsMD)) | ||
return marked(contributorsMD); | ||
} | ||
|
||
module.exports = { | ||
package | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
|
||
const matter = require('gray-matter'); | ||
const marked = require('marked'); | ||
|
||
function packageDescription(examplePath) { | ||
const descriptionMD = fs.readFileSync(path.join(examplePath, 'description.md'), 'utf8'); | ||
const {data, content} = matter(descriptionMD); | ||
data.content = marked(content); | ||
return data; | ||
} | ||
|
||
function packageSources(examplePath) { | ||
const exampleSource = {}; | ||
|
||
const jsPath = path.join(examplePath, 'example.js'); | ||
if (fs.existsSync(jsPath)) { | ||
exampleSource.js = fs.readFileSync(jsPath, 'utf8'); | ||
} | ||
|
||
const cssPath = path.join(examplePath, 'example.css'); | ||
if (fs.existsSync(cssPath)) { | ||
exampleSource.css = fs.readFileSync(cssPath, 'utf8'); | ||
} | ||
|
||
const htmlPath = path.join(examplePath, 'example.html'); | ||
if (fs.existsSync(htmlPath)) { | ||
exampleSource.html = fs.readFileSync(htmlPath, 'utf8'); | ||
} | ||
|
||
return exampleSource; | ||
} | ||
|
||
function packageExample(examplePath) { | ||
return { | ||
description: packageDescription(examplePath), | ||
sources: packageSources(examplePath) | ||
} | ||
} | ||
|
||
function package(paths) { | ||
return paths.map(packageExample); | ||
} | ||
|
||
module.exports = { | ||
package | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
|
||
const yaml = require('js-yaml'); | ||
const matter = require('gray-matter'); | ||
const marked = require('marked'); | ||
const jsdom = require('jsdom'); | ||
const bcd = require('mdn-browser-compat-data'); | ||
|
||
const { JSDOM } = jsdom; | ||
const commentNode = 8; | ||
const namedSections = [ | ||
'short-description', | ||
'overview', | ||
'attributes-text', | ||
'usage-notes', | ||
'accessibility-concerns', | ||
'see-also' | ||
]; | ||
|
||
function extractFromSiblings(node, terminatorTag, contentType) { | ||
let content = ''; | ||
let sib = node.nextSibling; | ||
while (sib && sib.nodeName != terminatorTag) { | ||
if (sib.outerHTML) { | ||
if (contentType === 'html') { | ||
content += sib.outerHTML; | ||
} else if (contentType === 'text') { | ||
content += sib.textContent; | ||
} | ||
} | ||
sib = sib.nextSibling; | ||
} | ||
return content; | ||
} | ||
|
||
function packageValues(heading, dom) { | ||
const values = []; | ||
const valueHeadings = dom.querySelectorAll('h3'); | ||
for (let valueHeading of valueHeadings) { | ||
let value = { | ||
value: valueHeading.textContent, | ||
description: extractFromSiblings(valueHeading, 'H3', 'html') | ||
} | ||
values.push(value); | ||
} | ||
return values; | ||
} | ||
|
||
function getSection(node, sections) { | ||
const sectionName = node.textContent.trim(); | ||
const sectionContent = extractFromSiblings(node, '#comment', 'html'); | ||
const extraSections = []; | ||
|
||
if (namedSections.includes(sectionName)) { | ||
sections[sectionName] = sectionContent; | ||
} else { | ||
const additionalSection = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like the prototype to break nosily (or at least complain loudly) on sections that aren't in our recipes. Knowing how the structure bends and breaks is really important to minimizing the risk of structuring content. We need to know early if some content is going to be irregular in ways we can anticipate or if we have lots of genuinely exceptional content. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Additional sections" are explicitly allowed in the recipe but we could definitely log them when we encounter them. Is your thinking that if, say, 70% of pages want a section called "Security considerations" then we should put it in the recipe? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, exactly. It's my hope that we'll know what sections may exist, even if they're not necessarily used in every instance of that recipe. We'll definitely have required sections, like Short description. I expect we'll have many recurring but optional sections (Security considerations). We might even have specialized recipes that make some optional sections required or forbidden (e.g., maybe every CSS property that deals with color must have an Accessibility section; maybe it's pointless for Web Crypto APIs to have a Security considerations section). But we'll probably want to reject one-off sections (Security concerns) or at least stuff them someplace that will elicit an antagonistic review (Notes). But I figure we won't know until we start logging. 🌲 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure thing. I don't think we need this for mdn/sprints#1499, and am not sure if this should happen at JSON-build time or at linting time, but have filed #21 for it anyway. |
||
name: sectionName, | ||
content: sectionContent | ||
}; | ||
sections['additional-sections'].push(additionalSection); | ||
} | ||
} | ||
|
||
function package(prosePath) { | ||
const proseMD = fs.readFileSync(prosePath, 'utf8'); | ||
const dom = JSDOM.fragment(marked(proseMD)); | ||
const sections = { | ||
'additional-sections': [] | ||
}; | ||
let node = dom.firstChild; | ||
while (node) { | ||
if (node.nodeType === commentNode) { | ||
getSection(node, sections); | ||
} | ||
node = node.nextSibling; | ||
} | ||
return sections; | ||
} | ||
|
||
module.exports = { | ||
package | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const yaml = require('js-yaml'); | ||
|
||
const bcd = require('./package-bcd'); | ||
const examples = require('./package-examples'); | ||
const attributes = require('./package-attributes'); | ||
const prose = require('./package-prose'); | ||
const contributors = require('./package-contributors'); | ||
|
||
const htmlElements = '/content/html/elements'; | ||
|
||
const writeToFile = (propertyName, json) => { | ||
const data = { | ||
html: { | ||
elements: { | ||
[propertyName]: json, | ||
} | ||
} | ||
}; | ||
|
||
const dest = path.join(process.cwd(),'packaged', `${propertyName}.json`); | ||
const destDir = path.dirname(dest); | ||
if (!fs.existsSync(destDir)) { | ||
fs.mkdirSync(destDir, { recursive: true }); | ||
} | ||
fs.writeFileSync(dest, `${JSON.stringify(data, null, 2)}`); | ||
}; | ||
|
||
function package(elementName) { | ||
const elementPath = path.join(process.cwd(), htmlElements, elementName); | ||
|
||
// open meta.yaml | ||
const meta = yaml.safeLoad(fs.readFileSync(path.join(elementPath, 'meta.yaml'), 'utf8')); | ||
|
||
// initialise some paths for more resources | ||
const examplesPaths = meta.examples.map(relativePath => path.join(elementPath, relativePath)); | ||
const attributesPath = path.join(elementPath, meta.attributes['element-specific']); | ||
const prosePath = path.join(elementPath, 'prose.md'); | ||
const contributorsPath = path.join(elementPath, 'contributors.md'); | ||
|
||
// make the package | ||
const element = {}; | ||
element.title = meta.title; | ||
element.interactive_example_url = meta['interactive-example']; | ||
element.browser_compatibility = bcd.package(meta['browser-compatibility']); | ||
element.examples = examples.package(examplesPaths); | ||
element.attributes = attributes.package(attributesPath); | ||
element.prose = prose.package(prosePath); | ||
element.contributors = contributors.package(contributorsPath); | ||
|
||
writeToFile(elementName, element); | ||
} | ||
|
||
package(process.argv[2]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More and more I think comments or custom tags are not going to work for this. For example, it completely foils GitHub's rich Markdown diffs:
I think sooner or later we'll want to do plain headings (
## Short description
) and aggressively enforce accepted text and hierarchy (as part of the recipe).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes... I'm not sure now why we opted for this plan rather than just using headings. Also I'm not happy that prose.md uses comments while attributes use headings.
So unless someone can remember why we decided to do this, I think you are right.
Again these are things we can tweak as we go along though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to keep this for now but filed #19.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment doesn't signify that I've done any kind of thorough review 😃, but I was skimming this PR while waiting for some tests to finish and noticed this thread. I can't remember the details either, but I'm also not happy with the fact that
prose.md
uses hidden comments to enforce structure while the attributes use headings. It's probably best to choose some specific headings for signifying/enforcing structure, and get rid of the hidden comments. So, for what it's worth, just wanted to say that I agree with you both in where you're heading here! 😄