Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core(font-size): remove deprecated DOM.getFlattenedDocument #11248

Merged
merged 6 commits into from
Aug 11, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions lighthouse-core/audits/seo/font-size.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ function getUniqueFailingRules(fontSizeArtifact) {
/** @type {Map<string, FailingNodeData>} */
const failingRules = new Map();

fontSizeArtifact.forEach(({cssRule, fontSize, textLength, node}) => {
const artifactId = getFontArtifactId(cssRule, node);
fontSizeArtifact.forEach((failingNodeData) => {
const {nodeId, cssRule, fontSize, textLength, parentNode} = failingNodeData;
const artifactId = getFontArtifactId(cssRule, parentNode, nodeId);
const failingRule = failingRules.get(artifactId);

if (!failingRule) {
failingRules.set(artifactId, {
node,
nodeId,
parentNode,
cssRule,
fontSize,
textLength,
Expand Down Expand Up @@ -76,7 +78,7 @@ function getAttributeMap(attributes = []) {

/**
* TODO: return unique selector, like axe-core does, instead of just id/class/name of a single node
* @param {FailingNodeData['node']} node
* @param {FailingNodeData['parentNode']} node
* @returns {string}
*/
function getSelector(node) {
Expand All @@ -91,11 +93,11 @@ function getSelector(node) {
}
}

return node.localName.toLowerCase();
return node.nodeName.toLowerCase();
}

/**
* @param {FailingNodeData['node']} node
* @param {FailingNodeData['parentNode']} node
* @return {LH.Audit.Details.NodeValue}
*/
function nodeToTableNode(node) {
Expand All @@ -107,14 +109,14 @@ function nodeToTableNode(node) {
return {
type: 'node',
selector: node.parentNode ? getSelector(node.parentNode) : '',
snippet: `<${node.localName}${attributesString}>`,
snippet: `<${node.nodeName.toLowerCase()}${attributesString}>`,
};
}

/**
* @param {string} baseURL
* @param {FailingNodeData['cssRule']} styleDeclaration
* @param {FailingNodeData['node']} node
* @param {FailingNodeData['parentNode']} node
* @returns {{source: LH.Audit.Details.UrlValue | LH.Audit.Details.SourceLocationValue | LH.Audit.Details.CodeValue, selector: string | LH.Audit.Details.NodeValue}}
*/
function findStyleRuleSource(baseURL, styleDeclaration, node) {
Expand Down Expand Up @@ -198,16 +200,17 @@ function findStyleRuleSource(baseURL, styleDeclaration, node) {

/**
* @param {FailingNodeData['cssRule']} styleDeclaration
* @param {FailingNodeData['node']} node
* @param {FailingNodeData['parentNode']} node
* @param {number} textNodeId
* @return {string}
*/
function getFontArtifactId(styleDeclaration, node) {
function getFontArtifactId(styleDeclaration, node, textNodeId) {
if (styleDeclaration && styleDeclaration.type === 'Regular') {
const startLine = styleDeclaration.range ? styleDeclaration.range.startLine : 0;
const startColumn = styleDeclaration.range ? styleDeclaration.range.startColumn : 0;
return `${styleDeclaration.styleSheetId}@${startLine}:${startColumn}`;
} else {
return `node_${node.nodeId}`;
return `node_${textNodeId}`;
}
}

Expand Down Expand Up @@ -274,9 +277,9 @@ class FontSize extends Audit {
];

const tableData = failingRules.sort((a, b) => b.textLength - a.textLength)
.map(({cssRule, textLength, fontSize, node}) => {
.map(({cssRule, textLength, fontSize, parentNode}) => {
const percentageOfAffectedText = textLength / totalTextLength * 100;
const origin = findStyleRuleSource(pageUrl, cssRule, node);
const origin = findStyleRuleSource(pageUrl, cssRule, parentNode);

return {
source: origin.source,
Expand Down
13 changes: 0 additions & 13 deletions lighthouse-core/gather/driver.js
Original file line number Diff line number Diff line change
Expand Up @@ -1204,19 +1204,6 @@ class Driver {
return new LHElement(targetNode, this);
}

/**
* Returns the flattened list of all DOM nodes within the document.
* @param {boolean=} pierce Whether to pierce through shadow trees and iframes.
* True by default.
* @return {Promise<Array<LH.Crdp.DOM.Node>>} The found nodes, or [], resolved in a promise
*/
async getNodesInDocument(pierce = true) {
const flattenedDocument = await this.sendCommand('DOM.getFlattenedDocument',
{depth: -1, pierce});

return flattenedDocument.nodes ? flattenedDocument.nodes : [];
}

/**
* Resolves a backend node ID (from a trace event, protocol, etc) to the object ID for use with
* `Runtime.callFunctionOn`. `undefined` means the node could not be found.
Expand Down
133 changes: 61 additions & 72 deletions lighthouse-core/gather/gatherers/seo/font-size.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,6 @@ const MAX_NODES_SOURCE_RULE_FETCHED = 50; // number of nodes to fetch the source
/** @typedef {LH.Artifacts.FontSize.DomNodeMaybeWithParent} DomNodeMaybeWithParent*/
/** @typedef {Map<number, {fontSize: number, textLength: number}>} BackendIdsToFontData */

/**
* @param {LH.Artifacts.FontSize.DomNodeMaybeWithParent=} node
* @returns {node is LH.Artifacts.FontSize.DomNodeWithParent}
*/
function nodeInBody(node) {
if (!node) {
return false;
}
if (node.nodeName === 'BODY') {
return true;
}
return nodeInBody(node.parentNode);
}

/**
* Get list of all nodes from the document body.
*
* @param {Driver} driver
* @returns {Promise<LH.Artifacts.FontSize.DomNodeWithParent[]>}
*/
async function getAllNodesFromBody(driver) {
const nodes = /** @type {DomNodeMaybeWithParent[]} */ (await driver.getNodesInDocument());
/** @type {Map<number|undefined, LH.Artifacts.FontSize.DomNodeMaybeWithParent>} */
const nodeMap = new Map();
nodes.forEach(node => nodeMap.set(node.nodeId, node));
nodes.forEach(node => (node.parentNode = nodeMap.get(node.parentId)));
return nodes.filter(nodeInBody);
}

/**
* @param {LH.Crdp.CSS.CSSStyle} [style]
* @return {boolean}
Expand Down Expand Up @@ -187,12 +158,12 @@ function getTextLength(text) {

/**
* @param {Driver} driver
* @param {LH.Crdp.DOM.Node} node text node
* @param {number} nodeId text node
* @returns {Promise<NodeFontData['cssRule']|undefined>}
*/
async function fetchSourceRule(driver, node) {
async function fetchSourceRule(driver, nodeId) {
const matchedRules = await driver.sendCommand('CSS.getMatchedStylesForNode', {
nodeId: node.nodeId,
nodeId,
});
const sourceRule = getEffectiveFontRule(matchedRules);
if (!sourceRule) return undefined;
Expand All @@ -214,12 +185,22 @@ class FontSize extends Gatherer {
* @param {Array<NodeFontData>} failingNodes
*/
static async fetchFailingNodeSourceRules(driver, failingNodes) {
const analysisPromises = failingNodes
const nodesToAnalyze = failingNodes
.sort((a, b) => b.textLength - a.textLength)
.slice(0, MAX_NODES_SOURCE_RULE_FETCHED)
.map(async failingNode => {
.slice(0, MAX_NODES_SOURCE_RULE_FETCHED);

// DOM.getDocument is necessary for pushNodesByBackendIdsToFrontend to properly retrieve nodeIds if the `DOM` domain was enabled before this gatherer, invoke it to be safe.
await driver.sendCommand('DOM.getDocument', {depth: -1, pierce: true});

const {nodeIds} = await driver.sendCommand('DOM.pushNodesByBackendIdsToFrontend', {
backendNodeIds: nodesToAnalyze.map(node => node.parentNode.backendNodeId),
});

const analysisPromises = nodesToAnalyze
.map(async (failingNode, i) => {
failingNode.nodeId = nodeIds[i];
try {
const cssRule = await fetchSourceRule(driver, failingNode.node);
const cssRule = await fetchSourceRule(driver, nodeIds[i]);
failingNode.cssRule = cssRule;
} catch (err) {
// The node was deleted. We don't need to distinguish between lack-of-rule
Expand All @@ -240,25 +221,34 @@ class FontSize extends Gatherer {
}

/**
* Maps backendNodeId of TextNodes to {fontSize, textLength}.
*
* Iterates on the TextNodes in a DOM Snapshot.
* Every entry is associated with a TextNode in the layout tree (not display: none).
* @param {LH.Crdp.DOMSnapshot.CaptureSnapshotResponse} snapshot
* @return {BackendIdsToFontData}
*/
calculateBackendIdsToFontData(snapshot) {
* iterateTextNodesInLayoutFromSnapshot(snapshot) {
Copy link
Collaborator

@patrickhulce patrickhulce Aug 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we've ever used generator syntax in Lighthouse outside of the very unusual asset saver case which is very specific to allowing streamable results, probably deserves some sort of comment or justification :)

is there an actual benefit here compared to just doing a filtered map instead? AFAICT, we're just using the iterator to push all of these objects onto an array anyway, but maybe there were other memory pressure reasons I'm mising?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

outside of the very unusual asset saver case which is very specific to allowing streamable results

(and really is a product of the Node < 8.10 days when we couldn't have strings over 256MiB (#1685 (comment)) and no one has touched that save method for three years)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to change, not a sticking point, but I don't like the idea of avoiding entire language features because they are new or not commonly used :) Would a comment linking to "how generators work" help? Is the main concern just that someone doesn't know what * is, or that even if they do, do you think generators are hard to reason about?

I appreciate a generator here as the size of the data created is not bounded. It scales with the number of elements on the page, and with the amount of text within those elements. Since only a single value needs to be processed at a time, and because the creation of said value is rather complex, extracting as a generator function made sense to me.

The alternative seems to be:

  1. inline this entire function. affects readability, function decomposition is good, etc.
  2. return as an array. this is the current state, and I believe I reduced some of the concern here in a previous PR by returning the textLength instead of the text. Here I reverted that, as it's a derived property better calculated by the calling code.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the main concern just that someone doesn't know what * is, or that even if they do, do you think generators are hard to reason about?

Mostly this. I wouldn't expect most developers to immediately understand how this function works compared to adding items to an array. For this reason, I would only really use a generator in a shared codebase when the benefits are significant (like in the string creation case of asset saver) which is accompanied by that justification in comments.

I believe I reduced some of the concern here in a previous PR by returning the textLength instead of the text. Here I reverted that, as it's a derived property better calculated by the calling code.

Ah, OK I didn't notice this. If we held on to text for any significant part of the filtering step I might agree with you this is sufficient justification, but much like your other simplifications in this PR, we could solve it by limiting the returned information and just change text to a textLength: getTextLength(text) invocation into the iterator function, right?

as it's a derived property better calculated by the calling code.

I'm not sure I follow this one, we'd still be calculating it with the exact same method now, we'd just compute it before returning the node object.

Final thoughts:

I understand your concern about refusing to adopt new language features and ending up with a ES20XX-era codebase forever. I'm not as worried about this being the case as we've extensively adopted several features newer to node than generators, it's primarily that generators are rarer in applicability for Lighthouse (and therefore in my belief less likely to be easy to read for the average developer) than asynchronous code.

While my primary concern was wrong to lean exclusively on "it's different, so no", there is still some merit to consistency in patterns. @paulirish mostly converted me here on avoiding reduce in pretty much every situation I would've used it previously for easier reading to those less accustomed to reduce as a general-purpose for-loop, and I believe in other recent reviews of new contributors we both concurred on use of a for...of instead of a long .filter().map().filter().forEach() chain. Not that there's never a time to use those new patterns, but for a situation in which the dominant pattern already fits well and the new pattern is sufficiently different in complexity of comprehension, I'd prefer the former.

const strings = snapshot.strings;

/** @type {BackendIdsToFontData} */
const backendIdsToFontData = new Map();
/** @param {number} index */
const getString = (index) => strings[index];
/** @param {number} index */
const getFloat = (index) => parseFloat(strings[index]);

for (let j = 0; j < snapshot.documents.length; j++) {
// `doc` is a flattened property list describing all the Nodes in a document, with all string
// values deduped in the `strings` array.
const doc = snapshot.documents[j];

if (!doc.nodes.backendNodeId) {
if (!doc.nodes.backendNodeId || !doc.nodes.parentIndex ||
!doc.nodes.attributes || !doc.nodes.nodeName) {
throw new Error('Unexpected response from DOMSnapshot.captureSnapshot.');
}
const nodes = /** @type {Required<typeof doc['nodes']>} */ (doc.nodes);

/** @param {number} parentIndex */
const getParentData = (parentIndex) => ({
backendNodeId: nodes.backendNodeId[parentIndex],
attributes: nodes.attributes[parentIndex].map(getString),
nodeName: getString(nodes.nodeName[parentIndex]),
});

for (const layoutIndex of doc.textBoxes.layoutIndex) {
const text = strings[doc.layout.text[layoutIndex]];
Expand All @@ -267,45 +257,49 @@ class FontSize extends Gatherer {
const nodeIndex = doc.layout.nodeIndex[layoutIndex];
const styles = doc.layout.styles[layoutIndex];
const [fontSizeStringId] = styles;
const fontSize = getFloat(fontSizeStringId);

const fontSize = parseFloat(strings[fontSizeStringId]);
backendIdsToFontData.set(doc.nodes.backendNodeId[nodeIndex], {
const parentIndex = nodes.parentIndex[nodeIndex];
const grandParentIndex = nodes.parentIndex[parentIndex];
const parentNode = getParentData(parentIndex);
const grandParentNode =
grandParentIndex !== undefined ? getParentData(grandParentIndex) : undefined;

yield {
nodeIndex,
backendNodeId: nodes.backendNodeId[nodeIndex],
fontSize,
textLength: getTextLength(text),
});
text,
parentNode: {
...parentNode,
parentNode: grandParentNode,
},
};
}
}

return backendIdsToFontData;
}

/**
* The only connection between a snapshot Node and an actual Protocol Node is backendId,
* so that is used to join the two data structures. DOMSnapshot.captureSnapshot doesn't
* give the entire Node object, so DOM.getFlattenedDocument is used.
* @param {BackendIdsToFontData} backendIdsToFontData
* @param {LH.Artifacts.FontSize.DomNodeWithParent[]} crdpNodes
* Get all the failing text nodes that don't meet the legible text threshold.
* @param {LH.Crdp.DOMSnapshot.CaptureSnapshotResponse} snapshot
*/
findFailingNodes(backendIdsToFontData, crdpNodes) {
findFailingNodes(snapshot) {
/** @type {NodeFontData[]} */
const failingNodes = [];
let totalTextLength = 0;
let failingTextLength = 0;

for (const crdpNode of crdpNodes) {
const partialFontData = backendIdsToFontData.get(crdpNode.backendNodeId);
if (!partialFontData) continue;
// `crdpNode` is a non-empty TextNode that is in the layout tree (not display: none).

const {fontSize, textLength} = partialFontData;
for (const nodeData of this.iterateTextNodesInLayoutFromSnapshot(snapshot)) {
const textLength = getTextLength(nodeData.text);
totalTextLength += textLength;
if (fontSize < MINIMAL_LEGIBLE_FONT_SIZE_PX) {
if (nodeData.fontSize < MINIMAL_LEGIBLE_FONT_SIZE_PX) {
// Once a bad TextNode is identified, its parent Node is needed.
failingTextLength += textLength;
failingNodes.push({
node: crdpNode.parentNode,
nodeId: 0, // Set later in fetchFailingNodeSourceRules.
parentNode: nodeData.parentNode,
textLength,
fontSize,
fontSize: nodeData.fontSize,
});
}
}
Expand All @@ -331,20 +325,15 @@ class FontSize extends Gatherer {
]);

// Get the computed font-size style of every node.
const snapshotPromise = passContext.driver.sendCommand('DOMSnapshot.captureSnapshot', {
const snapshot = await passContext.driver.sendCommand('DOMSnapshot.captureSnapshot', {
computedStyles: ['font-size'],
});
const allNodesPromise = getAllNodesFromBody(passContext.driver);
const [snapshot, crdpNodes] = await Promise.all([snapshotPromise, allNodesPromise]);
const backendIdsToFontData = this.calculateBackendIdsToFontData(snapshot);
// `backendIdsToFontData` will include all non-empty TextNodes.
// `crdpNodes` will only contain the body node and its descendants.

const {
totalTextLength,
failingTextLength,
failingNodes,
} = this.findFailingNodes(backendIdsToFontData, crdpNodes);
} = this.findFailingNodes(snapshot);
const {
analyzedFailingNodesData,
analyzedFailingTextLength,
Expand Down
Loading