From 275dc644e63b5fa966c23daacf058357abcb7f01 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sun, 5 Jul 2020 22:07:02 -0400 Subject: [PATCH 01/46] first experiment with accessibility annotations --- lib/LaTeXML/Post/MathML.pm | 1 + lib/LaTeXML/Post/MathML/Presentation.pm | 50 ++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/lib/LaTeXML/Post/MathML.pm b/lib/LaTeXML/Post/MathML.pm index 6d9efc179..119eec297 100644 --- a/lib/LaTeXML/Post/MathML.pm +++ b/lib/LaTeXML/Post/MathML.pm @@ -26,6 +26,7 @@ our @EXPORT = ( &pmml_infix &pmml_script &pmml_summation), qw( &cmml &cmml_share &cmml_shared &cmml_ci &cmml_or_compose &cmml_synth_not &cmml_synth_complement), + qw(&getQName) ); require LaTeXML::Post::MathML::Presentation; require LaTeXML::Post::MathML::Content; diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 6e757a41b..1101efb76 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -14,6 +14,8 @@ package LaTeXML::Post::MathML::Presentation; use strict; use warnings; use base qw(LaTeXML::Post::MathML); +use LaTeXML::Post::MathML qw(getQName); +use LaTeXML::Common::XML qw(isElementNode); sub preprocess { my ($self, $doc, @maths) = @_; @@ -68,11 +70,13 @@ sub convertNode { sub rawIDSuffix { return '.pmml'; } +use Data::Dumper; + sub associateNodeHook { my ($self, $node, $sourcenode) = @_; # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase # in LaTeXML::Common or LaTeXML::Util ? - my $name = LaTeXML::Post::MathML::getQName($node); + my $name = getQName($node); if ($name =~ /^m:(?:mi|mo|mn)$/) { if (my $href = $sourcenode->getAttribute('href')) { if (ref $node eq 'ARRAY') { @@ -84,6 +88,50 @@ sub associateNodeHook { $$node[1]{title} = $title; } else { $node->setAttribute('title', $title); } } } + # Experiment: set accessibility attributes on the resulting presentation tree, + # if the XMath source has a claim to the semantics via a "meaning" attribute. + my $meaning; + my $source_name = getQName($sourcenode); + if ($source_name eq 'ltx:XMTok') { + $meaning = $sourcenode->getAttribute('meaning'); } + elsif ($source_name eq 'ltx:XMApp') { + my @src_children; + if (ref $sourcenode eq 'ARRAY') { + @src_children = @$sourcenode[2 .. -1]; } + else { + @src_children = $sourcenode->childNodes; } + if ($name ne 'm:mrow') { + # Implied operator case with special presentation element, rather than an mrow + # (e.g. in \sqrt{} we don't have an operator token, but a wrapping msqrt) + if (my $op_literal = $src_children[0]->getAttribute('meaning')) { +# attempt annotating only if we understand the operator, otherwise leave the default behavior to handle this element + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. scalar(@src_children) - 1)) . ')'; } } + else { + # Equivalent layout case: + $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. scalar(@src_children) - 1)) . ')'; } } + if ($meaning) { + if (ref $node eq 'ARRAY') { + $$node[1]{semantic} = $meaning; } + else { + $node->setAttribute('semantic', $meaning); } } + # Also check if argument of higher parent notation, mark if so. + my $sourceparent = $sourcenode->parentNode; + if (getQName($sourceparent) eq 'ltx:XMApp') { + my $op_node = $sourceparent->firstChild; + if ($op_node->getAttribute('meaning')) { # only annotated applications we understand + my $arg; + my $index = 0; + my $prev_sibling = $sourcenode; + while ($prev_sibling = $prev_sibling->previousSibling) { + $index++ if isElementNode($prev_sibling); } + if ($index == 0) { + $arg = 'op'; } + else { + $arg = $index; } + if (ref $node eq 'ARRAY') { + $$node[1]{arg} = $arg; } + else { + $node->setAttribute('arg', $arg); } } } return; } #================================================================================ From 019137723c5d4320c2263277aa2efe4df69257ae Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sun, 5 Jul 2020 22:59:53 -0400 Subject: [PATCH 02/46] also handle a basic dual --- lib/LaTeXML/Post/MathML/Presentation.pm | 84 ++++++++++++++++--------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 1101efb76..387746897 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -70,9 +70,8 @@ sub convertNode { sub rawIDSuffix { return '.pmml'; } -use Data::Dumper; - sub associateNodeHook { + # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form my ($self, $node, $sourcenode) = @_; # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase # in LaTeXML::Common or LaTeXML::Util ? @@ -90,36 +89,51 @@ sub associateNodeHook { $node->setAttribute('title', $title); } } } # Experiment: set accessibility attributes on the resulting presentation tree, # if the XMath source has a claim to the semantics via a "meaning" attribute. + # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation my $meaning; - my $source_name = getQName($sourcenode); - if ($source_name eq 'ltx:XMTok') { - $meaning = $sourcenode->getAttribute('meaning'); } - elsif ($source_name eq 'ltx:XMApp') { - my @src_children; - if (ref $sourcenode eq 'ARRAY') { - @src_children = @$sourcenode[2 .. -1]; } - else { - @src_children = $sourcenode->childNodes; } - if ($name ne 'm:mrow') { + my $source_name = getQName($sourcenode); + my $src_parent = $sourcenode->parentNode; + my $src_parent_name = getQName($src_parent); + my $src_grandparent = $src_parent->parentNode; + my $src_grandparent_name = getQName($src_grandparent); + # avoid any handlers in the constituent subtrees of a dual, handle those top-down + if ($src_grandparent_name ne 'ltx:XMDual') { + # tokens are simplest - if we know of a meaning, use that for accessibility + if ($source_name eq 'ltx:XMTok') { + $meaning = $sourcenode->getAttribute('meaning'); } + elsif ($source_name eq 'ltx:XMApp') { + my @src_children = $sourcenode->childNodes; + my $arg_count = scalar(@src_children) - 1; # Implied operator case with special presentation element, rather than an mrow # (e.g. in \sqrt{} we don't have an operator token, but a wrapping msqrt) - if (my $op_literal = $src_children[0]->getAttribute('meaning')) { -# attempt annotating only if we understand the operator, otherwise leave the default behavior to handle this element - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. scalar(@src_children) - 1)) . ')'; } } - else { - # Equivalent layout case: - $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. scalar(@src_children) - 1)) . ')'; } } + if ($name ne 'm:mrow') { + # attempt annotating only if we understand the operator, + # otherwise leave the default behavior to handle this element + if (my $op_literal = $src_children[0]->getAttribute('meaning')) { + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + else { + # Directly translate the content tree in the attribute, all constitutents can be cross-annotated: + $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + elsif ($source_name eq 'ltx:XMDual') { + # duals always have a literal head applied to a list of referenced arguments + my $content_child = $sourcenode->firstChild; + my $op_literal = $content_child->firstChild->getAttribute('meaning'); + my @arg_nodes = $content_child->childNodes; + my $arg_count = scalar(@arg_nodes) - 1; + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + # if we found some meaning, attach it as an accessible attribute if ($meaning) { if (ref $node eq 'ARRAY') { $$node[1]{semantic} = $meaning; } else { $node->setAttribute('semantic', $meaning); } } - # Also check if argument of higher parent notation, mark if so. - my $sourceparent = $sourcenode->parentNode; - if (getQName($sourceparent) eq 'ltx:XMApp') { - my $op_node = $sourceparent->firstChild; + + # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. + my $arg; + if ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual') { + # Handle applications, but not inside duals - those should be handled when entering the dual + my $op_node = $src_parent->firstChild; if ($op_node->getAttribute('meaning')) { # only annotated applications we understand - my $arg; my $index = 0; my $prev_sibling = $sourcenode; while ($prev_sibling = $prev_sibling->previousSibling) { @@ -127,11 +141,25 @@ sub associateNodeHook { if ($index == 0) { $arg = 'op'; } else { - $arg = $index; } - if (ref $node eq 'ARRAY') { - $$node[1]{arg} = $arg; } - else { - $node->setAttribute('arg', $arg); } } } + $arg = $index; } } } + elsif ($src_parent_name eq 'ltx:XMWrap' && $src_grandparent_name eq 'ltx:XMDual' && + # in which case, associated with the XMRef for this argument, if any. + (my $fragid = $sourcenode->getAttribute('fragid'))) { + my $content_child = $src_grandparent->firstChild; + my @arg_nodes = grep { isElementNode($_) } $content_child->childNodes; + my $index = 0; + while (my $arg_node = shift @arg_nodes) { + if ((getQName($arg_node) eq 'ltx:XMRef') && $arg_node->getAttribute('idref') eq $fragid) { + # Found! + $arg = $index; last; } + $index += 1; + } } +# if we found an indication that this node is an argument of a higher-up content tree, attach the annotation + if ($arg) { + if (ref $node eq 'ARRAY') { + $$node[1]{arg} = $arg; } + else { + $node->setAttribute('arg', $arg); } } return; } #================================================================================ From da078dac20b24344b6e096381be200e213f61df4 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sun, 5 Jul 2020 23:18:09 -0400 Subject: [PATCH 03/46] stacked duals example --- lib/LaTeXML/Post/MathML/Presentation.pm | 57 ++++++++++++++----------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 387746897..12df02c59 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -70,9 +70,16 @@ sub convertNode { sub rawIDSuffix { return '.pmml'; } +use Data::Dumper; + sub associateNodeHook { # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form my ($self, $node, $sourcenode) = @_; + # if (ref $node eq 'ARRAY') { + # print STDERR "node: ", Dumper($node), "\nxmath: ", $sourcenode->toString(1), "\n"; } + # else { + # print STDERR "node: ", $node->toString(1), "\nxmath: ", $sourcenode->toString(1), "\n"; } + # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase # in LaTeXML::Common or LaTeXML::Util ? my $name = getQName($node); @@ -96,31 +103,31 @@ sub associateNodeHook { my $src_parent_name = getQName($src_parent); my $src_grandparent = $src_parent->parentNode; my $src_grandparent_name = getQName($src_grandparent); - # avoid any handlers in the constituent subtrees of a dual, handle those top-down - if ($src_grandparent_name ne 'ltx:XMDual') { - # tokens are simplest - if we know of a meaning, use that for accessibility - if ($source_name eq 'ltx:XMTok') { - $meaning = $sourcenode->getAttribute('meaning'); } - elsif ($source_name eq 'ltx:XMApp') { - my @src_children = $sourcenode->childNodes; - my $arg_count = scalar(@src_children) - 1; - # Implied operator case with special presentation element, rather than an mrow - # (e.g. in \sqrt{} we don't have an operator token, but a wrapping msqrt) - if ($name ne 'm:mrow') { - # attempt annotating only if we understand the operator, - # otherwise leave the default behavior to handle this element - if (my $op_literal = $src_children[0]->getAttribute('meaning')) { - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } - else { - # Directly translate the content tree in the attribute, all constitutents can be cross-annotated: - $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } - elsif ($source_name eq 'ltx:XMDual') { - # duals always have a literal head applied to a list of referenced arguments - my $content_child = $sourcenode->firstChild; - my $op_literal = $content_child->firstChild->getAttribute('meaning'); - my @arg_nodes = $content_child->childNodes; - my $arg_count = scalar(@arg_nodes) - 1; - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + # tokens are simplest - if we know of a meaning, use that for accessibility + if ($source_name eq 'ltx:XMTok') { + # avoid token handlers in the constituent subtrees of a dual, handle those top-down + if ($src_grandparent_name ne 'ltx:XMDual') { + $meaning = $sourcenode->getAttribute('meaning'); } } + elsif ($source_name eq 'ltx:XMApp') { + my @src_children = $sourcenode->childNodes; + my $arg_count = scalar(@src_children) - 1; + # Implied operator case with special presentation element, rather than an mrow + # (e.g. in \sqrt{} we don't have an operator token, but a wrapping msqrt) + if ($name ne 'm:mrow') { + # attempt annotating only if we understand the operator, + # otherwise leave the default behavior to handle this element + if (my $op_literal = $src_children[0]->getAttribute('meaning')) { + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + else { + # Directly translate the content tree in the attribute, all constitutents can be cross-annotated: + $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + elsif ($source_name eq 'ltx:XMDual') { + # duals always have a literal head applied to a list of referenced arguments + my $content_child = $sourcenode->firstChild; + my $op_literal = $content_child->firstChild->getAttribute('meaning'); + my @arg_nodes = $content_child->childNodes; + my $arg_count = scalar(@arg_nodes) - 1; + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } # if we found some meaning, attach it as an accessible attribute if ($meaning) { if (ref $node eq 'ARRAY') { From 0e530d1afcdc6d017b6c84dac0b9a3284861bd8e Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Mon, 6 Jul 2020 18:05:58 -0400 Subject: [PATCH 04/46] more precise xmdual annotations --- lib/LaTeXML/Post/MathML/Presentation.pm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 12df02c59..6f6aa46cb 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -105,9 +105,15 @@ sub associateNodeHook { my $src_grandparent_name = getQName($src_grandparent); # tokens are simplest - if we know of a meaning, use that for accessibility if ($source_name eq 'ltx:XMTok') { - # avoid token handlers in the constituent subtrees of a dual, handle those top-down - if ($src_grandparent_name ne 'ltx:XMDual') { - $meaning = $sourcenode->getAttribute('meaning'); } } + if (my $token_meaning = $sourcenode->getAttribute('meaning')) { + if ($src_grandparent_name eq 'ltx:XMDual') { + # often an XMDual contains the participating tokens of a transfix notation + # and those tokens carry the same meaning as the top-level dual operation. + # in those cases, don't tag the tokens, only tag the top-level dual node + my $dual_meaning = $src_grandparent->firstChild->firstChild->getAttribute('meaning'); + $meaning = $token_meaning if ($token_meaning ne $dual_meaning); } + else { # just copy the meaning in the usual case + $meaning = $token_meaning; } } } elsif ($source_name eq 'ltx:XMApp') { my @src_children = $sourcenode->childNodes; my $arg_count = scalar(@src_children) - 1; From f89c574a425f129dbb10206a560c48583ac569de Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Mon, 6 Jul 2020 22:33:06 -0400 Subject: [PATCH 05/46] for demo, use data-* attributes as per HTML --- lib/LaTeXML/Post/MathML/Presentation.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 6f6aa46cb..35f45fe9a 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -137,9 +137,9 @@ sub associateNodeHook { # if we found some meaning, attach it as an accessible attribute if ($meaning) { if (ref $node eq 'ARRAY') { - $$node[1]{semantic} = $meaning; } + $$node[1]{'data-semantic'} = $meaning; } else { - $node->setAttribute('semantic', $meaning); } } + $node->setAttribute('data-semantic', $meaning); } } # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. my $arg; @@ -170,9 +170,9 @@ sub associateNodeHook { # if we found an indication that this node is an argument of a higher-up content tree, attach the annotation if ($arg) { if (ref $node eq 'ARRAY') { - $$node[1]{arg} = $arg; } + $$node[1]{'data-arg'} = $arg; } else { - $node->setAttribute('arg', $arg); } } + $node->setAttribute('data-arg', $arg); } } return; } #================================================================================ From 02c6c377eba81642a113da9d32d29252f38debef Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 7 Jul 2020 20:36:23 -0400 Subject: [PATCH 06/46] also use the most precise current_node info; duals and wraps work well together --- lib/LaTeXML/Post.pm | 2 +- lib/LaTeXML/Post/MathML/Presentation.pm | 52 +++++++++++++++++-------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/lib/LaTeXML/Post.pm b/lib/LaTeXML/Post.pm index 31a90131e..7cd13f955 100644 --- a/lib/LaTeXML/Post.pm +++ b/lib/LaTeXML/Post.pm @@ -546,7 +546,7 @@ sub associateNode { else { $node->setAttribute('xml:id' => $id); } push(@{ $$self{convertedIDs}{$sourceid} }, $id) unless $noxref; } } - $self->associateNodeHook($node, $sourcenode, $noxref); + $self->associateNodeHook($node, $sourcenode, $noxref, $currentnode); if ($isarray) { # Array represented map { $self->associateNode($_, $currentnode, $noxref) } @$node[2 .. $#$node]; } else { # LibXML node diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 35f45fe9a..55cef2781 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -74,7 +74,7 @@ use Data::Dumper; sub associateNodeHook { # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form - my ($self, $node, $sourcenode) = @_; + my ($self, $node, $sourcenode, $noxref, $currentnode) = @_; # if (ref $node eq 'ARRAY') { # print STDERR "node: ", Dumper($node), "\nxmath: ", $sourcenode->toString(1), "\n"; } # else { @@ -94,15 +94,23 @@ sub associateNodeHook { $$node[1]{title} = $title; } else { $node->setAttribute('title', $title); } } } + $self->addAccessibilityAnnotations($node, $sourcenode, $currentnode); + return; } + +sub addAccessibilityAnnotations { # Experiment: set accessibility attributes on the resulting presentation tree, # if the XMath source has a claim to the semantics via a "meaning" attribute. # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation + my ($self, $node, $sourcenode, $currentnode) = @_; my $meaning; + my $name = getQName($node); my $source_name = getQName($sourcenode); my $src_parent = $sourcenode->parentNode; my $src_parent_name = getQName($src_parent); my $src_grandparent = $src_parent->parentNode; my $src_grandparent_name = getQName($src_grandparent); + my $current_node_name = getQName($currentnode); + my $current_parent_name = getQName($currentnode->parentNode); # tokens are simplest - if we know of a meaning, use that for accessibility if ($source_name eq 'ltx:XMTok') { if (my $token_meaning = $sourcenode->getAttribute('meaning')) { @@ -127,12 +135,19 @@ sub associateNodeHook { else { # Directly translate the content tree in the attribute, all constitutents can be cross-annotated: $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } - elsif ($source_name eq 'ltx:XMDual') { - # duals always have a literal head applied to a list of referenced arguments + elsif ($source_name eq 'ltx:XMDual' and $current_node_name eq 'ltx:XMWrap') { +# Duals are tricky, we'd like to annotate them on the top-level only, while still annotating the inner structure as needed +# top-level is (mostly? always?) available when we are examining an XMWrap, use that as a guide for now. +# If no wrap is present, the inner contents should suffice in annotation my $content_child = $sourcenode->firstChild; - my $op_literal = $content_child->firstChild->getAttribute('meaning'); - my @arg_nodes = $content_child->childNodes; - my $arg_count = scalar(@arg_nodes) - 1; + my $op_literal; + if (getQName($content_child) eq 'ltx:XMRef') { + $op_literal = '@op'; # important: we have a clear match in the presentation, so the operator will have an arg + $content_child = $LaTeXML::Post::DOCUMENT->realizeXMNode($content_child); } + my $op_node = getQName($content_child) eq 'ltx:XMTok' ? $content_child : $content_child->firstChild; + $op_literal = $op_literal || $op_node->getAttribute('meaning') || '@op'; + my @arg_nodes = $content_child->childNodes; + my $arg_count = scalar(@arg_nodes) - 1; $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } # if we found some meaning, attach it as an accessible attribute if ($meaning) { @@ -143,11 +158,11 @@ sub associateNodeHook { # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. my $arg; - if ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual') { + my $index = 0; + if ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual' && $current_parent_name ne 'ltx:XMWrap') { # Handle applications, but not inside duals - those should be handled when entering the dual my $op_node = $src_parent->firstChild; if ($op_node->getAttribute('meaning')) { # only annotated applications we understand - my $index = 0; my $prev_sibling = $sourcenode; while ($prev_sibling = $prev_sibling->previousSibling) { $index++ if isElementNode($prev_sibling); } @@ -156,18 +171,21 @@ sub associateNodeHook { else { $arg = $index; } } } elsif ($src_parent_name eq 'ltx:XMWrap' && $src_grandparent_name eq 'ltx:XMDual' && - # in which case, associated with the XMRef for this argument, if any. (my $fragid = $sourcenode->getAttribute('fragid'))) { + # This $node is a constituent of a higher-up Dual's presentation. + # If it has been XRef-ed, it should have an arg= annotation my $content_child = $src_grandparent->firstChild; - my @arg_nodes = grep { isElementNode($_) } $content_child->childNodes; + my @content_nodes = grep { isElementNode($_) } $content_child->childNodes; my $index = 0; - while (my $arg_node = shift @arg_nodes) { - if ((getQName($arg_node) eq 'ltx:XMRef') && $arg_node->getAttribute('idref') eq $fragid) { - # Found! - $arg = $index; last; } - $index += 1; - } } -# if we found an indication that this node is an argument of a higher-up content tree, attach the annotation + while (my $content_arg = shift @content_nodes) { + if (getQName($content_arg) eq 'ltx:XMRef' && $content_arg->getAttribute('idref') eq $fragid) { + if ($index) { + $arg = $index; } + else { + $arg = 'op'; } + last; } + else { + $index++; } } } if ($arg) { if (ref $node eq 'ARRAY') { $$node[1]{'data-arg'} = $arg; } From a56ac4493ed7791624daac08a9f895bfbd74eddd Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 8 Jul 2020 18:22:11 -0400 Subject: [PATCH 07/46] fragid as primary arg info source; coordinate sourcenode and currentnode with dual logic --- lib/LaTeXML/Post/MathML/Presentation.pm | 63 ++++++++++++++----------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 55cef2781..89d5db811 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -125,16 +125,19 @@ sub addAccessibilityAnnotations { elsif ($source_name eq 'ltx:XMApp') { my @src_children = $sourcenode->childNodes; my $arg_count = scalar(@src_children) - 1; - # Implied operator case with special presentation element, rather than an mrow - # (e.g. in \sqrt{} we don't have an operator token, but a wrapping msqrt) - if ($name ne 'm:mrow') { - # attempt annotating only if we understand the operator, - # otherwise leave the default behavior to handle this element - if (my $op_literal = $src_children[0]->getAttribute('meaning')) { - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } - else { - # Directly translate the content tree in the attribute, all constitutents can be cross-annotated: - $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } + # Ok, so we need to disentangle the case where the operator XMTok is preserved in pmml, + # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token + # so we need to mark the literal "square-root" in msqrt + my $op_literal = $src_children[0]->getAttribute('meaning'); + if ($op_literal && $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + elsif ($name eq 'm:mrow') { + # usually an mrow keeps the operator token in its children as an (or such) + # when doesn't it? one example is "multirelation", is there a general pattern? + if ($op_literal eq 'multirelation') { + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + else { # default case, assume we'll find the @op inside + $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } } elsif ($source_name eq 'ltx:XMDual' and $current_node_name eq 'ltx:XMWrap') { # Duals are tricky, we'd like to annotate them on the top-level only, while still annotating the inner structure as needed # top-level is (mostly? always?) available when we are examining an XMWrap, use that as a guide for now. @@ -159,7 +162,29 @@ sub addAccessibilityAnnotations { # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. my $arg; my $index = 0; - if ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual' && $current_parent_name ne 'ltx:XMWrap') { + if ((my $fragid = $sourcenode->getAttribute('fragid')) && + # duals are again special, since they source many nodes + # we only want to handle the top XMWrap presentation + ($source_name ne 'ltx:XMDual' or $current_node_name eq 'ltx:XMWrap')) { + + # fragid-carrying nodes always have an arg annotation + # step 1. find their dual + my $dual_node = $sourcenode->parentNode; + while (getQName($dual_node) ne 'ltx:XMDual') { + $dual_node = $dual_node->parentNode; } + my $content_child = $dual_node->firstChild; + my @content_nodes = getQName($content_child) eq 'ltx:XMApp' ? $content_child->childNodes : (); + my $index = 0; + while (my $content_arg = shift @content_nodes) { + if (getQName($content_arg) eq 'ltx:XMRef' and $content_arg->getAttribute('idref') eq $fragid) { + if ($index) { + $arg = $index; } + else { + $arg = 'op'; } + last; } + else { + $index++; } } } + elsif ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual' && $current_parent_name ne 'ltx:XMWrap') { # Handle applications, but not inside duals - those should be handled when entering the dual my $op_node = $src_parent->firstChild; if ($op_node->getAttribute('meaning')) { # only annotated applications we understand @@ -170,22 +195,6 @@ sub addAccessibilityAnnotations { $arg = 'op'; } else { $arg = $index; } } } - elsif ($src_parent_name eq 'ltx:XMWrap' && $src_grandparent_name eq 'ltx:XMDual' && - (my $fragid = $sourcenode->getAttribute('fragid'))) { - # This $node is a constituent of a higher-up Dual's presentation. - # If it has been XRef-ed, it should have an arg= annotation - my $content_child = $src_grandparent->firstChild; - my @content_nodes = grep { isElementNode($_) } $content_child->childNodes; - my $index = 0; - while (my $content_arg = shift @content_nodes) { - if (getQName($content_arg) eq 'ltx:XMRef' && $content_arg->getAttribute('idref') eq $fragid) { - if ($index) { - $arg = $index; } - else { - $arg = 'op'; } - last; } - else { - $index++; } } } if ($arg) { if (ref $node eq 'ARRAY') { $$node[1]{'data-arg'} = $arg; } From a7e867c42f394640d28021c7731f01d3ab2ea135 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 8 Jul 2020 21:03:02 -0400 Subject: [PATCH 08/46] thanks to Neil Soiffer for spotting the annotations Fatal for failed-to-parse inputs --- lib/LaTeXML/Post/MathML/Presentation.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 89d5db811..d85d9cbaa 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -170,11 +170,11 @@ sub addAccessibilityAnnotations { # fragid-carrying nodes always have an arg annotation # step 1. find their dual my $dual_node = $sourcenode->parentNode; - while (getQName($dual_node) ne 'ltx:XMDual') { + while ($dual_node && ((getQName($dual_node) || '') ne 'ltx:XMDual')) { $dual_node = $dual_node->parentNode; } - my $content_child = $dual_node->firstChild; - my @content_nodes = getQName($content_child) eq 'ltx:XMApp' ? $content_child->childNodes : (); - my $index = 0; + my $content_child = $dual_node && $dual_node->firstChild; + my @content_nodes = ($content_child && getQName($content_child) eq 'ltx:XMApp') ? $content_child->childNodes : (); + my $index = 0; while (my $content_arg = shift @content_nodes) { if (getQName($content_arg) eq 'ltx:XMRef' and $content_arg->getAttribute('idref') eq $fragid) { if ($index) { From d4b43110f3c4f5de2b76d6dc26bdce94195da2ea Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 12:56:02 -0400 Subject: [PATCH 09/46] fragid-based approach to duals, significant refactor --- lib/LaTeXML/MathParser.pm | 11 +- lib/LaTeXML/Post/MathML/Presentation.pm | 172 ++++++++++++------------ 2 files changed, 98 insertions(+), 85 deletions(-) diff --git a/lib/LaTeXML/MathParser.pm b/lib/LaTeXML/MathParser.pm index 4e827a17a..9585641fc 100644 --- a/lib/LaTeXML/MathParser.pm +++ b/lib/LaTeXML/MathParser.pm @@ -33,7 +33,8 @@ our @EXPORT_OK = (qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbol &LeftRec &Arg &MaybeFunction &SawNotation &IsNotationAllowed - &isMatchingClose &Fence)); + &isMatchingClose &Fence + &p_getAttribute &p_setAttribute)); our %EXPORT_TAGS = (constructors => [qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbols &Annotate &InvisibleTimes &InvisibleComma @@ -1054,6 +1055,14 @@ sub p_getAttribute { elsif (ref $item eq 'XML::LibXML::Element') { return $item->getAttribute($key); } } +sub p_setAttribute { + my ($node, $key, $value) = @_; + if (ref $node eq 'ARRAY') { + $$node[1]{$key} = $value; } + else { + $node->setAttribute($key => $value); } + return; } + sub p_element_nodes { my ($item) = @_; if (!defined $item) { diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index d85d9cbaa..3e728a74b 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -15,6 +15,7 @@ use strict; use warnings; use base qw(LaTeXML::Post::MathML); use LaTeXML::Post::MathML qw(getQName); +use LaTeXML::MathParser qw(p_getAttribute p_setAttribute); use LaTeXML::Common::XML qw(isElementNode); sub preprocess { @@ -75,25 +76,15 @@ use Data::Dumper; sub associateNodeHook { # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form my ($self, $node, $sourcenode, $noxref, $currentnode) = @_; - # if (ref $node eq 'ARRAY') { - # print STDERR "node: ", Dumper($node), "\nxmath: ", $sourcenode->toString(1), "\n"; } - # else { - # print STDERR "node: ", $node->toString(1), "\nxmath: ", $sourcenode->toString(1), "\n"; } - # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase + # (same for the p_* methods from MathParser) # in LaTeXML::Common or LaTeXML::Util ? my $name = getQName($node); if ($name =~ /^m:(?:mi|mo|mn)$/) { if (my $href = $sourcenode->getAttribute('href')) { - if (ref $node eq 'ARRAY') { - $$node[1]{href} = $href; } - else { - $node->setAttribute('href', $href); } } + p_setAttribute($node, 'href', $href); } if (my $title = $sourcenode->getAttribute('title')) { - if (ref $node eq 'ARRAY') { - $$node[1]{title} = $title; } - else { - $node->setAttribute('title', $title); } } } + p_setAttribute($node, 'title', $title); } } $self->addAccessibilityAnnotations($node, $sourcenode, $currentnode); return; } @@ -102,104 +93,117 @@ sub addAccessibilityAnnotations { # if the XMath source has a claim to the semantics via a "meaning" attribute. # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation my ($self, $node, $sourcenode, $currentnode) = @_; - my $meaning; - my $name = getQName($node); - my $source_name = getQName($sourcenode); - my $src_parent = $sourcenode->parentNode; - my $src_parent_name = getQName($src_parent); - my $src_grandparent = $src_parent->parentNode; - my $src_grandparent_name = getQName($src_grandparent); - my $current_node_name = getQName($currentnode); - my $current_parent_name = getQName($currentnode->parentNode); + my $name = getQName($node); + my $source_name = getQName($sourcenode); + return if $source_name eq 'ltx:XMath'; + my $current_node_name = getQName($currentnode); + my $current_parent = $currentnode->parentNode; + my $current_parent_name = getQName($current_parent); + my $fragid = $currentnode->getAttribute('fragid'); + my ($meaning, $arg); + # FIRST AND FOREMOST, run an exclusion check for pieces that are presentation-only fluff for duals + # namely: + my @dual_pres_ancestry = $LaTeXML::Post::DOCUMENT->findnodes("ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual]", $currentnode); + my $dual_pres_node = $dual_pres_ancestry[-1]; # Weirdly ->findnode() is finding the highest ancestor, rather than the tightest ancestor? This [-1] seems to do it. + if ($dual_pres_node) { # 1) they have a dual ancestor + # 2) no node on the path to that dual has a "fragid" + my $check_node = $currentnode; + while (!$fragid && !$check_node->isSameNode($dual_pres_node)) { + $fragid = $check_node->getAttribute('fragid'); + $check_node = $check_node->parentNode; } + if (!$fragid) { + # 3) they're not "The Main Presentation" node, which is where we want to annotate duals + return unless $currentnode->isSameNode($dual_pres_node); } } + # All other cases, process the node, it has meaningful annotations to add, handle them first + if ($dual_pres_node && $dual_pres_node->isSameNode($currentnode)) { # top-level, annotate with semantic, and potentially arg + my $content_child = $dual_pres_node->previousSibling; + my $op_literal; + if (getQName($content_child) eq 'ltx:XMRef') { + $op_literal = '@op'; # important: we have a clear match in the presentation, so the operator will have an arg + $content_child = $LaTeXML::Post::DOCUMENT->realizeXMNode($content_child); } + if (getQName($content_child) eq 'ltx:XMTok') { # not an else, since this may have just been realized from XMRef + # another exception! (x) will have meaning x, so... + undef $op_literal; + $meaning = '@1'; } + else { + my $op_node = $content_child->firstChild; + $op_literal = $op_literal || ($op_node && $op_node->getAttribute('meaning')) || '@op'; + my @arg_nodes = $content_child->childNodes; + my $arg_count = scalar(@arg_nodes) - 1; + $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } +# Note that if the carrier ltx:XMDual had a fragid, it would get lost as we never visit it through this hook. +# to correct that, assign it in the top presentation child + if (!$fragid) { + my $dual = $dual_pres_node->parentNode; + if (my $dual_fragid = $dual->getAttribute('fragid')) { +# But we can't reuse the common logic, since it will comapare the dual with itself rather than its parent, ugh + my $grand_dual = $dual->parentNode; + while (getQName($grand_dual) ne 'ltx:XMDual') { $grand_dual = $grand_dual->parentNode; } + # this HAS to be an apply child right?? + my @grand_content_args = $grand_dual->firstChild->childNodes; + my $grand_args_count = scalar(@grand_content_args); + my $index = 0; + while (my $grand_content_arg = shift @grand_content_args) { + if ($grand_content_arg->getAttribute('idref') eq $dual_fragid) { + $arg = $index ? $index : ($grand_args_count > 1 ? 'op' : '1'); } + else { $index++; } } + } } } # tokens are simplest - if we know of a meaning, use that for accessibility - if ($source_name eq 'ltx:XMTok') { - if (my $token_meaning = $sourcenode->getAttribute('meaning')) { - if ($src_grandparent_name eq 'ltx:XMDual') { - # often an XMDual contains the participating tokens of a transfix notation - # and those tokens carry the same meaning as the top-level dual operation. - # in those cases, don't tag the tokens, only tag the top-level dual node - my $dual_meaning = $src_grandparent->firstChild->firstChild->getAttribute('meaning'); - $meaning = $token_meaning if ($token_meaning ne $dual_meaning); } - else { # just copy the meaning in the usual case - $meaning = $token_meaning; } } } - elsif ($source_name eq 'ltx:XMApp') { - my @src_children = $sourcenode->childNodes; + elsif ($current_node_name eq 'ltx:XMTok') { + $meaning = $currentnode->getAttribute('meaning'); } + elsif ($current_node_name eq 'ltx:XMApp') { + my @src_children = $currentnode->childNodes; my $arg_count = scalar(@src_children) - 1; # Ok, so we need to disentangle the case where the operator XMTok is preserved in pmml, # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token # so we need to mark the literal "square-root" in msqrt my $op_literal = $src_children[0]->getAttribute('meaning'); - if ($op_literal && $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? + if ($op_literal and $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } elsif ($name eq 'm:mrow') { # usually an mrow keeps the operator token in its children as an (or such) # when doesn't it? one example is "multirelation", is there a general pattern? - if ($op_literal eq 'multirelation') { + if ($op_literal and $op_literal eq 'multirelation') { $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } else { # default case, assume we'll find the @op inside $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } } - elsif ($source_name eq 'ltx:XMDual' and $current_node_name eq 'ltx:XMWrap') { -# Duals are tricky, we'd like to annotate them on the top-level only, while still annotating the inner structure as needed -# top-level is (mostly? always?) available when we are examining an XMWrap, use that as a guide for now. -# If no wrap is present, the inner contents should suffice in annotation - my $content_child = $sourcenode->firstChild; - my $op_literal; - if (getQName($content_child) eq 'ltx:XMRef') { - $op_literal = '@op'; # important: we have a clear match in the presentation, so the operator will have an arg - $content_child = $LaTeXML::Post::DOCUMENT->realizeXMNode($content_child); } - my $op_node = getQName($content_child) eq 'ltx:XMTok' ? $content_child : $content_child->firstChild; - $op_literal = $op_literal || $op_node->getAttribute('meaning') || '@op'; - my @arg_nodes = $content_child->childNodes; - my $arg_count = scalar(@arg_nodes) - 1; - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + # if we found some meaning, attach it as an accessible attribute if ($meaning) { - if (ref $node eq 'ARRAY') { - $$node[1]{'data-semantic'} = $meaning; } - else { - $node->setAttribute('data-semantic', $meaning); } } + p_setAttribute($node, 'data-semantic', $meaning); } # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. - my $arg; + # best to reset fragid here + $fragid = $currentnode->getAttribute('fragid'); my $index = 0; - if ((my $fragid = $sourcenode->getAttribute('fragid')) && - # duals are again special, since they source many nodes - # we only want to handle the top XMWrap presentation - ($source_name ne 'ltx:XMDual' or $current_node_name eq 'ltx:XMWrap')) { - - # fragid-carrying nodes always have an arg annotation - # step 1. find their dual - my $dual_node = $sourcenode->parentNode; - while ($dual_node && ((getQName($dual_node) || '') ne 'ltx:XMDual')) { - $dual_node = $dual_node->parentNode; } - my $content_child = $dual_node && $dual_node->firstChild; - my @content_nodes = ($content_child && getQName($content_child) eq 'ltx:XMApp') ? $content_child->childNodes : (); + # II.1 fragid-carrying nodes always point to their referrees. + if ($fragid) { + print STDERR "fragid $fragid carried: ", $currentnode->toString(1), "\n"; + # We already found the dual + my $content_child = $dual_pres_node->previousSibling; + my @content_args = getQName($content_child) eq 'ltx:XMApp' ? ($content_child->childNodes) : ($content_child); + my $arg_count = scalar(@content_args); + # if no compound-apply, no need for top-level dual annotation, leave it to the descendants my $index = 0; - while (my $content_arg = shift @content_nodes) { - if (getQName($content_arg) eq 'ltx:XMRef' and $content_arg->getAttribute('idref') eq $fragid) { - if ($index) { - $arg = $index; } - else { - $arg = 'op'; } - last; } - else { + while (my $c_arg = shift @content_args) { + my $idref = $c_arg->getAttribute('idref') || ''; + if ($idref eq $fragid) { + $arg = $index || ($arg_count >= 2 ? 'op' : '1'); + } else { $index++; } } } - elsif ($src_parent_name eq 'ltx:XMApp' && $src_grandparent_name ne 'ltx:XMDual' && $current_parent_name ne 'ltx:XMWrap') { - # Handle applications, but not inside duals - those should be handled when entering the dual - my $op_node = $src_parent->firstChild; + # II.2. applications children are directly pointing to their parents + elsif ($current_parent_name eq 'ltx:XMApp') { + my $op_node = $current_parent->firstChild; if ($op_node->getAttribute('meaning')) { # only annotated applications we understand - my $prev_sibling = $sourcenode; + my $prev_sibling = $currentnode; while ($prev_sibling = $prev_sibling->previousSibling) { - $index++ if isElementNode($prev_sibling); } + $index++; } if ($index == 0) { $arg = 'op'; } else { $arg = $index; } } } if ($arg) { - if (ref $node eq 'ARRAY') { - $$node[1]{'data-arg'} = $arg; } - else { - $node->setAttribute('data-arg', $arg); } } + p_setAttribute($node, 'data-arg', $arg); } return; } #================================================================================ From dce2cdd2d00a7bb15bee0460608de3e154d86f46 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 19:05:52 -0400 Subject: [PATCH 10/46] correctly use xml:id instead of fragid, thanks for the correction Bruce! --- lib/LaTeXML/Post/MathML/Presentation.pm | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 3e728a74b..a1a01b6f8 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -99,19 +99,19 @@ sub addAccessibilityAnnotations { my $current_node_name = getQName($currentnode); my $current_parent = $currentnode->parentNode; my $current_parent_name = getQName($current_parent); - my $fragid = $currentnode->getAttribute('fragid'); + my $id = $currentnode->getAttribute('xml:id'); my ($meaning, $arg); # FIRST AND FOREMOST, run an exclusion check for pieces that are presentation-only fluff for duals # namely: my @dual_pres_ancestry = $LaTeXML::Post::DOCUMENT->findnodes("ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual]", $currentnode); my $dual_pres_node = $dual_pres_ancestry[-1]; # Weirdly ->findnode() is finding the highest ancestor, rather than the tightest ancestor? This [-1] seems to do it. if ($dual_pres_node) { # 1) they have a dual ancestor - # 2) no node on the path to that dual has a "fragid" + # 2) no node on the path to that dual has a "id" my $check_node = $currentnode; - while (!$fragid && !$check_node->isSameNode($dual_pres_node)) { - $fragid = $check_node->getAttribute('fragid'); + while (!$id && !$check_node->isSameNode($dual_pres_node)) { + $id = $check_node->getAttribute('xml:id'); $check_node = $check_node->parentNode; } - if (!$fragid) { + if (!$id) { # 3) they're not "The Main Presentation" node, which is where we want to annotate duals return unless $currentnode->isSameNode($dual_pres_node); } } # All other cases, process the node, it has meaningful annotations to add, handle them first @@ -131,11 +131,11 @@ sub addAccessibilityAnnotations { my @arg_nodes = $content_child->childNodes; my $arg_count = scalar(@arg_nodes) - 1; $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } -# Note that if the carrier ltx:XMDual had a fragid, it would get lost as we never visit it through this hook. +# Note that if the carrier ltx:XMDual had a id, it would get lost as we never visit it through this hook. # to correct that, assign it in the top presentation child - if (!$fragid) { + if (!$id) { my $dual = $dual_pres_node->parentNode; - if (my $dual_fragid = $dual->getAttribute('fragid')) { + if (my $id = $dual->getAttribute('xml:id')) { # But we can't reuse the common logic, since it will comapare the dual with itself rather than its parent, ugh my $grand_dual = $dual->parentNode; while (getQName($grand_dual) ne 'ltx:XMDual') { $grand_dual = $grand_dual->parentNode; } @@ -144,7 +144,7 @@ sub addAccessibilityAnnotations { my $grand_args_count = scalar(@grand_content_args); my $index = 0; while (my $grand_content_arg = shift @grand_content_args) { - if ($grand_content_arg->getAttribute('idref') eq $dual_fragid) { + if ($grand_content_arg->getAttribute('idref') eq $id) { $arg = $index ? $index : ($grand_args_count > 1 ? 'op' : '1'); } else { $index++; } } } } } @@ -173,12 +173,12 @@ sub addAccessibilityAnnotations { p_setAttribute($node, 'data-semantic', $meaning); } # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. - # best to reset fragid here - $fragid = $currentnode->getAttribute('fragid'); + # best to reset id here + $id = $currentnode->getAttribute('xml:id'); my $index = 0; - # II.1 fragid-carrying nodes always point to their referrees. - if ($fragid) { - print STDERR "fragid $fragid carried: ", $currentnode->toString(1), "\n"; + # II.1 id-carrying nodes always point to their referrees. + if ($id) { + print STDERR "id $id carried: ", $currentnode->toString(1), "\n"; # We already found the dual my $content_child = $dual_pres_node->previousSibling; my @content_args = getQName($content_child) eq 'ltx:XMApp' ? ($content_child->childNodes) : ($content_child); @@ -187,7 +187,7 @@ sub addAccessibilityAnnotations { my $index = 0; while (my $c_arg = shift @content_args) { my $idref = $c_arg->getAttribute('idref') || ''; - if ($idref eq $fragid) { + if ($idref eq $id) { $arg = $index || ($arg_count >= 2 ? 'op' : '1'); } else { $index++; } } } From 0ede129275be32b72d280e127d57a9e176faa863 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 19:17:47 -0400 Subject: [PATCH 11/46] also handle duals in apply --- lib/LaTeXML/Post/MathML/Presentation.pm | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index a1a01b6f8..2fc4626d3 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -134,10 +134,10 @@ sub addAccessibilityAnnotations { # Note that if the carrier ltx:XMDual had a id, it would get lost as we never visit it through this hook. # to correct that, assign it in the top presentation child if (!$id) { - my $dual = $dual_pres_node->parentNode; + my $dual = $dual_pres_node->parentNode; + my $grand_dual = $dual->parentNode; if (my $id = $dual->getAttribute('xml:id')) { # But we can't reuse the common logic, since it will comapare the dual with itself rather than its parent, ugh - my $grand_dual = $dual->parentNode; while (getQName($grand_dual) ne 'ltx:XMDual') { $grand_dual = $grand_dual->parentNode; } # this HAS to be an apply child right?? my @grand_content_args = $grand_dual->firstChild->childNodes; @@ -146,8 +146,15 @@ sub addAccessibilityAnnotations { while (my $grand_content_arg = shift @grand_content_args) { if ($grand_content_arg->getAttribute('idref') eq $id) { $arg = $index ? $index : ($grand_args_count > 1 ? 'op' : '1'); } - else { $index++; } } - } } } + else { $index++; } } } + elsif (getQName($grand_dual) eq 'ltx:XMApp') { + # simpler case of the dual being an simple argument, as in x\in(0,1) + my $index = 0; + my $prev = $dual->previousSibling; + while ($prev) { + $index++; + $prev = $prev->previousSibling; } + $arg = $index ? $index : 'op'; } } } # tokens are simplest - if we know of a meaning, use that for accessibility elsif ($current_node_name eq 'ltx:XMTok') { $meaning = $currentnode->getAttribute('meaning'); } From 765bc3f224786cf1123e90aa33f2173e36a1e174 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 19:23:37 -0400 Subject: [PATCH 12/46] switch to using # instead of @ for referring to args --- lib/LaTeXML/Post/MathML/Presentation.pm | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 2fc4626d3..9bd548e19 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -119,18 +119,18 @@ sub addAccessibilityAnnotations { my $content_child = $dual_pres_node->previousSibling; my $op_literal; if (getQName($content_child) eq 'ltx:XMRef') { - $op_literal = '@op'; # important: we have a clear match in the presentation, so the operator will have an arg + $op_literal = '#op'; # important: we have a clear match in the presentation, so the operator will have an arg $content_child = $LaTeXML::Post::DOCUMENT->realizeXMNode($content_child); } if (getQName($content_child) eq 'ltx:XMTok') { # not an else, since this may have just been realized from XMRef # another exception! (x) will have meaning x, so... undef $op_literal; - $meaning = '@1'; } + $meaning = '#1'; } else { my $op_node = $content_child->firstChild; - $op_literal = $op_literal || ($op_node && $op_node->getAttribute('meaning')) || '@op'; + $op_literal = $op_literal || ($op_node && $op_node->getAttribute('meaning')) || '#op'; my @arg_nodes = $content_child->childNodes; my $arg_count = scalar(@arg_nodes) - 1; - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } # Note that if the carrier ltx:XMDual had a id, it would get lost as we never visit it through this hook. # to correct that, assign it in the top presentation child if (!$id) { @@ -166,14 +166,14 @@ sub addAccessibilityAnnotations { # so we need to mark the literal "square-root" in msqrt my $op_literal = $src_children[0]->getAttribute('meaning'); if ($op_literal and $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } elsif ($name eq 'm:mrow') { # usually an mrow keeps the operator token in its children as an (or such) # when doesn't it? one example is "multirelation", is there a general pattern? if ($op_literal and $op_literal eq 'multirelation') { - $meaning = $op_literal . '(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } + $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } else { # default case, assume we'll find the @op inside - $meaning = '@op(' . join(",", map { '@' . $_ } (1 .. $arg_count)) . ')'; } } } + $meaning = '#op(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } } } # if we found some meaning, attach it as an accessible attribute if ($meaning) { From 653d2d4d2340f2b635dc9fadbef7e6277d68e5a7 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 20:25:50 -0400 Subject: [PATCH 13/46] adapt Bruce's useful memory trick with an attribute marking XMath nodes as processed --- lib/LaTeXML/Post/MathML/Presentation.pm | 35 +++++++++++++------------ 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 9bd548e19..af87ab1a8 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -85,21 +85,23 @@ sub associateNodeHook { p_setAttribute($node, 'href', $href); } if (my $title = $sourcenode->getAttribute('title')) { p_setAttribute($node, 'title', $title); } } - $self->addAccessibilityAnnotations($node, $sourcenode, $currentnode); + $self->addAccessibilityAnnotations($node, $currentnode); return; } sub addAccessibilityAnnotations { # Experiment: set accessibility attributes on the resulting presentation tree, # if the XMath source has a claim to the semantics via a "meaning" attribute. # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation - my ($self, $node, $sourcenode, $currentnode) = @_; - my $name = getQName($node); - my $source_name = getQName($sourcenode); - return if $source_name eq 'ltx:XMath'; - my $current_node_name = getQName($currentnode); - my $current_parent = $currentnode->parentNode; - my $current_parent_name = getQName($current_parent); - my $id = $currentnode->getAttribute('xml:id'); + my ($self, $node, $currentnode) = @_; + my $current_node_name = getQName($currentnode); + return if $current_node_name eq 'ltx:XMath'; +# a number of redundant annotations are caused by reusing the same content node for on-the-fly content, +# e.g. we end up creating a new invisible-apply XMTok, and then associate its node +# with the $currentnode of its parent f(x), now as finvisible-apply(x) +# that second call should just immediately terminate, there is nothing to add in such cases. + return if $currentnode->getAttribute('_a11y_done'); + $currentnode->setAttribute('_a11y_done', '1'); + my $id = $currentnode->getAttribute('xml:id'); my ($meaning, $arg); # FIRST AND FOREMOST, run an exclusion check for pieces that are presentation-only fluff for duals # namely: @@ -144,7 +146,7 @@ sub addAccessibilityAnnotations { my $grand_args_count = scalar(@grand_content_args); my $index = 0; while (my $grand_content_arg = shift @grand_content_args) { - if ($grand_content_arg->getAttribute('idref') eq $id) { + if (($grand_content_arg->getAttribute('idref') || '') eq $id) { $arg = $index ? $index : ($grand_args_count > 1 ? 'op' : '1'); } else { $index++; } } } elsif (getQName($grand_dual) eq 'ltx:XMApp') { @@ -165,6 +167,7 @@ sub addAccessibilityAnnotations { # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token # so we need to mark the literal "square-root" in msqrt my $op_literal = $src_children[0]->getAttribute('meaning'); + my $name = getQName($node); if ($op_literal and $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } elsif ($name eq 'm:mrow') { @@ -176,16 +179,15 @@ sub addAccessibilityAnnotations { $meaning = '#op(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } } } # if we found some meaning, attach it as an accessible attribute - if ($meaning) { - p_setAttribute($node, 'data-semantic', $meaning); } + p_setAttribute($node, 'data-semantic', $meaning) if $meaning; # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. # best to reset id here $id = $currentnode->getAttribute('xml:id'); - my $index = 0; + my $current_parent = $currentnode->parentNode; + my $index = 0; # II.1 id-carrying nodes always point to their referrees. if ($id) { - print STDERR "id $id carried: ", $currentnode->toString(1), "\n"; # We already found the dual my $content_child = $dual_pres_node->previousSibling; my @content_args = getQName($content_child) eq 'ltx:XMApp' ? ($content_child->childNodes) : ($content_child); @@ -199,7 +201,7 @@ sub addAccessibilityAnnotations { } else { $index++; } } } # II.2. applications children are directly pointing to their parents - elsif ($current_parent_name eq 'ltx:XMApp') { + elsif (getQName($current_parent) eq 'ltx:XMApp') { my $op_node = $current_parent->firstChild; if ($op_node->getAttribute('meaning')) { # only annotated applications we understand my $prev_sibling = $currentnode; @@ -209,8 +211,7 @@ sub addAccessibilityAnnotations { $arg = 'op'; } else { $arg = $index; } } } - if ($arg) { - p_setAttribute($node, 'data-arg', $arg); } + p_setAttribute($node, 'data-arg', $arg) if ($arg); return; } #================================================================================ From 4c950cfd436653b5de09dab034a208d2cfd1ce73 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 22:18:34 -0400 Subject: [PATCH 14/46] now under 100 lines of code, with straightened out logic routes and more comments --- lib/LaTeXML/Post/MathML/Presentation.pm | 138 +++++++++--------------- 1 file changed, 51 insertions(+), 87 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index af87ab1a8..af7834d58 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -16,7 +16,7 @@ use warnings; use base qw(LaTeXML::Post::MathML); use LaTeXML::Post::MathML qw(getQName); use LaTeXML::MathParser qw(p_getAttribute p_setAttribute); -use LaTeXML::Common::XML qw(isElementNode); +use LaTeXML::Common::XML; sub preprocess { my ($self, $doc, @maths) = @_; @@ -71,8 +71,6 @@ sub convertNode { sub rawIDSuffix { return '.pmml'; } -use Data::Dumper; - sub associateNodeHook { # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form my ($self, $node, $sourcenode, $noxref, $currentnode) = @_; @@ -88,9 +86,9 @@ sub associateNodeHook { $self->addAccessibilityAnnotations($node, $currentnode); return; } +# Experiment: set accessibility attributes on the resulting presentation tree, +# if the XMath source has a claim to the semantics via a "meaning" attribute. sub addAccessibilityAnnotations { - # Experiment: set accessibility attributes on the resulting presentation tree, - # if the XMath source has a claim to the semantics via a "meaning" attribute. # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation my ($self, $node, $currentnode) = @_; my $current_node_name = getQName($currentnode); @@ -101,117 +99,83 @@ sub addAccessibilityAnnotations { # that second call should just immediately terminate, there is nothing to add in such cases. return if $currentnode->getAttribute('_a11y_done'); $currentnode->setAttribute('_a11y_done', '1'); - my $id = $currentnode->getAttribute('xml:id'); - my ($meaning, $arg); - # FIRST AND FOREMOST, run an exclusion check for pieces that are presentation-only fluff for duals - # namely: + # --- TOP PRIORITY: run an exclusion check for pieces that are presentation-only fluff for duals my @dual_pres_ancestry = $LaTeXML::Post::DOCUMENT->findnodes("ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual]", $currentnode); my $dual_pres_node = $dual_pres_ancestry[-1]; # Weirdly ->findnode() is finding the highest ancestor, rather than the tightest ancestor? This [-1] seems to do it. - if ($dual_pres_node) { # 1) they have a dual ancestor - # 2) no node on the path to that dual has a "id" + if ($dual_pres_node && !$dual_pres_node->isSameNode($currentnode)) { # 1) they have a dual ancestor, but are not the main presentation node my $check_node = $currentnode; + my $id = $currentnode->getAttribute('xml:id'); while (!$id && !$check_node->isSameNode($dual_pres_node)) { $id = $check_node->getAttribute('xml:id'); $check_node = $check_node->parentNode; } - if (!$id) { - # 3) they're not "The Main Presentation" node, which is where we want to annotate duals - return unless $currentnode->isSameNode($dual_pres_node); } } - # All other cases, process the node, it has meaningful annotations to add, handle them first - if ($dual_pres_node && $dual_pres_node->isSameNode($currentnode)) { # top-level, annotate with semantic, and potentially arg - my $content_child = $dual_pres_node->previousSibling; - my $op_literal; - if (getQName($content_child) eq 'ltx:XMRef') { - $op_literal = '#op'; # important: we have a clear match in the presentation, so the operator will have an arg - $content_child = $LaTeXML::Post::DOCUMENT->realizeXMNode($content_child); } - if (getQName($content_child) eq 'ltx:XMTok') { # not an else, since this may have just been realized from XMRef - # another exception! (x) will have meaning x, so... - undef $op_literal; + # if no id is found, they are not referenced by the dual + return unless $id; } + # --- + # In the remaining cases, process the node, check if it has meaningful annotations to add + my ($meaning, $arg); + if ($dual_pres_node && $dual_pres_node->isSameNode($currentnode)) { # top-level pres of dual + my $dual_content_node = $dual_pres_node->previousSibling; + my $dual_content_name = getQName($dual_content_node); + if ($dual_content_name eq 'ltx:XMRef') { # single subtree of the presentation, point to it $meaning = '#1'; } - else { - my $op_node = $content_child->firstChild; - $op_literal = $op_literal || ($op_node && $op_node->getAttribute('meaning')) || '#op'; - my @arg_nodes = $content_child->childNodes; + elsif ($dual_content_name eq 'ltx:XMTok') { + $meaning = $dual_content_node->getAttribute('meaning'); } + elsif ($dual_content_name eq 'ltx:XMApp') { + my $op_node = $dual_content_node->firstChild; + my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; + my @arg_nodes = element_nodes($dual_content_node); my $arg_count = scalar(@arg_nodes) - 1; - $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } -# Note that if the carrier ltx:XMDual had a id, it would get lost as we never visit it through this hook. + $meaning = $op . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } +# Note that if the carrier ltx:XMDual had a referenced id, it would get lost as we never visit it through this hook. # to correct that, assign it in the top presentation child - if (!$id) { - my $dual = $dual_pres_node->parentNode; - my $grand_dual = $dual->parentNode; - if (my $id = $dual->getAttribute('xml:id')) { -# But we can't reuse the common logic, since it will comapare the dual with itself rather than its parent, ugh - while (getQName($grand_dual) ne 'ltx:XMDual') { $grand_dual = $grand_dual->parentNode; } - # this HAS to be an apply child right?? - my @grand_content_args = $grand_dual->firstChild->childNodes; - my $grand_args_count = scalar(@grand_content_args); - my $index = 0; - while (my $grand_content_arg = shift @grand_content_args) { - if (($grand_content_arg->getAttribute('idref') || '') eq $id) { - $arg = $index ? $index : ($grand_args_count > 1 ? 'op' : '1'); } - else { $index++; } } } - elsif (getQName($grand_dual) eq 'ltx:XMApp') { - # simpler case of the dual being an simple argument, as in x\in(0,1) - my $index = 0; - my $prev = $dual->previousSibling; - while ($prev) { - $index++; - $prev = $prev->previousSibling; } - $arg = $index ? $index : 'op'; } } } + my $dual = $dual_pres_node->parentNode; + if (my $id = $dual->getAttribute('xml:id')) { + $self->addAccessibilityAnnotations($node, $dual); } } # tokens are simplest - if we know of a meaning, use that for accessibility elsif ($current_node_name eq 'ltx:XMTok') { $meaning = $currentnode->getAttribute('meaning'); } elsif ($current_node_name eq 'ltx:XMApp') { - my @src_children = $currentnode->childNodes; - my $arg_count = scalar(@src_children) - 1; + my @current_children = element_nodes($currentnode); + my $current_op_meaning = $current_children[0]->getAttribute('meaning') || ''; + my $arg_count = scalar(@current_children) - 1; # Ok, so we need to disentangle the case where the operator XMTok is preserved in pmml, # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token # so we need to mark the literal "square-root" in msqrt - my $op_literal = $src_children[0]->getAttribute('meaning'); - my $name = getQName($node); - if ($op_literal and $name ne 'm:mrow') { # assume we have phased out the operator node. Are there counter-examples? - $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } - elsif ($name eq 'm:mrow') { - # usually an mrow keeps the operator token in its children as an (or such) - # when doesn't it? one example is "multirelation", is there a general pattern? - if ($op_literal and $op_literal eq 'multirelation') { - $meaning = $op_literal . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } - else { # default case, assume we'll find the @op inside - $meaning = '#op(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } } } + my $op; + my $name = getQName($node); + if ($name ne 'm:mrow') { # not an mrow, prefer the literal semantic + $op = $current_op_meaning || $name; } + else { # mrow, prefer #op, except for whitelisted exception cases (which ones??) + $op = ($current_op_meaning eq 'multirelation') ? $current_op_meaning : '#op'; } + if ($op) { # Set the meaning, if we found a satisfying $op: + $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } } # if we found some meaning, attach it as an accessible attribute p_setAttribute($node, 'data-semantic', $meaning) if $meaning; # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. - # best to reset id here - $id = $currentnode->getAttribute('xml:id'); - my $current_parent = $currentnode->parentNode; - my $index = 0; + my $index = 0; # II.1 id-carrying nodes always point to their referrees. - if ($id) { + if ($dual_pres_node && (my $id = $currentnode->getAttribute('xml:id'))) { # We already found the dual - my $content_child = $dual_pres_node->previousSibling; - my @content_args = getQName($content_child) eq 'ltx:XMApp' ? ($content_child->childNodes) : ($content_child); + my $dual_content_node = $dual_pres_node->previousSibling; + my @content_args = getQName($dual_content_node) eq 'ltx:XMApp' ? element_nodes($dual_content_node) : ($dual_content_node); my $arg_count = scalar(@content_args); # if no compound-apply, no need for top-level dual annotation, leave it to the descendants - my $index = 0; while (my $c_arg = shift @content_args) { - my $idref = $c_arg->getAttribute('idref') || ''; - if ($idref eq $id) { + if ($id eq ($c_arg->getAttribute('idref') || '')) { $arg = $index || ($arg_count >= 2 ? 'op' : '1'); - } else { + last; + } else { # note that if we never find the 'idref', arg is never set $index++; } } } # II.2. applications children are directly pointing to their parents - elsif (getQName($current_parent) eq 'ltx:XMApp') { - my $op_node = $current_parent->firstChild; - if ($op_node->getAttribute('meaning')) { # only annotated applications we understand - my $prev_sibling = $currentnode; - while ($prev_sibling = $prev_sibling->previousSibling) { - $index++; } - if ($index == 0) { - $arg = 'op'; } - else { - $arg = $index; } } } - p_setAttribute($node, 'data-arg', $arg) if ($arg); + # also fallback in the dual case, if the XMApp had an id but wasn't an arg + if (!$arg && (getQName($currentnode->parentNode) eq 'ltx:XMApp')) { + my $prev_sibling = $currentnode; + while ($prev_sibling = $prev_sibling->previousSibling) { + $index++; } + $arg = $index ? $index : 'op'; } + p_setAttribute($node, 'data-arg', $arg) if $arg; return; } #================================================================================ From 5afea504da17a6d365e8023150389c958710223c Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 9 Jul 2020 22:31:27 -0400 Subject: [PATCH 15/46] add experimental a11ymark binding for web showcase, e.g. \integral --- MANIFEST | 1 + lib/LaTeXML/Package/a11ymark.sty.ltxml | 32 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 lib/LaTeXML/Package/a11ymark.sty.ltxml diff --git a/MANIFEST b/MANIFEST index 236f9c3b3..ce0ba574d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -322,6 +322,7 @@ lib/LaTeXML/Package/PoS.cls.ltxml lib/LaTeXML/Package/TeX.pool.ltxml lib/LaTeXML/Package/a0poster.cls.ltxml lib/LaTeXML/Package/a0size.sty.ltxml +lib/LaTeXML/Package/a11ymark.sty.ltxml lib/LaTeXML/Package/a4.sty.ltxml lib/LaTeXML/Package/a4wide.sty.ltxml lib/LaTeXML/Package/aa.cls.ltxml diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml new file mode 100644 index 000000000..0af2ba5ec --- /dev/null +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -0,0 +1,32 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | a11ymark.sty -- demo semantic bindings for accessibility | # +# | Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +DefConstructor('\diffd', 'd'); +DefMath('\deriv[]{}{}', + '\frac{\@MAYBEAPPLY{\@SUPERSCRIPT{\diffd}{#1}}{#2}}' + . '{\@SUPERSCRIPT{\@APPLY{\diffd #3}}{#1}}', + meaning => 'derivative', reorder => [2, 3, 1], + # afterDigest => sub { + # # NOTE: arg 2 will be wrapped in XMRef! + # $_[1]->setProperty(role => 'DIFFOP') if checkDiffOp($_[1]); + # return; }, + hide_content_reversion => 1); + +DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); + +######## +1; From 28d101c201a55d9b40b11665ec03a85ea6cd2dcb Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 10 Jul 2020 00:57:38 -0400 Subject: [PATCH 16/46] return to never annotating Apply nodes with unknown meaning --- lib/LaTeXML/MathParser.pm | 10 +++++++++- lib/LaTeXML/Post/MathML/Presentation.pm | 10 +++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/MathParser.pm b/lib/LaTeXML/MathParser.pm index 9585641fc..499437075 100644 --- a/lib/LaTeXML/MathParser.pm +++ b/lib/LaTeXML/MathParser.pm @@ -34,7 +34,7 @@ our @EXPORT_OK = (qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbol &Arg &MaybeFunction &SawNotation &IsNotationAllowed &isMatchingClose &Fence - &p_getAttribute &p_setAttribute)); + &p_getAttribute &p_setAttribute &p_removeAttribute &p_element_nodes)); our %EXPORT_TAGS = (constructors => [qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbols &Annotate &InvisibleTimes &InvisibleComma @@ -1063,6 +1063,14 @@ sub p_setAttribute { $node->setAttribute($key => $value); } return; } +sub p_removeAttribute { + my ($node, $key) = @_; + if (ref $node eq 'ARRAY') { + delete $$node[1]{$key}; } + else { + $node->removeAttribute($key); } + return; } + sub p_element_nodes { my ($item) = @_; if (!defined $item) { diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index af7834d58..66340c040 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -15,7 +15,7 @@ use strict; use warnings; use base qw(LaTeXML::Post::MathML); use LaTeXML::Post::MathML qw(getQName); -use LaTeXML::MathParser qw(p_getAttribute p_setAttribute); +use LaTeXML::MathParser qw(p_getAttribute p_setAttribute p_removeAttribute p_element_nodes); use LaTeXML::Common::XML; sub preprocess { @@ -144,11 +144,15 @@ sub addAccessibilityAnnotations { my $op; my $name = getQName($node); if ($name ne 'm:mrow') { # not an mrow, prefer the literal semantic - $op = $current_op_meaning || $name; } + $op = $current_op_meaning; } else { # mrow, prefer #op, except for whitelisted exception cases (which ones??) $op = ($current_op_meaning eq 'multirelation') ? $current_op_meaning : '#op'; } if ($op) { # Set the meaning, if we found a satisfying $op: - $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } } + $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } + else { # if there is no op, we should undo argument annotations pointing at the application, + for my $arg_node (p_element_nodes($node)) { + p_removeAttribute($arg_node, 'data-arg'); + } } } # if we found some meaning, attach it as an accessible attribute p_setAttribute($node, 'data-semantic', $meaning) if $meaning; From 8d84160655488d93c783b0fec82da07c57ff6b4f Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 10 Jul 2020 01:09:31 -0400 Subject: [PATCH 17/46] use more of Bruce's approach to mark used nodes for easier reasoning in annotating --- lib/LaTeXML/Post/MathML/Presentation.pm | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 66340c040..f4f857021 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -97,8 +97,8 @@ sub addAccessibilityAnnotations { # e.g. we end up creating a new invisible-apply XMTok, and then associate its node # with the $currentnode of its parent f(x), now as finvisible-apply(x) # that second call should just immediately terminate, there is nothing to add in such cases. - return if $currentnode->getAttribute('_a11y_done'); - $currentnode->setAttribute('_a11y_done', '1'); + return if $currentnode->getAttribute('_a11y'); + $currentnode->setAttribute('_a11y', 'done'); # --- TOP PRIORITY: run an exclusion check for pieces that are presentation-only fluff for duals my @dual_pres_ancestry = $LaTeXML::Post::DOCUMENT->findnodes("ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual]", $currentnode); my $dual_pres_node = $dual_pres_ancestry[-1]; # Weirdly ->findnode() is finding the highest ancestor, rather than the tightest ancestor? This [-1] seems to do it. @@ -133,7 +133,9 @@ sub addAccessibilityAnnotations { $self->addAccessibilityAnnotations($node, $dual); } } # tokens are simplest - if we know of a meaning, use that for accessibility elsif ($current_node_name eq 'ltx:XMTok') { - $meaning = $currentnode->getAttribute('meaning'); } + # stylistic choice - avoid tagging numbers, even though we could, too obvious + my $role = $currentnode->getAttribute('role') || ''; + $meaning = ($role ne 'NUMBER') && $currentnode->getAttribute('meaning'); } elsif ($current_node_name eq 'ltx:XMApp') { my @current_children = element_nodes($currentnode); my $current_op_meaning = $current_children[0]->getAttribute('meaning') || ''; @@ -151,8 +153,8 @@ sub addAccessibilityAnnotations { $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } else { # if there is no op, we should undo argument annotations pointing at the application, for my $arg_node (p_element_nodes($node)) { - p_removeAttribute($arg_node, 'data-arg'); - } } } + if ((p_getAttribute($arg_node, '_a11y') || '') ne 'ref') { + p_removeAttribute($arg_node, 'data-arg'); } } } } # if we found some meaning, attach it as an accessible attribute p_setAttribute($node, 'data-semantic', $meaning) if $meaning; @@ -168,6 +170,7 @@ sub addAccessibilityAnnotations { # if no compound-apply, no need for top-level dual annotation, leave it to the descendants while (my $c_arg = shift @content_args) { if ($id eq ($c_arg->getAttribute('idref') || '')) { + p_setAttribute($node, '_a11y', 'ref'); # mark as used in ref $arg = $index || ($arg_count >= 2 ? 'op' : '1'); last; } else { # note that if we never find the 'idref', arg is never set From 8e1ea78a49596c028d371e09d1d376ac7a825672 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 10 Jul 2020 15:12:24 -0400 Subject: [PATCH 18/46] always call into ltx:XMDual node to get arg --- lib/LaTeXML/Post/MathML/Presentation.pm | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index f4f857021..99f3ceb3c 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -126,11 +126,8 @@ sub addAccessibilityAnnotations { my @arg_nodes = element_nodes($dual_content_node); my $arg_count = scalar(@arg_nodes) - 1; $meaning = $op . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } -# Note that if the carrier ltx:XMDual had a referenced id, it would get lost as we never visit it through this hook. -# to correct that, assign it in the top presentation child - my $dual = $dual_pres_node->parentNode; - if (my $id = $dual->getAttribute('xml:id')) { - $self->addAccessibilityAnnotations($node, $dual); } } +# Note that the carrier ltx:XMDual is never passed in associateNode, but often requires an "arg". Recurse: + $self->addAccessibilityAnnotations($node, $dual_pres_node->parentNode); } # tokens are simplest - if we know of a meaning, use that for accessibility elsif ($current_node_name eq 'ltx:XMTok') { # stylistic choice - avoid tagging numbers, even though we could, too obvious From 3bfe086a5f6acee71ce65e94d2f68962ccbbb878 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 10 Jul 2020 23:48:20 -0400 Subject: [PATCH 19/46] one approach to implementing \power macro --- lib/LaTeXML/MathParser.pm | 3 ++- lib/LaTeXML/Package/a11ymark.sty.ltxml | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/LaTeXML/MathParser.pm b/lib/LaTeXML/MathParser.pm index 499437075..9b136dce6 100644 --- a/lib/LaTeXML/MathParser.pm +++ b/lib/LaTeXML/MathParser.pm @@ -1530,7 +1530,8 @@ sub NewScript { $l++; $bumped = 1 } elsif (my $innerl = p_getAttribute($rbase, '_bumplevel')) { $l = $innerl; } - my $app = Apply(New(undef, undef, role => $y . 'SCRIPTOP', scriptpos => "$x$l"), + my $meaning = p_getAttribute($rscript, 'meaning'); + my $app = Apply(New(undef, undef, role => $y . 'SCRIPTOP', scriptpos => "$x$l", ($meaning ? (meaning => $meaning) : ())), $base, Arg($script, 0)); # Record whether this script was a floating one $$app[1]{_wasfloat} = 1 if $mode eq 'FLOAT'; diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 0af2ba5ec..5838e8dee 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -28,5 +28,21 @@ DefMath('\deriv[]{}{}', DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); +DefConstructor('\meaning{}{}', '#2', afterConstruct => sub { + my ($doc, $whatsit) = @_; + my $apply = $doc->getNode->lastChild; + if ($apply->localName eq 'XMApp') { # should be an apply, if by design + $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } }, + bounded => 1, requireMath => 1); + +DefMacro('\power{}{}', '\meaning{power}{#1^{#2}}'); + +# These still need work, since currently we get +# annotations "transpose(#1,#2)" and "frobulator(#1,#2)" +# instead of the desired "transpose(#1)" and "frobulator" +# I guess this needs a different type of XMath structural pattern... +DefMacro('\transpose{}', '\meaning{transpose}{#1^T}'); +DefMacro('\frobulator', '\meaning{frobulator}{x\'}'); + ######## 1; From 77cf03e8716b66591c9cf262e39407ff0e3df4c6 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sat, 11 Jul 2020 00:46:22 -0400 Subject: [PATCH 20/46] one way to do embellished atoms and bases --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 58 +++++++++++++++++++------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 5838e8dee..d3e0148c1 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -15,6 +15,45 @@ use strict; use warnings; use LaTeXML::Package; +## 0. Semantic infrastructure +DefConstructor('\meaning{}{}', '#2', afterConstruct => sub { + my ($doc, $whatsit) = @_; + my $apply = $doc->getNode->lastChild; + if ($apply->localName eq 'XMApp') { # should be an apply, if by design + $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } }, + bounded => 1, requireMath => 1); + +# Embellishment is hard to write, hard to speak, but describes exactly several cases +# I will abbreviate it "emb", for now, and use it as a prefix +DefConstructor('\emb@atom{}{}', + '' + . '' + . '#2' + . '', + bounded => 1, requireMath => 1); + +DefMacro('\emb@base{}{}{}', sub { + my ($gullet, $meaning, $base, $emb) = @_; + # Package::dualize_arglist seems to be doing this with more sophistication, + # a good bit to read if we ever need to build more infra here. + # for now, fast and loose, these don't get saved and are only for local association + my $id_token = T_OTHER("id" . int(rand(10000))); + my $ref_token = Invocation(T_CS('\@XMRef'), $id_token); + my $arg_token = Invocation(T_CS('\@XMArg'), $id_token, $base); + return Invocation(T_CS('\emb@base@build'), $meaning, $ref_token, Tokens($arg_token, $emb))->unlist; +}); + +DefConstructor('\emb@base@build{}{}{}', + '' + . '' + . '' + . '#2' + . '' + . '#3' + . '', + bounded => 1, requireMath => 1); + +## I. Calculus DefConstructor('\diffd', 'd'); DefMath('\deriv[]{}{}', '\frac{\@MAYBEAPPLY{\@SUPERSCRIPT{\diffd}{#1}}{#2}}' @@ -28,21 +67,10 @@ DefMath('\deriv[]{}{}', DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); -DefConstructor('\meaning{}{}', '#2', afterConstruct => sub { - my ($doc, $whatsit) = @_; - my $apply = $doc->getNode->lastChild; - if ($apply->localName eq 'XMApp') { # should be an apply, if by design - $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } }, - bounded => 1, requireMath => 1); - -DefMacro('\power{}{}', '\meaning{power}{#1^{#2}}'); - -# These still need work, since currently we get -# annotations "transpose(#1,#2)" and "frobulator(#1,#2)" -# instead of the desired "transpose(#1)" and "frobulator" -# I guess this needs a different type of XMath structural pattern... -DefMacro('\transpose{}', '\meaning{transpose}{#1^T}'); -DefMacro('\frobulator', '\meaning{frobulator}{x\'}'); +## II. Scripts +DefMacro('\power{}{}', '\meaning{power}{#1^{#2}}'); +DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); +DefMacro('\transpose{}', '\emb@base{transpose}{#1}{^T}'); ######## 1; From eeac88b8541f1dc3ad19b60e5bc9e864f4fb5ea3 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Sat, 11 Jul 2020 03:01:40 -0400 Subject: [PATCH 21/46] conquer \derivenum{f}{2} --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 31 ++++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index d3e0148c1..621ec1915 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -18,10 +18,13 @@ use LaTeXML::Package; ## 0. Semantic infrastructure DefConstructor('\meaning{}{}', '#2', afterConstruct => sub { my ($doc, $whatsit) = @_; - my $apply = $doc->getNode->lastChild; - if ($apply->localName eq 'XMApp') { # should be an apply, if by design - $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } }, - bounded => 1, requireMath => 1); + my $node = $doc->getNode; + my $apply = $node->lastChild; + if ($apply && $apply->localName eq 'XMApp') { # should be an apply, if by design + $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } + else { # otherwise, add the meaning on the current node, possibly an empty XMArg + $node->setAttribute('meaning', ToString($whatsit->getArg(1))); } + return; }, bounded => 1, requireMath => 1); # Embellishment is hard to write, hard to speak, but describes exactly several cases # I will abbreviate it "emb", for now, and use it as a prefix @@ -40,8 +43,13 @@ DefMacro('\emb@base{}{}{}', sub { my $id_token = T_OTHER("id" . int(rand(10000))); my $ref_token = Invocation(T_CS('\@XMRef'), $id_token); my $arg_token = Invocation(T_CS('\@XMArg'), $id_token, $base); - return Invocation(T_CS('\emb@base@build'), $meaning, $ref_token, Tokens($arg_token, $emb))->unlist; -}); + return Invocation(T_CS('\emb@base@build'), + $meaning, $ref_token, Tokens($arg_token, $emb))->unlist }); + +DefMacro('\numprimemarks{}', sub { + my ($gullet, $numtok) = @_; + my $num = int(ToString($numtok)); + return map { T_CS('\prime') } 1 .. $num; }); DefConstructor('\emb@base@build{}{}{}', '' @@ -71,6 +79,15 @@ DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); DefMacro('\power{}{}', '\meaning{power}{#1^{#2}}'); DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); DefMacro('\transpose{}', '\emb@base{transpose}{#1}{^T}'); +DefMacro('\adjoint{}', '\emb@base{adjoint}{#1}{^\dagger}'); + +# This looks misleadingly "straightforward". The floating script is hard to convince to bind to #1, +# so literally writing f'' and trying to annotate failed in pretty much all different configurations I tried. +# what seems to succeed gracefully is my \meaning-based semantic injections in the constructed tree, +# *given that* I use the f^{\prime\prime} form. +# +DefMacro('\derivenum{}{}', '\meaning{derivative-implicit-variable}{#1^{\meaning{#2}{\numprimemarks{#2}}}}'); + +################################################################################################################ -######## 1; From b9df85e96a4c84deaa152b4e50560db3567ae8d0 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Mon, 13 Jul 2020 14:14:59 -0400 Subject: [PATCH 22/46] undo modding NewScript, leave as-is and instead focus on pruning the duals --- lib/LaTeXML/MathParser.pm | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/LaTeXML/MathParser.pm b/lib/LaTeXML/MathParser.pm index 9b136dce6..499437075 100644 --- a/lib/LaTeXML/MathParser.pm +++ b/lib/LaTeXML/MathParser.pm @@ -1530,8 +1530,7 @@ sub NewScript { $l++; $bumped = 1 } elsif (my $innerl = p_getAttribute($rbase, '_bumplevel')) { $l = $innerl; } - my $meaning = p_getAttribute($rscript, 'meaning'); - my $app = Apply(New(undef, undef, role => $y . 'SCRIPTOP', scriptpos => "$x$l", ($meaning ? (meaning => $meaning) : ())), + my $app = Apply(New(undef, undef, role => $y . 'SCRIPTOP', scriptpos => "$x$l"), $base, Arg($script, 0)); # Record whether this script was a floating one $$app[1]{_wasfloat} = 1 if $mode eq 'FLOAT'; From 4ec4766ab3d03d99d2fbe95116c28447d2f60ef8 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 14 Jul 2020 22:44:49 -0400 Subject: [PATCH 23/46] stupid mistake with lexical scope of $index --- lib/LaTeXML/Post/MathML/Presentation.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 99f3ceb3c..8ca262965 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -157,7 +157,6 @@ sub addAccessibilityAnnotations { p_setAttribute($node, 'data-semantic', $meaning) if $meaning; # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. - my $index = 0; # II.1 id-carrying nodes always point to their referrees. if ($dual_pres_node && (my $id = $currentnode->getAttribute('xml:id'))) { # We already found the dual @@ -165,6 +164,7 @@ sub addAccessibilityAnnotations { my @content_args = getQName($dual_content_node) eq 'ltx:XMApp' ? element_nodes($dual_content_node) : ($dual_content_node); my $arg_count = scalar(@content_args); # if no compound-apply, no need for top-level dual annotation, leave it to the descendants + my $index = 0; while (my $c_arg = shift @content_args) { if ($id eq ($c_arg->getAttribute('idref') || '')) { p_setAttribute($node, '_a11y', 'ref'); # mark as used in ref @@ -175,6 +175,7 @@ sub addAccessibilityAnnotations { # II.2. applications children are directly pointing to their parents # also fallback in the dual case, if the XMApp had an id but wasn't an arg if (!$arg && (getQName($currentnode->parentNode) eq 'ltx:XMApp')) { + my $index = 0; my $prev_sibling = $currentnode; while ($prev_sibling = $prev_sibling->previousSibling) { $index++; } From c1df9118e617ba464159471838558965f46894fc Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 14 Jul 2020 23:47:15 -0400 Subject: [PATCH 24/46] working \fnderive{f}{2} using \DUAL and DefMath --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 38 ++++++++------------------ 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 621ec1915..7376afd58 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -15,26 +15,9 @@ use strict; use warnings; use LaTeXML::Package; -## 0. Semantic infrastructure -DefConstructor('\meaning{}{}', '#2', afterConstruct => sub { - my ($doc, $whatsit) = @_; - my $node = $doc->getNode; - my $apply = $node->lastChild; - if ($apply && $apply->localName eq 'XMApp') { # should be an apply, if by design - $apply->setAttribute('meaning', ToString($whatsit->getArg(1))); } - else { # otherwise, add the meaning on the current node, possibly an empty XMArg - $node->setAttribute('meaning', ToString($whatsit->getArg(1))); } - return; }, bounded => 1, requireMath => 1); - # Embellishment is hard to write, hard to speak, but describes exactly several cases # I will abbreviate it "emb", for now, and use it as a prefix -DefConstructor('\emb@atom{}{}', - '' - . '' - . '#2' - . '', - bounded => 1, requireMath => 1); - +DefMacro('\emb@atom{}{}', '\DUAL{\@CSYMBOL{#1}}{\@WRAP{#2}}'); DefMacro('\emb@base{}{}{}', sub { my ($gullet, $meaning, $base, $emb) = @_; # Package::dualize_arglist seems to be doing this with more sophistication, @@ -48,8 +31,10 @@ DefMacro('\emb@base{}{}{}', sub { DefMacro('\numprimemarks{}', sub { my ($gullet, $numtok) = @_; - my $num = int(ToString($numtok)); - return map { T_CS('\prime') } 1 .. $num; }); + # we need to digest due to \@XMArg being a constructor + my $num = int(ToString(Digest($numtok))); + return Tokens(map { T_CS('\prime') } 1 .. $num); +}); DefConstructor('\emb@base@build{}{}{}', '' @@ -76,17 +61,16 @@ DefMath('\deriv[]{}{}', DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); ## II. Scripts -DefMacro('\power{}{}', '\meaning{power}{#1^{#2}}'); +DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', + reversion => '#1^{#2}', + hide_content_reversion => 1); DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); DefMacro('\transpose{}', '\emb@base{transpose}{#1}{^T}'); DefMacro('\adjoint{}', '\emb@base{adjoint}{#1}{^\dagger}'); -# This looks misleadingly "straightforward". The floating script is hard to convince to bind to #1, -# so literally writing f'' and trying to annotate failed in pretty much all different configurations I tried. -# what seems to succeed gracefully is my \meaning-based semantic injections in the constructed tree, -# *given that* I use the f^{\prime\prime} form. -# -DefMacro('\derivenum{}{}', '\meaning{derivative-implicit-variable}{#1^{\meaning{#2}{\numprimemarks{#2}}}}'); +DefMacro('\derivemark{}', '\DUAL{#1}{\@WRAP{\numprimemarks{#1}}}'); +DefMath('\fnderive{}{}', '#1^{\derivemark{#2}}', + meaning => 'derivative-implicit-variable'); ################################################################################################################ From 88a3bc2780e4be09f99bf0020be86d95382b7870 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 15 Jul 2020 00:27:00 -0400 Subject: [PATCH 25/46] subtle a11y - hide the switch in a11ymark.sty, disabled for regular latexml calls --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 3 +++ lib/LaTeXML/Post/MathML.pm | 4 ++++ lib/LaTeXML/Post/MathML/Presentation.pm | 5 +++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 7376afd58..eb47a7f12 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -15,6 +15,9 @@ use strict; use warnings; use LaTeXML::Package; +DefConstructorI(T_CS('\@request@math@a11y'), undef, ""); +AtBeginDocument(T_CS('\@request@math@a11y')); + # Embellishment is hard to write, hard to speak, but describes exactly several cases # I will abbreviate it "emb", for now, and use it as a prefix DefMacro('\emb@atom{}{}', '\DUAL{\@CSYMBOL{#1}}{\@WRAP{#2}}'); diff --git a/lib/LaTeXML/Post/MathML.pm b/lib/LaTeXML/Post/MathML.pm index 119eec297..8d4c1aaa9 100644 --- a/lib/LaTeXML/Post/MathML.pm +++ b/lib/LaTeXML/Post/MathML.pm @@ -68,6 +68,10 @@ sub preprocess { $$self{nestmath} = 0 unless $$self{nestmath}; $doc->adjust_latexml_doctype('MathML'); # Add MathML if LaTeXML dtd. $doc->addNamespace($mmlURI, 'm'); + # flip the accessibility switch on if requested, as it is currently experimental + if (my $a11y = $doc->findnode('.//processing-instruction("latexml")[contains(.,"a11y=")]')) { + if ($a11y->textContent =~ /a11y=['"]enabled['"]/) { + $$self{a11y} = 1; } } return; } # Works for pmml, cmml diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 8ca262965..fd45b1961 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -83,7 +83,7 @@ sub associateNodeHook { p_setAttribute($node, 'href', $href); } if (my $title = $sourcenode->getAttribute('title')) { p_setAttribute($node, 'title', $title); } } - $self->addAccessibilityAnnotations($node, $currentnode); + $self->addAccessibilityAnnotations($node, $currentnode) if $$self{a11y}; return; } # Experiment: set accessibility attributes on the resulting presentation tree, @@ -161,7 +161,8 @@ sub addAccessibilityAnnotations { if ($dual_pres_node && (my $id = $currentnode->getAttribute('xml:id'))) { # We already found the dual my $dual_content_node = $dual_pres_node->previousSibling; - my @content_args = getQName($dual_content_node) eq 'ltx:XMApp' ? element_nodes($dual_content_node) : ($dual_content_node); + my @content_args = ($dual_content_node && getQName($dual_content_node)) eq 'ltx:XMApp' ? + element_nodes($dual_content_node) : ($dual_content_node); my $arg_count = scalar(@content_args); # if no compound-apply, no need for top-level dual annotation, leave it to the descendants my $index = 0; From a8aa3e81d4ed50c5c867abba0a35875ec4fa6e40 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 15 Jul 2020 01:26:07 -0400 Subject: [PATCH 26/46] \fndegree handles arbitrary degree expressions --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 57 +++++++++++++++++++++----- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index eb47a7f12..fb360888f 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -32,13 +32,6 @@ DefMacro('\emb@base{}{}{}', sub { return Invocation(T_CS('\emb@base@build'), $meaning, $ref_token, Tokens($arg_token, $emb))->unlist }); -DefMacro('\numprimemarks{}', sub { - my ($gullet, $numtok) = @_; - # we need to digest due to \@XMArg being a constructor - my $num = int(ToString(Digest($numtok))); - return Tokens(map { T_CS('\prime') } 1 .. $num); -}); - DefConstructor('\emb@base@build{}{}{}', '' . '' @@ -71,9 +64,53 @@ DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); DefMacro('\transpose{}', '\emb@base{transpose}{#1}{^T}'); DefMacro('\adjoint{}', '\emb@base{adjoint}{#1}{^\dagger}'); -DefMacro('\derivemark{}', '\DUAL{#1}{\@WRAP{\numprimemarks{#1}}}'); -DefMath('\fnderive{}{}', '#1^{\derivemark{#2}}', - meaning => 'derivative-implicit-variable'); +# What I Really Want to Say here, but can't is likely: +# DefMath('\derivemark{}', '\derivemark@pres{#1}', meaning=>'#1'); +DefMacro('\derivemark{}', sub { + my ($gullet, $token) = @_; + # Dualizing the arglist only works if we are going to keep the same token at the end + # in the case of 2 --> '' , this fails. So, obtain the presentation right away to figure this out + + # we need to digest due to \@XMArg being a constructor + my $mark = ToString(Digest($token)); + my ($content, $presentation); + if ($mark =~ /^\d$/) { # single digit, add primes + $content = $token; + $presentation = Tokens(map { T_CS('\prime') } 1 .. int($mark)) } + else { + # assume an id, wrap in parens + my ($cargs, $pargs) = dualize_arglist('#1', $token); + $content = $$cargs[0]; + $presentation = Tokens(T_OTHER('('), $$pargs[0], T_OTHER(')')); } + + return Invocation(T_CS('\DUAL'), + undef, # debugging that I missed this 'undef' argument was not fun. + $content, + Invocation(T_CS('\@WRAP'), $presentation))->unlist; +}); + +# curiously we need an indirection level, so that we point to the dual instead of +# the content node of the dual. The a11y attribute generation does not support the following markup +# at the moment: +# +# +# +# +# +# +# +# +# +# f +# +# n +# ... +# +# We can only deal with 'p1.m1.2' pointing to the inner XMDual, rather than directly to its content "n" +DefMacro('\fnderive{}{}', '\fnderive@build{#1}{\derivemark{#2}}'); +DefMath('\fnderive@build{}{}', '#1^#2', + meaning => 'derivative-implicit-variable', + hide_content_reversion => 1); ################################################################################################################ From 81df249cbd67165a6689ecc3d5261f2e437a4c6a Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 15 Jul 2020 23:09:44 -0400 Subject: [PATCH 27/46] refactor until a macro layer feels usable, switch \adjoint \transpose to fine-grained annotations (thanks Bruce) --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 72 ++++++++++++++++++-------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index fb360888f..92fe21d90 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -18,29 +18,48 @@ use LaTeXML::Package; DefConstructorI(T_CS('\@request@math@a11y'), undef, ""); AtBeginDocument(T_CS('\@request@math@a11y')); +# NOTE: demonstration-oriented binding, all names and definitions subject to change without notice. + # Embellishment is hard to write, hard to speak, but describes exactly several cases # I will abbreviate it "emb", for now, and use it as a prefix -DefMacro('\emb@atom{}{}', '\DUAL{\@CSYMBOL{#1}}{\@WRAP{#2}}'); -DefMacro('\emb@base{}{}{}', sub { - my ($gullet, $meaning, $base, $emb) = @_; - # Package::dualize_arglist seems to be doing this with more sophistication, - # a good bit to read if we ever need to build more infra here. - # for now, fast and loose, these don't get saved and are only for local association - my $id_token = T_OTHER("id" . int(rand(10000))); - my $ref_token = Invocation(T_CS('\@XMRef'), $id_token); - my $arg_token = Invocation(T_CS('\@XMArg'), $id_token, $base); - return Invocation(T_CS('\emb@base@build'), - $meaning, $ref_token, Tokens($arg_token, $emb))->unlist }); - -DefConstructor('\emb@base@build{}{}{}', - '' - . '' - . '' - . '#2' - . '' - . '#3' - . '', - bounded => 1, requireMath => 1); + +# \emb@atom{meaning}{presentation} +DefMacro('\dual', '\DUAL[hide_content_reversion=true]'); +DefMacro('\emb@atom{}{}', '\dual{\@CSYMBOL{#1}}{\@WRAP{#2}}'); +DefMacro('\emb@build@apply{}{}', '\dual{\@APPLY{#1}}{\@WRAP{#2}}'); + +sub emb_apply { + my ($gullet, $base, $meaning, $emb, $invert_to_prefix) = @_; + my ($cargs, $pargs) = dualize_arglist('#1', $base); + my $ref_base = $$cargs[0]; + my $arg_base = $$pargs[0]; + my $presentation = $invert_to_prefix ? Tokens($emb, $arg_base) : Tokens($arg_base, $emb); + return Invocation(T_CS('\emb@build@apply'), + Tokens(Invocation(T_CS('\@CSYMBOL'), $meaning), $ref_base), + $presentation)->unlist; } + +sub emb_apply_two { # one-or-two operations, can't fully reuse the simple case... + my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $invert_to_prefix) = @_; + if (!$op2_meaning && !$op2_pres) { # one operation, use the simple apply call + return emb_apply(@_); } + # Case where we have two consecutive operations + my ($cargs, $pargs) = dualize_arglist('#1', $base); + my $ref_base = $$cargs[0]; + my $arg_base = $$pargs[0]; + + my $pres_tokens = $invert_to_prefix ? Tokens($op2_pres, $op1_pres, $arg_base) : Tokens($arg_base, $op1_pres, $op2_pres); + my $presentation = Invocation(T_CS('\@WRAP'), $pres_tokens); + my $content = Invocation(T_CS('\@APPLY'), Invocation(T_CS('\@CSYMBOL'), $op2_meaning), + Invocation(T_CS('\@APPLY'), Invocation(T_CS('\@CSYMBOL'), $op1_meaning), $ref_base)); + return Invocation(T_CS('\DUAL'), undef, $content, $presentation)->unlist; } + +# Two operators acting on base in sequence, commonly alternate scripts ^m_n. +# \emb@apply{base}{op1 meaning}{op1 pres}[op2 meaning][op2 pres] +DefMacro('\emb@apply{}{}{}[][]', \&emb_apply_two); + +# As with \emb@apply, but the presentation is right-to-left prefix "op2_pres op1_pres base" +# \emb@preapply{base}{op1 meaning}{op1 pres}[op2 meaning][op2 pres] +DefMacro('\emb@preapply{}{}{}[][]', sub { emb_apply_two(@_, 1); }); ## I. Calculus DefConstructor('\diffd', 'd'); @@ -57,12 +76,19 @@ DefMath('\deriv[]{}{}', DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); ## II. Scripts +DefMacro('\@sup@apply{}{}', sub { + my ($gullet, $base, $script) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $script); + return Invocation(T_CS('\emb@build@apply'), + Tokens($$cargs[1], $$cargs[0]), + Invocation(T_CS('\@SUPERSCRIPT'), @$pargs))->unlist; }); +DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', reversion => '#1^{#2}', hide_content_reversion => 1); DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); -DefMacro('\transpose{}', '\emb@base{transpose}{#1}{^T}'); -DefMacro('\adjoint{}', '\emb@base{adjoint}{#1}{^\dagger}'); +DefMacro('\transpose{}', '\supop{#1}{transpose}{T}'); +DefMacro('\adjoint{}', '\supop{#1}{adjoint}{\dagger}'); # What I Really Want to Say here, but can't is likely: # DefMath('\derivemark{}', '\derivemark@pres{#1}', meaning=>'#1'); From 553ef5b3605e806b670bc6b34c32758b255ba744 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 01:06:38 -0400 Subject: [PATCH 28/46] crosswise presentation and semantic tree structures, two examples better --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 62 +++++++++++++++++++++++-- lib/LaTeXML/Post/MathML/Presentation.pm | 35 +++++++++++--- 2 files changed, 87 insertions(+), 10 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 92fe21d90..5dc54e879 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -24,9 +24,8 @@ AtBeginDocument(T_CS('\@request@math@a11y')); # I will abbreviate it "emb", for now, and use it as a prefix # \emb@atom{meaning}{presentation} -DefMacro('\dual', '\DUAL[hide_content_reversion=true]'); -DefMacro('\emb@atom{}{}', '\dual{\@CSYMBOL{#1}}{\@WRAP{#2}}'); -DefMacro('\emb@build@apply{}{}', '\dual{\@APPLY{#1}}{\@WRAP{#2}}'); +DefMacro('\emb@atom{}{}', '\DUAL[hide_content_reversion=true]{\@CSYMBOL{#1}}{\@WRAP{#2}}'); +DefMacro('\emb@build@apply{}{}', '\DUAL[hide_content_reversion=true]{\@APPLY{#1}}{\@WRAP{#2}}'); sub emb_apply { my ($gullet, $base, $meaning, $emb, $invert_to_prefix) = @_; @@ -41,7 +40,7 @@ sub emb_apply { sub emb_apply_two { # one-or-two operations, can't fully reuse the simple case... my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $invert_to_prefix) = @_; if (!$op2_meaning && !$op2_pres) { # one operation, use the simple apply call - return emb_apply(@_); } + return emb_apply($gullet, $base, $op1_meaning, $op1_pres, $invert_to_prefix); } # Case where we have two consecutive operations my ($cargs, $pargs) = dualize_arglist('#1', $base); my $ref_base = $$cargs[0]; @@ -61,6 +60,38 @@ DefMacro('\emb@apply{}{}{}[][]', \&emb_apply_two); # \emb@preapply{base}{op1 meaning}{op1 pres}[op2 meaning][op2 pres] DefMacro('\emb@preapply{}{}{}[][]', sub { emb_apply_two(@_, 1); }); +# ADHOC for the very awkward example we have so far. +# and the order of presentation args is inverted, while the semantic one is kept. +# Example \PrePostArgCrosswise{x}{median}{\overline}{index}{_}{i} +DefMacro('\PrePostArgCrosswise{}{}{}{}{}{}', sub { + my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op2_rhs_var); + my ($ref_base, $ref_rhs_var) = @$cargs; + my ($arg_base, $arg_rhs_var) = @$pargs; + + my $presentation = Tokens(Tokens($op1_pres, $arg_base), $op2_pres, $arg_rhs_var); + my $content = Tokens(Invocation(T_CS('\@CSYMBOL'), $op1_meaning), + Invocation(T_CS('\@APPLY'), Tokens( + Invocation(T_CS('\@CSYMBOL'), $op2_meaning), + $ref_base, $ref_rhs_var))); + return Invocation(T_CS('\emb@build@apply'), $content, $presentation)->unlist; }); + +# ADHOC - terrible low-level soup macro with 7 arguments, +# just an example of things being possible... +# \PostArgsCrosswise{x}{derivative-implicit-variable}{^}{\derivemark{1}}{index}{_}{i} +DefMacro('\PostArgsCrosswise{}{}{}{}{}{}{}', sub { + my ($gullet, $base, $op1_meaning, $op1_pres, $op1_rhs_var, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op1_rhs_var, $op2_rhs_var); + my ($ref_base, $ref_rhs_var1, $ref_rhs_var2) = @$cargs; + my ($arg_base, $arg_rhs_var1, $arg_rhs_var2) = @$pargs; + + my $presentation = Tokens(Tokens($arg_base, $op1_pres, $arg_rhs_var1), $op2_pres, $arg_rhs_var2); + my $content = Tokens(Invocation(T_CS('\@CSYMBOL'), $op1_meaning), + Invocation(T_CS('\@APPLY'), Tokens( + Invocation(T_CS('\@CSYMBOL'), $op2_meaning), $ref_base, $ref_rhs_var2)), + $ref_rhs_var1); + return Invocation(T_CS('\emb@build@apply'), $content, $presentation)->unlist; }); + ## I. Calculus DefConstructor('\diffd', 'd'); DefMath('\deriv[]{}{}', @@ -83,12 +114,35 @@ DefMacro('\@sup@apply{}{}', sub { Tokens($$cargs[1], $$cargs[0]), Invocation(T_CS('\@SUPERSCRIPT'), @$pargs))->unlist; }); DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); +DefMacro('\@sub@apply{}{}', sub { + my ($gullet, $base, $script) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $script); + return Invocation(T_CS('\emb@build@apply'), + Tokens($$cargs[1], $$cargs[0]), + Invocation(T_CS('\@SUBCRIPT'), @$pargs))->unlist; }); +DefMacro('\subop{}{}{}', '\@sub@apply{#1}{\emb@atom{#2}{#3}}'); + DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', reversion => '#1^{#2}', hide_content_reversion => 1); + +DefMath('\index{}{}', "{#1_{#2}}", meaning => 'index', + reversion => '#1_{#2}', + hide_content_reversion => 1); +# only mark the script as a dual, so that we can remix it +DefMacro('\indexArg{}', sub { + my ($gullet, $arg) = @_; + my ($cargs, $pargs) = dualize_arglist('#1', $arg); + return Invocation(T_CS('\emb@build@apply'), + Tokens(Invocation(T_CS('\@CSYMBOL'), 'index'), $$cargs[0]), + Tokens(T_SUB, $$pargs[0]))->unlist; }); +DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); + DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); DefMacro('\transpose{}', '\supop{#1}{transpose}{T}'); DefMacro('\adjoint{}', '\supop{#1}{adjoint}{\dagger}'); +# This works well, but can't be remixed crosswise as \median{x}_i: +DefMacro('\median', '\emb@atom{median}{\overline}'); # What I Really Want to Say here, but can't is likely: # DefMath('\derivemark{}', '\derivemark@pres{#1}', meaning=>'#1'); diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index fd45b1961..83f0b40c4 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -121,11 +121,11 @@ sub addAccessibilityAnnotations { elsif ($dual_content_name eq 'ltx:XMTok') { $meaning = $dual_content_node->getAttribute('meaning'); } elsif ($dual_content_name eq 'ltx:XMApp') { - my $op_node = $dual_content_node->firstChild; - my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; - my @arg_nodes = element_nodes($dual_content_node); - my $arg_count = scalar(@arg_nodes) - 1; - $meaning = $op . '(' . join(",", map { '#' . $_ } (1 .. $arg_count)) . ')'; } + # another special case, from the \overline{x}_i land, + # we could get a deeply nested application tree with a lot of nodes which have no referrents + # but should be translated into the final data-semantic + # best to call out into a subroutine? + $meaning = get_dual_content_meaning($dual_content_node, 0); } # Note that the carrier ltx:XMDual is never passed in associateNode, but often requires an "arg". Recurse: $self->addAccessibilityAnnotations($node, $dual_pres_node->parentNode); } # tokens are simplest - if we know of a meaning, use that for accessibility @@ -180,10 +180,33 @@ sub addAccessibilityAnnotations { my $prev_sibling = $currentnode; while ($prev_sibling = $prev_sibling->previousSibling) { $index++; } - $arg = $index ? $index : 'op'; } + $arg = $index ? $index : 'op'; + # if this is a nested apply in a dual, and only then, obtain a suffix marker + my $parent = $currentnode->parentNode; + my $lvl = 0; + while (getQName($parent) eq 'ltx:XMApp') { + $lvl++; $parent = $parent->parentNode; } + if ((getQName($parent) eq 'ltx:XMDual') && $lvl) { + $arg .= "_$lvl"; } } p_setAttribute($node, 'data-arg', $arg) if $arg; return; } +sub get_dual_content_meaning { + my ($node, $lvl) = @_; + my @arg_nodes = element_nodes($node); + my $op_node = shift @arg_nodes; + my $op = $op_node ? $op_node->getAttribute('meaning') : + ($lvl ? '#op' . $lvl : '#op'); + my @arg_strings = (); + my $index = 0; + for my $arg_node (@arg_nodes) { + $index++; + if (getQName($arg_node) eq 'ltx:XMApp') { + push @arg_strings, get_dual_content_meaning($arg_node, $lvl + 1); } + else { + push @arg_strings, '#' . $index . ($lvl ? "_$lvl" : ""); } } # will we need level suffixes? + return $op . '(' . join(",", @arg_strings) . ')'; } + #================================================================================ # Presentation MathML with Line breaking # Not at all sure how this will integrate with Parallel markup... From be48a37d36c0e129f6f810b289eb65652c69f817 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 01:16:49 -0400 Subject: [PATCH 29/46] typo in dualize_arglist, we now have 3 --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 5dc54e879..34b1b6173 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -81,7 +81,7 @@ DefMacro('\PrePostArgCrosswise{}{}{}{}{}{}', sub { # \PostArgsCrosswise{x}{derivative-implicit-variable}{^}{\derivemark{1}}{index}{_}{i} DefMacro('\PostArgsCrosswise{}{}{}{}{}{}{}', sub { my ($gullet, $base, $op1_meaning, $op1_pres, $op1_rhs_var, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; - my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op1_rhs_var, $op2_rhs_var); + my ($cargs, $pargs) = dualize_arglist('#1#2#3', $base, $op1_rhs_var, $op2_rhs_var); my ($ref_base, $ref_rhs_var1, $ref_rhs_var2) = @$cargs; my ($arg_base, $arg_rhs_var1, $arg_rhs_var2) = @$pargs; From 78abadd8969d0573dcfa44f7826c6fd17d961cdc Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 01:41:19 -0400 Subject: [PATCH 30/46] fish out idrefs in arbitrary depths within dual, compute lvl --- lib/LaTeXML/Post/MathML/Presentation.pm | 37 ++++++++++++------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 83f0b40c4..2bb8e537f 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -161,18 +161,22 @@ sub addAccessibilityAnnotations { if ($dual_pres_node && (my $id = $currentnode->getAttribute('xml:id'))) { # We already found the dual my $dual_content_node = $dual_pres_node->previousSibling; - my @content_args = ($dual_content_node && getQName($dual_content_node)) eq 'ltx:XMApp' ? - element_nodes($dual_content_node) : ($dual_content_node); - my $arg_count = scalar(@content_args); - # if no compound-apply, no need for top-level dual annotation, leave it to the descendants - my $index = 0; - while (my $c_arg = shift @content_args) { - if ($id eq ($c_arg->getAttribute('idref') || '')) { - p_setAttribute($node, '_a11y', 'ref'); # mark as used in ref - $arg = $index || ($arg_count >= 2 ? 'op' : '1'); - last; - } else { # note that if we never find the 'idref', arg is never set - $index++; } } } + # note that if we never find the 'idref', arg is never set + if (my $xmref = $LaTeXML::Post::DOCUMENT->findnode('//ltx:XMRef[@idref="' . $id . '"]', $dual_content_node)) { + print STDERR "XMREF: ", $xmref->toString(1); + p_setAttribute($xmref, '_a11y', 'ref'); # mark as used in ref + my $index = 0; + my @arg_nodes = element_nodes($xmref->parentNode); + my $c_arg = $xmref; + while ($c_arg = $c_arg->previousSibling) { + $index++; } + $arg = $index || (scalar(@arg_nodes) >= 2 ? 'op' : '1'); + my $parent = $xmref->parentNode; + my $lvl = -1; + while (getQName($parent) eq 'ltx:XMApp') { + $parent = $parent->parentNode; + $lvl++; } + $arg .= "_$lvl" if ($lvl > 0); } } # II.2. applications children are directly pointing to their parents # also fallback in the dual case, if the XMApp had an id but wasn't an arg if (!$arg && (getQName($currentnode->parentNode) eq 'ltx:XMApp')) { @@ -180,14 +184,7 @@ sub addAccessibilityAnnotations { my $prev_sibling = $currentnode; while ($prev_sibling = $prev_sibling->previousSibling) { $index++; } - $arg = $index ? $index : 'op'; - # if this is a nested apply in a dual, and only then, obtain a suffix marker - my $parent = $currentnode->parentNode; - my $lvl = 0; - while (getQName($parent) eq 'ltx:XMApp') { - $lvl++; $parent = $parent->parentNode; } - if ((getQName($parent) eq 'ltx:XMDual') && $lvl) { - $arg .= "_$lvl"; } } + $arg = $index ? $index : 'op'; } p_setAttribute($node, 'data-arg', $arg) if $arg; return; } From 75412e77e9d9a8350f5095765a9e1a72955572c6 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 01:56:05 -0400 Subject: [PATCH 31/46] recognize arg annotation case for XMRef-only content branches of duals --- lib/LaTeXML/Post/MathML/Presentation.pm | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 2bb8e537f..890aee069 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -163,16 +163,15 @@ sub addAccessibilityAnnotations { my $dual_content_node = $dual_pres_node->previousSibling; # note that if we never find the 'idref', arg is never set if (my $xmref = $LaTeXML::Post::DOCUMENT->findnode('//ltx:XMRef[@idref="' . $id . '"]', $dual_content_node)) { - print STDERR "XMREF: ", $xmref->toString(1); p_setAttribute($xmref, '_a11y', 'ref'); # mark as used in ref - my $index = 0; - my @arg_nodes = element_nodes($xmref->parentNode); - my $c_arg = $xmref; + my $index = 0; + my $parent = $xmref->parentNode; + my $c_arg = $xmref; while ($c_arg = $c_arg->previousSibling) { $index++; } - $arg = $index || (scalar(@arg_nodes) >= 2 ? 'op' : '1'); - my $parent = $xmref->parentNode; - my $lvl = -1; + $arg = $index || ((getQName($xmref->parentNode) eq 'ltx:XMDual') ? '1' : ($xmref->nextSibling ? 'op' : '1')); + # compute a level suffix if nested within main dual + my $lvl = -1; while (getQName($parent) eq 'ltx:XMApp') { $parent = $parent->parentNode; $lvl++; } From 6d3ee965e37d731f6bf1f8d2c60572a0aac66b36 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 11:01:54 -0400 Subject: [PATCH 32/46] typo in refactoring, guard the "data-arg" in final pmml, via _a11y attr, not the XMath --- lib/LaTeXML/Post/MathML/Presentation.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 890aee069..dd0ad6218 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -163,7 +163,7 @@ sub addAccessibilityAnnotations { my $dual_content_node = $dual_pres_node->previousSibling; # note that if we never find the 'idref', arg is never set if (my $xmref = $LaTeXML::Post::DOCUMENT->findnode('//ltx:XMRef[@idref="' . $id . '"]', $dual_content_node)) { - p_setAttribute($xmref, '_a11y', 'ref'); # mark as used in ref + p_setAttribute($node, '_a11y', 'ref'); # mark as used in ref my $index = 0; my $parent = $xmref->parentNode; my $c_arg = $xmref; From 0c10ef1898536de8c23026398f6fd35eafb36679 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 11:11:36 -0400 Subject: [PATCH 33/46] cleanup naming, another rough edge logic bug while refactoring --- lib/LaTeXML/Post/MathML/Presentation.pm | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index dd0ad6218..06a508503 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -125,7 +125,7 @@ sub addAccessibilityAnnotations { # we could get a deeply nested application tree with a lot of nodes which have no referrents # but should be translated into the final data-semantic # best to call out into a subroutine? - $meaning = get_dual_content_meaning($dual_content_node, 0); } + $meaning = dual_content_xmapp_to_semantic_attr($dual_content_node, 0); } # Note that the carrier ltx:XMDual is never passed in associateNode, but often requires an "arg". Recurse: $self->addAccessibilityAnnotations($node, $dual_pres_node->parentNode); } # tokens are simplest - if we know of a meaning, use that for accessibility @@ -187,18 +187,17 @@ sub addAccessibilityAnnotations { p_setAttribute($node, 'data-arg', $arg) if $arg; return; } -sub get_dual_content_meaning { +sub dual_content_xmapp_to_semantic_attr { my ($node, $lvl) = @_; - my @arg_nodes = element_nodes($node); - my $op_node = shift @arg_nodes; - my $op = $op_node ? $op_node->getAttribute('meaning') : - ($lvl ? '#op' . $lvl : '#op'); + my @arg_nodes = element_nodes($node); + my $op_node = shift @arg_nodes; + my $op = ($op_node && $op_node->getAttribute('meaning')) || ($lvl ? '#op' . $lvl : '#op'); my @arg_strings = (); my $index = 0; for my $arg_node (@arg_nodes) { $index++; if (getQName($arg_node) eq 'ltx:XMApp') { - push @arg_strings, get_dual_content_meaning($arg_node, $lvl + 1); } + push @arg_strings, dual_content_xmapp_to_semantic_attr($arg_node, $lvl + 1); } else { push @arg_strings, '#' . $index . ($lvl ? "_$lvl" : ""); } } # will we need level suffixes? return $op . '(' . join(",", @arg_strings) . ')'; } From 7a5fe4bf3acde8250a4e1db4aa83cdde5010f9fe Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 11:27:02 -0400 Subject: [PATCH 34/46] adding functional power, Laplacian, inverse --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 34b1b6173..f121cd3d6 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -125,6 +125,14 @@ DefMacro('\subop{}{}{}', '\@sub@apply{#1}{\emb@atom{#2}{#3}}'); DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', reversion => '#1^{#2}', hide_content_reversion => 1); +DefMath('\fnpower{}{}', "{#1^{#2}}", meaning => 'functional-power', + reversion => '#1^{#2}', + hide_content_reversion => 1); +DefMath('\fninverse{}', "#1^{-1}", meaning => "inverse", role => 'OPFUNCTION', + reversion => '#1^{-1}', + hide_content_reversion => 1); +DefMath('\laplacian', '\\nabla^2', + meaning => 'Laplacian', role => 'OPERATOR', hide_content_reversion => 1); DefMath('\index{}{}', "{#1_{#2}}", meaning => 'index', reversion => '#1_{#2}', @@ -138,7 +146,8 @@ DefMacro('\indexArg{}', sub { Tokens(T_SUB, $$pargs[0]))->unlist; }); DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); -DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); +DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); + DefMacro('\transpose{}', '\supop{#1}{transpose}{T}'); DefMacro('\adjoint{}', '\supop{#1}{adjoint}{\dagger}'); # This works well, but can't be remixed crosswise as \median{x}_i: From afcded96b8ae26cc453230ae153e384a65f4b3d0 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Thu, 16 Jul 2020 11:54:25 -0400 Subject: [PATCH 35/46] add: norm, determinant, inner-product, legendre symbol --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index f121cd3d6..2b5a450b8 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -126,17 +126,14 @@ DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', reversion => '#1^{#2}', hide_content_reversion => 1); DefMath('\fnpower{}{}', "{#1^{#2}}", meaning => 'functional-power', - reversion => '#1^{#2}', - hide_content_reversion => 1); + reversion => '#1^{#2}', hide_content_reversion => 1); DefMath('\fninverse{}', "#1^{-1}", meaning => "inverse", role => 'OPFUNCTION', - reversion => '#1^{-1}', + reversion => '#1^{-1}', hide_content_reversion => 1); +DefMath('\laplacian', '\nabla^2', meaning => 'Laplacian', role => 'OPERATOR', hide_content_reversion => 1); -DefMath('\laplacian', '\\nabla^2', - meaning => 'Laplacian', role => 'OPERATOR', hide_content_reversion => 1); - DefMath('\index{}{}', "{#1_{#2}}", meaning => 'index', - reversion => '#1_{#2}', - hide_content_reversion => 1); + reversion => '#1_{#2}', hide_content_reversion => 1); + # only mark the script as a dual, so that we can remix it DefMacro('\indexArg{}', sub { my ($gullet, $arg) = @_; @@ -201,6 +198,16 @@ DefMath('\fnderive@build{}{}', '#1^#2', meaning => 'derivative-implicit-variable', hide_content_reversion => 1); +## Circumfix, applicative: +DefMath('\norm{}', '|\mathbf{#1}|', meaning => 'norm', role => 'ID', + reversion => '|\mathbf{#1}|', hide_content_reversion => 1); +DefMath('\determinant{}', '|\mathbf{#1}|', meaning => 'determinant', role => 'ID', + reversion => '|\mathbf{#1}|', hide_content_reversion => 1); +DefMath('\innerp{}{}', '\left<\mathbf{#1},\mathbf{#2}\right>', meaning => 'inner-product', role => 'ID', + reversion => '\left<\mathbf{#1},\mathbf{#2}\right>', hide_content_reversion => 1); +DefMath('\legendre{}{}', '(#1|#2)', meaning => 'Legendre-symbol', role => 'ID', + reversion => '(#1|#2)', hide_content_reversion => 1); + ################################################################################################################ 1; From 1e65ac77acc38cf8f44e9c806ac3facd74cd54bf Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Mon, 3 Aug 2020 22:27:01 -0400 Subject: [PATCH 36/46] introduce \pragma as an ergonomic frontend for lxDeclare --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 49 +++++++++++++++++++++----- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 2b5a450b8..b8bd97195 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -15,6 +15,8 @@ use strict; use warnings; use LaTeXML::Package; +RequirePackage('latexml'); + DefConstructorI(T_CS('\@request@math@a11y'), undef, ""); AtBeginDocument(T_CS('\@request@math@a11y')); @@ -65,7 +67,7 @@ DefMacro('\emb@preapply{}{}{}[][]', sub { emb_apply_two(@_, 1); }); # Example \PrePostArgCrosswise{x}{median}{\overline}{index}{_}{i} DefMacro('\PrePostArgCrosswise{}{}{}{}{}{}', sub { my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; - my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op2_rhs_var); + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op2_rhs_var); my ($ref_base, $ref_rhs_var) = @$cargs; my ($arg_base, $arg_rhs_var) = @$pargs; @@ -136,11 +138,11 @@ DefMath('\index{}{}', "{#1_{#2}}", meaning => 'index', # only mark the script as a dual, so that we can remix it DefMacro('\indexArg{}', sub { - my ($gullet, $arg) = @_; - my ($cargs, $pargs) = dualize_arglist('#1', $arg); + my ($gullet, $arg) = @_; + my ($cargs, $pargs) = dualize_arglist('#1', $arg); return Invocation(T_CS('\emb@build@apply'), Tokens(Invocation(T_CS('\@CSYMBOL'), 'index'), $$cargs[0]), - Tokens(T_SUB, $$pargs[0]))->unlist; }); + Tokens(T_SUB, $$pargs[0]))->unlist; }); DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); @@ -203,11 +205,40 @@ DefMath('\norm{}', '|\mathbf{#1}|', meaning => 'norm', role => 'ID', reversion => '|\mathbf{#1}|', hide_content_reversion => 1); DefMath('\determinant{}', '|\mathbf{#1}|', meaning => 'determinant', role => 'ID', reversion => '|\mathbf{#1}|', hide_content_reversion => 1); -DefMath('\innerp{}{}', '\left<\mathbf{#1},\mathbf{#2}\right>', meaning => 'inner-product', role => 'ID', - reversion => '\left<\mathbf{#1},\mathbf{#2}\right>', hide_content_reversion => 1); -DefMath('\legendre{}{}', '(#1|#2)', meaning => 'Legendre-symbol', role => 'ID', - reversion => '(#1|#2)', hide_content_reversion => 1); - ################################################################################################################ +# Declare some default common in K12 math when using this package: +# Also, improve ergonomics of \lxDecalre to my (Deyan's) liking +DefKeyVal('Declare', 'role', '', ''); +DefKeyVal('Declare', 'meaning', '', ''); +our %PRAGMA_ROLES = map { $_ => 1 } qw(ID FUNCTION); +DefMacro('\pragma OptionalMatch:* {}{}', sub { # Limitation: never use commas in the symbol/notation contents + my ($gullet, $star, $properties, $notations) = @_; + my @declarations = (); + my $notations_expanded = ToString($notations); + $notations_expanded =~ s/\?/\\WildCard/g; + my @notations = $star ? $notations_expanded : split(",", $notations_expanded); + my @properties = split(",", ToString($properties)); + for my $notation (@notations) { + my $kvprops = LaTeXML::Core::KeyVals->new('KV', 'Declare', assign => T_OTHER('='), punct => T_OTHER(',')); + for my $p (@properties) { # extend with more of the lxDeclare capabilities? scopes? + if ($PRAGMA_ROLES{$p}) { + $kvprops->setValue('role', $p); } + else { + $kvprops->setValue('meaning', $p); } } + push @declarations, + Invocation(T_CS('\lxDeclare'), undef, $kvprops, + Tokens(T_MATH, Tokenize($notation), T_MATH)); } + return @declarations; }); + +PushValue('@at@begin@document', Tokenize(<<'EOL')); +\pragma{FUNCTION}{f,g,h} +\pragma{ID}{a,b,c,d,n,m,x,y,z} +\pragma{index}{?_?} +\pragma{power}{?^?} +\pragma{Pochhamer-symbol}{\left(?\right)_?} +\pragma{Legendre-symbol}{\left(?|?\right)} +\pragma*{ID,inner-product}{\left<\mathbf{?},\mathbf{?}\right>} +EOL + 1; From cd567424e7b0a8abb0eee73799a58cb116708962 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 4 Aug 2020 16:03:07 -0400 Subject: [PATCH 37/46] curried BesselJ via a pragma --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index b8bd97195..a723f8ad8 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -236,9 +236,11 @@ PushValue('@at@begin@document', Tokenize(<<'EOL')); \pragma{ID}{a,b,c,d,n,m,x,y,z} \pragma{index}{?_?} \pragma{power}{?^?} -\pragma{Pochhamer-symbol}{\left(?\right)_?} -\pragma{Legendre-symbol}{\left(?|?\right)} -\pragma*{ID,inner-product}{\left<\mathbf{?},\mathbf{?}\right>} +\pragma{Pochhamer-symbol,ID}{\left(?\right)_?} +\pragma{Legendre-symbol,ID}{\left(?|?\right)} +\pragma{BesselJ,FUNCTION}{J_?} +\pragma*{inner-product,ID}{\left<\mathbf{?},\mathbf{?}\right>} + EOL 1; From 485d4f73bd002a313b308a53ed94eb9bb96850cb Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Tue, 4 Aug 2020 22:34:24 -0400 Subject: [PATCH 38/46] experiment with pre/post actions for pragmas/lxDeclare --- lib/LaTeXML/Core/Rewrite.pm | 48 ++++++++++++++++++++++++-- lib/LaTeXML/Package/a11ymark.sty.ltxml | 15 +++++--- lib/LaTeXML/Package/latexml.sty.ltxml | 28 ++++++++------- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/lib/LaTeXML/Core/Rewrite.pm b/lib/LaTeXML/Core/Rewrite.pm index bfb4a6ed8..3a3a868d8 100644 --- a/lib/LaTeXML/Core/Rewrite.pm +++ b/lib/LaTeXML/Core/Rewrite.pm @@ -16,6 +16,8 @@ use LaTeXML::Global; use LaTeXML::Common::Object; use LaTeXML::Common::Error; use LaTeXML::Common::XML; +use LaTeXML::Core::Token qw(T_CS T_MATH); +use LaTeXML::Core::Tokens qw(Tokens); sub new { my ($class, $mode, @specs) = @_; @@ -143,8 +145,8 @@ sub applyClause { # Now make any adjustments to the new nodes map { $document->recordNodeIDs($_) } @inserted; - my $font = $document->getNodeFont($tree); # the font of the matched node - foreach my $ins (@inserted) { # Copy the non-semantic parts of font to the replacement + my $font = $document->getNodeFont($tree); # the font of the matched node + foreach my $ins (@inserted) { # Copy the non-semantic parts of font to the replacement $document->mergeNodeFontRec($ins => $font); } # Now, replace the following nodes. map { $parent->appendChild($_) } @following; } @@ -178,6 +180,32 @@ sub applyClause { Error('misdefined', '', undef, "Unknown directive '$op' in Compiled Rewrite spec"); } return; } +## EXPERIMENTAL: This is an early experiment and needs to be refactored before it can be considered for serious use +sub action_insert { + my ($document, $direction, $extra, $tree) = @_; + print STDERR "Tree: ", $tree->toString(1), "\n"; + my $anchor; + if ($direction eq 'pre') { + $anchor = $tree->previousSibling; } + elsif ($direction eq 'post') { + $anchor = $tree->nextSibling; } + if ($anchor) { # What should we do if no anchor? Skip? + # Carry out the operation, inserting whatever nodes. + my $parent = $anchor->parentNode; + my $end_mark = $parent->lastChild; + $document->setNode($parent); + &$extra($document); + my @inserted = (); + my @children = $parent->childNodes; + while (my $child = pop @children) { + last unless ($$child != $$end_mark); + $child->unbindNode; + push @inserted, $child; } + for my $newchild (@inserted) { + $parent->insertAfter($newchild, $anchor); + $document->recordNodeIDs($newchild); } } + return; } + # Set attributes for an encapsulated tree (ie. a decorated symbol as symbol itself) sub setAttributes_encapsulate { my ($document, $attributes, @nodes) = @_; @@ -321,6 +349,20 @@ sub compileClause { if (ref $pattern eq 'CODE') { } else { $pattern = $self->compile_replacement($document, $pattern); } } + elsif ($op eq 'action') { + if (ref $pattern eq 'CODE') { } + # HACK: it appears this is a stage already **too late** to handle pre/post directive parsing + # maybe what I should consider instead is having a "pre:action" and "post:action" KEY + # which can be parsed via $op, keeping $pattern handled identically to the 'replace' case? + elsif (ToString($pattern) =~ /^(pre|post)[:].(.+)$/) { + my $direction = $1; + my $extra = $self->compile_replacement($document, Tokens(T_MATH, T_CS("\\$2"), T_MATH)); + $pattern = sub { + my ($tree) = @_; + action_insert($document, $direction, $extra, $tree); } } + else { + Fatal('misdefined', '', undef, + "Can't generate 'action' for arbitrary tokens.", ToString($pattern)); } } elsif ($op eq 'regexp') { $pattern = $self->compile_regexp($pattern); } print STDERR "Compiled clause $oop=>" . ToString($opattern) . " ==> $op=>" . ToString($pattern) . "\n" @@ -532,7 +574,7 @@ sub domToXPath_seq { __END__ -=pod +=pod =head1 NAME diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index a723f8ad8..9af3ff847 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -209,21 +209,26 @@ DefMath('\determinant{}', '|\mathbf{#1}|', meaning => 'determinant', role => 'ID # Declare some default common in K12 math when using this package: # Also, improve ergonomics of \lxDecalre to my (Deyan's) liking +# TODO: Can we reuse this keyval from latexml.sty? How? DefKeyVal('Declare', 'role', '', ''); DefKeyVal('Declare', 'meaning', '', ''); +DefKeyVal('Declare', 'action', '', ''); +DefKeyVal('Declare', 'replace', '', ''); our %PRAGMA_ROLES = map { $_ => 1 } qw(ID FUNCTION); DefMacro('\pragma OptionalMatch:* {}{}', sub { # Limitation: never use commas in the symbol/notation contents my ($gullet, $star, $properties, $notations) = @_; my @declarations = (); my $notations_expanded = ToString($notations); - $notations_expanded =~ s/\?/\\WildCard/g; - my @notations = $star ? $notations_expanded : split(",", $notations_expanded); - my @properties = split(",", ToString($properties)); + $notations_expanded =~ s/\?/\\WildCard[]/g; + my @notations = $star ? $notations_expanded : split(/\s*,\s*/, $notations_expanded); + my @properties = split(/\s*,\s*/, ToString($properties)); for my $notation (@notations) { my $kvprops = LaTeXML::Core::KeyVals->new('KV', 'Declare', assign => T_OTHER('='), punct => T_OTHER(',')); for my $p (@properties) { # extend with more of the lxDeclare capabilities? scopes? if ($PRAGMA_ROLES{$p}) { $kvprops->setValue('role', $p); } + elsif ($p =~ /^(pre|post)\:/) { + $kvprops->setValue('action', $p); } else { $kvprops->setValue('meaning', $p); } } push @declarations, @@ -240,7 +245,7 @@ PushValue('@at@begin@document', Tokenize(<<'EOL')); \pragma{Legendre-symbol,ID}{\left(?|?\right)} \pragma{BesselJ,FUNCTION}{J_?} \pragma*{inner-product,ID}{\left<\mathbf{?},\mathbf{?}\right>} - +\pragma*{pre:\@APPLYFUNCTION}{\left(?,?;?|?\right)} EOL - +# 1; diff --git a/lib/LaTeXML/Package/latexml.sty.ltxml b/lib/LaTeXML/Package/latexml.sty.ltxml index 4742178c5..bab167490 100644 --- a/lib/LaTeXML/Package/latexml.sty.ltxml +++ b/lib/LaTeXML/Package/latexml.sty.ltxml @@ -246,7 +246,7 @@ DefPrimitive('\lxDefMath{}[Number][]{} OptionalKeyVals:XMath', sub { $params && map { $_ && ToString($_) } map { $params->getValue($_) } qw(name meaning cd role alias scope); my $needsid = $params && ($params->getValue('tag') || $params->getValue('description')); - my $id = ($needsid ? next_declaration_id() : undef); + my $id = ($needsid ? next_declaration_id() : undef); DefMathI($cs, convertLaTeXArgs($nargs, $opt), $presentation, name => $name, meaning => $meaning, omcd => $cd, role => $role, alias => $alias, scope => $scope, decl_id => $id, @@ -294,7 +294,7 @@ sub normalizeDeclareKeys { if (my $stuff = $description || $tag) { ($term, $desc) = splitDeclareTag($stuff); } $short = ($description ? $tag || $desc : undef); - $desc = $desc || $description || $tag; + $desc = $desc || $description || $tag; $whatsit->setProperties(term => $term, short => $short, description => $desc); return; } @@ -340,9 +340,10 @@ sub splitDeclareTag { DefKeyVal('Declare', 'nowrap', '{}', 1); DefKeyVal('Declare', 'trace', '{}', 1); DefKeyVal('Declare', 'replace', 'UndigestedKey'); +DefKeyVal('Declare', 'action', 'UndigestedKey'); our $declare_keys = { scope => 1, role => 1, tag => 1, description => 1, name => 1, meaning => 1, - trace => 1, nowrap => 1, replace => 1, label => 1 }; + trace => 1, nowrap => 1, replace => 1, action => 1, label => 1 }; # Most is same as above; merge into one!!!!! DefConstructor('\lxDeclare OptionalMatch:* OptionalKeyVals:Declare {}', sub { my ($document, $flag, $kv, $pattern, %props) = @_; @@ -387,7 +388,8 @@ DefConstructor('\lxDeclare OptionalMatch:* OptionalKeyVals:Declare {}', sub { nowrap => defined $kv->getValue('nowrap'), id => $id, match => $pattern, - replace => $kv->getValue('replace')); + replace => $kv->getValue('replace'), + action => $kv->getValue('action')); normalizeDeclareKeys($kv, $whatsit); if (my $label = ToString($kv->getValue('label'))) { @@ -436,8 +438,8 @@ sub getDeclarationScope { sub createDeclarationRewrite { my ($document, $scope, $whatsit) = @_; my %props = $whatsit->getProperties; - my ($id, $match, $nowrap, $role, $name, $meaning, $ref, $trace, $replace) - = map { $props{$_} } qw(id match nowrap role name meaning ref trace replace); + my ($id, $match, $nowrap, $role, $name, $meaning, $ref, $trace, $replace, $action) + = map { $props{$_} } qw(id match nowrap role name meaning ref trace replace action); # Put this rule IN FRONT of other rules! UnshiftValue('DOCUMENT_REWRITE_RULES', LaTeXML::Core::Rewrite->new('math', @@ -446,12 +448,14 @@ sub createDeclarationRewrite { ($match ? (match => $match) : ()), ($replace ? (replace => $replace) - : attributes => { ($role ? (role => $role) : ()), - ($name ? (name => $name) : ()), - ($meaning ? (meaning => $meaning) : ()), - ($id ? (decl_id => $id) : ()), - ($nowrap ? (_nowrap => $nowrap) : ()), - }), + : ($action + ? (action => $action) + : attributes => { ($role ? (role => $role) : ()), + ($name ? (name => $name) : ()), + ($meaning ? (meaning => $meaning) : ()), + ($id ? (decl_id => $id) : ()), + ($nowrap ? (_nowrap => $nowrap) : ()), + })), )); return; } From cc17c35747ee0f4dbf3e970dc657ed0f1c53b9ea Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 5 Aug 2020 21:58:00 -0400 Subject: [PATCH 39/46] comment out pragmas, move them to showcase --- lib/LaTeXML/Package/a11ymark.sty.ltxml | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml index 9af3ff847..3ab295a54 100644 --- a/lib/LaTeXML/Package/a11ymark.sty.ltxml +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -236,16 +236,20 @@ DefMacro('\pragma OptionalMatch:* {}{}', sub { # Limitation: never use commas in Tokens(T_MATH, Tokenize($notation), T_MATH)); } return @declarations; }); -PushValue('@at@begin@document', Tokenize(<<'EOL')); -\pragma{FUNCTION}{f,g,h} -\pragma{ID}{a,b,c,d,n,m,x,y,z} -\pragma{index}{?_?} -\pragma{power}{?^?} -\pragma{Pochhamer-symbol,ID}{\left(?\right)_?} -\pragma{Legendre-symbol,ID}{\left(?|?\right)} -\pragma{BesselJ,FUNCTION}{J_?} -\pragma*{inner-product,ID}{\left<\mathbf{?},\mathbf{?}\right>} -\pragma*{pre:\@APPLYFUNCTION}{\left(?,?;?|?\right)} -EOL +# Example pragmas, as incldued with the tiny accessibility showcase: +# +# disabled by default here, since they may assume too much +# +# PushValue('@at@begin@document', Tokenize(<<'EOL')); +# \pragma{FUNCTION}{f,g,h} +# \pragma{ID}{a,b,c,d,n,m,x,y,z} +# \pragma{index}{?_?} +# \pragma{power}{?^?} +# \pragma{Pochhamer-symbol,ID}{\left(?\right)_?} +# \pragma{Legendre-symbol,ID}{\left(?|?\right)} +# \pragma{BesselJ,FUNCTION}{J_?} +# \pragma*{inner-product,ID}{\left<\mathbf{?},\mathbf{?}\right>} +# \pragma*{pre:\@APPLYFUNCTION}{\left(?,?;?|?\right)} +# EOL # 1; From 5661de3eed1cc8e2a227de5ee26c0bcdb532ad6d Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Wed, 12 Aug 2020 08:54:37 -0400 Subject: [PATCH 40/46] better a11y annotation treatment for XMapp with meaning attr --- lib/LaTeXML/Post/MathML/Presentation.pm | 36 ++++++++++++++++--------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 06a508503..9efb77dc1 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -60,7 +60,7 @@ sub convertNode { # NEXT better strategy will be to scan columns of MathBranches to establish desired line length? elsif ($$self{linelength} # If line breaking && ($doc->findnodes('ancestor::ltx:MathBranch', $xmath)) # In formatted side of MathFork? - # But ONLY if last column!! (until we can adapt LineBreaker!) + # But ONLY if last column!! (until we can adapt LineBreaker!) && !$doc->findnodes('parent::ltx:Math/parent::ltx:td/following-sibling::ltx:td', $xmath)) { my ($pmmlb, $broke) = $self->convertNode_linebreak($doc, $xmath, $style); $pmml = $pmmlb; } @@ -134,18 +134,30 @@ sub addAccessibilityAnnotations { my $role = $currentnode->getAttribute('role') || ''; $meaning = ($role ne 'NUMBER') && $currentnode->getAttribute('meaning'); } elsif ($current_node_name eq 'ltx:XMApp') { - my @current_children = element_nodes($currentnode); - my $current_op_meaning = $current_children[0]->getAttribute('meaning') || ''; - my $arg_count = scalar(@current_children) - 1; # Ok, so we need to disentangle the case where the operator XMTok is preserved in pmml, # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token # so we need to mark the literal "square-root" in msqrt - my $op; - my $name = getQName($node); - if ($name ne 'm:mrow') { # not an mrow, prefer the literal semantic - $op = $current_op_meaning; } - else { # mrow, prefer #op, except for whitelisted exception cases (which ones??) - $op = ($current_op_meaning eq 'multirelation') ? $current_op_meaning : '#op'; } + # + # Additionally, semantic annotations via e.g. \lxDeclare will appear as top-level XMApp meaning + # attributes, and we must be extra careful not to descend into those nodes. + my ($op, $arg_count); + my $app_meaning = $currentnode->getAttribute('meaning'); + if ($app_meaning) { + $meaning = $app_meaning; + # TODO: This still feels like "choppy" ad-hoc treatment. + # What is a good general traversal algorithm that avoids all these special cases? + for my $arg_node (p_element_nodes($node)) { + if ((p_getAttribute($arg_node, '_a11y') || '') ne 'ref') { + p_removeAttribute($arg_node, 'data-semantic'); } } } + else { + my @current_children = element_nodes($currentnode); + my $current_op_meaning = $current_children[0]->getAttribute('meaning') || ''; + $arg_count = scalar(@current_children) - 1; + my $name = getQName($node); + if ($name ne 'm:mrow') { # not an mrow, prefer the literal semantic + $op = $current_op_meaning; } + else { # mrow, prefer #op, except for whitelisted exception cases (which ones??) + $op = ($current_op_meaning eq 'multirelation') ? $current_op_meaning : '#op'; } } if ($op) { # Set the meaning, if we found a satisfying $op: $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } else { # if there is no op, we should undo argument annotations pointing at the application, @@ -232,8 +244,8 @@ sub preprocess_linebreaking { my $style = ($mode eq 'display' ? 'display' : 'text'); # If already has in a MathBranch, we can't really know if, or how wide, to line break!?!?! next if $doc->findnodes('ancestor::ltx:MathFork', $math); # SKIP if already in a branch? - # Now let's do the layout & see if it actually needs line breaks! - # next if $math isn't really so wide .. + # Now let's do the layout & see if it actually needs line breaks! + # next if $math isn't really so wide .. my $id = $math->getAttribute('xml:id'); my $xmath = $doc->findnode('ltx:XMath', $math); my ($pmml, $broke) = $self->convertNode_linebreak($doc, $xmath, $style); From c2dc9e099b650f32e1b0b99e2f58ad57753b67b5 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 14:48:41 -0400 Subject: [PATCH 41/46] cleaner rewrite from scratch of accessibility traversal --- lib/LaTeXML/Core/Rewrite.pm | 1 - lib/LaTeXML/Post/MathML/Presentation.pm | 162 +++++++++--------------- 2 files changed, 59 insertions(+), 104 deletions(-) diff --git a/lib/LaTeXML/Core/Rewrite.pm b/lib/LaTeXML/Core/Rewrite.pm index 3a3a868d8..b38671a43 100644 --- a/lib/LaTeXML/Core/Rewrite.pm +++ b/lib/LaTeXML/Core/Rewrite.pm @@ -183,7 +183,6 @@ sub applyClause { ## EXPERIMENTAL: This is an early experiment and needs to be refactored before it can be considered for serious use sub action_insert { my ($document, $direction, $extra, $tree) = @_; - print STDERR "Tree: ", $tree->toString(1), "\n"; my $anchor; if ($direction eq 'pre') { $anchor = $tree->previousSibling; } diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 9efb77dc1..36c71468c 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -72,7 +72,7 @@ sub rawIDSuffix { return '.pmml'; } sub associateNodeHook { - # technical note: $sourcenode is a LibXML element, while $node is that OR the arrayref triple form +# technical note: $sourcenode and $currentnode are LibXML elements, while $node is that OR the arrayref triple form my ($self, $node, $sourcenode, $noxref, $currentnode) = @_; # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase # (same for the p_* methods from MathParser) @@ -83,122 +83,66 @@ sub associateNodeHook { p_setAttribute($node, 'href', $href); } if (my $title = $sourcenode->getAttribute('title')) { p_setAttribute($node, 'title', $title); } } - $self->addAccessibilityAnnotations($node, $currentnode) if $$self{a11y}; + $self->addAccessibilityAnnotations($node, $sourcenode, $currentnode) if $$self{a11y}; return; } # Experiment: set accessibility attributes on the resulting presentation tree, # if the XMath source has a claim to the semantics via a "meaning" attribute. sub addAccessibilityAnnotations { - # Part I: Top-down. Recover the meaning of a subtree as an accessible annotation - my ($self, $node, $currentnode) = @_; + my ($self, $node, $sourcenode, $currentnode) = @_; + # 1. Filter and bookkeep which nodes are to be treated. my $current_node_name = getQName($currentnode); return if $current_node_name eq 'ltx:XMath'; -# a number of redundant annotations are caused by reusing the same content node for on-the-fly content, -# e.g. we end up creating a new invisible-apply XMTok, and then associate its node -# with the $currentnode of its parent f(x), now as finvisible-apply(x) -# that second call should just immediately terminate, there is nothing to add in such cases. return if $currentnode->getAttribute('_a11y'); $currentnode->setAttribute('_a11y', 'done'); - # --- TOP PRIORITY: run an exclusion check for pieces that are presentation-only fluff for duals - my @dual_pres_ancestry = $LaTeXML::Post::DOCUMENT->findnodes("ancestor-or-self::*[preceding-sibling::*][parent::ltx:XMDual]", $currentnode); - my $dual_pres_node = $dual_pres_ancestry[-1]; # Weirdly ->findnode() is finding the highest ancestor, rather than the tightest ancestor? This [-1] seems to do it. - if ($dual_pres_node && !$dual_pres_node->isSameNode($currentnode)) { # 1) they have a dual ancestor, but are not the main presentation node - my $check_node = $currentnode; - my $id = $currentnode->getAttribute('xml:id'); - while (!$id && !$check_node->isSameNode($dual_pres_node)) { - $id = $check_node->getAttribute('xml:id'); - $check_node = $check_node->parentNode; } - # if no id is found, they are not referenced by the dual - return unless $id; } - # --- - # In the remaining cases, process the node, check if it has meaningful annotations to add + my $source_node_name = getQName($sourcenode); + my $container; +# skip non-material dual presentation, which points to content nodes but should *not* carry annotations itself + if ($$currentnode != $$sourcenode) { + return if ($source_node_name ne 'ltx:XMDual') or ($sourcenode->getAttribute('_a11y')); + $sourcenode->setAttribute('_a11y', 'done'); } + elsif ($container = $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $currentnode)) { +# also skip any embellishments in duals that are not semantic, a bit tricky since we need to check parent xmapps + my $content_node = $container->firstChild; + my %xmrefs = map { my $ref = $_->getAttribute('idref'); $ref ? ($ref => 1) : () } + $LaTeXML::Post::DOCUMENT->findnodes("//ltx:XMRef[\@idref]", $content_node); + return unless %xmrefs; # certainly not usable if no refs. + my $ancestor = $currentnode; + while ($$ancestor != $$container && !$xmrefs{ $ancestor->getAttribute('xml:id') || '' }) { + $ancestor = $ancestor->parentNode; } + return if $$ancestor == $$container; } + # 1--end. We reach here only with semantic nodes in hand (or the logic has a Bug). + + print STDERR "semantic node: ", $sourcenode->toString(1), "\n"; + + # 2. Bookkeep the semantic information. my ($meaning, $arg); - if ($dual_pres_node && $dual_pres_node->isSameNode($currentnode)) { # top-level pres of dual - my $dual_content_node = $dual_pres_node->previousSibling; - my $dual_content_name = getQName($dual_content_node); - if ($dual_content_name eq 'ltx:XMRef') { # single subtree of the presentation, point to it - $meaning = '#1'; } - elsif ($dual_content_name eq 'ltx:XMTok') { - $meaning = $dual_content_node->getAttribute('meaning'); } - elsif ($dual_content_name eq 'ltx:XMApp') { - # another special case, from the \overline{x}_i land, - # we could get a deeply nested application tree with a lot of nodes which have no referrents - # but should be translated into the final data-semantic - # best to call out into a subroutine? - $meaning = dual_content_xmapp_to_semantic_attr($dual_content_node, 0); } -# Note that the carrier ltx:XMDual is never passed in associateNode, but often requires an "arg". Recurse: - $self->addAccessibilityAnnotations($node, $dual_pres_node->parentNode); } - # tokens are simplest - if we know of a meaning, use that for accessibility - elsif ($current_node_name eq 'ltx:XMTok') { - # stylistic choice - avoid tagging numbers, even though we could, too obvious - my $role = $currentnode->getAttribute('role') || ''; - $meaning = ($role ne 'NUMBER') && $currentnode->getAttribute('meaning'); } - elsif ($current_node_name eq 'ltx:XMApp') { - # Ok, so we need to disentangle the case where the operator XMTok is preserved in pmml, - # and the case where it isn't. E.g. in \sqrt{x} we get a msqrt wrapper, but no dedicated token - # so we need to mark the literal "square-root" in msqrt - # - # Additionally, semantic annotations via e.g. \lxDeclare will appear as top-level XMApp meaning - # attributes, and we must be extra careful not to descend into those nodes. - my ($op, $arg_count); - my $app_meaning = $currentnode->getAttribute('meaning'); - if ($app_meaning) { - $meaning = $app_meaning; - # TODO: This still feels like "choppy" ad-hoc treatment. - # What is a good general traversal algorithm that avoids all these special cases? - for my $arg_node (p_element_nodes($node)) { - if ((p_getAttribute($arg_node, '_a11y') || '') ne 'ref') { - p_removeAttribute($arg_node, 'data-semantic'); } } } - else { - my @current_children = element_nodes($currentnode); - my $current_op_meaning = $current_children[0]->getAttribute('meaning') || ''; - $arg_count = scalar(@current_children) - 1; - my $name = getQName($node); - if ($name ne 'm:mrow') { # not an mrow, prefer the literal semantic - $op = $current_op_meaning; } - else { # mrow, prefer #op, except for whitelisted exception cases (which ones??) - $op = ($current_op_meaning eq 'multirelation') ? $current_op_meaning : '#op'; } } - if ($op) { # Set the meaning, if we found a satisfying $op: - $meaning = "$op(" . join(",", map { '#' . $_ } (1 .. $arg_count)) . ")"; } - else { # if there is no op, we should undo argument annotations pointing at the application, - for my $arg_node (p_element_nodes($node)) { - if ((p_getAttribute($arg_node, '_a11y') || '') ne 'ref') { - p_removeAttribute($arg_node, 'data-arg'); } } } } + if (my $src_meaning = $sourcenode->getAttribute('meaning')) { + $meaning = $src_meaning; } + elsif ($source_node_name eq 'ltx:XMApp') { + my $op = ($$node[0] eq 'm:mrow') ? '#op' : p_getAttribute($sourcenode->firstChild, 'meaning'); + $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } + elsif ($source_node_name eq 'ltx:XMDual') { + $meaning = dual_content_xmapp_to_semantic_attr($sourcenode->firstChild); } + +# 3. Bookkeep "arg" information +# (careful, can be arbitrary deep in a dual content tree) +# also, not so easy to disentangle - a node nested deeply inside a dual may be _either_ referenced in the dual (primary) +# _or_ a classic direct child of an intermediate XMApp. So we test until we find an $arg: + $container = $container || $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $sourcenode); + if ($container) { + my $id = $sourcenode->getAttribute('xml:id'); + $arg = $id && dual_content_idref_to_data_attr($container->firstChild, $id); } + if (!$arg && (getQName($sourcenode->parentNode) eq 'ltx:XMApp')) { # normal apply case + # note we can only do this simple check because we filtered out all embellishments in step 1. + my $position = $LaTeXML::Post::DOCUMENT->findvalue("count(preceding-sibling::*)", $sourcenode); + $arg = $position || 'op'; } - # if we found some meaning, attach it as an accessible attribute p_setAttribute($node, 'data-semantic', $meaning) if $meaning; - - # Part II: Bottom-up. Also check if argument of higher parent notation, mark if so. - # II.1 id-carrying nodes always point to their referrees. - if ($dual_pres_node && (my $id = $currentnode->getAttribute('xml:id'))) { - # We already found the dual - my $dual_content_node = $dual_pres_node->previousSibling; - # note that if we never find the 'idref', arg is never set - if (my $xmref = $LaTeXML::Post::DOCUMENT->findnode('//ltx:XMRef[@idref="' . $id . '"]', $dual_content_node)) { - p_setAttribute($node, '_a11y', 'ref'); # mark as used in ref - my $index = 0; - my $parent = $xmref->parentNode; - my $c_arg = $xmref; - while ($c_arg = $c_arg->previousSibling) { - $index++; } - $arg = $index || ((getQName($xmref->parentNode) eq 'ltx:XMDual') ? '1' : ($xmref->nextSibling ? 'op' : '1')); - # compute a level suffix if nested within main dual - my $lvl = -1; - while (getQName($parent) eq 'ltx:XMApp') { - $parent = $parent->parentNode; - $lvl++; } - $arg .= "_$lvl" if ($lvl > 0); } } - # II.2. applications children are directly pointing to their parents - # also fallback in the dual case, if the XMApp had an id but wasn't an arg - if (!$arg && (getQName($currentnode->parentNode) eq 'ltx:XMApp')) { - my $index = 0; - my $prev_sibling = $currentnode; - while ($prev_sibling = $prev_sibling->previousSibling) { - $index++; } - $arg = $index ? $index : 'op'; } - p_setAttribute($node, 'data-arg', $arg) if $arg; + p_setAttribute($node, 'data-arg', $arg) if $arg; return; } +# Given the first (content) child of an ltx:XMDual, compute its corresponding a11y "semantic" attribute sub dual_content_xmapp_to_semantic_attr { my ($node, $lvl) = @_; my @arg_nodes = element_nodes($node); @@ -214,6 +158,18 @@ sub dual_content_xmapp_to_semantic_attr { push @arg_strings, '#' . $index . ($lvl ? "_$lvl" : ""); } } # will we need level suffixes? return $op . '(' . join(",", @arg_strings) . ')'; } +# Given the first (content) child of an ltx:XMDual, and an idref value, compute the corresponding "arg" attribute for that XMRef +sub dual_content_idref_to_data_attr { + my ($content_node, $idref) = @_; + my ($ref_node) = $LaTeXML::Post::DOCUMENT->findnodes( + "//ltx:XMRef[\@idref=\"" . $idref . "\"][1]", $content_node); + my $path = ''; + my $ancestor = $ref_node; + while ($$ancestor != $$content_node) { + my $position = $LaTeXML::Post::DOCUMENT->findvalue("count(preceding-sibling::*)", $ancestor); + $path = $path ? ($position . '_' . $path) : $position; + $ancestor = $ancestor->parentNode; } + return $path || 'op'; } #================================================================================ # Presentation MathML with Line breaking # Not at all sure how this will integrate with Parallel markup... From 620ea05c132bfd75035ead6b88e9f71d818d8070 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 15:06:38 -0400 Subject: [PATCH 42/46] compact simple XMDual of two XMToks --- lib/LaTeXML/Post/MathML/Presentation.pm | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 36c71468c..fc80762ae 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -113,8 +113,6 @@ sub addAccessibilityAnnotations { return if $$ancestor == $$container; } # 1--end. We reach here only with semantic nodes in hand (or the logic has a Bug). - print STDERR "semantic node: ", $sourcenode->toString(1), "\n"; - # 2. Bookkeep the semantic information. my ($meaning, $arg); if (my $src_meaning = $sourcenode->getAttribute('meaning')) { From a7616cd338e61799bb8081aadb75ca256cf01798 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 15:21:54 -0400 Subject: [PATCH 43/46] transition away from the artificial level count, just do child counting from the dual root -> down, in deep id generation --- lib/LaTeXML/Post/MathML/Presentation.pm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index fc80762ae..1453dafce 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -142,18 +142,18 @@ sub addAccessibilityAnnotations { # Given the first (content) child of an ltx:XMDual, compute its corresponding a11y "semantic" attribute sub dual_content_xmapp_to_semantic_attr { - my ($node, $lvl) = @_; + my ($node, $prefix) = @_; my @arg_nodes = element_nodes($node); my $op_node = shift @arg_nodes; - my $op = ($op_node && $op_node->getAttribute('meaning')) || ($lvl ? '#op' . $lvl : '#op'); + my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; my @arg_strings = (); my $index = 0; for my $arg_node (@arg_nodes) { $index++; if (getQName($arg_node) eq 'ltx:XMApp') { - push @arg_strings, dual_content_xmapp_to_semantic_attr($arg_node, $lvl + 1); } + push @arg_strings, dual_content_xmapp_to_semantic_attr($arg_node, $prefix ? ($prefix . "_$index") : $index); } else { - push @arg_strings, '#' . $index . ($lvl ? "_$lvl" : ""); } } # will we need level suffixes? + push @arg_strings, '#' . ($prefix ? ($prefix . "_$index") : $index); } } # will we need level suffixes? return $op . '(' . join(",", @arg_strings) . ')'; } # Given the first (content) child of an ltx:XMDual, and an idref value, compute the corresponding "arg" attribute for that XMRef From 2291bbe98b8a29c82272ef98daf1ea6df4754586 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 15:48:16 -0400 Subject: [PATCH 44/46] also handle single XMTok duals --- lib/LaTeXML/Post/MathML/Presentation.pm | 35 +++++++++++++++---------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 1453dafce..6791d9850 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -121,7 +121,7 @@ sub addAccessibilityAnnotations { my $op = ($$node[0] eq 'm:mrow') ? '#op' : p_getAttribute($sourcenode->firstChild, 'meaning'); $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } elsif ($source_node_name eq 'ltx:XMDual') { - $meaning = dual_content_xmapp_to_semantic_attr($sourcenode->firstChild); } + $meaning = dual_content_to_semantic_attr($sourcenode->firstChild); } # 3. Bookkeep "arg" information # (careful, can be arbitrary deep in a dual content tree) @@ -141,20 +141,27 @@ sub addAccessibilityAnnotations { return; } # Given the first (content) child of an ltx:XMDual, compute its corresponding a11y "semantic" attribute -sub dual_content_xmapp_to_semantic_attr { +sub dual_content_to_semantic_attr { my ($node, $prefix) = @_; - my @arg_nodes = element_nodes($node); - my $op_node = shift @arg_nodes; - my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; - my @arg_strings = (); - my $index = 0; - for my $arg_node (@arg_nodes) { - $index++; - if (getQName($arg_node) eq 'ltx:XMApp') { - push @arg_strings, dual_content_xmapp_to_semantic_attr($arg_node, $prefix ? ($prefix . "_$index") : $index); } - else { - push @arg_strings, '#' . ($prefix ? ($prefix . "_$index") : $index); } } # will we need level suffixes? - return $op . '(' . join(",", @arg_strings) . ')'; } + my $name = getQName($node); + if ($name eq 'ltx:XMTok') { + return $node->getAttribute('meaning') || $node->getAttribute('name') || 'unknown'; } + elsif ($name eq 'ltx:XMApp') { + my @arg_nodes = element_nodes($node); + my $op_node = shift @arg_nodes; + my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; + my @arg_strings = (); + my $index = 0; + for my $arg_node (@arg_nodes) { + $index++; + if (getQName($arg_node) eq 'ltx:XMApp') { + push @arg_strings, dual_content_to_semantic_attr($arg_node, $prefix ? ($prefix . "_$index") : $index); } + else { + push @arg_strings, '#' . ($prefix ? ($prefix . "_$index") : $index); } } # will we need level suffixes? + return $op . '(' . join(",", @arg_strings) . ')'; } + else { + print STDERR "Warning:unknown XMDual content child '$name' will default data-semantic attribute to 'unknown'\n"; + return 'unknown'; } } # Given the first (content) child of an ltx:XMDual, and an idref value, compute the corresponding "arg" attribute for that XMRef sub dual_content_idref_to_data_attr { From 5c433ea129bcd5beee9c1c459f7469a6cec20158 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 16:23:30 -0400 Subject: [PATCH 45/46] avoid marking up special markup pmml with no given meaning --- lib/LaTeXML/Post/MathML/Presentation.pm | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 6791d9850..3f63b10cb 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -119,7 +119,12 @@ sub addAccessibilityAnnotations { $meaning = $src_meaning; } elsif ($source_node_name eq 'ltx:XMApp') { my $op = ($$node[0] eq 'm:mrow') ? '#op' : p_getAttribute($sourcenode->firstChild, 'meaning'); - $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } + if ($op) { # annotate only if we knew a 'meaning' attribute, for the special markup scenarios + $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } + else { + # otherwise, take the liberty to delete all data-arg of direct children + for my $pmml_child (@$node[2 .. scalar(@$node) - 1]) { + p_removeAttribute($pmml_child, 'data-arg'); } } } elsif ($source_node_name eq 'ltx:XMDual') { $meaning = dual_content_to_semantic_attr($sourcenode->firstChild); } @@ -175,6 +180,7 @@ sub dual_content_idref_to_data_attr { $path = $path ? ($position . '_' . $path) : $position; $ancestor = $ancestor->parentNode; } return $path || 'op'; } + #================================================================================ # Presentation MathML with Line breaking # Not at all sure how this will integrate with Parallel markup... From 1b5936a62ea4b513e205a14ec0cfdeeab3489803 Mon Sep 17 00:00:00 2001 From: Deyan Ginev Date: Fri, 14 Aug 2020 19:28:38 -0400 Subject: [PATCH 46/46] tighten handler logic. Handles almost all examples in a11y showcase, best results so far --- lib/LaTeXML/Post/MathML/Presentation.pm | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 3f63b10cb..f52c20331 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -98,27 +98,26 @@ sub addAccessibilityAnnotations { my $source_node_name = getQName($sourcenode); my $container; # skip non-material dual presentation, which points to content nodes but should *not* carry annotations itself - if ($$currentnode != $$sourcenode) { - return if ($source_node_name ne 'ltx:XMDual') or ($sourcenode->getAttribute('_a11y')); - $sourcenode->setAttribute('_a11y', 'done'); } - elsif ($container = $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $currentnode)) { + if (($container = $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $currentnode)) and + (${ $currentnode->parentNode } != $$container)) { # also skip any embellishments in duals that are not semantic, a bit tricky since we need to check parent xmapps my $content_node = $container->firstChild; my %xmrefs = map { my $ref = $_->getAttribute('idref'); $ref ? ($ref => 1) : () } - $LaTeXML::Post::DOCUMENT->findnodes("//ltx:XMRef[\@idref]", $content_node); - return unless %xmrefs; # certainly not usable if no refs. + $LaTeXML::Post::DOCUMENT->findnodes("descendant-or-self::ltx:XMRef[\@idref]", $content_node); + return unless %xmrefs; # certainly not usable if no refs in the dual. my $ancestor = $currentnode; while ($$ancestor != $$container && !$xmrefs{ $ancestor->getAttribute('xml:id') || '' }) { $ancestor = $ancestor->parentNode; } return if $$ancestor == $$container; } # 1--end. We reach here only with semantic nodes in hand (or the logic has a Bug). - # 2. Bookkeep the semantic information. my ($meaning, $arg); if (my $src_meaning = $sourcenode->getAttribute('meaning')) { $meaning = $src_meaning; } elsif ($source_node_name eq 'ltx:XMApp') { - my $op = ($$node[0] eq 'm:mrow') ? '#op' : p_getAttribute($sourcenode->firstChild, 'meaning'); +# Tricky, what is the best way to figure out if the operator is presentable vs implied? Check if it has _a11y=done? + my $op_node = $sourcenode->firstChild; + my $op = $op_node->getAttribute('_a11y') ? '#op' : p_getAttribute($op_node, 'meaning'); if ($op) { # annotate only if we knew a 'meaning' attribute, for the special markup scenarios $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } else { @@ -151,6 +150,8 @@ sub dual_content_to_semantic_attr { my $name = getQName($node); if ($name eq 'ltx:XMTok') { return $node->getAttribute('meaning') || $node->getAttribute('name') || 'unknown'; } + elsif ($name eq 'ltx:XMRef') { # pass through case + return '#1'; } elsif ($name eq 'ltx:XMApp') { my @arg_nodes = element_nodes($node); my $op_node = shift @arg_nodes; @@ -172,14 +173,15 @@ sub dual_content_to_semantic_attr { sub dual_content_idref_to_data_attr { my ($content_node, $idref) = @_; my ($ref_node) = $LaTeXML::Post::DOCUMENT->findnodes( - "//ltx:XMRef[\@idref=\"" . $idref . "\"][1]", $content_node); + "descendant-or-self::ltx:XMRef[\@idref=\"" . $idref . "\"][1]", $content_node); + return '' unless $ref_node; my $path = ''; my $ancestor = $ref_node; while ($$ancestor != $$content_node) { my $position = $LaTeXML::Post::DOCUMENT->findvalue("count(preceding-sibling::*)", $ancestor); $path = $path ? ($position . '_' . $path) : $position; $ancestor = $ancestor->parentNode; } - return $path || 'op'; } + return $path ? $path : (scalar(element_nodes($content_node)) > 1 ? 'op' : '1'); } #================================================================================ # Presentation MathML with Line breaking