diff --git a/MANIFEST b/MANIFEST index 236f9c3b3..ce0ba574d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -322,6 +322,7 @@ lib/LaTeXML/Package/PoS.cls.ltxml lib/LaTeXML/Package/TeX.pool.ltxml lib/LaTeXML/Package/a0poster.cls.ltxml lib/LaTeXML/Package/a0size.sty.ltxml +lib/LaTeXML/Package/a11ymark.sty.ltxml lib/LaTeXML/Package/a4.sty.ltxml lib/LaTeXML/Package/a4wide.sty.ltxml lib/LaTeXML/Package/aa.cls.ltxml diff --git a/lib/LaTeXML/Core/Rewrite.pm b/lib/LaTeXML/Core/Rewrite.pm index bfb4a6ed8..b38671a43 100644 --- a/lib/LaTeXML/Core/Rewrite.pm +++ b/lib/LaTeXML/Core/Rewrite.pm @@ -16,6 +16,8 @@ use LaTeXML::Global; use LaTeXML::Common::Object; use LaTeXML::Common::Error; use LaTeXML::Common::XML; +use LaTeXML::Core::Token qw(T_CS T_MATH); +use LaTeXML::Core::Tokens qw(Tokens); sub new { my ($class, $mode, @specs) = @_; @@ -143,8 +145,8 @@ sub applyClause { # Now make any adjustments to the new nodes map { $document->recordNodeIDs($_) } @inserted; - my $font = $document->getNodeFont($tree); # the font of the matched node - foreach my $ins (@inserted) { # Copy the non-semantic parts of font to the replacement + my $font = $document->getNodeFont($tree); # the font of the matched node + foreach my $ins (@inserted) { # Copy the non-semantic parts of font to the replacement $document->mergeNodeFontRec($ins => $font); } # Now, replace the following nodes. map { $parent->appendChild($_) } @following; } @@ -178,6 +180,31 @@ sub applyClause { Error('misdefined', '', undef, "Unknown directive '$op' in Compiled Rewrite spec"); } return; } +## EXPERIMENTAL: This is an early experiment and needs to be refactored before it can be considered for serious use +sub action_insert { + my ($document, $direction, $extra, $tree) = @_; + my $anchor; + if ($direction eq 'pre') { + $anchor = $tree->previousSibling; } + elsif ($direction eq 'post') { + $anchor = $tree->nextSibling; } + if ($anchor) { # What should we do if no anchor? Skip? + # Carry out the operation, inserting whatever nodes. + my $parent = $anchor->parentNode; + my $end_mark = $parent->lastChild; + $document->setNode($parent); + &$extra($document); + my @inserted = (); + my @children = $parent->childNodes; + while (my $child = pop @children) { + last unless ($$child != $$end_mark); + $child->unbindNode; + push @inserted, $child; } + for my $newchild (@inserted) { + $parent->insertAfter($newchild, $anchor); + $document->recordNodeIDs($newchild); } } + return; } + # Set attributes for an encapsulated tree (ie. a decorated symbol as symbol itself) sub setAttributes_encapsulate { my ($document, $attributes, @nodes) = @_; @@ -321,6 +348,20 @@ sub compileClause { if (ref $pattern eq 'CODE') { } else { $pattern = $self->compile_replacement($document, $pattern); } } + elsif ($op eq 'action') { + if (ref $pattern eq 'CODE') { } + # HACK: it appears this is a stage already **too late** to handle pre/post directive parsing + # maybe what I should consider instead is having a "pre:action" and "post:action" KEY + # which can be parsed via $op, keeping $pattern handled identically to the 'replace' case? + elsif (ToString($pattern) =~ /^(pre|post)[:].(.+)$/) { + my $direction = $1; + my $extra = $self->compile_replacement($document, Tokens(T_MATH, T_CS("\\$2"), T_MATH)); + $pattern = sub { + my ($tree) = @_; + action_insert($document, $direction, $extra, $tree); } } + else { + Fatal('misdefined', '', undef, + "Can't generate 'action' for arbitrary tokens.", ToString($pattern)); } } elsif ($op eq 'regexp') { $pattern = $self->compile_regexp($pattern); } print STDERR "Compiled clause $oop=>" . ToString($opattern) . " ==> $op=>" . ToString($pattern) . "\n" @@ -532,7 +573,7 @@ sub domToXPath_seq { __END__ -=pod +=pod =head1 NAME diff --git a/lib/LaTeXML/MathParser.pm b/lib/LaTeXML/MathParser.pm index 4e827a17a..499437075 100644 --- a/lib/LaTeXML/MathParser.pm +++ b/lib/LaTeXML/MathParser.pm @@ -33,7 +33,8 @@ our @EXPORT_OK = (qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbol &LeftRec &Arg &MaybeFunction &SawNotation &IsNotationAllowed - &isMatchingClose &Fence)); + &isMatchingClose &Fence + &p_getAttribute &p_setAttribute &p_removeAttribute &p_element_nodes)); our %EXPORT_TAGS = (constructors => [qw(&Lookup &New &Absent &Apply &ApplyNary &recApply &CatSymbols &Annotate &InvisibleTimes &InvisibleComma @@ -1054,6 +1055,22 @@ sub p_getAttribute { elsif (ref $item eq 'XML::LibXML::Element') { return $item->getAttribute($key); } } +sub p_setAttribute { + my ($node, $key, $value) = @_; + if (ref $node eq 'ARRAY') { + $$node[1]{$key} = $value; } + else { + $node->setAttribute($key => $value); } + return; } + +sub p_removeAttribute { + my ($node, $key) = @_; + if (ref $node eq 'ARRAY') { + delete $$node[1]{$key}; } + else { + $node->removeAttribute($key); } + return; } + sub p_element_nodes { my ($item) = @_; if (!defined $item) { diff --git a/lib/LaTeXML/Package/a11ymark.sty.ltxml b/lib/LaTeXML/Package/a11ymark.sty.ltxml new file mode 100644 index 000000000..3ab295a54 --- /dev/null +++ b/lib/LaTeXML/Package/a11ymark.sty.ltxml @@ -0,0 +1,255 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | a11ymark.sty -- demo semantic bindings for accessibility | # +# | Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +RequirePackage('latexml'); + +DefConstructorI(T_CS('\@request@math@a11y'), undef, ""); +AtBeginDocument(T_CS('\@request@math@a11y')); + +# NOTE: demonstration-oriented binding, all names and definitions subject to change without notice. + +# Embellishment is hard to write, hard to speak, but describes exactly several cases +# I will abbreviate it "emb", for now, and use it as a prefix + +# \emb@atom{meaning}{presentation} +DefMacro('\emb@atom{}{}', '\DUAL[hide_content_reversion=true]{\@CSYMBOL{#1}}{\@WRAP{#2}}'); +DefMacro('\emb@build@apply{}{}', '\DUAL[hide_content_reversion=true]{\@APPLY{#1}}{\@WRAP{#2}}'); + +sub emb_apply { + my ($gullet, $base, $meaning, $emb, $invert_to_prefix) = @_; + my ($cargs, $pargs) = dualize_arglist('#1', $base); + my $ref_base = $$cargs[0]; + my $arg_base = $$pargs[0]; + my $presentation = $invert_to_prefix ? Tokens($emb, $arg_base) : Tokens($arg_base, $emb); + return Invocation(T_CS('\emb@build@apply'), + Tokens(Invocation(T_CS('\@CSYMBOL'), $meaning), $ref_base), + $presentation)->unlist; } + +sub emb_apply_two { # one-or-two operations, can't fully reuse the simple case... + my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $invert_to_prefix) = @_; + if (!$op2_meaning && !$op2_pres) { # one operation, use the simple apply call + return emb_apply($gullet, $base, $op1_meaning, $op1_pres, $invert_to_prefix); } + # Case where we have two consecutive operations + my ($cargs, $pargs) = dualize_arglist('#1', $base); + my $ref_base = $$cargs[0]; + my $arg_base = $$pargs[0]; + + my $pres_tokens = $invert_to_prefix ? Tokens($op2_pres, $op1_pres, $arg_base) : Tokens($arg_base, $op1_pres, $op2_pres); + my $presentation = Invocation(T_CS('\@WRAP'), $pres_tokens); + my $content = Invocation(T_CS('\@APPLY'), Invocation(T_CS('\@CSYMBOL'), $op2_meaning), + Invocation(T_CS('\@APPLY'), Invocation(T_CS('\@CSYMBOL'), $op1_meaning), $ref_base)); + return Invocation(T_CS('\DUAL'), undef, $content, $presentation)->unlist; } + +# Two operators acting on base in sequence, commonly alternate scripts ^m_n. +# \emb@apply{base}{op1 meaning}{op1 pres}[op2 meaning][op2 pres] +DefMacro('\emb@apply{}{}{}[][]', \&emb_apply_two); + +# As with \emb@apply, but the presentation is right-to-left prefix "op2_pres op1_pres base" +# \emb@preapply{base}{op1 meaning}{op1 pres}[op2 meaning][op2 pres] +DefMacro('\emb@preapply{}{}{}[][]', sub { emb_apply_two(@_, 1); }); + +# ADHOC for the very awkward example we have so far. +# and the order of presentation args is inverted, while the semantic one is kept. +# Example \PrePostArgCrosswise{x}{median}{\overline}{index}{_}{i} +DefMacro('\PrePostArgCrosswise{}{}{}{}{}{}', sub { + my ($gullet, $base, $op1_meaning, $op1_pres, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $op2_rhs_var); + my ($ref_base, $ref_rhs_var) = @$cargs; + my ($arg_base, $arg_rhs_var) = @$pargs; + + my $presentation = Tokens(Tokens($op1_pres, $arg_base), $op2_pres, $arg_rhs_var); + my $content = Tokens(Invocation(T_CS('\@CSYMBOL'), $op1_meaning), + Invocation(T_CS('\@APPLY'), Tokens( + Invocation(T_CS('\@CSYMBOL'), $op2_meaning), + $ref_base, $ref_rhs_var))); + return Invocation(T_CS('\emb@build@apply'), $content, $presentation)->unlist; }); + +# ADHOC - terrible low-level soup macro with 7 arguments, +# just an example of things being possible... +# \PostArgsCrosswise{x}{derivative-implicit-variable}{^}{\derivemark{1}}{index}{_}{i} +DefMacro('\PostArgsCrosswise{}{}{}{}{}{}{}', sub { + my ($gullet, $base, $op1_meaning, $op1_pres, $op1_rhs_var, $op2_meaning, $op2_pres, $op2_rhs_var) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2#3', $base, $op1_rhs_var, $op2_rhs_var); + my ($ref_base, $ref_rhs_var1, $ref_rhs_var2) = @$cargs; + my ($arg_base, $arg_rhs_var1, $arg_rhs_var2) = @$pargs; + + my $presentation = Tokens(Tokens($arg_base, $op1_pres, $arg_rhs_var1), $op2_pres, $arg_rhs_var2); + my $content = Tokens(Invocation(T_CS('\@CSYMBOL'), $op1_meaning), + Invocation(T_CS('\@APPLY'), Tokens( + Invocation(T_CS('\@CSYMBOL'), $op2_meaning), $ref_base, $ref_rhs_var2)), + $ref_rhs_var1); + return Invocation(T_CS('\emb@build@apply'), $content, $presentation)->unlist; }); + +## I. Calculus +DefConstructor('\diffd', 'd'); +DefMath('\deriv[]{}{}', + '\frac{\@MAYBEAPPLY{\@SUPERSCRIPT{\diffd}{#1}}{#2}}' + . '{\@SUPERSCRIPT{\@APPLY{\diffd #3}}{#1}}', + meaning => 'derivative', reorder => [2, 3, 1], + # afterDigest => sub { + # # NOTE: arg 2 will be wrapped in XMRef! + # $_[1]->setProperty(role => 'DIFFOP') if checkDiffOp($_[1]); + # return; }, + hide_content_reversion => 1); + +DefMath('\integral{}{}', '\int #1 \diffd #2', meaning => 'integral'); + +## II. Scripts +DefMacro('\@sup@apply{}{}', sub { + my ($gullet, $base, $script) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $script); + return Invocation(T_CS('\emb@build@apply'), + Tokens($$cargs[1], $$cargs[0]), + Invocation(T_CS('\@SUPERSCRIPT'), @$pargs))->unlist; }); +DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); +DefMacro('\@sub@apply{}{}', sub { + my ($gullet, $base, $script) = @_; + my ($cargs, $pargs) = dualize_arglist('#1#2', $base, $script); + return Invocation(T_CS('\emb@build@apply'), + Tokens($$cargs[1], $$cargs[0]), + Invocation(T_CS('\@SUBCRIPT'), @$pargs))->unlist; }); +DefMacro('\subop{}{}{}', '\@sub@apply{#1}{\emb@atom{#2}{#3}}'); + +DefMath('\power{}{}', "{#1^{#2}}", meaning => 'power', + reversion => '#1^{#2}', + hide_content_reversion => 1); +DefMath('\fnpower{}{}', "{#1^{#2}}", meaning => 'functional-power', + reversion => '#1^{#2}', hide_content_reversion => 1); +DefMath('\fninverse{}', "#1^{-1}", meaning => "inverse", role => 'OPFUNCTION', + reversion => '#1^{-1}', hide_content_reversion => 1); +DefMath('\laplacian', '\nabla^2', meaning => 'Laplacian', role => 'OPERATOR', + hide_content_reversion => 1); +DefMath('\index{}{}', "{#1_{#2}}", meaning => 'index', + reversion => '#1_{#2}', hide_content_reversion => 1); + +# only mark the script as a dual, so that we can remix it +DefMacro('\indexArg{}', sub { + my ($gullet, $arg) = @_; + my ($cargs, $pargs) = dualize_arglist('#1', $arg); + return Invocation(T_CS('\emb@build@apply'), + Tokens(Invocation(T_CS('\@CSYMBOL'), 'index'), $$cargs[0]), + Tokens(T_SUB, $$pargs[0]))->unlist; }); +DefMacro('\supop{}{}{}', '\@sup@apply{#1}{\emb@atom{#2}{#3}}'); + +DefMacro('\frobulator', '\emb@atom{frobulator}{x\'}'); + +DefMacro('\transpose{}', '\supop{#1}{transpose}{T}'); +DefMacro('\adjoint{}', '\supop{#1}{adjoint}{\dagger}'); +# This works well, but can't be remixed crosswise as \median{x}_i: +DefMacro('\median', '\emb@atom{median}{\overline}'); + +# What I Really Want to Say here, but can't is likely: +# DefMath('\derivemark{}', '\derivemark@pres{#1}', meaning=>'#1'); +DefMacro('\derivemark{}', sub { + my ($gullet, $token) = @_; + # Dualizing the arglist only works if we are going to keep the same token at the end + # in the case of 2 --> '' , this fails. So, obtain the presentation right away to figure this out + + # we need to digest due to \@XMArg being a constructor + my $mark = ToString(Digest($token)); + my ($content, $presentation); + if ($mark =~ /^\d$/) { # single digit, add primes + $content = $token; + $presentation = Tokens(map { T_CS('\prime') } 1 .. int($mark)) } + else { + # assume an id, wrap in parens + my ($cargs, $pargs) = dualize_arglist('#1', $token); + $content = $$cargs[0]; + $presentation = Tokens(T_OTHER('('), $$pargs[0], T_OTHER(')')); } + + return Invocation(T_CS('\DUAL'), + undef, # debugging that I missed this 'undef' argument was not fun. + $content, + Invocation(T_CS('\@WRAP'), $presentation))->unlist; +}); + +# curiously we need an indirection level, so that we point to the dual instead of +# the content node of the dual. The a11y attribute generation does not support the following markup +# at the moment: +# +# +# +# +# +# +# +# +# +# f +# +# n +# ... +# +# We can only deal with 'p1.m1.2' pointing to the inner XMDual, rather than directly to its content "n" +DefMacro('\fnderive{}{}', '\fnderive@build{#1}{\derivemark{#2}}'); +DefMath('\fnderive@build{}{}', '#1^#2', + meaning => 'derivative-implicit-variable', + hide_content_reversion => 1); + +## Circumfix, applicative: +DefMath('\norm{}', '|\mathbf{#1}|', meaning => 'norm', role => 'ID', + reversion => '|\mathbf{#1}|', hide_content_reversion => 1); +DefMath('\determinant{}', '|\mathbf{#1}|', meaning => 'determinant', role => 'ID', + reversion => '|\mathbf{#1}|', hide_content_reversion => 1); +################################################################################################################ + +# Declare some default common in K12 math when using this package: +# Also, improve ergonomics of \lxDecalre to my (Deyan's) liking +# TODO: Can we reuse this keyval from latexml.sty? How? +DefKeyVal('Declare', 'role', '', ''); +DefKeyVal('Declare', 'meaning', '', ''); +DefKeyVal('Declare', 'action', '', ''); +DefKeyVal('Declare', 'replace', '', ''); +our %PRAGMA_ROLES = map { $_ => 1 } qw(ID FUNCTION); +DefMacro('\pragma OptionalMatch:* {}{}', sub { # Limitation: never use commas in the symbol/notation contents + my ($gullet, $star, $properties, $notations) = @_; + my @declarations = (); + my $notations_expanded = ToString($notations); + $notations_expanded =~ s/\?/\\WildCard[]/g; + my @notations = $star ? $notations_expanded : split(/\s*,\s*/, $notations_expanded); + my @properties = split(/\s*,\s*/, ToString($properties)); + for my $notation (@notations) { + my $kvprops = LaTeXML::Core::KeyVals->new('KV', 'Declare', assign => T_OTHER('='), punct => T_OTHER(',')); + for my $p (@properties) { # extend with more of the lxDeclare capabilities? scopes? + if ($PRAGMA_ROLES{$p}) { + $kvprops->setValue('role', $p); } + elsif ($p =~ /^(pre|post)\:/) { + $kvprops->setValue('action', $p); } + else { + $kvprops->setValue('meaning', $p); } } + push @declarations, + Invocation(T_CS('\lxDeclare'), undef, $kvprops, + Tokens(T_MATH, Tokenize($notation), T_MATH)); } + return @declarations; }); + +# Example pragmas, as incldued with the tiny accessibility showcase: +# +# disabled by default here, since they may assume too much +# +# PushValue('@at@begin@document', Tokenize(<<'EOL')); +# \pragma{FUNCTION}{f,g,h} +# \pragma{ID}{a,b,c,d,n,m,x,y,z} +# \pragma{index}{?_?} +# \pragma{power}{?^?} +# \pragma{Pochhamer-symbol,ID}{\left(?\right)_?} +# \pragma{Legendre-symbol,ID}{\left(?|?\right)} +# \pragma{BesselJ,FUNCTION}{J_?} +# \pragma*{inner-product,ID}{\left<\mathbf{?},\mathbf{?}\right>} +# \pragma*{pre:\@APPLYFUNCTION}{\left(?,?;?|?\right)} +# EOL +# +1; diff --git a/lib/LaTeXML/Package/latexml.sty.ltxml b/lib/LaTeXML/Package/latexml.sty.ltxml index 4742178c5..bab167490 100644 --- a/lib/LaTeXML/Package/latexml.sty.ltxml +++ b/lib/LaTeXML/Package/latexml.sty.ltxml @@ -246,7 +246,7 @@ DefPrimitive('\lxDefMath{}[Number][]{} OptionalKeyVals:XMath', sub { $params && map { $_ && ToString($_) } map { $params->getValue($_) } qw(name meaning cd role alias scope); my $needsid = $params && ($params->getValue('tag') || $params->getValue('description')); - my $id = ($needsid ? next_declaration_id() : undef); + my $id = ($needsid ? next_declaration_id() : undef); DefMathI($cs, convertLaTeXArgs($nargs, $opt), $presentation, name => $name, meaning => $meaning, omcd => $cd, role => $role, alias => $alias, scope => $scope, decl_id => $id, @@ -294,7 +294,7 @@ sub normalizeDeclareKeys { if (my $stuff = $description || $tag) { ($term, $desc) = splitDeclareTag($stuff); } $short = ($description ? $tag || $desc : undef); - $desc = $desc || $description || $tag; + $desc = $desc || $description || $tag; $whatsit->setProperties(term => $term, short => $short, description => $desc); return; } @@ -340,9 +340,10 @@ sub splitDeclareTag { DefKeyVal('Declare', 'nowrap', '{}', 1); DefKeyVal('Declare', 'trace', '{}', 1); DefKeyVal('Declare', 'replace', 'UndigestedKey'); +DefKeyVal('Declare', 'action', 'UndigestedKey'); our $declare_keys = { scope => 1, role => 1, tag => 1, description => 1, name => 1, meaning => 1, - trace => 1, nowrap => 1, replace => 1, label => 1 }; + trace => 1, nowrap => 1, replace => 1, action => 1, label => 1 }; # Most is same as above; merge into one!!!!! DefConstructor('\lxDeclare OptionalMatch:* OptionalKeyVals:Declare {}', sub { my ($document, $flag, $kv, $pattern, %props) = @_; @@ -387,7 +388,8 @@ DefConstructor('\lxDeclare OptionalMatch:* OptionalKeyVals:Declare {}', sub { nowrap => defined $kv->getValue('nowrap'), id => $id, match => $pattern, - replace => $kv->getValue('replace')); + replace => $kv->getValue('replace'), + action => $kv->getValue('action')); normalizeDeclareKeys($kv, $whatsit); if (my $label = ToString($kv->getValue('label'))) { @@ -436,8 +438,8 @@ sub getDeclarationScope { sub createDeclarationRewrite { my ($document, $scope, $whatsit) = @_; my %props = $whatsit->getProperties; - my ($id, $match, $nowrap, $role, $name, $meaning, $ref, $trace, $replace) - = map { $props{$_} } qw(id match nowrap role name meaning ref trace replace); + my ($id, $match, $nowrap, $role, $name, $meaning, $ref, $trace, $replace, $action) + = map { $props{$_} } qw(id match nowrap role name meaning ref trace replace action); # Put this rule IN FRONT of other rules! UnshiftValue('DOCUMENT_REWRITE_RULES', LaTeXML::Core::Rewrite->new('math', @@ -446,12 +448,14 @@ sub createDeclarationRewrite { ($match ? (match => $match) : ()), ($replace ? (replace => $replace) - : attributes => { ($role ? (role => $role) : ()), - ($name ? (name => $name) : ()), - ($meaning ? (meaning => $meaning) : ()), - ($id ? (decl_id => $id) : ()), - ($nowrap ? (_nowrap => $nowrap) : ()), - }), + : ($action + ? (action => $action) + : attributes => { ($role ? (role => $role) : ()), + ($name ? (name => $name) : ()), + ($meaning ? (meaning => $meaning) : ()), + ($id ? (decl_id => $id) : ()), + ($nowrap ? (_nowrap => $nowrap) : ()), + })), )); return; } diff --git a/lib/LaTeXML/Post.pm b/lib/LaTeXML/Post.pm index 31a90131e..7cd13f955 100644 --- a/lib/LaTeXML/Post.pm +++ b/lib/LaTeXML/Post.pm @@ -546,7 +546,7 @@ sub associateNode { else { $node->setAttribute('xml:id' => $id); } push(@{ $$self{convertedIDs}{$sourceid} }, $id) unless $noxref; } } - $self->associateNodeHook($node, $sourcenode, $noxref); + $self->associateNodeHook($node, $sourcenode, $noxref, $currentnode); if ($isarray) { # Array represented map { $self->associateNode($_, $currentnode, $noxref) } @$node[2 .. $#$node]; } else { # LibXML node diff --git a/lib/LaTeXML/Post/MathML.pm b/lib/LaTeXML/Post/MathML.pm index 6d9efc179..8d4c1aaa9 100644 --- a/lib/LaTeXML/Post/MathML.pm +++ b/lib/LaTeXML/Post/MathML.pm @@ -26,6 +26,7 @@ our @EXPORT = ( &pmml_infix &pmml_script &pmml_summation), qw( &cmml &cmml_share &cmml_shared &cmml_ci &cmml_or_compose &cmml_synth_not &cmml_synth_complement), + qw(&getQName) ); require LaTeXML::Post::MathML::Presentation; require LaTeXML::Post::MathML::Content; @@ -67,6 +68,10 @@ sub preprocess { $$self{nestmath} = 0 unless $$self{nestmath}; $doc->adjust_latexml_doctype('MathML'); # Add MathML if LaTeXML dtd. $doc->addNamespace($mmlURI, 'm'); + # flip the accessibility switch on if requested, as it is currently experimental + if (my $a11y = $doc->findnode('.//processing-instruction("latexml")[contains(.,"a11y=")]')) { + if ($a11y->textContent =~ /a11y=['"]enabled['"]/) { + $$self{a11y} = 1; } } return; } # Works for pmml, cmml diff --git a/lib/LaTeXML/Post/MathML/Presentation.pm b/lib/LaTeXML/Post/MathML/Presentation.pm index 6e757a41b..f52c20331 100644 --- a/lib/LaTeXML/Post/MathML/Presentation.pm +++ b/lib/LaTeXML/Post/MathML/Presentation.pm @@ -14,6 +14,9 @@ package LaTeXML::Post::MathML::Presentation; use strict; use warnings; use base qw(LaTeXML::Post::MathML); +use LaTeXML::Post::MathML qw(getQName); +use LaTeXML::MathParser qw(p_getAttribute p_setAttribute p_removeAttribute p_element_nodes); +use LaTeXML::Common::XML; sub preprocess { my ($self, $doc, @maths) = @_; @@ -57,7 +60,7 @@ sub convertNode { # NEXT better strategy will be to scan columns of MathBranches to establish desired line length? elsif ($$self{linelength} # If line breaking && ($doc->findnodes('ancestor::ltx:MathBranch', $xmath)) # In formatted side of MathFork? - # But ONLY if last column!! (until we can adapt LineBreaker!) + # But ONLY if last column!! (until we can adapt LineBreaker!) && !$doc->findnodes('parent::ltx:Math/parent::ltx:td/following-sibling::ltx:td', $xmath)) { my ($pmmlb, $broke) = $self->convertNode_linebreak($doc, $xmath, $style); $pmml = $pmmlb; } @@ -69,23 +72,117 @@ sub rawIDSuffix { return '.pmml'; } sub associateNodeHook { - my ($self, $node, $sourcenode) = @_; +# technical note: $sourcenode and $currentnode are LibXML elements, while $node is that OR the arrayref triple form + my ($self, $node, $sourcenode, $noxref, $currentnode) = @_; # TODO: Shouldn't we have a single getQName shared for the entire latexml codebase + # (same for the p_* methods from MathParser) # in LaTeXML::Common or LaTeXML::Util ? - my $name = LaTeXML::Post::MathML::getQName($node); + my $name = getQName($node); if ($name =~ /^m:(?:mi|mo|mn)$/) { if (my $href = $sourcenode->getAttribute('href')) { - if (ref $node eq 'ARRAY') { - $$node[1]{href} = $href; } - else { - $node->setAttribute('href', $href); } } + p_setAttribute($node, 'href', $href); } if (my $title = $sourcenode->getAttribute('title')) { - if (ref $node eq 'ARRAY') { - $$node[1]{title} = $title; } - else { - $node->setAttribute('title', $title); } } } + p_setAttribute($node, 'title', $title); } } + $self->addAccessibilityAnnotations($node, $sourcenode, $currentnode) if $$self{a11y}; + return; } + +# Experiment: set accessibility attributes on the resulting presentation tree, +# if the XMath source has a claim to the semantics via a "meaning" attribute. +sub addAccessibilityAnnotations { + my ($self, $node, $sourcenode, $currentnode) = @_; + # 1. Filter and bookkeep which nodes are to be treated. + my $current_node_name = getQName($currentnode); + return if $current_node_name eq 'ltx:XMath'; + return if $currentnode->getAttribute('_a11y'); + $currentnode->setAttribute('_a11y', 'done'); + my $source_node_name = getQName($sourcenode); + my $container; +# skip non-material dual presentation, which points to content nodes but should *not* carry annotations itself + if (($container = $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $currentnode)) and + (${ $currentnode->parentNode } != $$container)) { +# also skip any embellishments in duals that are not semantic, a bit tricky since we need to check parent xmapps + my $content_node = $container->firstChild; + my %xmrefs = map { my $ref = $_->getAttribute('idref'); $ref ? ($ref => 1) : () } + $LaTeXML::Post::DOCUMENT->findnodes("descendant-or-self::ltx:XMRef[\@idref]", $content_node); + return unless %xmrefs; # certainly not usable if no refs in the dual. + my $ancestor = $currentnode; + while ($$ancestor != $$container && !$xmrefs{ $ancestor->getAttribute('xml:id') || '' }) { + $ancestor = $ancestor->parentNode; } + return if $$ancestor == $$container; } + # 1--end. We reach here only with semantic nodes in hand (or the logic has a Bug). + # 2. Bookkeep the semantic information. + my ($meaning, $arg); + if (my $src_meaning = $sourcenode->getAttribute('meaning')) { + $meaning = $src_meaning; } + elsif ($source_node_name eq 'ltx:XMApp') { +# Tricky, what is the best way to figure out if the operator is presentable vs implied? Check if it has _a11y=done? + my $op_node = $sourcenode->firstChild; + my $op = $op_node->getAttribute('_a11y') ? '#op' : p_getAttribute($op_node, 'meaning'); + if ($op) { # annotate only if we knew a 'meaning' attribute, for the special markup scenarios + $meaning = "$op(" . join(",", map { "#$_" } 1 .. scalar(element_nodes($sourcenode)) - 1) . ')'; } + else { + # otherwise, take the liberty to delete all data-arg of direct children + for my $pmml_child (@$node[2 .. scalar(@$node) - 1]) { + p_removeAttribute($pmml_child, 'data-arg'); } } } + elsif ($source_node_name eq 'ltx:XMDual') { + $meaning = dual_content_to_semantic_attr($sourcenode->firstChild); } + +# 3. Bookkeep "arg" information +# (careful, can be arbitrary deep in a dual content tree) +# also, not so easy to disentangle - a node nested deeply inside a dual may be _either_ referenced in the dual (primary) +# _or_ a classic direct child of an intermediate XMApp. So we test until we find an $arg: + $container = $container || $LaTeXML::Post::DOCUMENT->findnode('ancestor::ltx:XMDual[1]', $sourcenode); + if ($container) { + my $id = $sourcenode->getAttribute('xml:id'); + $arg = $id && dual_content_idref_to_data_attr($container->firstChild, $id); } + if (!$arg && (getQName($sourcenode->parentNode) eq 'ltx:XMApp')) { # normal apply case + # note we can only do this simple check because we filtered out all embellishments in step 1. + my $position = $LaTeXML::Post::DOCUMENT->findvalue("count(preceding-sibling::*)", $sourcenode); + $arg = $position || 'op'; } + + p_setAttribute($node, 'data-semantic', $meaning) if $meaning; + p_setAttribute($node, 'data-arg', $arg) if $arg; return; } +# Given the first (content) child of an ltx:XMDual, compute its corresponding a11y "semantic" attribute +sub dual_content_to_semantic_attr { + my ($node, $prefix) = @_; + my $name = getQName($node); + if ($name eq 'ltx:XMTok') { + return $node->getAttribute('meaning') || $node->getAttribute('name') || 'unknown'; } + elsif ($name eq 'ltx:XMRef') { # pass through case + return '#1'; } + elsif ($name eq 'ltx:XMApp') { + my @arg_nodes = element_nodes($node); + my $op_node = shift @arg_nodes; + my $op = ($op_node && $op_node->getAttribute('meaning')) || '#op'; + my @arg_strings = (); + my $index = 0; + for my $arg_node (@arg_nodes) { + $index++; + if (getQName($arg_node) eq 'ltx:XMApp') { + push @arg_strings, dual_content_to_semantic_attr($arg_node, $prefix ? ($prefix . "_$index") : $index); } + else { + push @arg_strings, '#' . ($prefix ? ($prefix . "_$index") : $index); } } # will we need level suffixes? + return $op . '(' . join(",", @arg_strings) . ')'; } + else { + print STDERR "Warning:unknown XMDual content child '$name' will default data-semantic attribute to 'unknown'\n"; + return 'unknown'; } } + +# Given the first (content) child of an ltx:XMDual, and an idref value, compute the corresponding "arg" attribute for that XMRef +sub dual_content_idref_to_data_attr { + my ($content_node, $idref) = @_; + my ($ref_node) = $LaTeXML::Post::DOCUMENT->findnodes( + "descendant-or-self::ltx:XMRef[\@idref=\"" . $idref . "\"][1]", $content_node); + return '' unless $ref_node; + my $path = ''; + my $ancestor = $ref_node; + while ($$ancestor != $$content_node) { + my $position = $LaTeXML::Post::DOCUMENT->findvalue("count(preceding-sibling::*)", $ancestor); + $path = $path ? ($position . '_' . $path) : $position; + $ancestor = $ancestor->parentNode; } + return $path ? $path : (scalar(element_nodes($content_node)) > 1 ? 'op' : '1'); } + #================================================================================ # Presentation MathML with Line breaking # Not at all sure how this will integrate with Parallel markup... @@ -116,8 +213,8 @@ sub preprocess_linebreaking { my $style = ($mode eq 'display' ? 'display' : 'text'); # If already has in a MathBranch, we can't really know if, or how wide, to line break!?!?! next if $doc->findnodes('ancestor::ltx:MathFork', $math); # SKIP if already in a branch? - # Now let's do the layout & see if it actually needs line breaks! - # next if $math isn't really so wide .. + # Now let's do the layout & see if it actually needs line breaks! + # next if $math isn't really so wide .. my $id = $math->getAttribute('xml:id'); my $xmath = $doc->findnode('ltx:XMath', $math); my ($pmml, $broke) = $self->convertNode_linebreak($doc, $xmath, $style);