Skip to content

Commit

Permalink
Merge pull request #2201 from RotherOSS/issue-#88-sax_parser
Browse files Browse the repository at this point in the history
Issue #88 sax parser
  • Loading branch information
bschmalhofer authored Mar 1, 2023
2 parents 988ca17 + 0368e15 commit feb4ec0
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 139 deletions.
184 changes: 78 additions & 106 deletions Kernel/System/XML.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ use warnings;

# core modules
use Digest::MD5;
use XML::Parser;

# CPAN modules
use XML::LibXML;
use XML::LibXML::SAX::Parser;

# OTOBO modules

Expand All @@ -49,7 +50,7 @@ does not necessarily have to be resulted from parsing XML.
=head1 TECHNICAL DETAILS
Internally this module uses C<XML::Parser> which is based on the ancient XML parser B<expat>.
Internally this module uses C<XML::LibXML> which is based on the library C<libxml2>.
=head2 LIMITATIONS
Expand Down Expand Up @@ -135,7 +136,7 @@ Don't use the constructor directly, use the ObjectManager instead:
=cut

sub new {
my ( $Type, %Param ) = @_;
my ($Type) = @_;

# allocate new hash for object
return bless {}, $Type;
Expand Down Expand Up @@ -748,8 +749,7 @@ sub XMLHash2D {
return;
}

$Self->{XMLLevel} = 0;
$Self->{XMLTagCount} = 0;
$Self->{XMLLevel} = 0; # used in _XMLHash2D()
$Self->{XMLHash} = {};
$Self->{XMLHashReturn} = 1;
undef $Self->{XMLLevelTag};
Expand Down Expand Up @@ -795,7 +795,6 @@ sub XMLStructure2XMLHash {
}

$Self->{Tll} = 0;
$Self->{XMLTagCount} = 0;
$Self->{XMLHash2} = {};
$Self->{XMLHashReturn} = 1;
undef $Self->{XMLLevelTag};
Expand Down Expand Up @@ -870,10 +869,7 @@ sub XMLParse {
return @{$Cache} if $Cache;
}

# cleanup global vars
undef $Self->{XMLARRAY};
$Self->{XMLLevel} = 0;
$Self->{XMLTagCount} = 0;
# clean up global vars
undef $Self->{XMLLevelTag};
undef $Self->{XMLLevelCount};

Expand All @@ -891,40 +887,17 @@ sub XMLParse {
}
}

# load parse package and parse
my @XMLArray;
{
my $Parser = XML::Parser->new(
Handlers => {
Start => sub { $Self->_HS(@_); },
End => sub { $Self->_ES(@_); },
Char => sub { $Self->_CS(@_); },
ExternEnt => sub { return '' }, # suppress loading of external entities
},
);

# get sourcename now to avoid a possible race condition where
# $@ could get altered after a failing eval!
my $Sourcename = $Param{Sourcename} ? "\n\n($Param{Sourcename})" : '';

if ( eval { $Parser->parse( $Param{String} ) } ) {

# remember, XML::Parser is managing e. g. &amp; by it self
$Self->{XMLQuote} = 0;
}
else {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'error',
Message => "C-Parser: $@!$Sourcename"
);
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'error',
Message => "XML::Parser had errors. Offending XML was: $Param{String}",
);
}
my $SaxHandler = Kernel::System::XML::SAXHandler->new;
my $Document = XML::LibXML->new->parse_string( $Param{String} );
my $Generator = XML::LibXML::SAX::Parser->new( Handler => $SaxHandler );
$Generator->generate($Document);
@XMLArray = $SaxHandler->{XMLARRAY}->@*;
}

# quote
for my $XMLElement ( $Self->{XMLARRAY}->@* ) {
for my $XMLElement (@XMLArray) {
$Self->_Decode($XMLElement);
}

Expand All @@ -936,13 +909,13 @@ sub XMLParse {
$CacheObject->Set(
Type => 'XMLParse',
Key => $Checksum,
Value => $Self->{XMLARRAY},
Value => \@XMLArray,
TTL => 30 * 24 * 60 * 60,
CacheInMemory => 0,
);
}

return $Self->{XMLARRAY}->@*;
return @XMLArray;
}

=begin Internal:
Expand Down Expand Up @@ -1075,7 +1048,6 @@ sub _XMLHash2D {

if ( ref $Param{Item} eq 'HASH' ) {
$Self->{XMLLevel}++;
$Self->{XMLTagCount}++;
$Self->{XMLLevelTag}->{ $Self->{XMLLevel} } = $Param{Key};
if ( $Self->{Tll} && $Self->{Tll} > $Self->{XMLLevel} ) {
for ( ( $Self->{XMLLevel} + 1 ) .. 30 ) {
Expand Down Expand Up @@ -1133,7 +1105,6 @@ sub _XMLStructure2XMLHash {
if ( $Param{Item}->{TagType} eq 'End' ) {
return '';
}
$Self->{XMLTagCount}++;
$Self->{XMLLevelTag}->{ $Param{Item}->{TagLevel} } = $Param{Key};
if ( $Self->{Tll} && $Self->{Tll} > $Param{Item}->{TagLevel} ) {
for ( ( $Param{Item}->{TagLevel} + 1 ) .. 30 ) {
Expand Down Expand Up @@ -1565,88 +1536,89 @@ sub _Decode {
return 1;
}

sub _HS {
my ( $Self, $Expat, $Element, %Attr ) = @_;
=end Internal:
if ( $Self->{LastTag} ) {
push @{ $Self->{XMLARRAY} }, { %{ $Self->{LastTag} }, Content => $Self->{C} };
}
=cut

undef $Self->{LastTag};
undef $Self->{C};
package Kernel::System::XML::SAXHandler { ## no critic qw(Modules::ProhibitMultiplePackages)

$Self->{XMLLevel}++;
$Self->{XMLTagCount}++;
$Self->{XMLLevelTag}->{ $Self->{XMLLevel} } = $Element;
sub new {
my ($Type) = @_;

if ( $Self->{Tll} && $Self->{Tll} > $Self->{XMLLevel} ) {
for ( ( $Self->{XMLLevel} + 1 ) .. 30 ) {
undef $Self->{XMLLevelCount}->{$_};
}
# allocate new hash for object
return bless {
XMLARRAY => [],
XMLLevel => 0,
XMLTagCount => 0,
LastTag => undef,
C => undef,
XMLLevelTag => {},
}, $Type;
}

$Self->{XMLLevelCount}->{ $Self->{XMLLevel} }->{$Element}++;

# remember old level
$Self->{Tll} = $Self->{XMLLevel};

my $Key = '';
for ( 1 .. ( $Self->{XMLLevel} ) ) {
$Key .= "{'$Self->{XMLLevelTag}->{$_}'}";
$Key .= "[" . $Self->{XMLLevelCount}->{$_}->{ $Self->{XMLLevelTag}->{$_} } . "]";
}
sub start_element { ## no critic qw(OTOBO::RequireCamelCase)
my ( $Self, $Element ) = @_;

$Self->{LastTag} = {
%Attr,
TagType => 'Start',
Tag => $Element,
TagLevel => $Self->{XMLLevel},
TagCount => $Self->{XMLTagCount},
TagLastLevel => $Self->{XMLLevelTag}->{ ( $Self->{XMLLevel} - 1 ) },
};
my %Attr = map { $_->{Name} => $_->{Value} } values $Element->{Attributes}->%*;

return 1;
}
if ( $Self->{LastTag} ) {
push $Self->{XMLARRAY}->@*, { $Self->{LastTag}->%*, Content => $Self->{C} };
}

sub _CS {
my ( $Self, $Expat, $Element, $I, $II ) = @_;
undef $Self->{LastTag};
undef $Self->{C};

if ( $Self->{LastTag} ) {
$Self->{C} .= $Element;
$Self->{XMLLevel}++;
$Self->{XMLTagCount}++;
$Self->{XMLLevelTag}->{ $Self->{XMLLevel} } = $Element->{Name};

$Self->{LastTag} = {
%Attr,
TagType => 'Start',
Tag => $Element->{Name},
TagLevel => $Self->{XMLLevel},
TagCount => $Self->{XMLTagCount},
TagLastLevel => $Self->{XMLLevelTag}->{ $Self->{XMLLevel} - 1 },
};

return 1;
}

return 1;
}

sub _ES {
my ( $Self, $Expat, $Element ) = @_;
sub characters { ## no critic qw(OTOBO::RequireCamelCase)
my ( $Self, $Element ) = @_;

$Self->{XMLTagCount}++;
if ( $Self->{LastTag} ) {
$Self->{C} //= '';
$Self->{C} .= $Element->{Data};
}

if ( $Self->{LastTag} ) {
push @{ $Self->{XMLARRAY} }, { %{ $Self->{LastTag} }, Content => $Self->{C} };
return 1;
}

undef $Self->{LastTag};
undef $Self->{C};
sub end_element { ## no critic qw(OTOBO::RequireCamelCase)
my ( $Self, $Element ) = @_;

push(
@{ $Self->{XMLARRAY} },
{
TagType => 'End',
TagLevel => $Self->{XMLLevel},
TagCount => $Self->{XMLTagCount},
Tag => $Element
},
);
$Self->{XMLTagCount}++;

$Self->{XMLLevel} = $Self->{XMLLevel} - 1;
if ( $Self->{LastTag} ) {
push $Self->{XMLARRAY}->@*, { $Self->{LastTag}->%*, Content => $Self->{C} };
}

return 1;
}
undef $Self->{LastTag};
undef $Self->{C};

=end Internal:
push $Self->{XMLARRAY}->@*,
{
TagType => 'End',
TagLevel => $Self->{XMLLevel},
TagCount => $Self->{XMLTagCount},
Tag => $Element->{Name},
};

=cut
$Self->{XMLLevel}--;

return 1;
}
}

1;
11 changes: 0 additions & 11 deletions bin/otobo.CheckModules.pl
Original file line number Diff line number Diff line change
Expand Up @@ -959,17 +959,6 @@ =head1 DESCRIPTION
ports => 'textproc/p5-XML-LibXSLT',
},
},
{
Module => 'XML::Parser',
Features => ['div:xmlparser'],
Comment => 'Recommended for XML processing.',
InstTypes => {
aptget => 'libxml-parser-perl',
emerge => 'dev-perl/XML-Parser',
zypper => 'perl-XML-Parser',
ports => 'textproc/p5-XML-Parser',
},
},
{
Module => 'Const::Fast',
Required => 1,
Expand Down
9 changes: 0 additions & 9 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,6 @@ feature 'div:ssl', 'Support for feature div:ssl' => sub {

};

feature 'div:xmlparser', 'Support for feature div:xmlparser' => sub {
# Recommended for XML processing.
requires 'XML::Parser';

};

feature 'div:xslt', 'Support for feature div:xslt' => sub {
# Required for Generic Interface XSLT mapping module.
requires 'XML::LibXSLT';
Expand Down Expand Up @@ -326,9 +320,6 @@ feature 'optional', 'Support for feature optional' => sub {
# Required for Generic Interface XSLT mapping module.
requires 'XML::LibXSLT';

# Recommended for XML processing.
requires 'XML::Parser';

# nicer formatting when dumping data structures
requires 'Data::Dump', ">= 1.25";

Expand Down
3 changes: 0 additions & 3 deletions cpanfile.docker
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,6 @@ requires 'Const::Fast';
# Feature 'div:ssl' is not needed for Docker


# Feature 'div:xmlparser' is not needed for Docker


# feature 'div:xslt', 'Support for feature div:xslt' => sub {
# Required for Generic Interface XSLT mapping module.
requires 'XML::LibXSLT';
Expand Down
22 changes: 12 additions & 10 deletions cpanfile.docker.kerberos
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Do not change this file manually except if you want to invalidate the cache just in the GitHub CI workflow.
# Instead adapt bin/otobo.CheckModules.pl and call
# ./bin/otobo.CheckModules.pl --docker-cpanfile > cpanfile.docker
# This file is derived from cpanfile.docker.

# Required for REST and SOAP Kerberos Auth
requires 'Authen::Krb5::Simple';
Expand All @@ -17,9 +15,6 @@ requires 'DateTime', ">= 1.08";

requires 'Convert::BinHex';

# Adapt CGI.pm to the PSGI protocol
requires 'CGI::PSGI';

requires 'DBI';

# Sane persistent database connection
Expand Down Expand Up @@ -102,7 +97,8 @@ requires 'Const::Fast';

# feature 'db:mysql', 'Support for database MySQL' => sub {
# Required to connect to a MySQL database.
requires 'DBD::mysql';
# Version 4.042 not supported: This version had encoding related issues. Version 4.043 was a rollback to 4.0.41
requires 'DBD::mysql', ">= 4.00, != 4.042";

# };

Expand All @@ -128,6 +124,15 @@ requires 'Const::Fast';

# };

# feature 'devel:debugging', 'Features which can be useful in development environments' => sub {
# nicer formatting when dumping data structures
requires 'Data::Dump', ">= 1.25";

# convenient and informative dumping data structures
requires 'Data::Dx', ">= 0.000010";

# };

# feature 'devel:encoding', 'Modules for debugging encoding issues' => sub {
# for deeply inspecting scalars, especially strings
requires 'Data::Peek';
Expand Down Expand Up @@ -176,9 +181,6 @@ requires 'Const::Fast';
# Feature 'div:ssl' is not needed for Docker


# Feature 'div:xmlparser' is not needed for Docker


# feature 'div:xslt', 'Support for feature div:xslt' => sub {
# Required for Generic Interface XSLT mapping module.
requires 'XML::LibXSLT';
Expand Down

0 comments on commit feb4ec0

Please sign in to comment.