Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors
use Mojo::DOM; # Parse my $dom = Mojo::DOM->new('<div><p id="a">Test</p><p id="b">123</p></div>'); # Find say $dom->at('#b')->text; say $dom->find('p')->pluck('text'); say $dom->find('[id]')->pluck(attr => 'id'); # Iterate $dom->find('p[id]')->reverse->each(sub { say $_->{id} }); # Loop for my $e ($dom->find('p[id]')->each) { say $e->{id}, ':', $e->text; } # Modify $dom->find('div p')->last->append('<p id="c">456</p>'); $dom->find(':not(p)')->pluck('strip'); # Render say "$dom";
Mojo::DOM is a minimalistic and relaxed HTML/XML DOM parser with CSS selector support. It will even try to interpret broken XML, so you should not use it for validation.
Mojo::DOM defaults to HTML semantics, that means all tags and attributes are lowercased and selectors need to be lowercase as well.
my $dom = Mojo::DOM->new('<P ID="greeting">Hi!</P>'); say $dom->at('p')->text;
If XML processing instructions are found, the parser will automatically switch into XML mode and everything becomes case sensitive.
my $dom = Mojo::DOM->new('<?xml version="1.0"?><P ID="greeting">Hi!</P>'); say $dom->at('P')->text;
XML detection can also be disabled with the "xml" method.
# Force XML semantics $dom->xml(1); # Force HTML semantics $dom->xml(0);
Mojo::DOM implements the following methods.
my $collection = $dom->all_contents;
Return a Mojo::Collection object containing all nodes in DOM structure as Mojo::DOM objects.
# "<p><b>123</b></p>" $dom->parse('<p><!-- Test --><b>123<!-- 456 --></b></p>')->all_contents ->grep(sub { $_->node eq 'comment' })->pluck('remove')->first;
my $trimmed = $dom->all_text; my $untrimmed = $dom->all_text(0);
Extract all text content from DOM structure, smart whitespace trimming is enabled by default.
# "foo bar baz" $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text; # "foo\nbarbaz\n" $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->all_text(0);
my $collection = $dom->ancestors; my $collection = $dom->ancestors('div > p');
Find all ancestors of this node matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
# List types of ancestor elements say $dom->ancestors->pluck('type');
$dom = $dom->append('<p>I ♥ Mojolicious!</p>');
Append HTML/XML fragment to this node.
# "<div><h1>Test</h1><h2>123</h2></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1') ->append('<h2>123</h2>')->root; # "<p>Test 123</p>" $dom->parse('<p>Test</p>')->at('p')->contents->first->append(' 123')->root;
$dom = $dom->append_content('<p>I ♥ Mojolicious!</p>');
Append HTML/XML fragment (for root and tag nodes) or raw content to this node's content.
root
tag
# "<div><h1>Test123</h1></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1') ->append_content('123')->root; # "<!-- Test 123 --><br>" $dom->parse('<!-- Test --><br>') ->contents->first->append_content('123 ')->root; # "<p>Test<i>123</i></p>" $dom->parse('<p>Test</p>')->at('p')->append_content('<i>123</i>')->root;
my $result = $dom->at('html title');
Find first element in DOM structure matching the CSS selector and return it as a Mojo::DOM object or return undef if none could be found. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
undef
# Find first element with "svg" namespace definition my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'};
my $hash = $dom->attr; my $foo = $dom->attr('foo'); $dom = $dom->attr({foo => 'bar'}); $dom = $dom->attr(foo => 'bar');
This element's attributes.
# List id attributes say $dom->find('*')->pluck(attr => 'id')->compact;
my $collection = $dom->children; my $collection = $dom->children('div > p');
Find all children of this element matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
# Show type of random child element say $dom->children->shuffle->first->type;
my $str = $dom->content; $dom = $dom->content('<p>I ♥ Mojolicious!</p>');
Return this node's content or replace it with HTML/XML fragment (for root and tag nodes) or raw content.
# "<b>Test</b>" $dom->parse('<div><b>Test</b></div>')->at('div')->content; # "<div><h1>123</h1></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('123')->root; # "<p><i>123</i></p>" $dom->parse('<p>Test</p>')->at('p')->content('<i>123</i>')->root; # "<div><h1></h1></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1')->content('')->root; # " Test " $dom->parse('<!-- Test --><br>')->contents->first->content; # "<div><!-- 123 -->456</div>" $dom->parse('<div><!-- Test -->456</div>')->at('div') ->contents->first->content(' 123 ')->root;
my $collection = $dom->contents;
Return a Mojo::Collection object containing the child nodes of this element as Mojo::DOM objects.
# "<p><b>123</b></p>" $dom->parse('<p>Test<b>123</b></p>')->at('p')->contents->first->remove; # "<!-- Test -->" $dom->parse('<!-- Test --><b>123</b>')->contents->first;
my $collection = $dom->find('html title');
Find all elements in DOM structure matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
# Find a specific element and extract information my $id = $dom->find('div')->[23]{id}; # Extract information from multiple elements my @headers = $dom->find('h1, h2, h3')->pluck('text')->each; # Count all the different tags my $hash = $dom->find('*')->reduce(sub { $a->{$b->type}++; $a }, {}); # Find elements with a class that contains dots my @divs = $dom->find('div.foo\.bar')->each;
my $result = $dom->match('html title');
Match the CSS selector against this element and return it as a Mojo::DOM object or return undef if it didn't match. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
my $namespace = $dom->namespace;
Find this element's namespace.
# Find namespace for an element with namespace prefix my $namespace = $dom->at('svg > svg\:circle')->namespace; # Find namespace for an element that may or may not have a namespace prefix my $namespace = $dom->at('svg > circle')->namespace;
my $dom = Mojo::DOM->new; my $dom = Mojo::DOM->new('<foo bar="baz">I ♥ Mojolicious!</foo>');
Construct a new scalar-based Mojo::DOM object and "parse" HTML/XML fragment if necessary.
my $sibling = $dom->next;
Return Mojo::DOM object for next sibling element or undef if there are no more siblings.
# "<h2>123</h2>" $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h1')->next;
my $sibling = $dom->next_sibling;
Return Mojo::DOM object for next sibling node or undef if there are no more siblings.
# "456" $dom->parse('<p><b>123</b><!-- Test -->456</p>')->at('b') ->next_sibling->next_sibling;
my $type = $dom->node;
This node's type, usually cdata, comment, doctype, pi, raw, root, tag or text.
cdata
comment
doctype
pi
raw
text
my $parent = $dom->parent;
Return Mojo::DOM object for parent of this node or undef if this node has no parent.
$dom = $dom->parse('<foo bar="baz">I ♥ Mojolicious!</foo>');
Parse HTML/XML fragment with Mojo::DOM::HTML.
# Parse XML my $dom = Mojo::DOM->new->xml(1)->parse($xml);
$dom = $dom->prepend('<p>I ♥ Mojolicious!</p>');
Prepend HTML/XML fragment to this node.
# "<div><h1>Test</h1><h2>123</h2></div>" $dom->parse('<div><h2>123</h2></div>')->at('h2') ->prepend('<h1>Test</h1>')->root; # "<p>Test 123</p>" $dom->parse('<p>123</p>')->at('p')->contents->first->prepend('Test ')->root;
$dom = $dom->prepend_content('<p>I ♥ Mojolicious!</p>');
Prepend HTML/XML fragment (for root and tag nodes) or raw content to this node's content.
# "<div><h2>Test123</h2></div>" $dom->parse('<div><h2>123</h2></div>')->at('h2') ->prepend_content('Test')->root; # "<!-- Test 123 --><br>" $dom->parse('<!-- 123 --><br>') ->contents->first->prepend_content(' Test')->root; # "<p><i>123</i>Test</p>" $dom->parse('<p>Test</p>')->at('p')->prepend_content('<i>123</i>')->root;
my $sibling = $dom->previous;
Return Mojo::DOM object for previous sibling element or undef if there are no more siblings.
# "<h1>Test</h1>" $dom->parse('<div><h1>Test</h1><h2>123</h2></div>')->at('h2')->previous;
my $sibling = $dom->previous_sibling;
Return Mojo::DOM object for previous sibling node or undef if there are no more siblings.
# "123" $dom->parse('<p>123<!-- Test --><b>456</b></p>')->at('b') ->previous_sibling->previous_sibling;
my $parent = $dom->remove;
Remove this node and return "parent".
# "<div></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1')->remove; # "<p><b>456</b></p>" $dom->parse('<p>123<b>456</b></p>')->at('p')->contents->first->remove->root;
my $parent = $dom->replace('<div>I ♥ Mojolicious!</div>');
Replace this node with HTML/XML fragment and return "parent".
# "<div><h2>123</h2></div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1')->replace('<h2>123</h2>'); # "<p><b>123</b></p>" $dom->parse('<p>Test</p>')->at('p') ->contents->[0]->replace('<b>123</b>')->root;
my $root = $dom->root;
Return Mojo::DOM object for root node.
my $collection = $dom->siblings; my $collection = $dom->siblings('div > p');
Find all sibling elements of this node matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
# List types of sibling elements say $dom->siblings->pluck('type');
my $parent = $dom->strip;
Remove this element while preserving its content and return "parent".
# "<div>Test</div>" $dom->parse('<div><h1>Test</h1></div>')->at('h1')->strip;
$dom = $dom->tap(sub {...});
Alias for "tap" in Mojo::Base.
my $trimmed = $dom->text; my $untrimmed = $dom->text(0);
Extract text content from this element only (not including child elements), smart whitespace trimming is enabled by default.
# "foo baz" $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text; # "foo\nbaz\n" $dom->parse("<div>foo\n<p>bar</p>baz\n</div>")->at('div')->text(0);
my $str = $dom->to_string;
Render this node and its content to HTML/XML.
# "<b>Test</b>" $dom->parse('<div><b>Test</b></div>')->at('div b')->to_string;
my $tree = $dom->tree; $dom = $dom->tree(['root']);
Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.
my $type = $dom->type; $dom = $dom->type('div');
This element's type.
# List types of child elements say $dom->children->pluck('type');
my $collection = $dom->val;
Extract values from button, input, option, select or textarea element and return a Mojo::Collection object containing these values. In the case of select, find all option elements it contains that have a selected attribute and extract their values.
button
input
option
select
textarea
selected
# "b" $dom->parse('<input name="a" value="b">')->at('input')->val; # "c" $dom->parse('<option value="c">Test</option>')->at('option')->val; # "d" $dom->parse('<option>d</option>')->at('option')->val;
$dom = $dom->wrap('<div></div>');
Wrap HTML/XML fragment around this node, placing it as the last child of the first innermost element.
# "<p>123<b>Test</b></p>" $dom->parse('<b>Test</b>')->at('b')->wrap('<p>123</p>')->root; # "<div><p><b>Test</b></p>123</div>" $dom->parse('<b>Test</b>')->at('b')->wrap('<div><p></p>123</div>')->root; # "<p><b>Test</b></p><p>123</p>" $dom->parse('<b>Test</b>')->at('b')->wrap('<p></p><p>123</p>')->root; # "<p><b>Test</b></p>" $dom->parse('<p>Test</p>')->at('p')->contents->first->wrap('<b>')->root;
$dom = $dom->wrap_content('<div></div>');
Wrap HTML/XML fragment around this node's content, placing it as the last children of the first innermost element.
# "<p><b>123Test</b></p>" $dom->parse('<p>Test<p>')->at('p')->wrap_content('<b>123</b>')->root; # "<p><b>Test</b></p><p>123</p>" $dom->parse('<b>Test</b>')->wrap_content('<p></p><p>123</p>');
my $bool = $dom->xml; $dom = $dom->xml($bool);
Disable HTML semantics in parser and activate case sensitivity, defaults to auto detection based on processing instructions.
Mojo::DOM overloads the following operators.
my @nodes = @$dom;
Alias for "contents".
# "<!-- Test -->" $dom->parse('<!-- Test --><b>123</b>')->[0];
my $bool = !!$dom;
Always true.
my %attrs = %$dom;
Alias for "attr".
# "test" $dom->parse('<div id="test">Test</div>')->at('div')->{id};
my $str = "$dom";
Alias for "to_string".
Mojolicious, Mojolicious::Guides, http://mojolicio.us.
To install Mojolicious, copy and paste the appropriate command in to your terminal.
cpanm
cpanm Mojolicious
CPAN shell
perl -MCPAN -e shell install Mojolicious
For more information on module installation, please visit the detailed CPAN module installation guide.