# t/clean-event.t # # Test LJ::CleanHTML::clean_event. # # Authors: # Afuna # Mark Smith # Allen Petersen # # Copyright (c) 2013 by Dreamwidth Studios, LLC. # # This program is free software; you may redistribute it and/or modify it under # the same terms as Perl itself. For a copy of the license, please reference # 'perldoc perlartistic' or 'perldoc perlgpl'. # use strict; use warnings; use Test::More tests => 42; BEGIN { require "$ENV{LJHOME}/t/lib/ljtestlib.pl"; } use LJ::CleanHTML; use HTMLCleaner; my $orig_post; my $clean_post; my $clean = sub { my $opts = shift; LJ::CleanHTML::clean_event( \$orig_post, $opts ); }; note("malformed html"); $orig_post = qq{
abc
}; $clean_post = qq{
abc
}; $clean->(); is( $orig_post, $clean_post, "Inner tag isn't closed" ); $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Tag outside a table isn't closed" ); # we don't want to mess with tags in tables # they should be restricted in scope to within the tags they're in right now $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Non-table-related tag inside a table is open" ); $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Table-related-tag inside a table is open" ); $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Slash-closed tag" ); $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "No closing tags" ); # in this case, we consider the within the div as unclosed # and the closing as extra/unrelated. # Therefore, we close the opening tag (which needs to be closed) # and ignore the remaining closing
tag (which has no opening tag) $orig_post = qq{
}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Wrong closing tag order" ); # if we open a tag, then a table, then let auto-close happen, verify that # we close tags in the correct order $orig_post = qq{}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Wrong closing tag order in table" ); # similarly, if we manually close a tag in the table, don't consider it # closed. $orig_post = qq{}; $clean_post = qq{
}; $clean->(); is( $orig_post, $clean_post, "Table left open to swallow closing tags" ); note("unwanted tags and attributes"); # remove header tags $orig_post = qq{

test

testing this

testing again

}; $clean_post = qq{testtesting thistesting again}; $clean->( { remove_sizes => 1 } ); ok( $orig_post eq $clean_post, "Header tags removed" ); # remove colors $orig_post = qq{test}; $clean_post = qq{test}; $clean->( { remove_colors => 1 } ); ok( $orig_post eq $clean_post, "Colors removed" ); # remove colors and sizes $orig_post = qq{
test
}; $clean_post = qq{test}; $clean->( { remove_colors => 1, remove_sizes => 1 } ); ok( $orig_post eq $clean_post, "Colors and sizes removed" ); # remove fonts and sizes $orig_post = qq{test}; $clean_post = qq{test}; $clean->( { remove_fonts => 1, remove_sizes => 1 } ); ok( $orig_post eq $clean_post, "Fonts and sizes removed" ); # remove CSS colors $orig_post = qq{test}; $clean_post = qq{test<\\/span>}; $clean->( { remove_colors => 1 } ); ok( $orig_post =~ /^$clean_post$/, "CSS colors removed" ); # remove CSS colors $orig_post = qq{test}; $clean_post = qq{test<\\/span>}; $clean->( { remove_colors => 1 } ); ok( $orig_post =~ /^$clean_post$/, "CSS colors removed" ); # remove CSS colors and sizes $orig_post = qq{
test
}; $clean_post = qq{
test<\\/div>}; $clean->( { remove_colors => 1, remove_sizes => 1 } ); ok( $orig_post =~ /^$clean_post$/, "CSS colors and sizes removed" ); # remove CSS fonts and sizes $orig_post = qq{
test
}; $clean_post = qq{
test<\\/div>}; $clean->( { remove_fonts => 1, remove_sizes => 1 } ); ok( $orig_post =~ /^$clean_post$/, "CSS fonts and sizes removed" ); note("cut tags"); # get cut text my $cut_text; my $entry_text = qq{1112222}; $orig_post = $entry_text; $cut_text = "111"; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "Text under first cut, plain" ); $orig_post = $entry_text; $cut_text = "2222"; $clean->( { cut_retrieve => 2 } ); is( $orig_post, $cut_text, "Text under second cut, plain" ); $entry_text = qq{ 111 2222}; $orig_post = $entry_text; $cut_text = qq{111}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "Text under first cut, with HTML tags" ); $orig_post = $entry_text; $cut_text = qq{2222}; $clean->( { cut_retrieve => 2 } ); is( $orig_post, $cut_text, "Text under second cut, with HTML tags" ); $orig_post = qq{}; $clean->(); is( $orig_post, $clean_post, "Open textarea tag" ); $orig_post = qq{}; $clean_post = qq{}; $clean->(); is( $orig_post, $clean_post, "Double textarea tag" ); # nested cut tags $entry_text = qq{out in}; $orig_post = $entry_text; $cut_text = qq{out in}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "Text under outer cut, plain" ); $orig_post = $entry_text; $cut_text = qq{in}; $clean->( { cut_retrieve => 2 } ); is( $orig_post, $cut_text, "Text under inner cut, plain" ); $entry_text = qq{out in}; $orig_post = $entry_text; $cut_text = qq{out in}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "Text under outer cut, HTML" ); $orig_post = $entry_text; $cut_text = qq{in}; $clean->( { cut_retrieve => 2 } ); is( $orig_post, $cut_text, "Text under inner cut, HTML" ); $entry_text = qq{
Text here
}; $orig_post = $entry_text; $cut_text = qq{Text here}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "text in
style cut is retrieved" ); $entry_text = qq{
Text here
Other text here}; $orig_post = $entry_text; $cut_text = qq{Text here}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "text in
style cut is retrieved" ); $orig_post = $entry_text; $cut_text = qq{Other text here}; $clean->( { cut_retrieve => 2 } ); is( $orig_post, $cut_text, "text in style cut after
style cut is retrieved" ); note("various allowed/disallowed tags"); { $orig_post = qq{abc}; $clean_post = qq{abc}; $clean->(); is( $orig_post, $clean_post, "em tag allowed" ); $orig_post = qq{abc}; $clean_post = qq{abc}; $clean->(); is( $orig_post, $clean_post, "marquee tag allowed" ); $orig_post = qq{abc}; $clean_post = qq{abc}; $clean->(); is( $orig_post, $clean_post, "blink tag allowed" ); } note("mismatched and misnested tags"); { # form tags not in a form should be displayed my $form_inner = qq{}; $orig_post = qq{
$form_inner
}; $clean_post = qq{
$form_inner
}; $clean->(); is( $orig_post, $clean_post, "form tags within a form are allowed" ); $orig_post = $form_inner; $clean_post = qq{<select ... ><option ... >hello</option><option ... >bye</option></select>}; $clean->(); is( $orig_post, $clean_post, "form tags outside a form are escaped and displayed" ); my $table_inner = qq{hellobye}; $orig_post = qq{$table_inner
}; $clean_post = qq{$table_inner
}; $clean->(); is( $orig_post, $clean_post, "table tags within a table are allowed" ); $orig_post = $table_inner; $clean_post = qq{<tr><td>hello</td><td>bye</td></tr>}; $clean->(); is( $orig_post, $clean_post, "table tags outside a table are escaped and displayed" ); $orig_post = qq{strong not strong}; $clean_post = qq{strong not strong}; $clean->(); is( $orig_post, $clean_post, "mismatched closing tags or misnested closing tags shouldn't be displayed" ); $orig_post = qq{before in i after}; $clean_post = qq{before in i after}; $clean->(); is( $orig_post, $clean_post, "self-closing tags that aren't actually self-closing should still be closed." ); $orig_post = qq{

line one
line two
line three
line four

new paragraph

}; $clean_post = qq{

line one
line two
line three
line four

new paragraph

}; $clean->( { editor => 'html_raw0' } ); is( $orig_post, $clean_post, "empty tags don't get erroneously closed when closing their parent element." ); $entry_text = qq{before in strongout strongafter}; $orig_post = $entry_text; $cut_text = qq{in strongout strong}; $clean->( { cut_retrieve => 1 } ); is( $orig_post, $cut_text, "Text under cut with mismatched HTML tags within and with-out the cut (ignored)" ); $orig_post = $entry_text; $clean_post = qq{before in strongout strongafter}; $clean->(); is( $orig_post, $clean_post, "Full text of entry, with mismatched HTML tags within and with-out the cut" ); } 1;