otwarchive-symphonyarchive/spec/lib/html_cleaner_spec.rb
2026-03-11 22:22:11 +00:00

1249 lines
50 KiB
Ruby
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

require "spec_helper"
require "nokogiri"
describe HtmlCleaner do
include HtmlCleaner
def one_cell_table(content)
"<table><tr><td>#{content}</td></tr></table>"
end
describe "sanitize_value" do
ArchiveConfig.FIELDS_ALLOWING_MEDIA_EMBEDS.each do |field|
context "#{field} is configured to allow media embeds" do
%w[youtube.com youtube-nocookie.com vimeo.com player.vimeo.com
vidders.net criticalcommons.org google.com podfic.com archive.org
open.spotify.com spotify.com 8tracks.com w.soundcloud.com soundcloud.com viddertube.com
bilibili.com player.bilibili.com 4shared.com/web/embed audio.com/embed/audio].each do |source|
it "keeps embeds from #{source}" do
html = '<iframe width="560" height="315" src="//' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).to include(html)
end
end
%w[youtube.com youtube-nocookie.com vimeo.com player.vimeo.com
archive.org 8tracks.com podfic.com
open.spotify.com spotify.com w.soundcloud.com soundcloud.com vidders.net viddertube.com
bilibili.com player.bilibili.com 4shared.com/web/embed audio.com/embed/audio].each do |source|
it "converts src to https for #{source}" do
html = '<iframe width="560" height="315" src="http://' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).to match('https:')
end
end
%w[vidders.net].each do |source|
it "converts flashvars to https for #{source}" do
html = '<embed flashvars="config=http://' + source + '/embed/123" src="http://' + source + '/embed/123" type="application/x-shockwave-flash" width="456" height="344"></embed>'
result = sanitize_value(field, html)
expect(result).to match('flashvars=.*https:')
end
end
it "keeps google player embeds without closing tag" do
# HTML5 disallows </embed>, according to https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed#technical_summary
html1 = '<embed type="application/x-shockwave-flash" flashvars="audioUrl=http://dl.dropbox.com/u/123/foo.mp3" src="http://www.google.com/reader/ui/123-audio-player.swf" width="400" height="27" allowscriptaccess="never" allownetworking="internal">'
html2 = "#{html1}</embed>"
result = sanitize_value(field, html2)
expect(result).to eq(html1)
end
it "strips embeds with unknown source" do
html = '<embed src="http://www.evil.org"></embed>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
it "strips archive.org iframe if the src is not the embed directory" do
html = '<iframe src="http://archive.org/embed/../123/wrong/456.html"></iframe>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
%w[criticalcommons.org].each do |source|
it "doesn't convert src to https for #{source}" do
html = '<iframe width="560" height="315" src="http://' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).not_to match('https:')
end
end
it "allows video tags" do
html = '<video controls="controls" width="250" playsinline="playsinline" crossorigin="anonymous" preload="metadata">\
<track kind="subtitles" src="http://example.com/english.vtt" srclang="en">\
<track kind="subtitles" src="http://example.com/japanese.vtt" srclang="ja" default="default">\
</video>'
expect(sanitize_value(field, html)).to eq(html)
end
it "allows audio tags" do
html = '<audio controls="controls" crossorigin="anonymous" preload="metadata" loop="loop">\
<source src="http://example.com/podfic.mp3" type="audio/mpeg">\
<p>Maybe you want to <a href="http://example.com/podfic.mp3" rel="nofollow">download this podfic instead</a>?</p>\
</audio>'
expect(sanitize_value(field, html)).to eq(html)
end
end
end
context "Strip out tags not allowed in text fields other than content" do
[:endnotes, :notes, :summary].each do |field|
it "strips iframes" do
value = '<iframe width="560" height="315" src="//youtube.com/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, value)
expect(result).to eq("")
end
it "strips video tags" do
value = "<video></video>"
result = sanitize_value(field, value)
expect(result).to eq("")
end
end
end
ArchiveConfig.FIELDS_ALLOWING_CSS.each do |field|
context "#{field} field allows class attribute for CSS" do
context "class has one value" do
it "keeps values containing only letters, numbers, and hyphens" do
result = sanitize_value(field, '<p class="f-5">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[@class='f-5']/node()").to_s.strip).to eq("foobar")
end
it "strips values starting with a number" do
result = sanitize_value(field, '<p class="8ball">foobar</p>')
expect(result).not_to match(/8ball/)
end
it "strips values starting with a hyphen" do
result = sanitize_value(field, '<p class="-dash">foobar</p>')
expect(result).not_to match(/-dash/)
end
it "strips values with special characters" do
result = sanitize_value(field, '<p class="foo@bar">foobar</p>')
expect(result).not_to match(/foo@bar/)
end
end
context "class attribute has multiple values" do
it "keeps all valid values" do
result = sanitize_value(field, '<p class="foo bar">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[contains(@class, 'foo bar')]/node()").to_s.strip).to eq("foobar")
end
it "strips values starting with numbers" do
result = sanitize_value(field, '<p class="magic 8ball">foobar</p>')
expect(result).not_to match(/8ball/)
expect(result).to match(/magic/)
end
it "strips values starting with hypens" do
result = sanitize_value(field, '<p class="rainbow -dash">foobar</p>')
expect(result).not_to match(/-dash/)
expect(result).to match(/rainbow/)
end
end
end
end
[:comment_content, :bookmarker_notes, :summary].each do |field|
context "#{field} field does not allow class attribute" do
it "strips attribute even if value is valid" do
result = sanitize_value(field, '<p class="f-5">foobar</p>')
expect(result).not_to match(/f-5/)
expect(result).not_to match(/class/)
end
end
end
[:content, :endnotes, :notes, :summary].each do |field|
context "Sanitize #{field} field" do
it "keeps html" do
value = "<em>hello</em> <blockquote>world</blockquote>"
result = sanitize_value(field, value)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//em/node()").to_s.strip).to eq("hello")
expect(doc.xpath(".//blockquote/node()").to_s.strip).to eq("<p>world</p>")
end
it "should keep valid unicode chars as is" do
result = sanitize_value(field, "nörmäl—téxt")
expect(result).to match(/nörmäl—téxt/)
end
it "allows RTL content in p" do
html = '<p dir="rtl">This is RTL content</p>'
result = sanitize_value(field, html)
expect(result).to eq(html)
end
it "allows RTL content in div" do
html = '<div dir="rtl"><p>This is RTL content</p></div>'
result = sanitize_value(field, html)
expect(result).to eq('<div dir="rtl"><p>This is RTL content</p></div>')
end
it "should not allow iframes with unknown source" do
html = '<iframe src="http://www.evil.org"></iframe>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
[
"'';!--\"<XSS>=&{()}",
'<XSS STYLE="behavior: url(xss.htc);">'
].each do |value|
it "should strip xss tags: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
end
[
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
'<<SCRIPT>alert("XSS");//<</SCRIPT>',
"<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>",
"<SCRIPT SRC=//ha.ckers.org/.j>",
"<SCRIPT>alert(/XSS/.source)</SCRIPT>",
'</TITLE><SCRIPT>alert("XSS");</SCRIPT>',
'<SCRIPT SRC="http://ha.ckers.org/xss.jpg"></SCRIPT>'
].each do |value|
it "should strip script tags: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/ha.ckers.org/)
end
end
[
"\\\";alert('XSS');//",
"xss:expr/*blah*/ession(alert('XSS'))",
"xss:expression(alert('XSS'))"
].each do |value|
it "should keep text: #{value}" do
result = sanitize_value(field, value)
expect(result).to match(/alert\('XSS'\)/)
end
end
it "should strip iframe tags" do
value = "<iframe src=http://ha.ckers.org/scriptlet.html <"
result = sanitize_value(field, value)
expect(result).not_to match(/iframe/i)
expect(result).not_to match(/ha.ckers.org/)
end
[
"<IMG SRC=\"javascript:alert('XSS');\">",
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
"<IMG SRC=\" &#14; javascript:alert('XSS');\">",
"<IMG SRC=\"javascript:alert('XSS')\"",
"<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">",
"<IMG SRC=\"jav ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">"
].each do |value|
it "should strip javascript in img src attribute: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
end
[
'<META HTTP-EQUIV="Link" Content="<http://ha.ckers.org/xss.css>; REL=stylesheet">',
"<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">",
'<META HTTP-EQUIV="refresh" CONTENT="0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K">',
"<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">",
"<META HTTP-EQUIV=\"Set-Cookie\" Content=\"USERID=&lt;SCRIPT&gt;alert('XSS')&lt;/SCRIPT&gt;\">"
].each do |value|
it "should strip xss in meta tags: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
end
it "should strip xss inside tags" do
value = '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
end
it "should strip script/xss tags" do
value = '<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/)
end
it "should strip script/src tags" do
value = '<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/)
end
it "should strip xss in body background" do
value = "<BODY BACKGROUND=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
[
"<BODY ONLOAD=alert('XSS')>",
'<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>'
].each do |value|
it "should strip xss in body onload: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/onload/i)
end
end
it "should strip style tag" do
value = "<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
end
it "should handle lone @imports" do
value = "@import'http://ha.ckers.org/xss.css';"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
expect(result).to match(/@import/i)
end
it "should handle lone borked @imports" do
value = "@im\port'\ja\vasc\ript:alert(\"XSS\")';"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
expect(result).to match(/@import/i)
end
it "should strip javascript from img dynsrc" do
value = "<IMG DYNSRC=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from img lowsrc" do
value = "<IMG DYNSRC=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from bgsound src" do
value = "<BGSOUND SRC=\"javascript:alert('XSS');\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from br size" do
value = "<BR SIZE=\"&{alert('XSS')}\">"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from link href" do
value = "<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip xss from link href" do
value = '<LINK REL="stylesheet" HREF="http://ha.ckers.org/xss.css">'
result = sanitize_value(field, value)
expect(result).not_to match(/ha.ckers.org/i)
expect(result).not_to match(/xss/i)
end
it "should strip namespace tags" do
value = '<HTML xmlns:xss><?import namespace="xss" implementation="http://ha.ckers.org/xss.htc"><xss:xss>Blah</xss:xss></HTML>'
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/i)
expect(result).to match(/Blah/)
end
it "should strip javascript in style=background-image" do
value = "<span style=background-image:url(\"javascript:alert('XSS')\");>Text</span>"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
it "should strip script tags" do
value = "';alert(String.fromCharCode(88,83,83))//\\';alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//\\\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>\">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
[
"<!--#exec cmd=\"/bin/echo '<SCR'\"-->",
"<!--#exec cmd=\"/bin/echo 'IPT SRC=http://ha.ckers.org/xss.js></SCRIPT>'\"-->"
].each do |value|
it "should strip #exec: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).to eq("")
end
end
# TODO: Ones with all types of quote marks:
# "<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>"
it "should escape ampersands" do
result = sanitize_value(field, "& &amp;")
expect(result).to match(/&amp; &amp;/)
end
context "add rel=nofollow to all links to defeat spammers' SEO plans" do
it "adds rel=nofollow to links with no rel attribute" do
result = sanitize_value(field, "<a href='foo'>Foo</a>")
expect(result).to eq("<p><a href=\"foo\" rel=\"nofollow\">Foo</a></p>")
end
it "adds rel=nofollow to links with a rel attribute" do
result = sanitize_value(field, "<a href='foo' rel='help'>Foo</a>")
expect(result).to eq("<p><a href=\"foo\" rel=\"nofollow\">Foo</a></p>")
end
end
# These are from https://github.com/rgrove/sanitize/commit/a11498de9e283cd457b35ee252983662f7452aa9
it 'should not preserve the content of removed `math` elements' do
content = sanitize_value(field, '<math>hello! <script>alert(0)</script></math>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `plaintext` elements' do
content = sanitize_value(field, '<plaintext>hello! <script>alert(0)</script>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `svg` elements' do
content = sanitize_value(field, '<svg>hello! <script>alert(0)</script></svg>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `xmp` elements' do
content = sanitize_value(field, '<xmp>hello! <script>alert(0)</script></xmp>')
expect(content).to eq("")
end
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
describe 'foreign content bypass in relaxed config' do
it 'prevents a sanitization bypass via carefully crafted foreign content' do
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
content = sanitize_value(field, "<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/")
expect(content).to eq("")
content = sanitize_value(field, "<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/")
expect(content).to eq("")
end
end
end
context "when given an <img> tag with a relative src" do
it "converts the src value to an absolute URL" do
content = sanitize_value(field, "<img src=\"relative\">")
expect(content).to eq("<p><img src=\"#{ArchiveConfig.APP_URL}/relative\"></p>")
end
end
context "when given an <img> tag with an absolute src" do
it "doesn't modify the src value" do
content = sanitize_value(field, "<img src=\"http://random.com/image.png\">")
expect(content).to eq("<p><img src=\"http://random.com/image.png\"></p>")
end
end
end
end
ArchiveConfig.FIELDS_ALLOWING_HTML.each do |field|
it "preserves ruby-annotated HTML in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
expect(result).to include("<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
end
it "preserves ruby-annotated HTML without rp in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rt>small_text</rt></ruby>")
expect(result).to include("<ruby>BigText<rt>small_text</rt></ruby>")
end
it "transforms open attribute's value when present on details element in #{field}" do
html = <<~HTML
<details open="false">
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = sanitize_value(field, html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details/@open").to_s.strip).to eq("open")
end
it "does not require details to have an 'open' attribute in #{field}" do
html = <<~HTML
<details>
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = sanitize_value(field, html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details[@open]")).to be_empty
end
end
end
describe "fix_bad_characters" do
it "should not touch normal text" do
expect(fix_bad_characters("normal text")).to eq("normal text")
end
it "should not touch normal text with valid unicode chars" do
expect(fix_bad_characters("nörmältéxt")).to eq("nörmältéxt")
end
it "does not touch zero-width non-joiner" do
string = ["A".ord, 0x200C, "A".ord] # "A[zwnj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "does not touch zero-width joiner" do
string = ["A".ord, 0x200D, "A".ord] # "A[zwj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "does not touch word joiner" do
string = ["A".ord, 0x2060, "A".ord] # "A[wj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "should remove invalid unicode chars" do
bad_string = [65, 150, 65].pack("C*") # => "A\226A"
expect(fix_bad_characters(bad_string)).to eq("AA")
end
it "should escape <3" do
expect(fix_bad_characters("normal <3 text")).to eq("normal &lt;3 text")
end
it "should convert \\r\\n to \\n" do
expect(fix_bad_characters("normal\r\ntext")).to eq("normal\ntext")
end
it "should remove the spacer" do
expect(fix_bad_characters("A____spacer____A")).to eq("AA")
end
end
describe "add_paragraphs_to_text" do
%w[a abbr acronym address].each do |tag|
it "does not add extraneous paragraph breaks after #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>quack</#{tag}> quack")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br")).to be_empty
end
end
it "leaves audio tags alone" do
html = "<audio><source>\n</audio>"
result = add_paragraphs_to_text(html)
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
it "leaves video tags alone" do
html = "<video><track>\n</video>"
result = add_paragraphs_to_text(html)
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
it "doesn't break links with images inside them" do
result = add_paragraphs_to_text("<a href='/users/name'><img src='/icon.png'>name</a>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p/a/img").size).to eq(1)
end
it "does not convert linebreaks after p tags" do
result = add_paragraphs_to_text("<p>A</p>\n<p>B</p>\n\n<p>C</p>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not convert linebreaks after tables" do
result = add_paragraphs_to_text("#{one_cell_table('A')}\n#{one_cell_table('A')}\n\n#{one_cell_table('A')}\n\n\n")
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
%w[dl h1 h2 h3 h4 h5 h6 ol pre ul].each do |tag|
it "does not convert linebreaks after #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<#{tag}>B</#{tag}>\n\n<#{tag}>C</#{tag}>\n\n\n")
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
end
%w[blockquote center div].each do |tag|
it "does not convert linebreaks after #{tag} tags which produce blocks" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<#{tag}>B</#{tag}>\n\n<#{tag}>C</#{tag}>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not wrap #{tag} tag with a paragraph" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<p>B</p>")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><div>_</div></p> as <p></p><div>_</div>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath("./p").size).to eq(1)
expect(doc.xpath("./p/#{tag}").size).to eq(0)
end
it "wraps content inside of nested #{tag} tags with a paragraph" do
result = add_paragraphs_to_text("<#{tag}><#{tag}>A</#{tag}></#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath("./#{tag}/#{tag}/p/node()").to_s).to eq("A")
end
it "does not wrap paragraphs inside of nested #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}><#{tag}><#{tag}><p>A</p></#{tag}><p>B</p></#{tag}></#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/#{tag}/#{tag}/p/node()").to_s).to eq("A")
expect(doc.xpath("./#{tag}/#{tag}/p/node()").to_s).to eq("B")
end
it "does not add paragraphs between a #{tag} tag and a paragraph" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n\n<p>B</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/following-sibling::p/node()").to_s).to eq("B")
end
end
it "does not convert linebreaks after br tags" do
result = add_paragraphs_to_text("A<br>B<br>\n\nC<br>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(3)
end
it "does not convert linebreaks after hr tags" do
result = add_paragraphs_to_text("A<hr>B<hr>\n\nC<hr>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not wrap table in p tags" do
result = add_paragraphs_to_text("aa #{one_cell_table('foo')} bb")
# This needs XML parsing because HTML5 parser might hide issues:
# Nokogiri::HTML5.fragment('<p>aa <table><tbody><tr><td>foo</td></tr></tbody></table> bb</p>').to_s
# "<p>aa </p><table><tbody><tr><td>foo</td></tr></tbody></table> bb<p></p>"
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./table").size).to eq(1)
end
%w[figure dl h1 h2 h3 h4 h5 h6 ol pre summary ul].each do |tag|
it "does not wrap #{tag} in p tags" do
result = add_paragraphs_to_text("aa <#{tag}>foo</#{tag}> bb")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><h1>_</h1></p> as <p></p><h1>_</h1>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/node()").to_s.strip).to eq("foo")
end
end
it "does not wrap details in p tags" do
html = <<~HTML
aa
<details>
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
bb
HTML
result = add_paragraphs_to_text(html)
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><details>_</details></p> as <p></p><details>_</details>
doc = Nokogiri::XML.fragment(result)
# aa, velocity..., direction..., bb
expect(doc.xpath(".//p").size).to eq(4)
expect(doc.xpath("./p/details").size).to eq(0)
expect(doc.xpath("./details/p").size).to eq(2)
expect(doc.xpath("./p").size).to eq(2)
expect(doc.xpath("./p[1]/text()").to_s).to eq("aa")
expect(doc.xpath("./p[2]/text()").to_s).to eq("bb")
end
%w[ol ul].each do |tag|
it "does not convert linebreaks inside #{tag} lists" do
html = <<~HTML
<#{tag}>
<li>A</li>
<li>B</li>
</#{tag}>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/li[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./#{tag}/li[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath(".//br")).to be_empty
end
end
it "does not convert linebreaks inside tables" do
html = <<~TABLE
<table>
<tr>
<th>A</th>
<th>B</th>
</tr>
<tr>
<td>C</td>
<td>D</td>
</tr>
</table>
TABLE
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./table/tbody/tr[1]/th[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./table/tbody/tr[1]/th[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./table/tbody/tr[2]/td[1]/node()").to_s.strip).to eq("C")
expect(doc.xpath("./table/tbody/tr[2]/td[2]/node()").to_s.strip).to eq("D")
expect(doc.xpath(".//br")).to be_empty
end
it "does not convert linebreaks inside definition lists" do
html = <<~HTML
<dl>
<dt>A</dt>
<dd>aaa</dd>
<dt>B</dt>
<dd>bbb</dd>
</dl>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./dl/dt[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./dl/dd[1]/node()").to_s.strip).to eq("aaa")
expect(doc.xpath("./dl/dt[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./dl/dd[2]/node()").to_s.strip).to eq("bbb")
expect(doc.xpath(".//br")).to be_empty
end
it "does not add paragraphs inside summary" do
html = <<~HTML
<details>
<summary>
Automated
Status:
Operational
</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./summary/p")).to be_empty
end
it "does not add paragraphs inside figure" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg" alt="camera icon">
<img src="http://example.com/Hand-icon.svg" alt="hand icon">
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/p")).to be_empty
end
it "allows alt and title attributes on elements inside figure" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg" alt="camera icon">
<figcaption title="here is title">Take picture here</figcaption>
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/img/@alt").to_s.strip).to eq("camera icon")
expect(doc.xpath("./figure/figcaption/@title").to_s.strip).to eq("here is title")
end
it "allows other HTML elements inside figcaption" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg">
<figcaption><em>Take picture <a href="http://example.com/link">here</a></em></figcaption>
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/figcaption/em/text()").to_s.strip).to eq("Take picture")
expect(doc.xpath("./figure/figcaption/em/a/text()").to_s.strip).to eq("here")
expect(doc.xpath("./figure/figcaption/em/a/@href").to_s.strip).to eq("http://example.com/link")
end
it "allows other HTML elements inside summary" do
html = <<~HTML
<details>
<summary><em>Automated Status: <a href="http://example.com/link">Operational</a></em></summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details/summary/em/text()").to_s.strip).to eq("Automated Status:")
expect(doc.xpath("./details/summary/em/a/text()").to_s.strip).to eq("Operational")
expect(doc.xpath("./details/summary/em/a/@href").to_s.strip).to eq("http://example.com/link")
end
%w[address h1 h2 h3 h4 h5 h6 p pre].each do |tag|
it "does not wrap in p and not convert linebreaks inside #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A\nB\n\nC\n\n\nD</#{tag}>")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><h1>_</h1></p> as <p></p><h1>_</h1>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath("./#{tag}[1]/node()").to_s.strip).to eq("A\nB\n\nC\n\n\nD")
end
end
%w[a abbr acronym].each do |tag|
it "wraps in p and not convert linebreaks inside #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A\nB\n\nC\n\n\nD</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p/#{tag}[1]/node()").to_s.strip).to eq("A\nB\n\nC\n\n\nD")
end
end
it "wraps plain text in p tags" do
result = add_paragraphs_to_text("some text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some text")
end
it "converts single linebreak to br" do
result = add_paragraphs_to_text("some\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to match(%r{some<br/?>\ntext})
end
it "converts double linebreaks to paragraph break" do
result = add_paragraphs_to_text("some\n\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("text")
end
it "converts triple linebreaks into blank paragraph" do
result = add_paragraphs_to_text("some\n\n\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("&nbsp;")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("text")
end
it "converts double br tags into paragraph break" do
result = add_paragraphs_to_text("some<br/><br/>text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("text")
expect(doc.xpath(".//br")).to be_empty
end
it "converts triple br tags into blank paragraph" do
result = add_paragraphs_to_text("some<br/><br/><br/>text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("&nbsp;")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("text")
end
it "does not convert double br tags inside p tags" do
result = add_paragraphs_to_text("<p>some<br/>\n<br/>text</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(2)
end
it "does not convert triple br tags inside p tags" do
result = add_paragraphs_to_text("<p>some<br/>\n<br/>\n<br/>text</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(3)
end
%w[b big cite code del dfn em i ins kbd q s samp
small span strike strong sub sup tt u var].each do |tag|
it "handles #{tag} inline tags spanning double line breaks" do
result = add_paragraphs_to_text("<#{tag}>some\n\ntext</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/#{tag}/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/#{tag}/node()").to_s.strip).to eq("text")
end
it "handles #{tag} with an unclosed br tag in it" do
result = add_paragraphs_to_text("<#{tag}>some<br>text</#{tag}>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]/#{tag}[1]").children.to_s.strip).to match(%r{some<br/?>text})
end
end
it "handles inline tags spanning double line breaks" do
result = add_paragraphs_to_text("<i>have <b>some\n\ntext</b> yay</i>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/i/node()").to_s.strip).to match(/\Ahave/)
expect(doc.xpath("./p[1]/i/b/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/i/b/node()").to_s.strip).to eq("text")
expect(doc.xpath("./p[2]/i/node()").to_s.strip).to match(/ yay\Z/)
end
it "handles nested inline tags spanning double line breaks" do
result = add_paragraphs_to_text("have <em>some\n\ntext</em> yay")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to match(/\Ahave/)
expect(doc.xpath("./p[1]/em/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/em/node()").to_s.strip).to eq("text")
expect(doc.xpath("./p[2]/node()").to_s.strip).to match(/ yay\Z/)
end
%w[blockquote center div details].each do |tag|
it "converts double linebreaks inside #{tag} tag" do
result = add_paragraphs_to_text("<#{tag}>some\n\ntext</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./#{tag}/p[2]/node()").to_s.strip).to eq("text")
end
it "doesn't insert extra <p></p> tags before the #{tag} tag" do
result = add_paragraphs_to_text("<p>before</p><#{tag}><p>during</p></#{tag}>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("before")
expect(doc.xpath("./#{tag}/p[1]").children.to_s.strip).to eq("during")
end
it "creates a paragraph for text immediately following the #{tag} tag" do
result = add_paragraphs_to_text("<#{tag}>during</#{tag}>after")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/p[1]").children.to_s.strip).to eq("during")
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("after")
end
end
it "wraps text in p before and after existing p tag" do
result = add_paragraphs_to_text("boom\n\n<p>da</p>\n\nyadda")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("boom")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("da")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("yadda")
end
it "wraps ruby-annotated text in p tags" do
result = add_paragraphs_to_text("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
end
it "keeps attributes of block elements" do
result = add_paragraphs_to_text("<div class='foo'>some\n\ntext</div>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./div[@class='foo']/p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./div[@class='foo']/p[2]/node()").to_s.strip).to eq("text")
end
it "keeps attributes of inline elements across paragraphs" do
result = add_paragraphs_to_text("<span class='foo'>some\n\ntext</span>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/span[@class='foo']/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/span[@class='foo']/node()").to_s.strip).to eq("text")
end
it "handles two classes" do
result = add_paragraphs_to_text('<p class="foo bar">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[contains(@class, 'foo')]/node()").to_s.strip).to eq("foobar")
expect(doc.xpath("./p[contains(@class, 'bar')]/node()").to_s.strip).to eq("foobar")
end
it "closes unclosed tag within other tag" do
html = "<strong><em>unclosed</strong>"
doc = Nokogiri::HTML5.fragment(add_paragraphs_to_text(html))
expect(doc.xpath("./p/strong/em/node()").to_s.strip).to eq("unclosed")
end
it "closes unclosed rt tags" do
html = "<ruby>big text<rt>small text</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rt>small text</rt></ruby>")
end
it "closes unclosed rp tag" do
html = "<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</rp></ruby>")
end
it "re-nests mis-nested tags" do
html = "some <em><strong>text</em></strong>"
doc = Nokogiri::HTML5.fragment(add_paragraphs_to_text(html))
expect(doc.xpath("./p[1]/em/strong/node()").to_s.strip).to eq("text")
end
it "handles mixed uppercase/lowecase html tags" do
result = add_paragraphs_to_text("<em>mixed</EM> <EM>stuff</em>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/em[1]/node()").to_s.strip).to eq("mixed")
expect(doc.xpath("./p[1]/em[2]/node()").to_s.strip).to eq("stuff")
end
%w[b big cite code del dfn em i ins kbd q s samp
small span strike strong sub sup tt u var].each do |tag|
it "wraps consecutive #{tag} inline tags in one paragraph " do
result = add_paragraphs_to_text("<#{tag}>hey</#{tag}> <#{tag}>ho</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/#{tag}[1]/node()").to_s.strip).to eq("hey")
expect(doc.xpath("./p[1]/#{tag}[2]/node()").to_s.strip).to eq("ho")
expect(doc.xpath("./p[1]/text()").to_s).to eq(" ")
end
end
%w[&gt; &lt; &amp;].each do |entity|
it "leaves #{entity} alone" do
result = add_paragraphs_to_text(entity)
expect(result).to eq("<p>#{entity}</p>")
end
end
it "does not add empty p tags" do
result = add_paragraphs_to_text("A<p>B</p><p>C</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("C")
end
it "does not leave p inside i" do
result = add_paragraphs_to_text("<i><p>foo</p><p>bar</p></i>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//i/p")).to be_empty
end
it "handles table tags that don't need closing" do
html = <<~TABLE
<table>
<colgroup align=\"left\"><col width=\"20\"></colgroup>
<colgroup align=\"right\">
<tr>
<th>A</th>
<th>B</th>
</tr>
<tr>
<td>C</td>
<td>D</td>
</tr>
</table>
TABLE
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./table/colgroup[@align='left']/col[@width='20']").size).to eq(1)
expect(doc.xpath("./table/colgroup[@align='right']").size).to eq(1)
expect(doc.xpath("./table/tbody/tr[1]/th[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./table/tbody/tr[1]/th[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./table/tbody/tr[2]/td[1]/node()").to_s.strip).to eq("C")
expect(doc.xpath("./table/tbody/tr[2]/td[2]/node()").to_s.strip).to eq("D")
end
it "doesn't break when an attribute includes a single quote" do
result = add_paragraphs_to_text(<<~HTML)
<span title="Don't stop me now">Cause I'm having a good time</span>
HTML
doc = Nokogiri::HTML.fragment(result)
node = doc.xpath(".//span").first
expect(node.attribute("title").value).to eq("Don't stop me now")
end
it "doesn't unescape escaped text when processing newlines" do
result = add_paragraphs_to_text(<<~HTML.strip)
&lt;span&gt;
&lt;div&gt;
HTML
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("&lt;span&gt;")
expect(doc.xpath("./p[2]").children.to_s.strip).to eq("&lt;div&gt;")
end
it "fails gracefully for missing ending quotation marks" do
pending "Opened enhancement request with Nokogiri"
result = add_paragraphs_to_text("<strong><a href='ao3.org>mylink</a></strong>")
doc = Nokogiri::HTML5.fragment(result)
node = doc.xpath(".//a").first
expect(node.attribute("href").value).not_to match(/strong/)
expect(node.text.strip).to eq("mylink")
end
it "fails gracefully for missing starting quotation marks" do
result = add_paragraphs_to_text('<strong><a href=ao3.org">mylink</a></strong>')
doc = Nokogiri::HTML5.fragment(result)
node = doc.xpath(".//a").first
expect(node.attribute("href").value).to eq('ao3.org"')
expect(node.text.strip).to eq("mylink")
end
end
describe "add_break_between_paragraphs" do
it "adds <br /> between paragraphs" do
original = "<p>Hi!</p><p>I need more space.</p>"
result = "<p>Hi!</p><br /><p>I need more space.</p>"
expect(add_break_between_paragraphs(original)).to eq(result)
end
it "removes any blank spaces before, between, and after the paragraph marks" do
original = "bla. </p> <p> Bla"
result = "bla.</p><br /><p>Bla"
expect(add_break_between_paragraphs(original)).to eq(result)
end
end
describe "strip_images" do
let(:result) { "Hi! Bye" }
context "without keep_src" do
it "removes the img tag entirely when the src uses double quotes" do
string = 'Hi! <img src="http://example.org/image.png" /> Bye'
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src uses single quotes" do
string = "Hi! <img src='http://example.org/image.png'> Bye"
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src uses mismatched quotes" do
string = "Hi! <img src=\"http://example.org/image.png'> Bye"
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src is missing" do
string = 'Hi! <img alt="a11y"> Bye'
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src is missing a closing quotation mark" do
string = 'Hi! <img src="http://example.org/image.png /> Bye'
expect(strip_images(string)).to eq(result)
end
end
context "with keep_src: false" do
it "removes the img tag entirely when the src uses double quotes" do
string = 'Hi! <img src="http://example.org/image.png" /> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src uses single quotes" do
string = "Hi! <img src='http://example.org/image.png'> Bye"
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src uses mismatched quotes" do
string = "Hi! <img src=\"http://example.org/image.png'> Bye"
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src is missing" do
string = 'Hi! <img alt="a11y"> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src is missing a closing quotation mark" do
string = 'Hi! <img src="http://example.org/image.png /> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
end
context "with keep_src: true" do
it "keeps the img tag attributes" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\'> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
it "does not keep tag trailing slash without a space" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\' /> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
it "does not keep tag trailing slash with a space" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\'/> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
end
end
end