otwarchive-symphonyarchive/spec/lib/html_cleaner_spec.rb

1250 lines
50 KiB
Ruby
Raw Normal View History

2026-03-11 22:22:11 +00:00
require "spec_helper"
require "nokogiri"
describe HtmlCleaner do
include HtmlCleaner
def one_cell_table(content)
"<table><tr><td>#{content}</td></tr></table>"
end
describe "sanitize_value" do
ArchiveConfig.FIELDS_ALLOWING_MEDIA_EMBEDS.each do |field|
context "#{field} is configured to allow media embeds" do
%w[youtube.com youtube-nocookie.com vimeo.com player.vimeo.com
vidders.net criticalcommons.org google.com podfic.com archive.org
open.spotify.com spotify.com 8tracks.com w.soundcloud.com soundcloud.com viddertube.com
bilibili.com player.bilibili.com 4shared.com/web/embed audio.com/embed/audio].each do |source|
it "keeps embeds from #{source}" do
html = '<iframe width="560" height="315" src="//' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).to include(html)
end
end
%w[youtube.com youtube-nocookie.com vimeo.com player.vimeo.com
archive.org 8tracks.com podfic.com
open.spotify.com spotify.com w.soundcloud.com soundcloud.com vidders.net viddertube.com
bilibili.com player.bilibili.com 4shared.com/web/embed audio.com/embed/audio].each do |source|
it "converts src to https for #{source}" do
html = '<iframe width="560" height="315" src="http://' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).to match('https:')
end
end
%w[vidders.net].each do |source|
it "converts flashvars to https for #{source}" do
html = '<embed flashvars="config=http://' + source + '/embed/123" src="http://' + source + '/embed/123" type="application/x-shockwave-flash" width="456" height="344"></embed>'
result = sanitize_value(field, html)
expect(result).to match('flashvars=.*https:')
end
end
it "keeps google player embeds without closing tag" do
# HTML5 disallows </embed>, according to https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/embed#technical_summary
html1 = '<embed type="application/x-shockwave-flash" flashvars="audioUrl=http://dl.dropbox.com/u/123/foo.mp3" src="http://www.google.com/reader/ui/123-audio-player.swf" width="400" height="27" allowscriptaccess="never" allownetworking="internal">'
html2 = "#{html1}</embed>"
result = sanitize_value(field, html2)
expect(result).to eq(html1)
end
it "strips embeds with unknown source" do
html = '<embed src="http://www.evil.org"></embed>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
it "strips archive.org iframe if the src is not the embed directory" do
html = '<iframe src="http://archive.org/embed/../123/wrong/456.html"></iframe>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
%w[criticalcommons.org].each do |source|
it "doesn't convert src to https for #{source}" do
html = '<iframe width="560" height="315" src="http://' + source + '/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, html)
expect(result).not_to match('https:')
end
end
it "allows video tags" do
html = '<video controls="controls" width="250" playsinline="playsinline" crossorigin="anonymous" preload="metadata">\
<track kind="subtitles" src="http://example.com/english.vtt" srclang="en">\
<track kind="subtitles" src="http://example.com/japanese.vtt" srclang="ja" default="default">\
</video>'
expect(sanitize_value(field, html)).to eq(html)
end
it "allows audio tags" do
html = '<audio controls="controls" crossorigin="anonymous" preload="metadata" loop="loop">\
<source src="http://example.com/podfic.mp3" type="audio/mpeg">\
<p>Maybe you want to <a href="http://example.com/podfic.mp3" rel="nofollow">download this podfic instead</a>?</p>\
</audio>'
expect(sanitize_value(field, html)).to eq(html)
end
end
end
context "Strip out tags not allowed in text fields other than content" do
[:endnotes, :notes, :summary].each do |field|
it "strips iframes" do
value = '<iframe width="560" height="315" src="//youtube.com/embed/123" frameborder="0"></iframe>'
result = sanitize_value(field, value)
expect(result).to eq("")
end
it "strips video tags" do
value = "<video></video>"
result = sanitize_value(field, value)
expect(result).to eq("")
end
end
end
ArchiveConfig.FIELDS_ALLOWING_CSS.each do |field|
context "#{field} field allows class attribute for CSS" do
context "class has one value" do
it "keeps values containing only letters, numbers, and hyphens" do
result = sanitize_value(field, '<p class="f-5">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[@class='f-5']/node()").to_s.strip).to eq("foobar")
end
it "strips values starting with a number" do
result = sanitize_value(field, '<p class="8ball">foobar</p>')
expect(result).not_to match(/8ball/)
end
it "strips values starting with a hyphen" do
result = sanitize_value(field, '<p class="-dash">foobar</p>')
expect(result).not_to match(/-dash/)
end
it "strips values with special characters" do
result = sanitize_value(field, '<p class="foo@bar">foobar</p>')
expect(result).not_to match(/foo@bar/)
end
end
context "class attribute has multiple values" do
it "keeps all valid values" do
result = sanitize_value(field, '<p class="foo bar">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[contains(@class, 'foo bar')]/node()").to_s.strip).to eq("foobar")
end
it "strips values starting with numbers" do
result = sanitize_value(field, '<p class="magic 8ball">foobar</p>')
expect(result).not_to match(/8ball/)
expect(result).to match(/magic/)
end
it "strips values starting with hypens" do
result = sanitize_value(field, '<p class="rainbow -dash">foobar</p>')
expect(result).not_to match(/-dash/)
expect(result).to match(/rainbow/)
end
end
end
end
[:comment_content, :bookmarker_notes, :summary].each do |field|
context "#{field} field does not allow class attribute" do
it "strips attribute even if value is valid" do
result = sanitize_value(field, '<p class="f-5">foobar</p>')
expect(result).not_to match(/f-5/)
expect(result).not_to match(/class/)
end
end
end
[:content, :endnotes, :notes, :summary].each do |field|
context "Sanitize #{field} field" do
it "keeps html" do
value = "<em>hello</em> <blockquote>world</blockquote>"
result = sanitize_value(field, value)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//em/node()").to_s.strip).to eq("hello")
expect(doc.xpath(".//blockquote/node()").to_s.strip).to eq("<p>world</p>")
end
it "should keep valid unicode chars as is" do
result = sanitize_value(field, "nörmäl—téxt")
expect(result).to match(/nörmäl—téxt/)
end
it "allows RTL content in p" do
html = '<p dir="rtl">This is RTL content</p>'
result = sanitize_value(field, html)
expect(result).to eq(html)
end
it "allows RTL content in div" do
html = '<div dir="rtl"><p>This is RTL content</p></div>'
result = sanitize_value(field, html)
expect(result).to eq('<div dir="rtl"><p>This is RTL content</p></div>')
end
it "should not allow iframes with unknown source" do
html = '<iframe src="http://www.evil.org"></iframe>'
result = sanitize_value(field, html)
expect(result).to be_empty
end
[
"'';!--\"<XSS>=&{()}",
'<XSS STYLE="behavior: url(xss.htc);">'
].each do |value|
it "should strip xss tags: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
end
[
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
'<<SCRIPT>alert("XSS");//<</SCRIPT>',
"<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>",
"<SCRIPT SRC=//ha.ckers.org/.j>",
"<SCRIPT>alert(/XSS/.source)</SCRIPT>",
'</TITLE><SCRIPT>alert("XSS");</SCRIPT>',
'<SCRIPT SRC="http://ha.ckers.org/xss.jpg"></SCRIPT>'
].each do |value|
it "should strip script tags: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/ha.ckers.org/)
end
end
[
"\\\";alert('XSS');//",
"xss:expr/*blah*/ession(alert('XSS'))",
"xss:expression(alert('XSS'))"
].each do |value|
it "should keep text: #{value}" do
result = sanitize_value(field, value)
expect(result).to match(/alert\('XSS'\)/)
end
end
it "should strip iframe tags" do
value = "<iframe src=http://ha.ckers.org/scriptlet.html <"
result = sanitize_value(field, value)
expect(result).not_to match(/iframe/i)
expect(result).not_to match(/ha.ckers.org/)
end
[
"<IMG SRC=\"javascript:alert('XSS');\">",
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
"<IMG SRC=\" &#14; javascript:alert('XSS');\">",
"<IMG SRC=\"javascript:alert('XSS')\"",
"<INPUT TYPE=\"IMAGE\" SRC=\"javascript:alert('XSS');\">",
"<IMG SRC=\"jav ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
"<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">"
].each do |value|
it "should strip javascript in img src attribute: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
end
[
'<META HTTP-EQUIV="Link" Content="<http://ha.ckers.org/xss.css>; REL=stylesheet">',
"<META HTTP-EQUIV=\"refresh\" CONTENT=\"0;url=javascript:alert('XSS');\">",
'<META HTTP-EQUIV="refresh" CONTENT="0;url=data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K">',
"<META HTTP-EQUIV=\"refresh\" CONTENT=\"0; URL=http://;URL=javascript:alert('XSS');\">",
"<META HTTP-EQUIV=\"Set-Cookie\" Content=\"USERID=&lt;SCRIPT&gt;alert('XSS')&lt;/SCRIPT&gt;\">"
].each do |value|
it "should strip xss in meta tags: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
end
it "should strip xss inside tags" do
value = '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
end
it "should strip script/xss tags" do
value = '<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/)
end
it "should strip script/src tags" do
value = '<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>'
result = sanitize_value(field, value)
expect(result).not_to match(/script/i)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/)
end
it "should strip xss in body background" do
value = "<BODY BACKGROUND=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
[
"<BODY ONLOAD=alert('XSS')>",
'<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>'
].each do |value|
it "should strip xss in body onload: #{value}" do
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/onload/i)
end
end
it "should strip style tag" do
value = "<STYLE>@import'http://ha.ckers.org/xss.css';</STYLE>"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
end
it "should handle lone @imports" do
value = "@import'http://ha.ckers.org/xss.css';"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
expect(result).to match(/@import/i)
end
it "should handle lone borked @imports" do
value = "@im\port'\ja\vasc\ript:alert(\"XSS\")';"
result = sanitize_value(field, value)
expect(result).not_to match(/style/i)
expect(result).to match(/@import/i)
end
it "should strip javascript from img dynsrc" do
value = "<IMG DYNSRC=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from img lowsrc" do
value = "<IMG DYNSRC=\"javascript:alert('XSS')\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from bgsound src" do
value = "<BGSOUND SRC=\"javascript:alert('XSS');\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from br size" do
value = "<BR SIZE=\"&{alert('XSS')}\">"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
end
it "should strip javascript from link href" do
value = "<LINK REL=\"stylesheet\" HREF=\"javascript:alert('XSS');\">"
result = sanitize_value(field, value)
expect(result).not_to match(/javascript/i)
expect(result).not_to match(/xss/i)
end
it "should strip xss from link href" do
value = '<LINK REL="stylesheet" HREF="http://ha.ckers.org/xss.css">'
result = sanitize_value(field, value)
expect(result).not_to match(/ha.ckers.org/i)
expect(result).not_to match(/xss/i)
end
it "should strip namespace tags" do
value = '<HTML xmlns:xss><?import namespace="xss" implementation="http://ha.ckers.org/xss.htc"><xss:xss>Blah</xss:xss></HTML>'
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/ha.ckers.org/i)
expect(result).to match(/Blah/)
end
it "should strip javascript in style=background-image" do
value = "<span style=background-image:url(\"javascript:alert('XSS')\");>Text</span>"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
it "should strip script tags" do
value = "';alert(String.fromCharCode(88,83,83))//\\';alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//\\\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>\">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>"
result = sanitize_value(field, value)
expect(result).not_to match(/xss/i)
expect(result).not_to match(/javascript/i)
end
[
"<!--#exec cmd=\"/bin/echo '<SCR'\"-->",
"<!--#exec cmd=\"/bin/echo 'IPT SRC=http://ha.ckers.org/xss.js></SCRIPT>'\"-->"
].each do |value|
it "should strip #exec: #{value[0..40]}" do
result = sanitize_value(field, value)
expect(result).to eq("")
end
end
# TODO: Ones with all types of quote marks:
# "<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>"
it "should escape ampersands" do
result = sanitize_value(field, "& &amp;")
expect(result).to match(/&amp; &amp;/)
end
context "add rel=nofollow to all links to defeat spammers' SEO plans" do
it "adds rel=nofollow to links with no rel attribute" do
result = sanitize_value(field, "<a href='foo'>Foo</a>")
expect(result).to eq("<p><a href=\"foo\" rel=\"nofollow\">Foo</a></p>")
end
it "adds rel=nofollow to links with a rel attribute" do
result = sanitize_value(field, "<a href='foo' rel='help'>Foo</a>")
expect(result).to eq("<p><a href=\"foo\" rel=\"nofollow\">Foo</a></p>")
end
end
# These are from https://github.com/rgrove/sanitize/commit/a11498de9e283cd457b35ee252983662f7452aa9
it 'should not preserve the content of removed `math` elements' do
content = sanitize_value(field, '<math>hello! <script>alert(0)</script></math>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `plaintext` elements' do
content = sanitize_value(field, '<plaintext>hello! <script>alert(0)</script>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `svg` elements' do
content = sanitize_value(field, '<svg>hello! <script>alert(0)</script></svg>')
expect(content).to eq("")
end
it 'should not preserve the content of removed `xmp` elements' do
content = sanitize_value(field, '<xmp>hello! <script>alert(0)</script></xmp>')
expect(content).to eq("")
end
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
describe 'foreign content bypass in relaxed config' do
it 'prevents a sanitization bypass via carefully crafted foreign content' do
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
content = sanitize_value(field, "<math><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/")
expect(content).to eq("")
content = sanitize_value(field, "<svg><#{tag_name}>/*&lt;/#{tag_name}&gt;&lt;img src onerror=alert(1)>*/")
expect(content).to eq("")
end
end
end
context "when given an <img> tag with a relative src" do
it "converts the src value to an absolute URL" do
content = sanitize_value(field, "<img src=\"relative\">")
expect(content).to eq("<p><img src=\"#{ArchiveConfig.APP_URL}/relative\"></p>")
end
end
context "when given an <img> tag with an absolute src" do
it "doesn't modify the src value" do
content = sanitize_value(field, "<img src=\"http://random.com/image.png\">")
expect(content).to eq("<p><img src=\"http://random.com/image.png\"></p>")
end
end
end
end
ArchiveConfig.FIELDS_ALLOWING_HTML.each do |field|
it "preserves ruby-annotated HTML in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
expect(result).to include("<ruby>BigText<rp>(</rp><rt>small_text</rt><rp>)</rp></ruby>")
end
it "preserves ruby-annotated HTML without rp in #{field}" do
result = sanitize_value(field, "<ruby>BigText<rt>small_text</rt></ruby>")
expect(result).to include("<ruby>BigText<rt>small_text</rt></ruby>")
end
it "transforms open attribute's value when present on details element in #{field}" do
html = <<~HTML
<details open="false">
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = sanitize_value(field, html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details/@open").to_s.strip).to eq("open")
end
it "does not require details to have an 'open' attribute in #{field}" do
html = <<~HTML
<details>
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = sanitize_value(field, html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details[@open]")).to be_empty
end
end
end
describe "fix_bad_characters" do
it "should not touch normal text" do
expect(fix_bad_characters("normal text")).to eq("normal text")
end
it "should not touch normal text with valid unicode chars" do
expect(fix_bad_characters("nörmäl—téxt")).to eq("nörmäl—téxt")
end
it "does not touch zero-width non-joiner" do
string = ["A".ord, 0x200C, "A".ord] # "A[zwnj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "does not touch zero-width joiner" do
string = ["A".ord, 0x200D, "A".ord] # "A[zwj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "does not touch word joiner" do
string = ["A".ord, 0x2060, "A".ord] # "A[wj]A"
expect(fix_bad_characters(string.pack("U*")).unpack("U*")).to eq(string)
end
it "should remove invalid unicode chars" do
bad_string = [65, 150, 65].pack("C*") # => "A\226A"
expect(fix_bad_characters(bad_string)).to eq("AA")
end
it "should escape <3" do
expect(fix_bad_characters("normal <3 text")).to eq("normal &lt;3 text")
end
it "should convert \\r\\n to \\n" do
expect(fix_bad_characters("normal\r\ntext")).to eq("normal\ntext")
end
it "should remove the spacer" do
expect(fix_bad_characters("A____spacer____A")).to eq("AA")
end
end
describe "add_paragraphs_to_text" do
%w[a abbr acronym address].each do |tag|
it "does not add extraneous paragraph breaks after #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>quack</#{tag}> quack")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br")).to be_empty
end
end
it "leaves audio tags alone" do
html = "<audio><source>\n</audio>"
result = add_paragraphs_to_text(html)
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
it "leaves video tags alone" do
html = "<video><track>\n</video>"
result = add_paragraphs_to_text(html)
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
it "doesn't break links with images inside them" do
result = add_paragraphs_to_text("<a href='/users/name'><img src='/icon.png'>name</a>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p/a/img").size).to eq(1)
end
it "does not convert linebreaks after p tags" do
result = add_paragraphs_to_text("<p>A</p>\n<p>B</p>\n\n<p>C</p>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not convert linebreaks after tables" do
result = add_paragraphs_to_text("#{one_cell_table('A')}\n#{one_cell_table('A')}\n\n#{one_cell_table('A')}\n\n\n")
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
%w[dl h1 h2 h3 h4 h5 h6 ol pre ul].each do |tag|
it "does not convert linebreaks after #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<#{tag}>B</#{tag}>\n\n<#{tag}>C</#{tag}>\n\n\n")
expect(result).not_to match("<p>")
expect(result).not_to match("<br")
end
end
%w[blockquote center div].each do |tag|
it "does not convert linebreaks after #{tag} tags which produce blocks" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<#{tag}>B</#{tag}>\n\n<#{tag}>C</#{tag}>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not wrap #{tag} tag with a paragraph" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n<p>B</p>")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><div>_</div></p> as <p></p><div>_</div>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath("./p").size).to eq(1)
expect(doc.xpath("./p/#{tag}").size).to eq(0)
end
it "wraps content inside of nested #{tag} tags with a paragraph" do
result = add_paragraphs_to_text("<#{tag}><#{tag}>A</#{tag}></#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath("./#{tag}/#{tag}/p/node()").to_s).to eq("A")
end
it "does not wrap paragraphs inside of nested #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}><#{tag}><#{tag}><p>A</p></#{tag}><p>B</p></#{tag}></#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/#{tag}/#{tag}/p/node()").to_s).to eq("A")
expect(doc.xpath("./#{tag}/#{tag}/p/node()").to_s).to eq("B")
end
it "does not add paragraphs between a #{tag} tag and a paragraph" do
result = add_paragraphs_to_text("<#{tag}>A</#{tag}>\n\n<p>B</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/following-sibling::p/node()").to_s).to eq("B")
end
end
it "does not convert linebreaks after br tags" do
result = add_paragraphs_to_text("A<br>B<br>\n\nC<br>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(3)
end
it "does not convert linebreaks after hr tags" do
result = add_paragraphs_to_text("A<hr>B<hr>\n\nC<hr>\n\n\n")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath(".//br")).to be_empty
end
it "does not wrap table in p tags" do
result = add_paragraphs_to_text("aa #{one_cell_table('foo')} bb")
# This needs XML parsing because HTML5 parser might hide issues:
# Nokogiri::HTML5.fragment('<p>aa <table><tbody><tr><td>foo</td></tr></tbody></table> bb</p>').to_s
# "<p>aa </p><table><tbody><tr><td>foo</td></tr></tbody></table> bb<p></p>"
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./table").size).to eq(1)
end
%w[figure dl h1 h2 h3 h4 h5 h6 ol pre summary ul].each do |tag|
it "does not wrap #{tag} in p tags" do
result = add_paragraphs_to_text("aa <#{tag}>foo</#{tag}> bb")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><h1>_</h1></p> as <p></p><h1>_</h1>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/node()").to_s.strip).to eq("foo")
end
end
it "does not wrap details in p tags" do
html = <<~HTML
aa
<details>
<summary>Automated Status: Operational</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
bb
HTML
result = add_paragraphs_to_text(html)
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><details>_</details></p> as <p></p><details>_</details>
doc = Nokogiri::XML.fragment(result)
# aa, velocity..., direction..., bb
expect(doc.xpath(".//p").size).to eq(4)
expect(doc.xpath("./p/details").size).to eq(0)
expect(doc.xpath("./details/p").size).to eq(2)
expect(doc.xpath("./p").size).to eq(2)
expect(doc.xpath("./p[1]/text()").to_s).to eq("aa")
expect(doc.xpath("./p[2]/text()").to_s).to eq("bb")
end
%w[ol ul].each do |tag|
it "does not convert linebreaks inside #{tag} lists" do
html = <<~HTML
<#{tag}>
<li>A</li>
<li>B</li>
</#{tag}>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/li[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./#{tag}/li[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath(".//br")).to be_empty
end
end
it "does not convert linebreaks inside tables" do
html = <<~TABLE
<table>
<tr>
<th>A</th>
<th>B</th>
</tr>
<tr>
<td>C</td>
<td>D</td>
</tr>
</table>
TABLE
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./table/tbody/tr[1]/th[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./table/tbody/tr[1]/th[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./table/tbody/tr[2]/td[1]/node()").to_s.strip).to eq("C")
expect(doc.xpath("./table/tbody/tr[2]/td[2]/node()").to_s.strip).to eq("D")
expect(doc.xpath(".//br")).to be_empty
end
it "does not convert linebreaks inside definition lists" do
html = <<~HTML
<dl>
<dt>A</dt>
<dd>aaa</dd>
<dt>B</dt>
<dd>bbb</dd>
</dl>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./dl/dt[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./dl/dd[1]/node()").to_s.strip).to eq("aaa")
expect(doc.xpath("./dl/dt[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./dl/dd[2]/node()").to_s.strip).to eq("bbb")
expect(doc.xpath(".//br")).to be_empty
end
it "does not add paragraphs inside summary" do
html = <<~HTML
<details>
<summary>
Automated
Status:
Operational
</summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./summary/p")).to be_empty
end
it "does not add paragraphs inside figure" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg" alt="camera icon">
<img src="http://example.com/Hand-icon.svg" alt="hand icon">
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/p")).to be_empty
end
it "allows alt and title attributes on elements inside figure" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg" alt="camera icon">
<figcaption title="here is title">Take picture here</figcaption>
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/img/@alt").to_s.strip).to eq("camera icon")
expect(doc.xpath("./figure/figcaption/@title").to_s.strip).to eq("here is title")
end
it "allows other HTML elements inside figcaption" do
html = <<~HTML
<figure>
<img src="http://example.com/Camera-icon.svg">
<figcaption><em>Take picture <a href="http://example.com/link">here</a></em></figcaption>
</figure>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./figure/figcaption/em/text()").to_s.strip).to eq("Take picture")
expect(doc.xpath("./figure/figcaption/em/a/text()").to_s.strip).to eq("here")
expect(doc.xpath("./figure/figcaption/em/a/@href").to_s.strip).to eq("http://example.com/link")
end
it "allows other HTML elements inside summary" do
html = <<~HTML
<details>
<summary><em>Automated Status: <a href="http://example.com/link">Operational</a></em></summary>
<p>Velocity: 12m/s</p>
<p>Direction: North</p>
</details>
HTML
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./details/summary/em/text()").to_s.strip).to eq("Automated Status:")
expect(doc.xpath("./details/summary/em/a/text()").to_s.strip).to eq("Operational")
expect(doc.xpath("./details/summary/em/a/@href").to_s.strip).to eq("http://example.com/link")
end
%w[address h1 h2 h3 h4 h5 h6 p pre].each do |tag|
it "does not wrap in p and not convert linebreaks inside #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A\nB\n\nC\n\n\nD</#{tag}>")
# This needs XML parsing because HTML5 parser might hide failures
# by reinterpreting <p><h1>_</h1></p> as <p></p><h1>_</h1>
doc = Nokogiri::XML.fragment(result)
expect(doc.xpath("./#{tag}[1]/node()").to_s.strip).to eq("A\nB\n\nC\n\n\nD")
end
end
%w[a abbr acronym].each do |tag|
it "wraps in p and not convert linebreaks inside #{tag} tags" do
result = add_paragraphs_to_text("<#{tag}>A\nB\n\nC\n\n\nD</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p/#{tag}[1]/node()").to_s.strip).to eq("A\nB\n\nC\n\n\nD")
end
end
it "wraps plain text in p tags" do
result = add_paragraphs_to_text("some text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some text")
end
it "converts single linebreak to br" do
result = add_paragraphs_to_text("some\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to match(%r{some<br/?>\ntext})
end
it "converts double linebreaks to paragraph break" do
result = add_paragraphs_to_text("some\n\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("text")
end
it "converts triple linebreaks into blank paragraph" do
result = add_paragraphs_to_text("some\n\n\ntext")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("&nbsp;")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("text")
end
it "converts double br tags into paragraph break" do
result = add_paragraphs_to_text("some<br/><br/>text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("text")
expect(doc.xpath(".//br")).to be_empty
end
it "converts triple br tags into blank paragraph" do
result = add_paragraphs_to_text("some<br/><br/><br/>text")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("&nbsp;")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("text")
end
it "does not convert double br tags inside p tags" do
result = add_paragraphs_to_text("<p>some<br/>\n<br/>text</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(2)
end
it "does not convert triple br tags inside p tags" do
result = add_paragraphs_to_text("<p>some<br/>\n<br/>\n<br/>text</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//p").size).to eq(1)
expect(doc.xpath(".//br").size).to eq(3)
end
%w[b big cite code del dfn em i ins kbd q s samp
small span strike strong sub sup tt u var].each do |tag|
it "handles #{tag} inline tags spanning double line breaks" do
result = add_paragraphs_to_text("<#{tag}>some\n\ntext</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/#{tag}/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/#{tag}/node()").to_s.strip).to eq("text")
end
it "handles #{tag} with an unclosed br tag in it" do
result = add_paragraphs_to_text("<#{tag}>some<br>text</#{tag}>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]/#{tag}[1]").children.to_s.strip).to match(%r{some<br/?>text})
end
end
it "handles inline tags spanning double line breaks" do
result = add_paragraphs_to_text("<i>have <b>some\n\ntext</b> yay</i>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/i/node()").to_s.strip).to match(/\Ahave/)
expect(doc.xpath("./p[1]/i/b/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/i/b/node()").to_s.strip).to eq("text")
expect(doc.xpath("./p[2]/i/node()").to_s.strip).to match(/ yay\Z/)
end
it "handles nested inline tags spanning double line breaks" do
result = add_paragraphs_to_text("have <em>some\n\ntext</em> yay")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to match(/\Ahave/)
expect(doc.xpath("./p[1]/em/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/em/node()").to_s.strip).to eq("text")
expect(doc.xpath("./p[2]/node()").to_s.strip).to match(/ yay\Z/)
end
%w[blockquote center div details].each do |tag|
it "converts double linebreaks inside #{tag} tag" do
result = add_paragraphs_to_text("<#{tag}>some\n\ntext</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./#{tag}/p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./#{tag}/p[2]/node()").to_s.strip).to eq("text")
end
it "doesn't insert extra <p></p> tags before the #{tag} tag" do
result = add_paragraphs_to_text("<p>before</p><#{tag}><p>during</p></#{tag}>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("before")
expect(doc.xpath("./#{tag}/p[1]").children.to_s.strip).to eq("during")
end
it "creates a paragraph for text immediately following the #{tag} tag" do
result = add_paragraphs_to_text("<#{tag}>during</#{tag}>after")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath(".//p").size).to eq(2)
expect(doc.xpath("./#{tag}/p[1]").children.to_s.strip).to eq("during")
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("after")
end
end
it "wraps text in p before and after existing p tag" do
result = add_paragraphs_to_text("boom\n\n<p>da</p>\n\nyadda")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("boom")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("da")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("yadda")
end
it "wraps ruby-annotated text in p tags" do
result = add_paragraphs_to_text("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("text with <ruby>ルビ<rp> (</rp><rt>RUBY</rt><rp>)</rp></ruby>")
end
it "keeps attributes of block elements" do
result = add_paragraphs_to_text("<div class='foo'>some\n\ntext</div>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./div[@class='foo']/p[1]/node()").to_s.strip).to eq("some")
expect(doc.xpath("./div[@class='foo']/p[2]/node()").to_s.strip).to eq("text")
end
it "keeps attributes of inline elements across paragraphs" do
result = add_paragraphs_to_text("<span class='foo'>some\n\ntext</span>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/span[@class='foo']/node()").to_s.strip).to eq("some")
expect(doc.xpath("./p[2]/span[@class='foo']/node()").to_s.strip).to eq("text")
end
it "handles two classes" do
result = add_paragraphs_to_text('<p class="foo bar">foobar</p>')
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[contains(@class, 'foo')]/node()").to_s.strip).to eq("foobar")
expect(doc.xpath("./p[contains(@class, 'bar')]/node()").to_s.strip).to eq("foobar")
end
it "closes unclosed tag within other tag" do
html = "<strong><em>unclosed</strong>"
doc = Nokogiri::HTML5.fragment(add_paragraphs_to_text(html))
expect(doc.xpath("./p/strong/em/node()").to_s.strip).to eq("unclosed")
end
it "closes unclosed rt tags" do
html = "<ruby>big text<rt>small text</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rt>small text</rt></ruby>")
end
it "closes unclosed rp tag" do
html = "<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</ruby>"
result = add_paragraphs_to_text(html)
expect(result).to include("<ruby>big text<rp>(</rp><rt>small text</rt><rp>)</rp></ruby>")
end
it "re-nests mis-nested tags" do
html = "some <em><strong>text</em></strong>"
doc = Nokogiri::HTML5.fragment(add_paragraphs_to_text(html))
expect(doc.xpath("./p[1]/em/strong/node()").to_s.strip).to eq("text")
end
it "handles mixed uppercase/lowecase html tags" do
result = add_paragraphs_to_text("<em>mixed</EM> <EM>stuff</em>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/em[1]/node()").to_s.strip).to eq("mixed")
expect(doc.xpath("./p[1]/em[2]/node()").to_s.strip).to eq("stuff")
end
%w[b big cite code del dfn em i ins kbd q s samp
small span strike strong sub sup tt u var].each do |tag|
it "wraps consecutive #{tag} inline tags in one paragraph " do
result = add_paragraphs_to_text("<#{tag}>hey</#{tag}> <#{tag}>ho</#{tag}>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p[1]/#{tag}[1]/node()").to_s.strip).to eq("hey")
expect(doc.xpath("./p[1]/#{tag}[2]/node()").to_s.strip).to eq("ho")
expect(doc.xpath("./p[1]/text()").to_s).to eq(" ")
end
end
%w[&gt; &lt; &amp;].each do |entity|
it "leaves #{entity} alone" do
result = add_paragraphs_to_text(entity)
expect(result).to eq("<p>#{entity}</p>")
end
end
it "does not add empty p tags" do
result = add_paragraphs_to_text("A<p>B</p><p>C</p>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./p").size).to eq(3)
expect(doc.xpath("./p[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./p[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./p[3]/node()").to_s.strip).to eq("C")
end
it "does not leave p inside i" do
result = add_paragraphs_to_text("<i><p>foo</p><p>bar</p></i>")
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath(".//i/p")).to be_empty
end
it "handles table tags that don't need closing" do
html = <<~TABLE
<table>
<colgroup align=\"left\"><col width=\"20\"></colgroup>
<colgroup align=\"right\">
<tr>
<th>A</th>
<th>B</th>
</tr>
<tr>
<td>C</td>
<td>D</td>
</tr>
</table>
TABLE
result = add_paragraphs_to_text(html)
doc = Nokogiri::HTML5.fragment(result)
expect(doc.xpath("./table/colgroup[@align='left']/col[@width='20']").size).to eq(1)
expect(doc.xpath("./table/colgroup[@align='right']").size).to eq(1)
expect(doc.xpath("./table/tbody/tr[1]/th[1]/node()").to_s.strip).to eq("A")
expect(doc.xpath("./table/tbody/tr[1]/th[2]/node()").to_s.strip).to eq("B")
expect(doc.xpath("./table/tbody/tr[2]/td[1]/node()").to_s.strip).to eq("C")
expect(doc.xpath("./table/tbody/tr[2]/td[2]/node()").to_s.strip).to eq("D")
end
it "doesn't break when an attribute includes a single quote" do
result = add_paragraphs_to_text(<<~HTML)
<span title="Don't stop me now">Cause I'm having a good time</span>
HTML
doc = Nokogiri::HTML.fragment(result)
node = doc.xpath(".//span").first
expect(node.attribute("title").value).to eq("Don't stop me now")
end
it "doesn't unescape escaped text when processing newlines" do
result = add_paragraphs_to_text(<<~HTML.strip)
&lt;span&gt;
&lt;div&gt;
HTML
doc = Nokogiri::HTML.fragment(result)
expect(doc.xpath("./p[1]").children.to_s.strip).to eq("&lt;span&gt;")
expect(doc.xpath("./p[2]").children.to_s.strip).to eq("&lt;div&gt;")
end
it "fails gracefully for missing ending quotation marks" do
pending "Opened enhancement request with Nokogiri"
result = add_paragraphs_to_text("<strong><a href='ao3.org>mylink</a></strong>")
doc = Nokogiri::HTML5.fragment(result)
node = doc.xpath(".//a").first
expect(node.attribute("href").value).not_to match(/strong/)
expect(node.text.strip).to eq("mylink")
end
it "fails gracefully for missing starting quotation marks" do
result = add_paragraphs_to_text('<strong><a href=ao3.org">mylink</a></strong>')
doc = Nokogiri::HTML5.fragment(result)
node = doc.xpath(".//a").first
expect(node.attribute("href").value).to eq('ao3.org"')
expect(node.text.strip).to eq("mylink")
end
end
describe "add_break_between_paragraphs" do
it "adds <br /> between paragraphs" do
original = "<p>Hi!</p><p>I need more space.</p>"
result = "<p>Hi!</p><br /><p>I need more space.</p>"
expect(add_break_between_paragraphs(original)).to eq(result)
end
it "removes any blank spaces before, between, and after the paragraph marks" do
original = "bla. </p> <p> Bla"
result = "bla.</p><br /><p>Bla"
expect(add_break_between_paragraphs(original)).to eq(result)
end
end
describe "strip_images" do
let(:result) { "Hi! Bye" }
context "without keep_src" do
it "removes the img tag entirely when the src uses double quotes" do
string = 'Hi! <img src="http://example.org/image.png" /> Bye'
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src uses single quotes" do
string = "Hi! <img src='http://example.org/image.png'> Bye"
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src uses mismatched quotes" do
string = "Hi! <img src=\"http://example.org/image.png'> Bye"
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src is missing" do
string = 'Hi! <img alt="a11y"> Bye'
expect(strip_images(string)).to eq(result)
end
it "removes the img tag entirely when the src is missing a closing quotation mark" do
string = 'Hi! <img src="http://example.org/image.png /> Bye'
expect(strip_images(string)).to eq(result)
end
end
context "with keep_src: false" do
it "removes the img tag entirely when the src uses double quotes" do
string = 'Hi! <img src="http://example.org/image.png" /> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src uses single quotes" do
string = "Hi! <img src='http://example.org/image.png'> Bye"
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src uses mismatched quotes" do
string = "Hi! <img src=\"http://example.org/image.png'> Bye"
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src is missing" do
string = 'Hi! <img alt="a11y"> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
it "removes the img tag entirely when the src is missing a closing quotation mark" do
string = 'Hi! <img src="http://example.org/image.png /> Bye'
expect(strip_images(string, keep_src: false)).to eq(result)
end
end
context "with keep_src: true" do
it "keeps the img tag attributes" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\'> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
it "does not keep tag trailing slash without a space" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\' /> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
it "does not keep tag trailing slash with a space" do
string = 'Hi! <img src="http://example.org/image.png" alt=\'something\'/> Bye'
result = 'Hi! img src="http://example.org/image.png" alt=\'something\' Bye'
expect(strip_images(string, keep_src: true)).to eq(result)
end
end
end
end