# frozen_string_literal: true
require "addressable/uri"
require "cgi"
module OtwSanitize
# Creates a Sanitize transformer to sanitize embedded media
class EmbedSanitizer
ALLOWLIST_REGEXES = {
"4shared": %r{^4shared\.com/web/embed},
audiocom: %r{^audio\.com/embed/audio/},
archiveorg: %r{^archive\.org/embed/},
bilibili: %r{^(player\.)?bilibili\.com/},
criticalcommons: %r{^criticalcommons\.org/},
eighttracks: %r{^8tracks\.com/},
google: %r{^google\.com/},
podfic: %r{^podfic\.com/},
soundcloud: %r{^(w\.)?soundcloud\.com/},
spotify: %r{^(open\.)?spotify\.com/},
viddersnet: %r{^vidders\.net/},
viddertube: %r{^viddertube\.com/},
vimeo: %r{^(player\.)?vimeo\.com/},
youtube: %r{^youtube(-nocookie)?\.com/}
}.freeze
ALLOWS_FLASHVARS = %i[
criticalcommons eighttracks google
podfic soundcloud spotify viddersnet
].freeze
SUPPORTS_HTTPS = %i[
4shared audiocom
archiveorg bilibili eighttracks podfic
soundcloud spotify viddersnet viddertube vimeo youtube
].freeze
# Creates a callable transformer for the sanitizer to use
def self.transformer
lambda do |env|
# Don't continue if this node is already safelisted.
return if env[:is_allowlisted]
new(env[:node]).sanitized_node
end
end
attr_reader :node
# Takes a Nokogiri node
def initialize(node)
@node = node
end
def sanitized_node
return unless embed_node?
return unless source_url && source
ensure_https
if parent_name == "object"
sanitize_object
else
sanitize_embed
end
end
def node_name
node.name.to_s.downcase
end
delegate :parent, to: :node
def parent_name
parent.name.to_s.downcase if parent
end
# Since the transformer receives the deepest nodes first, we look for a
# element whose parent is an