289 lines
11 KiB
Perl
289 lines
11 KiB
Perl
|
|
#!/usr/bin/perl
|
||
|
|
#
|
||
|
|
# This code was based on code originally created by the LiveJournal project
|
||
|
|
# owned and operated by Live Journal, Inc. The code has been modified and expanded
|
||
|
|
# by Dreamwidth Studios, LLC. These files were originally licensed under
|
||
|
|
# the terms of the license supplied by Live Journal, Inc, which can
|
||
|
|
# currently be found at:
|
||
|
|
#
|
||
|
|
# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt
|
||
|
|
#
|
||
|
|
# In accordance with the original license, this code and all its
|
||
|
|
# modifications are provided under the GNU General Public License.
|
||
|
|
# A copy of that license can be found in the LICENSE file included as
|
||
|
|
# part of this distribution.
|
||
|
|
#
|
||
|
|
#
|
||
|
|
# DW::Hooks::EmbedWhitelist
|
||
|
|
#
|
||
|
|
# Keep a whitelist of trusted sites which we trust for certain kinds of embeds
|
||
|
|
#
|
||
|
|
# Authors:
|
||
|
|
# Afuna <coder.dw@afunamatata.com>
|
||
|
|
#
|
||
|
|
# Copyright (c) 2011 by Dreamwidth Studios, LLC.
|
||
|
|
|
||
|
|
package DW::Hooks::EmbedWhitelist;
|
||
|
|
|
||
|
|
use strict;
|
||
|
|
use LJ::Hooks;
|
||
|
|
use URI;
|
||
|
|
|
||
|
|
# for internal use only
|
||
|
|
# this is used when sites may offer embeds from multiple subdomain
|
||
|
|
# e.g., www, www1, etc
|
||
|
|
sub match_subdomain {
|
||
|
|
my $want_domain = $_[0];
|
||
|
|
my $domain_from_uri = $_[1];
|
||
|
|
|
||
|
|
return $domain_from_uri =~ /^(?:[\w.-]*\.)?\Q$want_domain\E$/;
|
||
|
|
}
|
||
|
|
|
||
|
|
sub match_full_path {
|
||
|
|
my $want_path = $_[0];
|
||
|
|
my $path_from_uri = $_[1];
|
||
|
|
|
||
|
|
return $path_from_uri =~ /^$want_path$/;
|
||
|
|
}
|
||
|
|
|
||
|
|
my %host_path_match = (
|
||
|
|
|
||
|
|
# regex, whether this supports https or not
|
||
|
|
"www.4shared.com" => [ qr!^/web/embed/file/!, 1 ],
|
||
|
|
"8tracks.com" => [ qr!^/mixes/!, 0 ],
|
||
|
|
|
||
|
|
"airtable.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
"archive.org" => [ qr!^/embed/!, 1 ],
|
||
|
|
"audiomack.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
|
||
|
|
"bandcamp.com" => [ qr!^/EmbeddedPlayer/!, 1 ],
|
||
|
|
"player.bilibili.com" => [ qr!^/player.html$!, 1 ],
|
||
|
|
"blip.tv" => [ qr!^/play/!, 1 ],
|
||
|
|
"percolate.blogtalkradio.com" => [ qr!^/offsiteplayer$!, 1 ],
|
||
|
|
"app.box.com" => [ qr!^/embed/s/!, 1 ],
|
||
|
|
|
||
|
|
"chirb.it" => [ qr!^/wp/!, 1 ],
|
||
|
|
"codepen.io" => [ qr!^/enxaneta/embed/!, 1 ],
|
||
|
|
"coub.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
"criticalcommons.org" => [ qr!^/embed$!, 1 ],
|
||
|
|
"www.criticalcommons.org" => [ qr!/embed_view$!, 0 ],
|
||
|
|
|
||
|
|
"www.dailymotion.com" => [ qr!^/embed/video/!, 1 ],
|
||
|
|
"diode.zone" => [ qr!^/videos/embed/[0-9a-fA-F\-]{36}!, 1 ],
|
||
|
|
"dotsub.com" => [ qr!^/media/!, 1 ],
|
||
|
|
"discordapp.com" => [ qr!^/widget$!, 1 ],
|
||
|
|
|
||
|
|
"episodecalendar.com" => [ qr!^/icalendar/!, 0 ],
|
||
|
|
|
||
|
|
"www.flickr.com" => [ qr!/player/$!, 1 ],
|
||
|
|
"www.funnyordie.com" => [ qr!/embed/!, 1 ],
|
||
|
|
|
||
|
|
"embed.gettyimages.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
"getyarn.io" => [ qr!^/yarn-clip/embed/[0-9a-fA-F\-]{36}!, 1 ],
|
||
|
|
"www.goodreads.com" => [ qr!^/widgets/!, 1 ],
|
||
|
|
"giphy.com" => [ qr!^/embed/\w+!, 1 ],
|
||
|
|
|
||
|
|
"maps.google.com" => [ qr!^/maps!, 1 ],
|
||
|
|
"www.google.com" => [ qr!^/(calendar/|maps/embed)!, 1 ],
|
||
|
|
"calendar.google.com" => [ qr!^/calendar/!, 1 ],
|
||
|
|
|
||
|
|
# drawings do not need to be whitelisted as they are images.
|
||
|
|
# forms arent being allowed for security concerns.
|
||
|
|
"docs.google.com" => [ qr!^/(document|spreadsheets?|presentation)/!, 1 ],
|
||
|
|
"books.google.com" => [ qr!^/ngrams/!, 1 ],
|
||
|
|
"drive.google.com" => [ qr!^/file/d/[a-zA-Z0-9]+/preview$!, 1 ],
|
||
|
|
"player.gimletmedia.com" => [ qr!^/\w+$!, 1 ],
|
||
|
|
|
||
|
|
"imgur.com" => [ qr!^/a/.+?/embed!, 1 ],
|
||
|
|
"instagram.com" => [ qr!^/p/.*/embed/$!, 1 ],
|
||
|
|
"www.imdb.com" => [ qr!^/videoembed/\w+$!, 0 ],
|
||
|
|
|
||
|
|
"jsfiddle.net" => [ qr!/embedded/$!, 1 ],
|
||
|
|
|
||
|
|
"www.kickstarter.com" => [ qr!/widget/[a-zA-Z]+\.html$!, 1 ],
|
||
|
|
|
||
|
|
"html5-player.libsyn.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
"lichess.org" => [ qr!/study/embed/!, 1 ],
|
||
|
|
"www.loc.gov" => [ qr!/item/[a-z0-9]+/$!, 1 ],
|
||
|
|
|
||
|
|
"makertube.net" => [ qr!^/videos/embed/[0-9a-fA-F\-]{36}!, 1 ],
|
||
|
|
"mega.nz" => [ qr!^/embed/!, 1 ],
|
||
|
|
"www.mixcloud.com" => [ qr!^/widget/iframe/$!, 1 ],
|
||
|
|
"mixstep.co" => [ qr!^/embed/!, 1 ],
|
||
|
|
"www.msnbc.com" => [ qr!^/msnbc/embedded-video/\w+!, 1 ],
|
||
|
|
"my.mail.ru" => [ qr!^/video/embed/\d+!, 1 ],
|
||
|
|
|
||
|
|
"nekocap.com" => [ qr!^/view/[a-zA-Z0-9]+$!, 1 ],
|
||
|
|
"ext.nicovideo.jp" => [ qr!^/thumb/!, 0 ],
|
||
|
|
"noisetrade.com" => [ qr!^/service/widgetv2/!, 1 ],
|
||
|
|
"www.npr.org" => [ qr!^/templates/event/embeddedVideo\.php!, 1 ],
|
||
|
|
|
||
|
|
"onedrive.live.com" => [ qr!^/embed$!, 1 ],
|
||
|
|
|
||
|
|
"player.pbs.org" => [ qr!^/viralplayer/[0-9]+!, 1 ],
|
||
|
|
"playmoss.com" => [ qr!^/embed/!, 1 ],
|
||
|
|
"www.plurk.com" => [ qr!^/getWidget$!, 1 ],
|
||
|
|
"pastebin.com" => [ qr!^/embed_iframe/\w+$!, 1 ],
|
||
|
|
"podomatic.com" => [ qr!^/embed/html5/episode/\d*!, 1 ],
|
||
|
|
|
||
|
|
"www.random.org" => [ qr!^/widgets/integers/iframe.php$!, 1 ],
|
||
|
|
"www.redditmedia.com" => [ qr!^/r/\w+/comments/\w+/\w+/$!, 1 ],
|
||
|
|
"www.reverbnation.com" => [ qr!^/widget_code/html_widget/artist_\d+$!, 1 ],
|
||
|
|
"rumble.com" => [ qr!^/embed/[a-zA-Z0-9]+/$!, 1 ],
|
||
|
|
"rutube.ru" => [ qr!^/play/embed/[0-9]+$!, 1 ],
|
||
|
|
|
||
|
|
"www.sbs.com.au" => [ qr!/player/embed/!, 0 ]
|
||
|
|
, # best guess; language parameter before /player may vary
|
||
|
|
"scratch.mit.edu" => [ qr!^/projects/embed/!, 1 ],
|
||
|
|
"www.scribd.com" => [ qr!^/embeds/!, 1 ],
|
||
|
|
"www.slideshare.net" => [ qr!^/slideshow/embed_code/!, 1 ],
|
||
|
|
"api.smugmug.com" => [ qr!^/services/embed/\w+$!, 1 ],
|
||
|
|
"w.soundcloud.com" => [ qr!^/player/!, 1 ],
|
||
|
|
"embed.spotify.com" => [ qr!^/$!, 1 ],
|
||
|
|
"open.spotify.com" => [ qr!^/($)|(embed/[/\w]+)!, 1 ],
|
||
|
|
"www.strava.com" => [ qr!^/activities/\d+/embed/\w+$!, 1 ],
|
||
|
|
"streamable.com" => [ qr!^/[eos]/!, 1 ],
|
||
|
|
|
||
|
|
"embed.ted.com" => [ qr!^/talks/!, 1 ],
|
||
|
|
|
||
|
|
"vid.me" => [ qr!^/e/!, 1 ],
|
||
|
|
"player.vimeo.com" => [ qr!^/video/\d+$!, 1 ],
|
||
|
|
"vine.co" => [ qr!^/v/[a-zA-Z0-9]{11}/embed/simple$!, 1 ],
|
||
|
|
|
||
|
|
# Videos seemed to use an 11-character identification; may need to be changed
|
||
|
|
"vk.com" => [ qr!^/video_ext\.php$!, 1 ],
|
||
|
|
|
||
|
|
"fast.wistia.com" => [ qr!^/embed/iframe/\w+$!, 1 ],
|
||
|
|
|
||
|
|
"video.yandex.ru" => [ qr!^/iframe/[\-\w]+/[a-z0-9]+\.\d{4}/?$!, 1 ]
|
||
|
|
, #don't think the last part can include caps; amend if necessary
|
||
|
|
|
||
|
|
"www.zippcast.com" => [ qr!^/videoview\.php$!, 0 ],
|
||
|
|
|
||
|
|
);
|
||
|
|
|
||
|
|
# note: these hash keys are for reference, only the value is checked
|
||
|
|
my %complex_match = (
|
||
|
|
"youtube.com" => sub {
|
||
|
|
|
||
|
|
## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html)
|
||
|
|
if ( match_subdomain( "youtube.com", $_[0]->host )
|
||
|
|
|| match_subdomain( "youtube-nocookie.com", $_[0]->host ) )
|
||
|
|
{
|
||
|
|
return ( 1, 1 ) if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $_[0]->path );
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"commons.wikimedia.org" => sub {
|
||
|
|
if ( $_[0]->host eq "commons.wikimedia.org" ) {
|
||
|
|
return ( 1, 1 )
|
||
|
|
if $_[0]->path =~ m!^/wiki/File:! && $_[0]->query =~ m/embedplayer=yes/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"turner.com" => sub {
|
||
|
|
if ( $_[0]->host eq "i.cdn.turner.com" ) {
|
||
|
|
return ( 1, 1 )
|
||
|
|
if $_[0]->path =~ '/cnn_\d+x\d+_embed.swf$'
|
||
|
|
&& $_[0]->query =~ m/^context=embed&videoId=/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"player.theplatform.com" => sub {
|
||
|
|
if ( $_[0]->host eq "player.theplatform.com" ) {
|
||
|
|
return ( 1, 1 )
|
||
|
|
if $_[0]->path =~ 'MSNBCEmbeddedOffSite' && $_[0]->query =~ m/^guid=/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"www.facebook.com" => sub {
|
||
|
|
if ( $_[0]->host eq "www.facebook.com" ) {
|
||
|
|
return ( 1, 1 )
|
||
|
|
if $_[0]->path eq '/plugins/video.php'
|
||
|
|
&& $_[0]->query =~
|
||
|
|
m/^(height=\d+&)?href=https%3A%2F%2Fwww.facebook.com%2F[^%]+%2Fvideos%2F/;
|
||
|
|
}
|
||
|
|
|
||
|
|
},
|
||
|
|
|
||
|
|
"www.jigsawplanet.com" => sub {
|
||
|
|
if ( $_[0]->host eq "www.jigsawplanet.com" ) {
|
||
|
|
return ( 1, 1 ) if $_[0]->query =~ m/rc=play/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"screen.yahoo.com" => sub {
|
||
|
|
if ( $_[0]->host eq "screen.yahoo.com" ) {
|
||
|
|
return ( 1, 1 ) if $_[0]->query =~ m/format=embed/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"livejournal.com" => sub {
|
||
|
|
if ( match_subdomain( "livejournal.com", $_[0]->host ) ) {
|
||
|
|
return ( 1, 1 )
|
||
|
|
if match_full_path( qr!/\d+\.html!, $_[0]->path ) && $_[0]->query =~ m/embed/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"music.yandex.ru" => sub {
|
||
|
|
if ( $_[0]->host eq "music.yandex.ru" ) {
|
||
|
|
return ( 1, 1 ) if $_[0]->fragment =~ m!track/\d+/\d+!;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
|
||
|
|
"player.twitch.tv" => sub {
|
||
|
|
if ( $_[0]->host eq "player.twitch.tv" ) {
|
||
|
|
return ( 1, 1 ) if $_[0]->query =~ m/video=v\d+/;
|
||
|
|
}
|
||
|
|
},
|
||
|
|
);
|
||
|
|
|
||
|
|
LJ::Hooks::register_hook(
|
||
|
|
'allow_iframe_embeds',
|
||
|
|
sub {
|
||
|
|
my ( $embed_url, %opts ) = @_;
|
||
|
|
|
||
|
|
return 0 unless $embed_url;
|
||
|
|
|
||
|
|
# the URI module hates network-relative URIs, eg '//youtube.com'
|
||
|
|
if ( substr( $embed_url, 0, 2 ) eq '//' ) {
|
||
|
|
$embed_url = 'http:' . $embed_url;
|
||
|
|
}
|
||
|
|
|
||
|
|
my $parsed_uri = URI->new($embed_url);
|
||
|
|
|
||
|
|
my $uri_scheme = $parsed_uri->scheme;
|
||
|
|
return 0 unless $uri_scheme eq "http" || $uri_scheme eq "https";
|
||
|
|
|
||
|
|
my $uri_host = $parsed_uri->host;
|
||
|
|
my $uri_path = $parsed_uri->path; # not including query
|
||
|
|
|
||
|
|
my $host_details = $host_path_match{$uri_host};
|
||
|
|
my $path_regex = $host_details->[0];
|
||
|
|
|
||
|
|
return ( 1, $host_details->[1] ) if $path_regex && ( $uri_path =~ $path_regex );
|
||
|
|
|
||
|
|
my @complex_ok = grep { $_ } map { $_->($parsed_uri) } values %complex_match;
|
||
|
|
return @complex_ok if @complex_ok;
|
||
|
|
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
);
|
||
|
|
|
||
|
|
LJ::Hooks::register_hook(
|
||
|
|
'list_iframe_embed_domains',
|
||
|
|
sub {
|
||
|
|
my @list = ( keys %host_path_match, keys %complex_match );
|
||
|
|
my $tld = sub {
|
||
|
|
my ($dom) = @_;
|
||
|
|
my $idx = ( $dom =~ /\.com?\.\w+$/ ) ? -3 : -2;
|
||
|
|
return [ split /\./, $dom ]->[$idx];
|
||
|
|
};
|
||
|
|
|
||
|
|
my $sort_domain = sub { $tld->($a) cmp $tld->($b) || $a cmp $b };
|
||
|
|
return [ sort $sort_domain @list ];
|
||
|
|
}
|
||
|
|
);
|
||
|
|
|
||
|
|
1;
|