#!/usr/bin/perl # # This code was forked from the LiveJournal project owned and operated # by Live Journal, Inc. The code has been modified and expanded by # Dreamwidth Studios, LLC. These files were originally licensed under # the terms of the license supplied by Live Journal, Inc, which can # currently be found at: # # http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt # # In accordance with the original license, this code and all its # modifications are provided under the GNU General Public License. # A copy of that license can be found in the LICENSE file included as # part of this distribution. package Apache::LiveJournal; use strict; no warnings 'uninitialized'; use Apache2::Const qw/ :common REDIRECT HTTP_NOT_MODIFIED HTTP_MOVED_PERMANENTLY HTTP_MOVED_TEMPORARILY M_TRACE M_OPTIONS /; use Carp qw/ croak confess /; use Compress::Zlib; use Cwd qw/abs_path/; use Fcntl ':mode'; use Apache::LiveJournal::PalImg; use LJ::ModuleCheck; use LJ::PageStats; use LJ::Protocol; use LJ::S2; use LJ::URI; use DW::Auth; use DW::BlobStore; use DW::Captcha; use DW::Routing; use DW::Template; use DW::VirtualGift; use DW::RateLimit; BEGIN { require "ljlib.pl"; } my %RQ; # per-request data my %REDIR; my ( $TOR_UPDATE_TIME, %TOR_EXITS ); my %FILE_LOOKUP_CACHE; # Mapping of MIME types to image types understood by the blob functions. my %MimeTypeMapd6 = ( 'G' => 'gif', 'J' => 'jpg', 'P' => 'png', ); # redirect data. foreach my $file ( 'redirect.dat', 'redirect-local.dat' ) { open( REDIR, "$LJ::HOME/cgi-bin/$file" ) or next; while () { next unless (/^(\S+)\s+(\S+)/); my ( $src, $dest ) = ( $1, $2 ); $REDIR{$src} = $dest; } close REDIR; } my @req_hosts; # client IP, and/or all proxies, real or claimed # init handler (PostReadRequest) sub handler { my $apache_r = shift; # only perform this once in case of internal redirects if ( $apache_r->is_initial_req ) { $apache_r->push_handlers( PerlCleanupHandler => sub { %RQ = () } ); $apache_r->push_handlers( PerlCleanupHandler => "LJ::end_request" ); if ($LJ::TRUST_X_HEADERS) { # if we're behind a lite mod_proxy front-end, we need to trick future handlers # into thinking they know the real remote IP address. problem is, it's complicated # by the fact that mod_proxy did nothing, requiring mod_proxy_add_forward, then # decided to do X-Forwarded-For, then did X-Forwarded-Host, so we have to deal # with all permutations of versions, hence all the ugliness. Furthermore, we might # be behind other types of proxies and loadbalancers that we trust, but the user # might have their own, so we should distinguish trusted proxies from other IPs. @req_hosts = ( $apache_r->connection->client_ip ); if ( my $forward = $apache_r->headers_in->{'X-Forwarded-For'} ) { my @hosts = split( /\s*,\s*/, $forward ); if (@hosts) { # Completely ignore original client ip here, since it may belong to # the previous pipelined request on this connection to this Apache worker. @req_hosts = keys %{ { map { $_ => 1 } @hosts } }; my $real; if ( ref $LJ::IS_TRUSTED_PROXY eq 'CODE' ) { # Find last IP in X-Forwarded-For that isn't a trusted proxy. do { $real = pop @hosts; } while ( @hosts && $LJ::IS_TRUSTED_PROXY->($real) ); } else { # Trust everything by default, real client IP is first. $real = shift @hosts; @hosts = (); } $apache_r->connection->client_ip($real); } $apache_r->headers_in->{'X-Forwarded-For'} = join( ", ", @hosts ); } # and now, deal with getting the right Host header if ( $_ = $apache_r->headers_in->{'X-Host'} ) { $apache_r->headers_in->{'Host'} = $_; } elsif ( $_ = $apache_r->headers_in->{'X-Forwarded-Host'} ) { $apache_r->headers_in->{'Host'} = $_; } } # reload libraries that might've changed if ( $LJ::IS_DEV_SERVER && LJ::is_enabled('module_reload') ) { my %to_reload; while ( my ( $file, $mod ) = each %LJ::LIB_MOD_TIME ) { my $cur_mod = ( stat($file) )[9]; next if $cur_mod == $mod; $to_reload{$file} = 1; } foreach my $key ( keys %INC ) { my $file = $INC{$key}; delete $INC{$key} if $to_reload{$file}; } foreach my $file ( keys %to_reload ) { print STDERR "[$$] Reloading file: $file.\n"; my %reloaded; local $SIG{__WARN__} = sub { if ( $_[0] =~ m/^Subroutine (\S+) redefined at / ) { warn @_ if ( $reloaded{$1}++ ); } else { warn(@_); } }; my $good = do $file; if ($good) { $LJ::LIB_MOD_TIME{$file} = ( stat($file) )[9]; } else { die "Failed to reload module [$file] due to error: $@\n"; } } } } $apache_r->set_handlers( PerlTransHandler => [ \&trans ] ); return OK; } sub redir { my ( $apache_r, $url, $code ) = @_; $apache_r->content_type("text/html"); $apache_r->headers_out->{Location} = $url; return $code || REDIRECT; } # send the user to the URL for them to get their domain session cookie sub remote_domsess_bounce { return redir( BML::get_request(), LJ::remote_bounce_url(), HTTP_MOVED_TEMPORARILY ); } sub send_concat_res_response { my $apache_r = $_[0]; my $args = $apache_r->args; my $uri = $apache_r->uri; my $dir = ( $LJ::STATDOCS // $LJ::HTDOCS ) . $uri; my $maxdir = ( $LJ::STATDOCS // $LJ::HTDOCS ) . '/max' . $uri; return 404 unless -d $dir || -d $maxdir; # Might contain cache buster "?v=3234234234" at the end; # plus possibly other unique args (caught by the .*) $args =~ s/\?v=.*$//; # Collect each file my ( $body, $size, $mtime, $mime ) = ( '', 0, 0, undef ); foreach my $file ( split /,/, substr( $args, 1 ) ) { my $res = load_file_for_concat("$dir$file") // load_file_for_concat("$maxdir$file"); return 404 unless defined $res; $body .= $res->[0]; $size += $res->[1]; $mtime = $res->[2] if $res->[2] > $mtime; $mime //= $res->[3]; # And catch if they've mixed filetypes, we can't do that return 404 if $mime ne $res->[3]; } # No files? Something went wrong return 404 unless $body; # Got a bunch of files, let's concatenate them into the output $apache_r->status(200); $apache_r->status_line("200 OK"); $apache_r->content_type($mime); $apache_r->headers_out->{"Content-length"} = $size; $apache_r->headers_out->{"Last-Modified"} = LJ::time_to_http($mtime); if ( $apache_r->method eq 'GET' ) { $apache_r->print($body); } return OK; } sub blocked_bot { my $apache_r = shift; $apache_r->status(403); $apache_r->status_line("403 Denied"); $apache_r->content_type("text/html"); my $subject = $LJ::BLOCKED_BOT_SUBJECT || "403 Denied"; my $message = $LJ::BLOCKED_BOT_MESSAGE || "You don't have permission to view this page."; if ($LJ::BLOCKED_BOT_INFO) { my $ip = LJ::get_remote_ip(); my $uniq = LJ::UniqCookie->current_uniq; $message .= " $uniq @ $ip"; } $apache_r->print("

$subject

$message"); return OK; } sub blocked_anon { my $apache_r = shift; $apache_r->status(403); $apache_r->status_line("403 Denied"); $apache_r->content_type("text/html"); my $subject = "403 Denied"; my $message = "You don't have permission to access $LJ::SITENAME. Please first log in."; $apache_r->print("$subject"); $apache_r->print("

$subject

$message"); $apache_r->print(""); return OK; } sub resolve_path_for_uri { my ( $apache_r, $orig_uri, %opts ) = @_; my $uri = $orig_uri; my $redirect_ref = $opts{redirect_url_ref}; if ( $uri !~ m!(\.\.|\%|\.\/)! ) { if ( exists $FILE_LOOKUP_CACHE{$orig_uri} ) { return @{ $FILE_LOOKUP_CACHE{$orig_uri} }; } foreach my $dir ( LJ::get_all_directories('htdocs') ) { # main page my $file = "$dir/$uri"; if ( -e "$file/index.bml" && $uri eq '/' ) { $file .= "index.bml"; $uri .= "/index.bml"; } # /blah/file => /blah/file.bml if ( -e "$file.bml" ) { $file .= ".bml"; $uri .= ".bml"; } # /foo => /foo/ # /foo/ => /foo/index.bml if ( -d $file && -e "$file/index.bml" ) { unless ( $uri =~ m!/$! ) { $$redirect_ref = LJ::create_url( $uri . "/" ); return; } $file .= "index.bml"; $uri .= "index.bml"; } next unless -f $file; $file = abs_path($file); if ($file) { $uri =~ s!^/+!/!; $FILE_LOOKUP_CACHE{$orig_uri} = [ $uri, $file ]; return @{ $FILE_LOOKUP_CACHE{$orig_uri} }; } } } return undef; } # trans does all the things. This is the initial entrypoint for all requests. sub trans { my $apache_r = $_[0]; # include some modern security headers for best practices; we put these on everything # so that no matter how a user reaches us, they get the configs $apache_r->headers_out->{'X-Content-Type-Options'} = 'nosniff'; $apache_r->headers_out->{'Referrer-Policy'} = 'same-origin'; if ( $LJ::PROTOCOL eq 'https' ) { # TODO: Raise HSTS timer here, but I want it low while I test to make sure that # the world doesn't implode; suggested value is 31536000. $apache_r->headers_out->{'Strict-Transport-Security'} = 'max-age=300; includeSubDomains'; } # don't deal with subrequests or OPTIONS return DECLINED if defined $apache_r->main || $apache_r->method_number == M_OPTIONS; my $uri = $apache_r->uri; my $args = $apache_r->args; my $args_wq = $args ? "?$args" : ""; my $host = lc( $apache_r->headers_in->{"Host"} ); my $hostport = ( $host =~ s/(:\d+)$// ) ? $1 : ""; # Allow hosts ending in . to work properly. $host =~ s/\.$//; # disable TRACE (so scripts on non-LJ domains can't invoke # a trace to get the LJ cookies in the echo) return FORBIDDEN if $apache_r->method_number == M_TRACE; # If the configuration says to log statistics and GTop is available, mark # values before the request runs so it can be turned into a delta later if ( my $gtop = LJ::gtop() ) { $apache_r->pnotes->{gtop_cpu} = $gtop->cpu; $apache_r->pnotes->{gtop_mem} = $gtop->proc_mem($$); } LJ::start_request(); S2::set_domain('LJ'); # Apply rate limiting (after start_request so get_remote returns fresh data) my $remote = LJ::get_remote(); my $ip = LJ::get_remote_ip(); # Get the appropriate rate limit based on whether user is logged in my $limit; if ($remote) { $limit = DW::RateLimit->get( "authenticated_requests", rate => "100/60s" # 100 requests per minute ); } else { $limit = DW::RateLimit->get( "anonymous_requests", rate => "30/60s" # 30 requests per minute ); } # Check if rate limit is exceeded if ($limit) { my $result = $limit->check( userid => $remote ? $remote->userid : undef, ip => $remote ? undef : $ip ); if ( $result->{exceeded} ) { $apache_r->status(429); $apache_r->status_line("429 Too Many Requests"); $apache_r->content_type("text/html"); my $retry_after = $result->{time_remaining}; $apache_r->headers_out->{'Retry-After'} = $retry_after; $apache_r->print("

429 Too Many Requests

"); $apache_r->print("

You have made too many requests. Please try again later.

"); if ($retry_after) { $apache_r->print("

Please wait $retry_after seconds before trying again.

"); } return OK; } } my $lang = $LJ::DEFAULT_LANG || $LJ::LANGS[0]; BML::set_language( $lang, \&LJ::Lang::get_text ); if ( $apache_r->is_initial_req ) { # handle uniq cookies # this will ensure that we have a correct cookie value # and also add it to $apache_r->notes LJ::UniqCookie->ensure_cookie_value; # apply sysban block if applicable if ( LJ::UniqCookie->sysban_should_block ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&blocked_bot ); return OK; } } else { # not is_initial_req if ( $apache_r->status == 404 ) { my $ret = DW::Routing->call( uri => "/internal/local/404" ); $ret //= DW::Routing->call( uri => "/internal/404" ); return $ret if defined $ret; } } # let foo.com still work, but redirect to www.foo.com if ( $LJ::DOMAIN_WEB && $apache_r->method eq "GET" && $host eq $LJ::DOMAIN && $LJ::DOMAIN_WEB ne $LJ::DOMAIN ) { my $url = "$LJ::SITEROOT$uri"; $url .= "?" . $args if $args; return redir( $apache_r, $url ); } # handle alternate domains if ( $host ne $LJ::DOMAIN && $host ne $LJ::DOMAIN_WEB && !( $LJ::EMBED_MODULE_DOMAIN && $host =~ /$LJ::EMBED_MODULE_DOMAIN$/ ) ) { my $which_alternate_domain = undef; foreach my $other_host (@LJ::ALTERNATE_DOMAINS) { $which_alternate_domain = $other_host if $host =~ m/\Q$other_host\E$/i; } if ( defined $which_alternate_domain ) { my $root = "$LJ::PROTOCOL://"; $host =~ s/\Q$which_alternate_domain\E$/$LJ::DOMAIN/i; # do $LJ::DOMAIN -> $LJ::DOMAIN_WEB here, to save a redirect. if ( $LJ::DOMAIN_WEB && $host eq $LJ::DOMAIN ) { $host = $LJ::DOMAIN_WEB; } $root .= "$host"; if ( $apache_r->method eq "GET" ) { my $url = "$root$uri"; $url .= "?" . $args if $args; return redir( $apache_r, $url ); } else { return redir( $apache_r, $root ); } } } $remote = LJ::get_remote(); # block on IP address for anonymous users but allow users to log in, # and logged in users to go through # we're not logged in, and we're not in the middle of logging in unless ( $remote || LJ::remote_bounce_url() ) { # allow the user to go through login and subdomain cookie checking paths unless ( $uri =~ m!^(?:/login|/__setdomsess|/misc/get_domain_session)! ) { foreach my $ip (@req_hosts) { if ( LJ::sysban_check( 'noanon_ip', $ip ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&blocked_anon ); return OK; } } } } # check for sysbans on ip address, and block the ip address completely unless ( $LJ::BLOCKED_BOT_URI && index( $uri, $LJ::BLOCKED_BOT_URI ) == 0 ) { foreach my $ip (@req_hosts) { if ( LJ::sysban_check( 'ip', $ip ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&blocked_bot ); return OK; } } if ( LJ::Sysban::tempban_check( ip => \@req_hosts ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&blocked_bot ); return OK; } if ( LJ::Hooks::run_hook( "forbid_request", $apache_r ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&blocked_bot ); return OK; } } # See if this is a concatenated static file request. If so, the args will start with a '?' # which means the original URL was of the form '/foo/??bar'. if ( $args =~ /^\?/ ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&send_concat_res_response ); return OK; } # Normal request processing my %GET = LJ::parse_args( $apache_r->args ); if ( $LJ::IS_DEV_SERVER && $GET{'as'} =~ /^\w{1,25}$/ ) { my $ru = LJ::load_user( $GET{'as'} ); LJ::set_remote($ru); # might be undef, to allow for "view as logged out" } # is this the embed module host if ( $LJ::EMBED_MODULE_DOMAIN && $host =~ /$LJ::EMBED_MODULE_DOMAIN$/ ) { return DW::Routing->call( uri => '/journal/embedcontent' ); } my $journal_view = sub { my $opts = shift; $opts ||= {}; my $orig_user = $opts->{'user'}; $opts->{'user'} = LJ::canonical_username( $opts->{'user'} ); my $remote = LJ::get_remote(); my $u = LJ::load_user($orig_user); # do redirects: # -- uppercase usernames # -- users with hyphens/underscores, except users from external domains if ( $orig_user ne lc($orig_user) || $orig_user =~ /[_-]/ && $u && $u->journal_base !~ m!^$LJ::PROTOCOL://$host!i && $opts->{'vhost'} !~ /^other:/ ) { my $newurl = $uri; # if we came through $opts->{vhost} eq "users" path above, then # the s/// below will not match and there will be a leading /, # so the s/// leaves a leading slash as well so that $newurl is # consistent for the concatenation before redirect $newurl =~ s!^/(users/|community/|~)\Q$orig_user\E!/!; $newurl = $u->journal_base . "$newurl$args_wq" if $u; return redir( $apache_r, $newurl ); } # check if this entry or journal contains adult content if ( LJ::is_enabled('adult_content') ) { # force remote to be checked my $burl = LJ::remote_bounce_url(); return remote_domsess_bounce() if LJ::remote_bounce_url(); my $entry = $opts->{ljentry}; my $poster; my $adult_content = "none"; if ( $u && $entry ) { $adult_content = $entry->adult_content_calculated || $u->adult_content_calculated; $poster = $entry->poster; } elsif ($u) { $adult_content = $u->adult_content_calculated; } # we should show the page (no interstitial) if: # the viewed user is deleted / suspended OR # the entry is specified but invalid OR # the remote user owns the journal we're viewing OR # the remote user posted the entry we're viewing my $should_show_page = ( $u && !$u->is_visible ) || ( $entry && !$entry->valid ) || ( $remote && ( $remote->can_manage($u) || ( $entry && $remote->equals($poster) ) ) ); my %journal_pages = ( read => 1, archive => 1, month => 1, day => 1, tag => 1, entry => 1, reply => 1, lastn => 1, ); my $is_journal_page = !$opts->{mode} || $journal_pages{ $opts->{mode} }; if ( $adult_content ne "none" && $is_journal_page && !$should_show_page ) { my $returl = "$LJ::PROTOCOL://$host" . $apache_r->uri . "$args_wq"; LJ::set_active_journal($u); $apache_r->pnotes->{user} = $u; $apache_r->pnotes->{entry} = $entry if $entry; $apache_r->notes->{returl} = $returl; unless ( DW::Logic::AdultContent->user_confirmed_page( user => $remote, journal => $u, entry => $entry, adult_content => $adult_content ) ) { my $adult_content_handler = sub { $apache_r->handler("perl-script"); $apache_r->notes->{adult_content_type} = $_[0]; $apache_r->push_handlers( PerlHandler => sub { adult_interstitial( $_[0] ) }, ); return OK; }; # logged in users with a defined age of under 18 are blocked from explicit adult content # logged in users with a defined age of under 18 are given a confirmation page for adult concepts depending on their settings # logged in users with a defined age of 18 or older are given confirmation pages for adult content depending on their settings # logged in users without defined ages and logged out users are given confirmation pages for all adult content if ( $adult_content eq "explicit" && $remote && $remote->is_minor ) { return $adult_content_handler->( DW::Logic::AdultContent->adult_interstitial_path( type => 'explicit_blocked' ) ); } else { my $hide_adult_content = $remote ? $remote->hide_adult_content : "concepts"; if ( $adult_content eq "explicit" && $hide_adult_content ne "none" ) { return $adult_content_handler->( DW::Logic::AdultContent->adult_interstitial_path( type => 'explicit' ) ); } elsif ( $adult_content eq "concepts" && $hide_adult_content eq "concepts" ) { return $adult_content_handler->( DW::Logic::AdultContent->adult_interstitial_path( type => 'concepts' ) ); } } } } } if ( $opts->{'mode'} eq "info" ) { my $u = LJ::load_user( $opts->{user} ) or return 404; my $mode = $GET{mode} eq 'full' ? '?mode=full' : ''; return redir( $apache_r, $u->profile_url . $mode ); } if ( $opts->{'mode'} eq "profile" ) { $apache_r->notes->{_journal} = $opts->{user}; # this is the notes field that all other s1/s2 pages use. # so be consistent for people wanting to read it. # _journal above is kinda deprecated, but we'll carry on # its behavior of meaning "whatever the user typed" to be # passed to the profile BML page, whereas this one only # works if journalid exists. if ( my $u = LJ::load_user( $opts->{user} ) ) { $apache_r->notes->{journalid} = $u->{userid}; } return DW::Routing->call( uri => '/profile' ); } if ( $opts->{'mode'} eq "update" ) { my $u = LJ::load_user( $opts->{user} ) or return 404; return redir( $apache_r, "$LJ::SITEROOT/update.bml?usejournal=" . $u->{'user'} ); } %RQ = %$opts; if ( $opts->{mode} eq "data" && $opts->{pathextra} =~ m!^/(\w+)(/.*)?! ) { my $remote = LJ::get_remote(); my $burl = LJ::remote_bounce_url(); return remote_domsess_bounce() if LJ::remote_bounce_url(); my ( $mode, $path ) = ( $1, $2 ); if ( my $handler = LJ::Hooks::run_hook( "data_handler:$mode", $RQ{'user'}, $path ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => $handler ); return OK; } } $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&journal_content ); return OK; }; my $determine_view = sub { my ( $user, $vhost, $uuri ) = @_; my $mode = undef; my $pe; my $ljentry; # if favicon, let filesystem handle it, for now, until # we have per-user favicons. if ( $uuri eq "/favicon.ico" ) { $apache_r->filename( LJ::resolve_file("htdocs/$uuri") ); return OK; } # see if there is a modular handler for this URI my $ret = LJ::URI->handle( $uuri, $apache_r ); $ret = DW::Routing->call( username => $user ) unless defined $ret; return $ret if defined $ret; if ( $uuri =~ m#^/tags(.*)# ) { return redir( $apache_r, "/tag$1" ); } if ( $uuri eq "/__setdomsess" ) { return redir( $apache_r, LJ::Session->setdomsess_handler ); } if ( $uuri =~ m#^/calendar(.*)# ) { return redir( $apache_r, "/archive$1" ); } if ( $uuri =~ m#^/(\d+)(\.html?)$#i ) { return redir( $apache_r, "/$1.html$args_wq" ) unless $2 eq '.html'; my $u = LJ::load_user($user) or return 404; $ljentry = LJ::Entry->new( $u, ditemid => $1 ); if ( $GET{'mode'} eq "reply" || $GET{'replyto'} || $GET{'edit'} ) { $mode = "reply"; } else { $mode = "entry"; } } elsif ( $uuri =~ m#^/(\d\d\d\d/\d\d/\d\d)/([a-z0-9_-]+)\.html$# ) { my $u = LJ::load_user($user) or return 404; # This hack validates that the YYYY/MM/DD given to us is correct. my $date = $1; $ljentry = LJ::Entry->new( $u, slug => $2 ); if ( defined $ljentry ) { my $dt = join( '/', split( '-', substr( $ljentry->eventtime_mysql, 0, 10 ) ) ); return 404 unless $dt eq $date; } if ( $GET{'mode'} eq "reply" || $GET{'replyto'} || $GET{'edit'} ) { $mode = "reply"; } else { $mode = "entry"; } } elsif ( $uuri =~ m#^/(\d\d\d\d)(?:/(\d\d)(?:/(\d\d))?)?(/?)$# ) { my ( $year, $mon, $day, $slash ) = ( $1, $2, $3, $4 ); unless ($slash) { my $u = LJ::load_user($user) or return 404; my $proper = $u->journal_base . "/$year"; $proper .= "/$mon" if defined $mon; $proper .= "/$day" if defined $day; $proper .= "/"; return redir( $apache_r, $proper ); } # the S1 ljviews code looks at $opts->{'pathextra'}, because # that's how it used to do it, when the pathextra was /day[/yyyy/mm/dd] $pe = $uuri; if ( defined $day ) { $mode = "day"; } elsif ( defined $mon ) { $mode = "month"; } else { $mode = "archive"; } } elsif ( $uuri =~ m! /([a-z\_]+)? # optional / (.*) # path extra: /ReadingFilter, for example !x && ( $1 eq "" || defined $LJ::viewinfo{$1} ) ) { ( $mode, $pe ) = ( $1, $2 ); $mode ||= "" unless length $pe; # if no pathextra, then imply 'lastn' # redirect old-style URLs to new versions: if ( $mode =~ /^day|calendar$/ && $pe =~ m!^/\d\d\d\d! ) { my $newuri = $uri; $newuri =~ s!$mode/(\d\d\d\d)!$1!; return redir( $apache_r, LJ::journal_base($user) . $newuri ); } elsif ( $mode eq 'rss' ) { # code 301: moved permanently, update your links. return redir( $apache_r, LJ::journal_base($user) . "/data/rss$args_wq", 301 ); } elsif ( $mode eq 'tag' ) { # tailing slash on here to prevent a second redirect after this one return redir( $apache_r, LJ::journal_base($user) . "$uri/" ) unless $pe; if ( $pe eq '/' ) { # tag list page $mode = 'tag'; $pe = undef; } else { # filtered lastn page $mode = 'lastn'; # prepend /tag so that lastn knows to do tag filtering $pe = "/tag$pe"; } } elsif ( $mode eq 'security' ) { # tailing slash on here to prevent a second redirect after this one return redir( $apache_r, LJ::journal_base($user) . "$uri/" ) unless $pe; # filtered lastn page $mode = 'lastn'; # prepend /security so that lastn knows to do security filtering $pe = "/security$pe"; } } elsif ( ( $vhost eq "users" || $vhost =~ /^other:/ ) && $uuri eq "/robots.txt" ) { $mode = "robots_txt"; } else { my $key = $uuri; $key =~ s!^/!!; my $u = LJ::load_user($user) or return 404; } return undef unless defined $mode; # Now that we know ourselves to be at a sensible URI, redirect renamed # journals. This ensures redirects work sensibly for all valid paths # under a given username, without sprinkling redirects everywhere. my $u = LJ::load_user($user); if ( $u && $u->is_redirect && $u->is_renamed ) { my $renamedto = $u->prop('renamedto'); if ( $renamedto ne '' ) { my $redirect_url = ( $renamedto =~ m!^$LJ::PROTOCOL?://! ) ? $renamedto : LJ::journal_base( $renamedto, vhost => $vhost ) . $uuri . $args_wq; return redir( $apache_r, $redirect_url, 301 ); } } return $journal_view->( { 'vhost' => $vhost, 'mode' => $mode, 'args' => $args, 'pathextra' => $pe, 'user' => $user, 'ljentry' => $ljentry, } ); }; # user or shop domains if ( $host =~ /^(www\.)?([\w\-]{1,25})\.\Q$LJ::USER_DOMAIN\E$/ && $2 ne "www" && # 1xx: info, 2xx: success, 3xx: redirect, 4xx: client err, 5xx: server err # let the main server handle any errors $apache_r->status < 400 ) { # Per bug 3734: users sometimes type 'www.username.USER_DOMAIN'. return redir( $apache_r, "$LJ::PROTOCOL://$2.$LJ::USER_DOMAIN$uri$args_wq" ) if $1 eq 'www.'; my $user = $2; # see if the "user" is really functional code my $func = $LJ::SUBDOMAIN_FUNCTION{$user}; if ( $func eq "normal" ) { # site admin wants this domain to be ignored and treated as if it # were "www" } elsif ( $func eq "cssproxy" ) { return DW::Routing->call( uri => '/extcss' ); } elsif ( $func eq 'support' ) { return redir( $apache_r, "$LJ::SITEROOT/support/" ); } elsif ( $func eq 'shop' ) { return redir( $apache_r, "$LJ::SITEROOT/shop$uri" ); } elsif ( $func eq 'mobile' ) { return redir( $apache_r, "$LJ::SITEROOT/mobile$uri" ); } elsif ( ref $func eq "ARRAY" && $func->[0] eq "changehost" ) { return redir( $apache_r, "$LJ::PROTOCOL://$func->[1]$uri$args_wq" ); } elsif ( $uri =~ m!^/(?:talkscreen|delcomment)\.bml! ) { # these URLs need to always work for the javascript comment management code # (JavaScript can't do cross-domain XMLHttpRequest calls) return DECLINED; } elsif ( $func eq "journal" ) { unless ( $uri =~ m!^/(\w{1,25})(/.*)?$! ) { if ( $uri eq "/favicon.ico" ) { $apache_r->filename( LJ::resolve_file("htdocs/$uri") ); return OK; } my $redir = LJ::Hooks::run_hook( "journal_subdomain_redirect_url", $host, $uri ); return redir( $apache_r, $redir ) if $redir; return 404; } ( $user, $uri ) = ( $1, $2 ); $uri ||= "/"; # redirect them to their canonical URL if on wrong host/prefix if ( my $u = LJ::load_user($user) ) { my $canon_url = $u->journal_base; unless ( $canon_url =~ m!^$LJ::PROTOCOL://$host!i ) { return redir( $apache_r, "$canon_url$uri$args_wq" ); } } my $view = $determine_view->( $user, "safevhost", $uri ); return $view if defined $view; } elsif ($func) { my $code = { 'userpics' => \&userpic_trans, 'files' => \&files_trans, }; return $code->{$func}->($apache_r) if $code->{$func}; return 404; # bogus ljconfig } else { # if this is the shop, manage domain cookie if needed, else just render if ( $user eq 'shop' ) { if ( $uri eq '/__setdomsess' ) { return redir( $apache_r, LJ::Session->setdomsess_handler ); } else { $uri =~ s/\/$//; $uri = "/shop$uri"; } } else { my $view = $determine_view->( $user, "users", $uri ); return $view if defined $view; return 404; } } } # userpic return userpic_trans($apache_r) if $uri =~ m!^/userpic/!; return vgift_trans($apache_r) if $uri =~ m!^/vgift/!; # Attempt to handle a URI given the old-style LJ handler, falling back to # the new style Dreamwidth routing system. my $ret = LJ::URI->handle( $uri, $apache_r ) // DW::Routing->call( uri => $uri ); return $ret if defined $ret; # API role if ( $uri =~ m!^/api/v(\d+)(/.+)$! ) { my $ver = $1 + 0; $ret = DW::Routing->call( apiver => $ver, uri => "/v$ver$2", role => 'api' ); $ret //= DW::Routing->call( uri => "/internal/api/404" ); return $ret if defined $ret; } # now check for BML pages my $redirect_url; my ( $alt_uri, $alt_path ) = resolve_path_for_uri( $apache_r, $uri, redirect_url_ref => \$redirect_url ); return redir( $apache_r, $redirect_url ) if $redirect_url; if ($alt_path) { $apache_r->uri($alt_uri); $apache_r->filename($alt_path); return OK; } # normal (non-domain) journal view if ( $uri =~ m! ^/(users\/|community\/|\~) # users/community/tilde ([^/]+) # potential username (.*)? # rest !x && $uri !~ /\.bml/ ) { my ( $part1, $user, $rest ) = ( $1, $2, $3 ); # get what the username should be my $cuser = LJ::canonical_username($user); return DECLINED unless length($cuser); my $srest = $rest || '/'; # FIXME: skip two redirects and send them right to __setdomsess with the right # cookie-to-be-set arguments. below is the easy/slow route. my $u = LJ::load_user($cuser) or return 404; my $base = $u->journal_base; # If journal_base is path-based on this host (dev container), handle # the request here instead of redirecting to ourselves in a loop. if ( $LJ::IS_DEV_SERVER && $LJ::IS_DEV_CONTAINER && $base =~ m!^$LJ::PROTOCOL://$host\Q$hostport\E/~! ) { my $view = $determine_view->( $cuser, "users", $srest ); return $view if defined $view; return 404; } return redir( $apache_r, "$base$srest$args_wq", correct_url_redirect_code() ); } if ( $uri =~ m!^/palimg/! ) { Apache::LiveJournal::PalImg->load; $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&Apache::LiveJournal::PalImg::handler ); return OK; } # redirected resources if ( $REDIR{$uri} ) { my $new = $REDIR{$uri}; if ( $apache_r->args ) { $new .= ( $new =~ /\?/ ? "&" : "?" ); $new .= $apache_r->args; } return redir( $apache_r, $new, HTTP_MOVED_PERMANENTLY ); } return FORBIDDEN if $uri =~ m!^/userpics!; return DECLINED; } sub userpic_trans { my $apache_r = shift; return 404 unless $apache_r->uri =~ m!^/(?:userpic/)?(\d+)/(\d+)$!; my ( $picid, $userid ) = ( $1, $2 ); # we can safely do this without checking since we never re-use # picture IDs and don't let the contents get modified return HTTP_NOT_MODIFIED if $apache_r->headers_in->{'If-Modified-Since'}; $RQ{'picid'} = $picid; $RQ{'pic-userid'} = $userid; $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&userpic_content ); return OK; } sub userpic_content { my $apache_r = $_[0]; # Load the user object and pic and make sure the picture is viewable my $u = LJ::load_userid( $RQ{'pic-userid'} ); my $pic = LJ::Userpic->get( $u, $RQ{'picid'}, { no_expunged => 1 } ) or return NOT_FOUND; # Must have contents by now, or return 404 my $data = $pic->imagedata; return NOT_FOUND unless $data; # Everything looks good, send it $apache_r->content_type( $pic->mimetype ); $apache_r->headers_out->{"Content-length"} = length $data; $apache_r->headers_out->{"Cache-Control"} = "no-transform"; $apache_r->headers_out->{"Last-Modified"} = LJ::time_to_http( $pic->pictime ); $apache_r->print($data) unless $apache_r->header_only; return OK; } sub files_trans { my $apache_r = shift; return 404 unless $apache_r->uri =~ m!^/(\w{1,25})/(\w+)(/\S+)!; my ( $user, $domain, $rest ) = ( $1, $2, $3 ); if ( my $handler = LJ::Hooks::run_hook( "files_handler:$domain", $user, $rest ) ) { $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => $handler ); return OK; } return 404; } sub vgift_trans { my $apache_r = shift; return 404 unless $apache_r->uri =~ m!^/vgift/(\d+)/(\w+)$!; my ( $picid, $picsize ) = ( $1, $2 ); return 404 unless $picsize =~ /^(?:small|large)$/; # we can safely do this without checking # unless we're using the admin interface return HTTP_NOT_MODIFIED if $apache_r->headers_in->{'If-Modified-Since'} && $apache_r->headers_in->{'Referer'} !~ m!^\Q$LJ::SITEROOT\E$/admin/!; $RQ{picid} = $picid; $RQ{picsize} = $picsize; $apache_r->handler("perl-script"); $apache_r->push_handlers( PerlResponseHandler => \&vgift_content ); return OK; } sub vgift_content { my $apache_r = $_[0]; my $picid = $RQ{picid}; my $picsize = $RQ{picsize}; my $vg = DW::VirtualGift->new($picid); my $mime = $vg->mime_type($picsize); return NOT_FOUND unless $mime; my $size; my $send_headers = sub { $size = $_[0] if @_; $size ||= 0; $apache_r->content_type($mime); $apache_r->headers_out->{"Content-length"} = $size + 0; $apache_r->headers_out->{"Cache-Control"} = "no-transform"; }; my $key = $vg->img_mogkey($picsize); my $data = DW::BlobStore->retrieve( vgifts => $key ); return NOT_FOUND unless $data; $send_headers->( length $$data ); $apache_r->print($$data) unless $apache_r->header_only; return OK; } # load_file_for_concat will retrieve the given file from the disk and return: # (contents, size, mtime, mime). If the file doesn't exist, undef is returned. sub load_file_for_concat { my $fn = $_[0]; # No path traversal return undef if $fn =~ /\.\./; # Specific types only -- we only support loading files for concatenation if # their extension is in this list my $mime; if ( $fn =~ /\.([a-z]+)$/ ) { $mime = { css => 'text/css; charset=utf-8', js => 'application/javascript; charset=utf-8', }->{$1}; } return undef unless $mime; # Verify exists and is regular file my @stat = stat($fn); return undef unless scalar @stat > 0 && S_ISREG( $stat[2] ); my $contents; open FILE, "<$fn" or return undef; { local $/ = undef; $contents = ; } close FILE; # Remove UTF-8 byte-order mark, in case someone set us up the BOM. (It's not # valid after the first position in a file, and can screw up syntax when # concatenating.) $contents =~ s/\A\x{ef}\x{bb}\x{bf}//; # Add a newline, juuuust to be safe. $contents .= "\n"; # If we were handling unicode properly, we'd need to encode before checking # length... but we aren't. my $size = length($contents); return [ $contents, $size, $stat[9], $mime ]; } sub adult_interstitial { my ( $apache_r, %opts ) = @_; my $int_redir = DW::Routing->call( uri => $apache_r->notes->{adult_content_type}, ); if ( defined $int_redir ) { # we got a match; clear the request cache and return DECLINED. LJ::start_request(); return DECLINED; } } sub journal_content { my $apache_r = shift; my $uri = $apache_r->uri; my %GET = LJ::parse_args( $apache_r->args ); if ( $RQ{'mode'} eq "robots_txt" ) { my $u = LJ::load_user( $RQ{'user'} ); return 404 unless $u; $u->preload_props( "opt_blockrobots", "adult_content" ); $apache_r->content_type("text/plain"); my @extra = LJ::Hooks::run_hook( "robots_txt_extra", $u ), (); $apache_r->print($_) foreach @extra; $apache_r->print("User-Agent: *\n"); if ( $u->should_block_robots ) { $apache_r->print("Disallow: /\n"); } return OK; } # handle HTTP digest authentication if ( $GET{'auth'} eq 'digest' || $apache_r->headers_in->{"Authorization"} =~ /^Digest/ ) { my ($res) = DW::Auth->authenticate( digest => 1 ); unless ($res) { $apache_r->status(401); $apache_r->status_line("401 Authentication required"); $apache_r->content_type("text/html"); $apache_r->print("Digest authentication failed."); return OK; } } my $criterr = 0; my $remote = LJ::get_remote( { criterr => \$criterr, } ); return remote_domsess_bounce() if LJ::remote_bounce_url(); # check for faked cookies here, since this is pretty central. if ($criterr) { $apache_r->status(500); $apache_r->status_line("500 Invalid Cookies"); $apache_r->content_type("text/html"); # reset all cookies foreach my $dom ( "", $LJ::DOMAIN, $LJ::COOKIE_DOMAIN ) { DW::Request->get->add_cookie( name => 'ljsession', expires => LJ::time_to_cookie(1), domain => $dom ? $dom : undef, path => '/', httponly => 1 ); } $apache_r->print( "Invalid cookies. Try logging out and then logging back in.\n" ); $apache_r->print("\n") for ( 0 .. 100 ); return OK; } # Before we actually make a journal, let's potentially bounce this user # off to get captchaed if ( DW::Captcha->should_captcha_view($remote) ) { return redir( $apache_r, DW::Captcha->redirect_url ); } # LJ::make_journal() will set this flag for pages that should use the site # skin and therefore need to be processed by siteviews (e.g., style=site) my $handle_with_siteviews = 0; my %headers = (); my $opts = { 'r' => $apache_r, 'headers' => \%headers, 'args' => $RQ{'args'}, 'vhost' => $RQ{'vhost'}, 'pathextra' => $RQ{'pathextra'}, 'header' => { 'If-Modified-Since' => $apache_r->headers_in->{"If-Modified-Since"}, }, 'handle_with_siteviews_ref' => \$handle_with_siteviews, 'siteviews_extra_content' => {}, 'ljentry' => $RQ{'ljentry'}, }; $apache_r->notes->{view} = $RQ{mode}; my $user = $RQ{'user'}; my $html = LJ::make_journal( $user, $RQ{'mode'}, $remote, $opts ); # Allow to add extra http-header or even modify html LJ::Hooks::run_hooks( "after_journal_content_created", $opts, \$html ) unless $handle_with_siteviews; # check for redirects if ( $opts->{internal_redir} ) { my $int_redir = DW::Routing->call( uri => $opts->{internal_redir} ); if ( defined $int_redir ) { # we got a match; clear the request cache and return DECLINED. LJ::start_request(); return DECLINED; } } return redir( $apache_r, $opts->{'redir'} ) if $opts->{'redir'}; return $opts->{'handler_return'} if defined $opts->{'handler_return'}; # if LJ::make_journal() indicated it can't handle the request: # only if HTML is set, otherwise leave it alone so the user # gets "messed up template definition", cause something went wrong. return DW::Template->render_string( $html, $opts->{siteviews_extra_content} ) if $handle_with_siteviews && $html; my $status = $opts->{'status'} || "200 OK"; $opts->{'contenttype'} ||= $opts->{'contenttype'} = "text/html"; if ( $opts->{'contenttype'} =~ m!^text/! && $opts->{'contenttype'} !~ /charset=/ ) { $opts->{'contenttype'} .= "; charset=utf-8"; } # Set to 1 if the code should generate junk to help IE # display a more meaningful error message. my $generate_iejunk = 0; if ( $opts->{'badargs'} ) { # No special information to give to the user, so just let # Apache handle the 404 return 404; } elsif ( $opts->{'badfriendgroup'} ) { # give a real 404 to the journal owner if ( $remote && $remote->{'user'} eq $user ) { return 404; # otherwise be vague with a 403 } else { $status = "403 Forbidden"; $html = "

Invalid Filter

Either this reading filter doesn't exist or you are not authorized to view it. Try checking that you are logged in if you're sure you have the name right.

"; } $generate_iejunk = 1; } elsif ( $opts->{'suspendeduser'} ) { $status = "403 User suspended"; $html = "

Suspended User

" . "

The content at this URL is from a suspended user.

"; $generate_iejunk = 1; } elsif ( $opts->{'suspendedentry'} ) { $status = "403 Entry suspended"; $html = "

Suspended Entry

" . "

The entry at this URL is suspended. You cannot reply to it.

"; $generate_iejunk = 1; } elsif ( $opts->{'readonlyremote'} || $opts->{'readonlyjournal'} ) { $status = "403 Read-only user"; $html = "

Read-Only User

"; $html .= $opts->{'readonlyremote'} ? "

You are read-only. You cannot post comments.

" : "

This journal is read-only. You cannot comment in it.

"; $generate_iejunk = 1; } unless ($html) { $status = "500 Bad Template"; $html = "

Error

User $user has messed up their journal template definition.

"; $generate_iejunk = 1; } $apache_r->status( $status =~ m/^(\d+)/ ); $apache_r->status_line($status); foreach my $hname ( keys %headers ) { if ( ref( $headers{$hname} ) && ref( $headers{$hname} ) eq "ARRAY" ) { foreach ( @{ $headers{$hname} } ) { $apache_r->headers_out->{$hname} = $_; } } else { $apache_r->headers_out->{$hname} = $headers{$hname}; } } $apache_r->content_type( $opts->{'contenttype'} ); $apache_r->headers_out->{"Cache-Control"} = "private, proxy-revalidate"; $html .= ( "\n" x 100 ) if $generate_iejunk; # Parse the page content for any temporary matches # defined in local config if ( my $cb = $LJ::TEMP_PARSE_MAKE_JOURNAL ) { $cb->( \$html ); } # add stuff before my $before_body_close = ""; LJ::Hooks::run_hooks( "insert_html_before_body_close", \$before_body_close ); LJ::Hooks::run_hooks( "insert_html_before_journalctx_body_close", \$before_body_close ); # Insert pagestats HTML and Javascript $before_body_close .= LJ::PageStats->new->render('journal'); $html =~ s!!$before_body_close!i if $before_body_close; my $ctbase = $opts->{'contenttype'}; $ctbase =~ s/;.*//; my $do_gzip = 1; $do_gzip = 0 unless $LJ::GZIP_OKAY{$ctbase}; $do_gzip = 0 if $apache_r->headers_in->{"Accept-Encoding"} !~ /gzip/; my $length = length($html); $do_gzip = 0 if $length < 500; if ($do_gzip) { my $pre_len = $length; $apache_r->notes->{bytes_pregzip} = $pre_len; $html = Compress::Zlib::memGzip($html); $length = length($html); $apache_r->headers_out->{'Content-Encoding'} = 'gzip'; } # Let caches know that Accept-Encoding will change content $apache_r->headers_out->{'Vary'} = 'Accept-Encoding'; $apache_r->headers_out->{"Content-length"} = $length; $apache_r->print($html) unless $apache_r->header_only; return OK; } sub correct_url_redirect_code { if ($LJ::CORRECT_URL_PERM_REDIRECT) { return HTTP_MOVED_PERMANENTLY; } return REDIRECT; } 1;