445 lines
14 KiB
Perl
445 lines
14 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# DW::Controller::Export
|
|
#
|
|
# Pages for exporting journal content.
|
|
#
|
|
# Authors:
|
|
# Mark Smith <mark@dreamwidth.org>
|
|
# Jen Griffin <kareila@livejournal.com>
|
|
#
|
|
# Copyright (c) 2015-2020 by Dreamwidth Studios, LLC.
|
|
#
|
|
# This program is free software; you may redistribute it and/or modify it under
|
|
# the same terms as Perl itself. For a copy of the license, please reference
|
|
# 'perldoc perlartistic' or 'perldoc perlgpl'.
|
|
#
|
|
|
|
package DW::Controller::Export;
|
|
|
|
use v5.10;
|
|
use strict;
|
|
|
|
use DW::Routing;
|
|
use DW::Template;
|
|
use DW::Controller;
|
|
use DW::FormErrors;
|
|
|
|
use DW::Mood;
|
|
use Unicode::MapUTF8;
|
|
|
|
DW::Routing->register_string( '/export', \&index_handler, app => 1 );
|
|
DW::Routing->register_string( '/export_do', \&post_handler, app => 1 );
|
|
|
|
DW::Routing->register_string( '/export_comments', \&comment_handler, app => 1 );
|
|
|
|
sub get_encodings {
|
|
my ( %encodings, %encnames );
|
|
LJ::load_codes( { "encoding" => \%encodings } );
|
|
LJ::load_codes( { "encname" => \%encnames } );
|
|
|
|
my $rv = {};
|
|
foreach my $id ( keys %encodings ) {
|
|
next if lc $encodings{$id} eq 'none';
|
|
$rv->{ $encodings{$id} } = $encnames{$id};
|
|
}
|
|
return $rv;
|
|
}
|
|
|
|
sub index_handler {
|
|
my ( $ok, $rv ) = controller( form_auth => 1, authas => 1 );
|
|
return $rv unless $ok;
|
|
|
|
my @enclist;
|
|
my %e = %{ get_encodings() };
|
|
push @enclist, ( $_ => $e{$_} ) foreach sort { $e{$a} cmp $e{$b} } keys %e;
|
|
|
|
$rv->{encodings} = \@enclist;
|
|
|
|
return DW::Template->render_template( 'export/index.tt', $rv );
|
|
}
|
|
|
|
sub post_handler {
|
|
my ( $ok, $rv ) = controller( form_auth => 1, authas => 1 );
|
|
return $rv unless $ok;
|
|
|
|
my $r = $rv->{r};
|
|
my $post = $r->post_args;
|
|
my $scope = '/export/index.tt';
|
|
my $errors = DW::FormErrors->new;
|
|
|
|
return error_ml('bml.requirepost') unless $r->did_post;
|
|
|
|
my $u = $rv->{u};
|
|
my $dbcr = LJ::get_cluster_reader($u);
|
|
return error_ml('error.nodb') unless $dbcr;
|
|
|
|
my $ok_formats = { csv => 'csv', xml => 'xml' };
|
|
my $format = $ok_formats->{ lc $post->{format} };
|
|
$errors->add( '', "$scope.error.format" ) unless $format;
|
|
|
|
my $encoding;
|
|
|
|
{
|
|
if ( $post->{encid} ) {
|
|
my %encodings;
|
|
LJ::load_codes( { "encoding" => \%encodings } );
|
|
$encoding = $encodings{ $post->{encid} };
|
|
}
|
|
|
|
$encoding ||= $post->{encoding};
|
|
$encoding ||= 'utf-8';
|
|
|
|
if ( lc($encoding) ne "utf-8"
|
|
&& !Unicode::MapUTF8::utf8_supported_charset($encoding) )
|
|
{
|
|
$errors->add( '', "$scope.error.encoding" );
|
|
}
|
|
}
|
|
|
|
return DW::Template->render_template( 'error.tt',
|
|
{ errors => $errors, message => LJ::Lang::ml('bml.badcontent.body') } )
|
|
if $errors->exist;
|
|
|
|
##### figure out what fields we're exporting
|
|
|
|
my @fields;
|
|
my $opts = { format => $format }; # information needed by printing routines
|
|
|
|
foreach my $f (qw(itemid eventtime logtime subject event security allowmask)) {
|
|
push @fields, $f if $post->{"field_${f}"};
|
|
}
|
|
|
|
if ( $post->{field_currents} ) {
|
|
push @fields, ( "current_music", "current_mood" );
|
|
$opts->{currents} = 1;
|
|
}
|
|
|
|
my $year = $post->{year} ? $post->{year} + 0 : 0;
|
|
my $month = $post->{month} ? $post->{month} + 0 : 0;
|
|
|
|
my $sth =
|
|
$dbcr->prepare(
|
|
"SELECT jitemid, anum, eventtime, logtime, security, allowmask FROM log2 "
|
|
. "WHERE journalid=? AND year=? AND month=?" );
|
|
$sth->execute( $u->id, $year, $month );
|
|
|
|
return DW::Template->render_template( 'error.tt', { message => $dbcr->errstr } )
|
|
if $dbcr->err;
|
|
|
|
#### do file-format specific initialization
|
|
|
|
if ( $format eq "csv" ) {
|
|
$r->content_type("text/plain");
|
|
my $filename = sprintf( "%s-%04d-%02d.csv", $u->user, $year, $month );
|
|
$r->header_out_add( 'Content-Disposition' => "attachment; filename=$filename" );
|
|
$r->print( join( ",", @fields ) . "\n" ) if $post->{csv_header};
|
|
}
|
|
|
|
if ( $format eq "xml" ) {
|
|
my $lenc = lc $encoding;
|
|
$r->content_type("text/xml; charset=$lenc");
|
|
$r->print(qq{<?xml version="1.0" encoding='$lenc'?>\n});
|
|
$r->print("<livejournal>\n");
|
|
}
|
|
|
|
$opts->{fields} = \@fields;
|
|
$opts->{encoding} = $encoding;
|
|
$opts->{notranslation} = 1 if $post->{notranslation};
|
|
|
|
my @buffer;
|
|
|
|
while ( my $i = $sth->fetchrow_hashref ) {
|
|
$i->{'ritemid'} = $i->{'jitemid'} || $i->{'itemid'};
|
|
$i->{'itemid'} = $i->{'jitemid'} * 256 + $i->{'anum'} if $i->{'jitemid'};
|
|
push @buffer, $i;
|
|
|
|
# process 20 entries at a time
|
|
|
|
if ( scalar @buffer == 20 ) {
|
|
$r->print($_) foreach @{ _load_buffer( $u, \@buffer, $dbcr, $opts ) };
|
|
@buffer = ();
|
|
}
|
|
|
|
}
|
|
|
|
$r->print($_) foreach @{ _load_buffer( $u, \@buffer, $dbcr, $opts ) };
|
|
|
|
$r->print("</livejournal>\n") if $format eq "xml";
|
|
|
|
return $r->OK;
|
|
}
|
|
|
|
sub _load_buffer {
|
|
my ( $u, $buf, $dbcr, $opts ) = @_;
|
|
my %props;
|
|
|
|
my @ids = map { $_->{ritemid} } @{$buf};
|
|
my $lt = LJ::get_logtext2( $u, @ids );
|
|
LJ::load_log_props2( $dbcr, $u->id, \@ids, \%props );
|
|
|
|
my @result;
|
|
|
|
foreach my $e ( @{$buf} ) {
|
|
$e->{'subject'} = $lt->{ $e->{'ritemid'} }->[0];
|
|
$e->{'event'} = $lt->{ $e->{'ritemid'} }->[1];
|
|
|
|
my $eprops = $props{ $e->{'ritemid'} };
|
|
|
|
# convert to UTF-8 if necessary
|
|
if ( $eprops->{'unknown8bit'} && !$opts->{'notranslation'} ) {
|
|
my $error;
|
|
$e->{'subject'} = LJ::text_convert( $e->{'subject'}, $u, \$error );
|
|
$e->{'event'} = LJ::text_convert( $e->{'event'}, $u, \$error );
|
|
foreach ( keys %{$eprops} ) {
|
|
$eprops->{$_} = LJ::text_convert( $eprops->{$_}, $u, \$error );
|
|
}
|
|
}
|
|
|
|
if ( $opts->{'currents'} ) {
|
|
$e->{'current_music'} = $eprops->{'current_music'};
|
|
$e->{'current_mood'} = $eprops->{'current_mood'};
|
|
if ( $eprops->{current_moodid} ) {
|
|
my $mood = DW::Mood->mood_name( $eprops->{current_moodid} );
|
|
$e->{current_mood} = $mood if $mood;
|
|
}
|
|
}
|
|
|
|
my $entry = _dump_entry( $e, $opts );
|
|
|
|
# now translate this to the chosen encoding but only if this is a
|
|
# Unicode environment. In a pre-Unicode environment the chosen encoding
|
|
# is merely a label.
|
|
|
|
if ( lc( $opts->{'encoding'} ) ne 'utf-8' && !$opts->{'notranslation'} ) {
|
|
$entry = Unicode::MapUTF8::from_utf8(
|
|
{
|
|
-string => $entry,
|
|
-charset => $opts->{'encoding'}
|
|
}
|
|
);
|
|
}
|
|
|
|
push @result, $entry;
|
|
}
|
|
|
|
return \@result;
|
|
}
|
|
|
|
sub _dump_entry {
|
|
my ( $e, $opts ) = @_;
|
|
|
|
my $format = $opts->{format};
|
|
my $entry = "";
|
|
my @vals = ();
|
|
|
|
if ( $format eq "xml" ) {
|
|
$entry .= "<entry>\n";
|
|
}
|
|
|
|
foreach my $f ( @{ $opts->{fields} } ) {
|
|
my $v = $e->{$f} // '';
|
|
if ( $format eq "csv" ) {
|
|
if ( $v =~ /[\"\n\,]/ ) {
|
|
$v =~ s/\"/\"\"/g;
|
|
$v = qq{"$v"};
|
|
}
|
|
}
|
|
if ( $format eq "xml" ) {
|
|
$v = LJ::exml($v);
|
|
}
|
|
push @vals, $v;
|
|
}
|
|
|
|
if ( $format eq "csv" ) {
|
|
$entry .= join( ",", @vals ) . "\n";
|
|
}
|
|
|
|
if ( $format eq "xml" ) {
|
|
foreach my $f ( @{ $opts->{fields} } ) {
|
|
my $v = shift @vals;
|
|
$entry .= "<$f>" . $v . "</$f>\n";
|
|
}
|
|
$entry .= "</entry>\n";
|
|
}
|
|
|
|
return $entry;
|
|
}
|
|
|
|
sub comment_handler {
|
|
my ( $ok, $rv ) = controller( form_auth => 1, authas => 1 );
|
|
return $rv unless $ok;
|
|
|
|
my $r = $rv->{r};
|
|
my $args = $r->get_args;
|
|
my $errors = DW::FormErrors->new;
|
|
|
|
# don't let people hit us with silly GET attacks
|
|
return error_ml('error.invalidform') if $r->header_in('Referer') && !$r->did_post;
|
|
|
|
my $u = $rv->{u};
|
|
my $dbcr = LJ::get_cluster_reader($u);
|
|
return error_ml('error.nodb') unless $dbcr;
|
|
|
|
my $mode = lc( $args->{get} // '' );
|
|
$errors->add( '', "error.unknownmode" ) unless $mode =~ m/^comment_(?:meta|body)$/;
|
|
|
|
return DW::Template->render_template( 'error.tt',
|
|
{ errors => $errors, message => LJ::Lang::ml('bml.badcontent.body') } )
|
|
if $errors->exist;
|
|
|
|
# begin printing results
|
|
$r->content_type("text/xml; charset=utf-8");
|
|
$r->print(qq{<?xml version="1.0" encoding='utf-8'?>\n<livejournal>\n});
|
|
|
|
# startid specified?
|
|
my $maxitems = $mode eq 'comment_meta' ? 10000 : 1000;
|
|
my $numitems = $args->{numitems};
|
|
my $gather = $maxitems;
|
|
|
|
if ( defined $numitems && ( $numitems > 0 ) && ( $numitems <= $maxitems ) ) {
|
|
$gather = $numitems + 0;
|
|
}
|
|
|
|
my $startid = $args->{startid} ? $args->{startid} + 0 : 0;
|
|
my $endid = $startid + $gather;
|
|
|
|
# get metadata
|
|
my $rows = $dbcr->selectall_arrayref(
|
|
'SELECT jtalkid, nodeid, parenttalkid, posterid, state, datepost '
|
|
. "FROM talk2 WHERE nodetype = 'L' AND journalid = ? AND "
|
|
. " jtalkid >= ? AND jtalkid < ?",
|
|
undef, $u->id, $startid, $endid
|
|
);
|
|
|
|
# now let's gather them all together while making a list of posterids
|
|
my %posterids;
|
|
my %comments;
|
|
foreach my $r ( @{ $rows || [] } ) {
|
|
$comments{ $r->[0] } = {
|
|
nodeid => $r->[1],
|
|
parenttalkid => $r->[2],
|
|
posterid => $r->[3],
|
|
state => $r->[4],
|
|
datepost => $r->[5],
|
|
};
|
|
$posterids{ $r->[3] } = 1 if $r->[3]; # don't include 0 (anonymous)
|
|
}
|
|
|
|
# load posterids
|
|
my $us = LJ::load_userids( keys %posterids );
|
|
|
|
my $userid = $u->userid;
|
|
|
|
my $filter = sub {
|
|
my $data = $_[0];
|
|
return unless $data->{posterid};
|
|
return if $data->{posterid} == $userid;
|
|
|
|
# If the poster is suspended, we treat the comment as if it was deleted
|
|
# This comment may have children, so it must still seem to exist.
|
|
$data->{state} = 'D' if $us->{ $data->{posterid} }->is_suspended;
|
|
};
|
|
|
|
# now we have two choices: comments themselves or metadata
|
|
if ( $mode eq 'comment_meta' ) {
|
|
|
|
# meta data is easy :)
|
|
my $max = $dbcr->selectrow_array(
|
|
"SELECT MAX(jtalkid) FROM talk2 WHERE journalid = ? AND nodetype = 'L'",
|
|
undef, $userid );
|
|
$max //= 0;
|
|
$r->print("<maxid>$max</maxid>\n");
|
|
my $nextid = $startid + $gather;
|
|
$r->print("<nextid>$nextid</nextid>\n") unless ( $nextid > $max );
|
|
|
|
# now spit out the metadata
|
|
$r->print("<comments>\n");
|
|
while ( my ( $id, $data ) = each %comments ) {
|
|
$filter->($data);
|
|
|
|
my $ret = "<comment id='$id'";
|
|
$ret .= " posterid='$data->{posterid}'" if $data->{posterid};
|
|
$ret .= " state='$data->{state}'" if $data->{state} ne 'A';
|
|
$ret .= " />\n";
|
|
$r->print($ret);
|
|
}
|
|
|
|
$r->print("</comments>\n<usermaps>\n");
|
|
|
|
# now spit out usermap
|
|
my $ret = '';
|
|
while ( my ( $id, $user ) = each %$us ) {
|
|
$ret .= "<usermap id='$id' user='$user->{user}' />\n";
|
|
}
|
|
$r->print($ret);
|
|
$r->print("</usermaps>\n");
|
|
|
|
# comment data also presented in glorious XML:
|
|
}
|
|
elsif ( $mode eq 'comment_body' ) {
|
|
|
|
# get real comments from startid to a limit of 10k data, however far that takes us
|
|
my @ids = sort { $a <=> $b } keys %comments;
|
|
|
|
# call a load to get comment text
|
|
my $texts = LJ::get_talktext2( $u, @ids );
|
|
|
|
# get props if we need to
|
|
my $props = {};
|
|
LJ::load_talk_props2( $userid, \@ids, $props ) if $args->{props};
|
|
|
|
# now start spitting out data
|
|
$r->print("<comments>\n");
|
|
foreach my $id (@ids) {
|
|
|
|
# get text for this comment
|
|
my $data = $comments{$id};
|
|
my $text = $texts->{$id};
|
|
my ( $subject, $body ) = @{ $text || [] };
|
|
|
|
# only spit out valid UTF8, and make sure it fits in XML, and uncompress it
|
|
LJ::text_uncompress( \$body );
|
|
LJ::text_out( \$subject );
|
|
LJ::text_out( \$body );
|
|
$subject = LJ::exml($subject);
|
|
$body = LJ::exml($body);
|
|
|
|
# setup the date to be GMT and formatted per W3C specs
|
|
my $date = LJ::mysqldate_to_time( $data->{datepost} );
|
|
$date = LJ::time_to_w3c( $date, 'Z' );
|
|
|
|
$filter->($data);
|
|
|
|
# print the data
|
|
my $ret = "<comment id='$id' jitemid='$data->{nodeid}'";
|
|
$ret .= " posterid='$data->{posterid}'" if $data->{posterid};
|
|
$ret .= " state='$data->{state}'" if $data->{state} ne 'A';
|
|
$ret .= " parentid='$data->{parenttalkid}'" if $data->{parenttalkid};
|
|
if ( $data->{state} eq 'D' ) {
|
|
$ret .= " />\n";
|
|
}
|
|
else {
|
|
$ret .= ">\n";
|
|
$ret .= "<subject>$subject</subject>\n" if $subject;
|
|
$ret .= "<body>$body</body>\n" if $body;
|
|
$ret .= "<date>$date</date>\n";
|
|
foreach my $propkey ( keys %{ $props->{$id} || {} } ) {
|
|
$ret .= "<property name='$propkey'>";
|
|
$ret .= LJ::exml( $props->{$id}->{$propkey} );
|
|
$ret .= "</property>\n";
|
|
}
|
|
$ret .= "</comment>\n";
|
|
}
|
|
$r->print($ret);
|
|
}
|
|
$r->print("</comments>\n");
|
|
}
|
|
|
|
# all done
|
|
$r->print("</livejournal>\n");
|
|
|
|
return $r->OK;
|
|
}
|
|
|
|
1;
|