mourningdove/cgi-bin/DW/Worker/ContentImporter/Local/Entries.pm

246 lines
6.7 KiB
Perl
Raw Normal View History

2026-05-24 01:03:05 +00:00
#!/usr/bin/perl
#
# DW::Worker::ContentImporter::Local::Entries
#
# Local data utilities to handle importing of entries into the local site.
#
# Authors:
# Andrea Nall <anall@andreanall.com>
# Mark Smith <mark@dreamwidth.org>
#
# Copyright (c) 2009 by Dreamwidth Studios, LLC.
#
# This program is free software; you may redistribute it and/or modify it under
# the same terms as Perl itself. For a copy of the license, please reference
# 'perldoc perlartistic' or 'perldoc perlgpl'.
#
package DW::Worker::ContentImporter::Local::Entries;
use strict;
use Carp qw/ croak /;
use Encode qw/ encode_utf8 /;
=head1 NAME
DW::Worker::ContentImporter::Local::Entries - Local data utilities for entries
=head1 Entries
These functions are part of the Saving API for entries.
=head2 C<< $class->get_entry_map( $user, $hashref ) >>
Returns a hashref mapping import_source keys to jitemids
=cut
sub get_entry_map {
my ( $class, $u ) = @_;
my $p = LJ::get_prop( log => 'import_source' )
or croak 'unable to load logprop';
my $dbcr = LJ::get_cluster_reader($u)
or croak 'unable to connect to database';
my $sth =
$dbcr->prepare("SELECT jitemid, value FROM logprop2 WHERE journalid = ? AND propid = ?")
or croak 'unable to prepare SQL';
$sth->execute( $u->id, $p->{id} );
croak 'database error: ' . $sth->errstr
if $sth->err;
my %map;
while ( my ( $jitemid, $value ) = $sth->fetchrow_array ) {
while ( exists $map{$value} ) {
$value .= "/x";
}
$map{$value} = $jitemid;
}
return \%map;
}
=head2 C<< $class->get_duplicates_map( $u ) >>
Returns a hashref mapping identifying entry metadata to jitemids.
We assume that an exact match on subject and timestamp will be sufficient
to identify duplicates
=cut
sub get_duplicates_map {
my ( $class, $u ) = @_;
my $dbr = LJ::get_cluster_reader($u)
or croak 'unable to connect to database';
my $sth =
$dbr->prepare( "SELECT l.jitemid, UNIX_TIMESTAMP(l.logtime), lt.subject"
. " FROM log2 l LEFT JOIN logtext2 lt ON ( l.jitemid = lt.jitemid AND l.journalid=lt.journalid )"
. " WHERE l.journalid=?" )
or croak 'unable to prepare SQL';
$sth->execute( $u->id );
croak 'database error: ' . $sth->errstr
if $sth->err;
my %dupes_map;
while ( my ( $id, $logtime, $subject ) = $sth->fetchrow_array ) {
$dupes_map{"$logtime-$subject"} = $id;
}
return \%dupes_map;
}
=head2 C<< $class->post_event( $hashref, $u, $event, $item_errors ) >>
$event is a hashref representation of a single entry, with the followinginx.confng format:
{
# standard event values
subject => 'My Entry',
event => 'I DID STUFF!!!!!',
security => 'usemask',
allowmask => 1,
eventtime => 'yyyy-mm-dd hh:mm:ss',
props => {
heres_a_userprop => "there's a userprop",
and_another_little => "userprop",
}
# the key is a uniquely opaque string that identifies this entry. this must be
# unique across all possible import sources. the permalink may work best.
key => 'some_unique_key',
# a url to this entry's original location
url => 'http://permalink.tld/',
}
$item_errors is an arrayref of errors to be formatted nicely with a link to old and new entries.
Returns (1, $res) on success, (undef, $res) on error.
=cut
sub post_event {
my ( $class, $data, $map, $u, $posteru, $evt, $errors ) = @_;
return if $map->{ $evt->{key} };
my ( $yr, $month, $day, $hr, $min, $sec );
( $yr, $month, $day, $hr, $min, $sec ) = ( $1, $2, $3, $4, $5, $6 )
if $evt->{eventtime} =~ m/(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/;
# Rarely, we will get text that isn't valid UTF-8. If that's the case, shove it through the
# encoder and hope for the best. Don't double-encode if it's already valid, though.
foreach my $key (qw/ subject event /) {
$evt->{$key} = encode_utf8( $evt->{$key} )
unless LJ::text_in( $evt->{$key} );
}
foreach my $prop ( keys %{ $evt->{props} } ) {
$evt->{props}->{$prop} = encode_utf8( $evt->{props}->{$prop} )
unless LJ::text_in( $evt->{props}->{$prop} );
}
my %proto = (
lineendings => 'unix',
subject => $evt->{subject},
event => $evt->{event},
security => $evt->{security},
allowmask => $evt->{allowmask},
year => $yr,
mon => $month,
day => $day,
hour => $hr,
min => $min,
);
my $props = $evt->{props};
# this is a list of props that actually exist on this site
# but have been shown to cause failures importing that entry.
my %bad_props = (
current_coords => 1,
personifi_word_count => 1,
personifi_lang => 1,
personifi_tags => 1,
give_features => 1,
spam_counter => 1,
poster_ip => 1,
uniq => 1,
);
foreach my $prop ( keys %$props ) {
next if $bad_props{$prop};
my $p = LJ::get_prop( "log", $prop )
or next;
next if $p->{ownership} eq 'system';
$proto{"prop_$prop"} = $props->{$prop};
}
# Overwrite these here in case we're importing from an imported journal (hey, it could happen)
$proto{prop_import_source} = $evt->{key};
if ( defined $posteru ) {
delete $proto{prop_opt_backdated};
}
else {
$proto{prop_opt_backdated} = 1;
}
my %res;
LJ::do_request(
{
mode => 'postevent',
user => $posteru ? $posteru->user : $u->user,
usejournal => $posteru ? $u->user : undef,
ver => $LJ::PROTOCOL_VER,
%proto,
},
\%res,
{
u => $posteru || $u,
u_owner => $u,
importer_bypass => 1,
allow_truncated_subject => 1,
}
);
if ( $res{success} eq 'FAIL' ) {
push @$errors, "Failed to post: $res{errmsg}";
return ( undef, \%res );
}
else {
$u->do( "UPDATE log2 SET logtime = ? where journalid = ? and jitemid = ?",
undef, $evt->{logtime}, $u->userid, $res{itemid} );
$map->{ $evt->{key} } = $res{itemid};
return ( 1, \%res );
}
# flow will never get here
}
=head1 AUTHORS
=over
=item Andrea Nall <anall@andreanall.com>
=item Mark Smith <mark@dreamwidth.org>
=back
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2009 by Dreamwidth Studios, LLC.
This program is free software; you may redistribute it and/or modify it under
the same terms as Perl itself. For a copy of the license, please reference
'perldoc perlartistic' or 'perldoc perlgpl'.
=cut
1;