547 lines
14 KiB
Perl
547 lines
14 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# Authors:
|
|
# Afuna <coder.dw@afunamatata.com>
|
|
#
|
|
# Copyright (c) 2013 by Dreamwidth Studios, LLC.
|
|
#
|
|
# This program is free software; you may redistribute it and/or modify it under
|
|
# the same terms as Perl itself. For a copy of the license, please reference
|
|
# 'perldoc perlartistic' or 'perldoc perlgpl'.
|
|
|
|
package DW::EmailPost::Base;
|
|
|
|
use strict;
|
|
|
|
require 'ljlib.pl';
|
|
use LJ::Emailpost::Web;
|
|
use DW::Formats;
|
|
|
|
use Encode;
|
|
use MIME::Words ();
|
|
use Unicode::MapUTF8 ();
|
|
|
|
my $workdir = "/tmp";
|
|
|
|
=head1 NAME
|
|
|
|
DW::EmailPost::Base - Basic email posting behavior
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
This is the basic email posting behavior. Subclasses should implement the following:
|
|
|
|
=over
|
|
|
|
=item _find_destination - given a list of email addresses, return one you're interested in (or undef if none)
|
|
|
|
=item _parse_destination - given an auth string taken from the "to:" email header, set the user/journal/validated information
|
|
|
|
=item _process - process the email. ::Base does some of the common cleanup for you. It's up to you to finish the job. Call $self->cleanup_body_final in here
|
|
|
|
=back
|
|
|
|
=cut
|
|
|
|
=head2 C<< $class->new( $mime_entity ) >>
|
|
|
|
Create an instance of DW::EmailPost::Base
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my ( $class, $mime_entity ) = @_;
|
|
|
|
my $self = bless {
|
|
|
|
_entity => $mime_entity,
|
|
_entity_head => $mime_entity->head,
|
|
|
|
dequeue => 1,
|
|
|
|
}, $class;
|
|
|
|
return $self;
|
|
}
|
|
|
|
=head1 CLASS METHODS
|
|
|
|
=head2 C<< $class->find_destination( $mime_entity ) >>
|
|
|
|
Given a mime entity object, return the scalar $user journal
|
|
(or undef) that this email is destined to post to
|
|
|
|
Subclasses must implement a _find_destination sub
|
|
|
|
=cut
|
|
|
|
sub find_destination {
|
|
my ( $class, $mime_entity ) = @_;
|
|
|
|
my @to_addresses = map { $_->address } Mail::Address->parse( $mime_entity->head->get('To') );
|
|
|
|
return $class->_find_destination(@to_addresses);
|
|
}
|
|
|
|
=head2 C<< $class->should_handle( $mime_entity ) >>
|
|
|
|
Given a mime entity object, return 1 if we're interested in handling this.
|
|
Return 0 if not.
|
|
|
|
=cut
|
|
|
|
sub should_handle {
|
|
my ( $class, $mime_entity ) = @_;
|
|
return $class->find_destination($mime_entity) ? 1 : 0;
|
|
}
|
|
|
|
=head1 INSTANCE METHODS
|
|
|
|
=head2 C<< $self->process >>
|
|
|
|
Process the email. Returns a status message indicating either success or reason for failure
|
|
|
|
This base implementation pulls out subject/body, finds the important metadata from the email
|
|
(such as address, username, etc) and does character decoding.
|
|
|
|
Subclasses must implement a _process sub for subclass-specific handling
|
|
|
|
=cut
|
|
|
|
sub process {
|
|
my ($self) = @_;
|
|
|
|
# pull out the head, and remove extra newlines
|
|
$self->{_entity_head}->unfold;
|
|
|
|
$self->_init_required;
|
|
return unless $self->{from};
|
|
|
|
# left side of "to" address
|
|
$self->{destination} ||= $self->find_destination( $self->{_entity} );
|
|
$self->parse_destination( $self->{destination} ) or return $self->send_error;
|
|
|
|
return $self->send_error("Email gateway access denied for your account type.")
|
|
unless $LJ::T_ALLOW_EMAILPOST || $self->{u}->can_emailpost;
|
|
|
|
# metadata that's not strictly needed, but could be useful later
|
|
$self->_init_optional;
|
|
|
|
# get the body and subject from the email
|
|
# processed character encoding, but not cleaned up further than that
|
|
# will probably need further processing before using as entry/comment text
|
|
$self->_extract_text or return $self->send_error;
|
|
$self->_extract_post_headers or return $self->send_error;
|
|
|
|
return $self->_process;
|
|
}
|
|
|
|
=head2 C<< $self->parse_destination( $auth_string ) >>
|
|
|
|
Given an auth string (lefthand side of "to:" header), set authorization options
|
|
|
|
Must set: u, journal
|
|
|
|
Subclasses must implement a _parse_destination sub
|
|
=cut
|
|
|
|
sub parse_destination {
|
|
my ( $self, $auth_string ) = @_;
|
|
|
|
$self->_parse_destination($auth_string) or return 0;
|
|
|
|
return 0 unless $self->{u} && $self->{u}->is_visible;
|
|
|
|
return 1;
|
|
}
|
|
|
|
=head2 C<< $self->cleanup_body_final >>
|
|
|
|
Final cleanup of the body text: remove signatures, adjust whitespace, etc.
|
|
Subclass should call this when doing _process
|
|
|
|
=cut
|
|
|
|
sub cleanup_body_final {
|
|
my $self = $_[0];
|
|
|
|
$self->{body} =~ s/^(?:\- )?[\-_]{2,}(\s| )*\r?\n.*//ms; # trim sigs
|
|
|
|
my $content_type = $self->{content_type};
|
|
|
|
# respect flowed text
|
|
if ( lc $content_type->{format} eq 'flowed' ) {
|
|
if ( $content_type->{delsp} && lc $content_type->{delsp} eq 'yes' ) {
|
|
$self->{body} =~ s/ \n//g;
|
|
}
|
|
else {
|
|
$self->{body} =~ s/ \n/ /g;
|
|
}
|
|
}
|
|
|
|
# trim off excess whitespace (html cleaner converts to breaks)
|
|
$self->{body} =~ s/\n+$/\n/;
|
|
}
|
|
|
|
# convenience methods
|
|
# discover the from/user to post as/journal to post to
|
|
sub _init_required {
|
|
my $self = $_[0];
|
|
|
|
# from address
|
|
$self->{from} =
|
|
${ ( Mail::Address->parse( $self->{_entity_head}->get('From:') ) )[0] || [] }[1];
|
|
}
|
|
|
|
sub _init_optional {
|
|
my $self = $_[0];
|
|
|
|
my $head = $self->{_entity_head};
|
|
|
|
# The return path should normally not ever be messed up enough to require this,
|
|
# but some mailers nowadays do some very strange things.
|
|
$self->{return_path} = ${ ( Mail::Address->parse( $head->get('Return-Path') ) )[0] || [] }[1];
|
|
|
|
$self->{email_date} = $head->get('Date:');
|
|
}
|
|
|
|
# body / subject / content_type
|
|
sub _extract_text {
|
|
my $self = $_[0];
|
|
|
|
# Use text/plain piece first - if it doesn't exist, then fallback to text/html
|
|
my $tent = $self->get_entity( $self->{_entity} )
|
|
|| $self->get_entity( $self->{_entity}, 'html' );
|
|
$self->{_tent} = $tent;
|
|
|
|
# $self->{content_type}
|
|
$self->_parse_content_type( $tent ? $tent->head->get('Content-type:') : '' );
|
|
|
|
# $self->{body}, $self->{subject}
|
|
$self->_clean_body_and_subject( $tent ? $tent->bodyhandle->as_string : "",
|
|
$self->{_entity_head}->get('Subject:') )
|
|
or return;
|
|
|
|
return 1;
|
|
}
|
|
|
|
# extract any lj-*, post-* headers
|
|
# these are not validated; any error-checking must be done by whatever is using them
|
|
sub _extract_post_headers {
|
|
my $self = $_[0];
|
|
|
|
my ( %post_headers, $amask );
|
|
|
|
# first look for old style lj headers
|
|
while ( $self->{body} =~ s/(?:^|\n)lj-(.+?):\s*(.+?)(?:$|\n)//is ) {
|
|
$post_headers{ lc($1) } = LJ::trim($2);
|
|
}
|
|
|
|
# next look for new style post headers
|
|
# so if both are specified, this value will be retained
|
|
while ( $self->{body} =~ s/(?:^|\n)post-(.+?):\s*(.+?)(?:$|\n)//is ) {
|
|
$post_headers{ lc($1) } = LJ::trim($2);
|
|
}
|
|
|
|
# remove any whitespace between post headers and body
|
|
$self->{body} =~ s/^\s*//;
|
|
|
|
$self->{post_headers} = \%post_headers;
|
|
|
|
return 1;
|
|
}
|
|
|
|
# given a content-type header, return a hash of content-type attributes
|
|
sub _parse_content_type {
|
|
my ( $self, $content_type ) = @_;
|
|
|
|
my %content_type_opts;
|
|
|
|
# Snag charset
|
|
$content_type_opts{_orig} = $content_type;
|
|
|
|
$content_type_opts{charset} = $1
|
|
if $content_type =~ /\bcharset=['\"]?(\S+?)['\"]?[\s\;]/i;
|
|
|
|
$content_type_opts{format} = $1
|
|
if $content_type =~ /\bformat=['\"]?(\S+?)['\"]?[\s\;]/i;
|
|
|
|
$content_type_opts{delsp} = $1
|
|
if $content_type =~ /\bdelsp=['\"]?(\w+?)['\"]?[\s\;]/i;
|
|
|
|
$self->{content_type} = \%content_type_opts;
|
|
}
|
|
|
|
# clean up the body and subject
|
|
sub _clean_body_and_subject {
|
|
my ( $self, $body, $subject ) = @_;
|
|
|
|
my $content_type = $self->{content_type};
|
|
|
|
# set before processing to original version
|
|
$self->{body} = $body;
|
|
$self->{subject} = $subject;
|
|
|
|
# remove leading and trailing whitespace
|
|
$body =~ s/^\s+//;
|
|
$body =~ s/\s+$//;
|
|
|
|
# do utf-8 conversion
|
|
my $body_charset = $content_type->{charset};
|
|
if ( defined($body_charset)
|
|
&& $body_charset !~ /^UTF-?8$/i )
|
|
{ # no charset? assume us-ascii
|
|
|
|
unless ( Unicode::MapUTF8::utf8_supported_charset($body_charset) ) {
|
|
$self->{error} = "Unknown charset encoding type. ($body_charset)";
|
|
return;
|
|
}
|
|
|
|
$body = Unicode::MapUTF8::to_utf8(
|
|
{
|
|
-string => $body,
|
|
-charset => $body_charset,
|
|
}
|
|
);
|
|
}
|
|
|
|
# check subject for rfc-1521 junk
|
|
chomp $subject;
|
|
if ( $subject =~ /^=\?/ ) {
|
|
my @subj_data = MIME::Words::decode_mimewords($subject);
|
|
my ( $string, $subject_charset ) = ( $subj_data[0][0], $subj_data[0][1] );
|
|
if (@subj_data) {
|
|
if ( $subject =~ /utf-8/i ) {
|
|
$subject = $string;
|
|
}
|
|
else {
|
|
unless ( Unicode::MapUTF8::utf8_supported_charset($subject_charset) ) {
|
|
$self->{error} = "Unknown charset encoding type. ($subject_charset)";
|
|
return;
|
|
}
|
|
|
|
$subject = Unicode::MapUTF8::to_utf8(
|
|
{
|
|
-string => $string,
|
|
-charset => $subject_charset,
|
|
}
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
# set after processing to processed version
|
|
$self->{body} = $body;
|
|
$self->{subject} = $subject;
|
|
|
|
return 1;
|
|
}
|
|
|
|
# Convert special email format keywords to the real format IDs used by
|
|
# DW::Formats and LJ::CleanHTML.
|
|
sub _choose_editor {
|
|
my ( $self, $format ) = @_;
|
|
$format = lc($format);
|
|
|
|
# Support email-only short names for the active formats:
|
|
$format = 'markdown_latest' if $format eq 'markdown';
|
|
$format = 'html_casual_latest' if $format eq 'html';
|
|
|
|
$format = DW::Formats::validate($format);
|
|
|
|
# If validate returns '', use an email-specific default.
|
|
$format = DW::Formats::validate('markdown_latest') unless $format;
|
|
|
|
return $format;
|
|
}
|
|
|
|
# By default, returns first plain text entity from email message.
|
|
# Specifying a type will return an array of MIME::Entity handles
|
|
# of that type. (image, application, etc)
|
|
# Specifying a type of 'all' will return all MIME::Entities,
|
|
# regardless of type.
|
|
sub get_entity {
|
|
my ( $self, $entity, $type ) = @_;
|
|
|
|
# old arguments were a hashref
|
|
$type = $type->{type} if ref $type eq "HASH";
|
|
|
|
# default to text
|
|
$type ||= 'text';
|
|
|
|
my $head = $entity->head;
|
|
my $mime_type = $head->mime_type;
|
|
|
|
return $entity if $type eq 'text' && $mime_type eq "text/plain";
|
|
return $entity if $type eq 'html' && $mime_type eq "text/html";
|
|
my @entities;
|
|
|
|
# Only bother looking in messages that advertise attachments
|
|
my $mimeattach_re = qr{ m|^multipart/(?:alternative|signed|mixed|related)$| };
|
|
if ( $mime_type =~ $mimeattach_re ) {
|
|
my $partcount = $entity->parts;
|
|
for ( my $i = 0 ; $i < $partcount ; $i++ ) {
|
|
my $alte = $entity->parts($i);
|
|
|
|
return $alte if $type eq 'text' && $alte->mime_type eq "text/plain";
|
|
return $alte if $type eq 'html' && $alte->mime_type eq "text/html";
|
|
push @entities, $alte if $type eq 'all';
|
|
|
|
if ( $type eq 'image'
|
|
&& $alte->mime_type =~ m#^application/octet-stream# )
|
|
{
|
|
my $alte_head = $alte->head;
|
|
my $filename = $alte_head->recommended_filename;
|
|
push @entities, $alte if $filename =~ /\.(?:gif|png|tiff?|jpe?g)$/;
|
|
}
|
|
push @entities, $alte
|
|
if $alte->mime_type =~ /^$type/
|
|
&& $type ne 'all';
|
|
|
|
# Recursively search through nested MIME for various pieces
|
|
if ( $alte->mime_type =~ $mimeattach_re ) {
|
|
if ( $type =~ /^(?:text|html)$/ ) {
|
|
my $text_entity = $self->get_entity( $entity->parts($i), $type );
|
|
return $text_entity if $text_entity;
|
|
}
|
|
else {
|
|
push @entities, $self->get_entity( $entity->parts($i), $type );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return @entities if $type ne 'text' && scalar @entities;
|
|
return;
|
|
}
|
|
|
|
# sets the error message
|
|
sub err {
|
|
my ( $self, $error, $error_args ) = @_;
|
|
$self->{error} = $error;
|
|
$self->{error_args} = $error_args;
|
|
return;
|
|
}
|
|
|
|
# fires off error notifications, etc
|
|
sub send_error {
|
|
my ( $self, $msg, %opt ) = @_;
|
|
|
|
$msg ||= $self->{error};
|
|
%opt = ( %{ $self->{error_args} || {} }, %opt );
|
|
|
|
my $errbody;
|
|
$errbody .= "There was an error during your email posting:\n\n";
|
|
$errbody .= $msg;
|
|
|
|
if ( $self->{body} ) {
|
|
$errbody .= "\n\n\nOriginal posting follows:\n\n";
|
|
$errbody .= $self->{body};
|
|
}
|
|
|
|
my $err_addr = $self->find_error_address;
|
|
|
|
# Rate limit email to 1/5min/address
|
|
if ( !$opt{nomail}
|
|
&& !$opt{retry}
|
|
&& $err_addr
|
|
&& LJ::MemCache::add( "rate_eperr:$err_addr", 5, 300 ) )
|
|
{
|
|
|
|
LJ::send_mail(
|
|
{
|
|
to => $err_addr,
|
|
from => $LJ::BOGUS_EMAIL,
|
|
fromname => "$LJ::SITENAME Error",
|
|
subject => "$LJ::SITENAME posting error: $self->{subject}",
|
|
body => $errbody
|
|
}
|
|
);
|
|
}
|
|
|
|
$self->{dequeue} = 0 if $opt{retry};
|
|
|
|
$opt{m} = $msg;
|
|
$opt{s} = $self->{subject};
|
|
$opt{e} = 1;
|
|
$self->dblog(%opt) unless $opt{nolog};
|
|
|
|
return ( 0, $msg );
|
|
}
|
|
|
|
sub dblog {
|
|
my ( $self, %info ) = @_;
|
|
return unless $self->{u};
|
|
|
|
%info = ( %info, $self->dblog_opts );
|
|
|
|
chomp $info{s};
|
|
$self->{u}->log_event( 'emailpost', \%info );
|
|
return;
|
|
}
|
|
|
|
=head2 C<< $self->dblog_opts >>
|
|
|
|
Class-specific options
|
|
|
|
=cut
|
|
|
|
sub dblog_opts { (); }
|
|
|
|
=head2 C<< $self->set_error_address >>
|
|
|
|
Given a user object and an email address, discover the appropriate email
|
|
to send any error messages to.
|
|
|
|
Fallback to raw address if no explicit allowed senders.
|
|
|
|
=cut
|
|
|
|
sub find_error_address {
|
|
my ($self) = $_[0];
|
|
return unless $self->{u};
|
|
|
|
my $err_addr;
|
|
my $addrlist = LJ::Emailpost::Web::get_allowed_senders( $self->{u} );
|
|
my $from = $self->{from};
|
|
foreach my $allowed_sender ( keys %$addrlist ) {
|
|
if ( lc $from eq lc $allowed_sender
|
|
&& $addrlist->{$allowed_sender}->{get_errors} )
|
|
{
|
|
$err_addr = $from;
|
|
last;
|
|
}
|
|
}
|
|
|
|
$err_addr ||= $self->{u}->email_raw if $self->{u};
|
|
return $err_addr;
|
|
}
|
|
|
|
=head1 GETTERS / SETTERS
|
|
|
|
=head2 C<< $self->destination( [ $destination ] ) >>
|
|
|
|
Get/set the destination this was sent to (left part of the To:)
|
|
|
|
=cut
|
|
|
|
sub destination {
|
|
my ( $self, $destination ) = @_;
|
|
|
|
$self->{destination} = $destination
|
|
if $destination;
|
|
|
|
return $destination;
|
|
}
|
|
|
|
=head2 C<< $self->dequeue >>
|
|
|
|
Returns whether this email post should be dequeued (1) or retried (0).
|
|
|
|
=cut
|
|
|
|
sub dequeue {
|
|
return $_[0]->{dequeue};
|
|
}
|
|
|
|
1;
|