mourningdove/cgi-bin/DW/EmailPost/Base.pm
2026-05-24 01:03:05 +00:00

547 lines
14 KiB
Perl

#!/usr/bin/perl
#
# Authors:
# Afuna <coder.dw@afunamatata.com>
#
# Copyright (c) 2013 by Dreamwidth Studios, LLC.
#
# This program is free software; you may redistribute it and/or modify it under
# the same terms as Perl itself. For a copy of the license, please reference
# 'perldoc perlartistic' or 'perldoc perlgpl'.
package DW::EmailPost::Base;
use strict;
require 'ljlib.pl';
use LJ::Emailpost::Web;
use DW::Formats;
use Encode;
use MIME::Words ();
use Unicode::MapUTF8 ();
my $workdir = "/tmp";
=head1 NAME
DW::EmailPost::Base - Basic email posting behavior
=head1 SYNOPSIS
This is the basic email posting behavior. Subclasses should implement the following:
=over
=item _find_destination - given a list of email addresses, return one you're interested in (or undef if none)
=item _parse_destination - given an auth string taken from the "to:" email header, set the user/journal/validated information
=item _process - process the email. ::Base does some of the common cleanup for you. It's up to you to finish the job. Call $self->cleanup_body_final in here
=back
=cut
=head2 C<< $class->new( $mime_entity ) >>
Create an instance of DW::EmailPost::Base
=cut
sub new {
my ( $class, $mime_entity ) = @_;
my $self = bless {
_entity => $mime_entity,
_entity_head => $mime_entity->head,
dequeue => 1,
}, $class;
return $self;
}
=head1 CLASS METHODS
=head2 C<< $class->find_destination( $mime_entity ) >>
Given a mime entity object, return the scalar $user journal
(or undef) that this email is destined to post to
Subclasses must implement a _find_destination sub
=cut
sub find_destination {
my ( $class, $mime_entity ) = @_;
my @to_addresses = map { $_->address } Mail::Address->parse( $mime_entity->head->get('To') );
return $class->_find_destination(@to_addresses);
}
=head2 C<< $class->should_handle( $mime_entity ) >>
Given a mime entity object, return 1 if we're interested in handling this.
Return 0 if not.
=cut
sub should_handle {
my ( $class, $mime_entity ) = @_;
return $class->find_destination($mime_entity) ? 1 : 0;
}
=head1 INSTANCE METHODS
=head2 C<< $self->process >>
Process the email. Returns a status message indicating either success or reason for failure
This base implementation pulls out subject/body, finds the important metadata from the email
(such as address, username, etc) and does character decoding.
Subclasses must implement a _process sub for subclass-specific handling
=cut
sub process {
my ($self) = @_;
# pull out the head, and remove extra newlines
$self->{_entity_head}->unfold;
$self->_init_required;
return unless $self->{from};
# left side of "to" address
$self->{destination} ||= $self->find_destination( $self->{_entity} );
$self->parse_destination( $self->{destination} ) or return $self->send_error;
return $self->send_error("Email gateway access denied for your account type.")
unless $LJ::T_ALLOW_EMAILPOST || $self->{u}->can_emailpost;
# metadata that's not strictly needed, but could be useful later
$self->_init_optional;
# get the body and subject from the email
# processed character encoding, but not cleaned up further than that
# will probably need further processing before using as entry/comment text
$self->_extract_text or return $self->send_error;
$self->_extract_post_headers or return $self->send_error;
return $self->_process;
}
=head2 C<< $self->parse_destination( $auth_string ) >>
Given an auth string (lefthand side of "to:" header), set authorization options
Must set: u, journal
Subclasses must implement a _parse_destination sub
=cut
sub parse_destination {
my ( $self, $auth_string ) = @_;
$self->_parse_destination($auth_string) or return 0;
return 0 unless $self->{u} && $self->{u}->is_visible;
return 1;
}
=head2 C<< $self->cleanup_body_final >>
Final cleanup of the body text: remove signatures, adjust whitespace, etc.
Subclass should call this when doing _process
=cut
sub cleanup_body_final {
my $self = $_[0];
$self->{body} =~ s/^(?:\- )?[\-_]{2,}(\s|&nbsp;)*\r?\n.*//ms; # trim sigs
my $content_type = $self->{content_type};
# respect flowed text
if ( lc $content_type->{format} eq 'flowed' ) {
if ( $content_type->{delsp} && lc $content_type->{delsp} eq 'yes' ) {
$self->{body} =~ s/ \n//g;
}
else {
$self->{body} =~ s/ \n/ /g;
}
}
# trim off excess whitespace (html cleaner converts to breaks)
$self->{body} =~ s/\n+$/\n/;
}
# convenience methods
# discover the from/user to post as/journal to post to
sub _init_required {
my $self = $_[0];
# from address
$self->{from} =
${ ( Mail::Address->parse( $self->{_entity_head}->get('From:') ) )[0] || [] }[1];
}
sub _init_optional {
my $self = $_[0];
my $head = $self->{_entity_head};
# The return path should normally not ever be messed up enough to require this,
# but some mailers nowadays do some very strange things.
$self->{return_path} = ${ ( Mail::Address->parse( $head->get('Return-Path') ) )[0] || [] }[1];
$self->{email_date} = $head->get('Date:');
}
# body / subject / content_type
sub _extract_text {
my $self = $_[0];
# Use text/plain piece first - if it doesn't exist, then fallback to text/html
my $tent = $self->get_entity( $self->{_entity} )
|| $self->get_entity( $self->{_entity}, 'html' );
$self->{_tent} = $tent;
# $self->{content_type}
$self->_parse_content_type( $tent ? $tent->head->get('Content-type:') : '' );
# $self->{body}, $self->{subject}
$self->_clean_body_and_subject( $tent ? $tent->bodyhandle->as_string : "",
$self->{_entity_head}->get('Subject:') )
or return;
return 1;
}
# extract any lj-*, post-* headers
# these are not validated; any error-checking must be done by whatever is using them
sub _extract_post_headers {
my $self = $_[0];
my ( %post_headers, $amask );
# first look for old style lj headers
while ( $self->{body} =~ s/(?:^|\n)lj-(.+?):\s*(.+?)(?:$|\n)//is ) {
$post_headers{ lc($1) } = LJ::trim($2);
}
# next look for new style post headers
# so if both are specified, this value will be retained
while ( $self->{body} =~ s/(?:^|\n)post-(.+?):\s*(.+?)(?:$|\n)//is ) {
$post_headers{ lc($1) } = LJ::trim($2);
}
# remove any whitespace between post headers and body
$self->{body} =~ s/^\s*//;
$self->{post_headers} = \%post_headers;
return 1;
}
# given a content-type header, return a hash of content-type attributes
sub _parse_content_type {
my ( $self, $content_type ) = @_;
my %content_type_opts;
# Snag charset
$content_type_opts{_orig} = $content_type;
$content_type_opts{charset} = $1
if $content_type =~ /\bcharset=['\"]?(\S+?)['\"]?[\s\;]/i;
$content_type_opts{format} = $1
if $content_type =~ /\bformat=['\"]?(\S+?)['\"]?[\s\;]/i;
$content_type_opts{delsp} = $1
if $content_type =~ /\bdelsp=['\"]?(\w+?)['\"]?[\s\;]/i;
$self->{content_type} = \%content_type_opts;
}
# clean up the body and subject
sub _clean_body_and_subject {
my ( $self, $body, $subject ) = @_;
my $content_type = $self->{content_type};
# set before processing to original version
$self->{body} = $body;
$self->{subject} = $subject;
# remove leading and trailing whitespace
$body =~ s/^\s+//;
$body =~ s/\s+$//;
# do utf-8 conversion
my $body_charset = $content_type->{charset};
if ( defined($body_charset)
&& $body_charset !~ /^UTF-?8$/i )
{ # no charset? assume us-ascii
unless ( Unicode::MapUTF8::utf8_supported_charset($body_charset) ) {
$self->{error} = "Unknown charset encoding type. ($body_charset)";
return;
}
$body = Unicode::MapUTF8::to_utf8(
{
-string => $body,
-charset => $body_charset,
}
);
}
# check subject for rfc-1521 junk
chomp $subject;
if ( $subject =~ /^=\?/ ) {
my @subj_data = MIME::Words::decode_mimewords($subject);
my ( $string, $subject_charset ) = ( $subj_data[0][0], $subj_data[0][1] );
if (@subj_data) {
if ( $subject =~ /utf-8/i ) {
$subject = $string;
}
else {
unless ( Unicode::MapUTF8::utf8_supported_charset($subject_charset) ) {
$self->{error} = "Unknown charset encoding type. ($subject_charset)";
return;
}
$subject = Unicode::MapUTF8::to_utf8(
{
-string => $string,
-charset => $subject_charset,
}
);
}
}
}
# set after processing to processed version
$self->{body} = $body;
$self->{subject} = $subject;
return 1;
}
# Convert special email format keywords to the real format IDs used by
# DW::Formats and LJ::CleanHTML.
sub _choose_editor {
my ( $self, $format ) = @_;
$format = lc($format);
# Support email-only short names for the active formats:
$format = 'markdown_latest' if $format eq 'markdown';
$format = 'html_casual_latest' if $format eq 'html';
$format = DW::Formats::validate($format);
# If validate returns '', use an email-specific default.
$format = DW::Formats::validate('markdown_latest') unless $format;
return $format;
}
# By default, returns first plain text entity from email message.
# Specifying a type will return an array of MIME::Entity handles
# of that type. (image, application, etc)
# Specifying a type of 'all' will return all MIME::Entities,
# regardless of type.
sub get_entity {
my ( $self, $entity, $type ) = @_;
# old arguments were a hashref
$type = $type->{type} if ref $type eq "HASH";
# default to text
$type ||= 'text';
my $head = $entity->head;
my $mime_type = $head->mime_type;
return $entity if $type eq 'text' && $mime_type eq "text/plain";
return $entity if $type eq 'html' && $mime_type eq "text/html";
my @entities;
# Only bother looking in messages that advertise attachments
my $mimeattach_re = qr{ m|^multipart/(?:alternative|signed|mixed|related)$| };
if ( $mime_type =~ $mimeattach_re ) {
my $partcount = $entity->parts;
for ( my $i = 0 ; $i < $partcount ; $i++ ) {
my $alte = $entity->parts($i);
return $alte if $type eq 'text' && $alte->mime_type eq "text/plain";
return $alte if $type eq 'html' && $alte->mime_type eq "text/html";
push @entities, $alte if $type eq 'all';
if ( $type eq 'image'
&& $alte->mime_type =~ m#^application/octet-stream# )
{
my $alte_head = $alte->head;
my $filename = $alte_head->recommended_filename;
push @entities, $alte if $filename =~ /\.(?:gif|png|tiff?|jpe?g)$/;
}
push @entities, $alte
if $alte->mime_type =~ /^$type/
&& $type ne 'all';
# Recursively search through nested MIME for various pieces
if ( $alte->mime_type =~ $mimeattach_re ) {
if ( $type =~ /^(?:text|html)$/ ) {
my $text_entity = $self->get_entity( $entity->parts($i), $type );
return $text_entity if $text_entity;
}
else {
push @entities, $self->get_entity( $entity->parts($i), $type );
}
}
}
}
return @entities if $type ne 'text' && scalar @entities;
return;
}
# sets the error message
sub err {
my ( $self, $error, $error_args ) = @_;
$self->{error} = $error;
$self->{error_args} = $error_args;
return;
}
# fires off error notifications, etc
sub send_error {
my ( $self, $msg, %opt ) = @_;
$msg ||= $self->{error};
%opt = ( %{ $self->{error_args} || {} }, %opt );
my $errbody;
$errbody .= "There was an error during your email posting:\n\n";
$errbody .= $msg;
if ( $self->{body} ) {
$errbody .= "\n\n\nOriginal posting follows:\n\n";
$errbody .= $self->{body};
}
my $err_addr = $self->find_error_address;
# Rate limit email to 1/5min/address
if ( !$opt{nomail}
&& !$opt{retry}
&& $err_addr
&& LJ::MemCache::add( "rate_eperr:$err_addr", 5, 300 ) )
{
LJ::send_mail(
{
to => $err_addr,
from => $LJ::BOGUS_EMAIL,
fromname => "$LJ::SITENAME Error",
subject => "$LJ::SITENAME posting error: $self->{subject}",
body => $errbody
}
);
}
$self->{dequeue} = 0 if $opt{retry};
$opt{m} = $msg;
$opt{s} = $self->{subject};
$opt{e} = 1;
$self->dblog(%opt) unless $opt{nolog};
return ( 0, $msg );
}
sub dblog {
my ( $self, %info ) = @_;
return unless $self->{u};
%info = ( %info, $self->dblog_opts );
chomp $info{s};
$self->{u}->log_event( 'emailpost', \%info );
return;
}
=head2 C<< $self->dblog_opts >>
Class-specific options
=cut
sub dblog_opts { (); }
=head2 C<< $self->set_error_address >>
Given a user object and an email address, discover the appropriate email
to send any error messages to.
Fallback to raw address if no explicit allowed senders.
=cut
sub find_error_address {
my ($self) = $_[0];
return unless $self->{u};
my $err_addr;
my $addrlist = LJ::Emailpost::Web::get_allowed_senders( $self->{u} );
my $from = $self->{from};
foreach my $allowed_sender ( keys %$addrlist ) {
if ( lc $from eq lc $allowed_sender
&& $addrlist->{$allowed_sender}->{get_errors} )
{
$err_addr = $from;
last;
}
}
$err_addr ||= $self->{u}->email_raw if $self->{u};
return $err_addr;
}
=head1 GETTERS / SETTERS
=head2 C<< $self->destination( [ $destination ] ) >>
Get/set the destination this was sent to (left part of the To:)
=cut
sub destination {
my ( $self, $destination ) = @_;
$self->{destination} = $destination
if $destination;
return $destination;
}
=head2 C<< $self->dequeue >>
Returns whether this email post should be dequeued (1) or retried (0).
=cut
sub dequeue {
return $_[0]->{dequeue};
}
1;