98 lines
2.2 KiB
Perl
98 lines
2.2 KiB
Perl
|
|
#!/usr/bin/perl
|
||
|
|
#
|
||
|
|
# Authors:
|
||
|
|
# Afuna <coder.dw@afunamatata.com>
|
||
|
|
#
|
||
|
|
# Copyright (c) 2013 by Dreamwidth Studios, LLC.
|
||
|
|
#
|
||
|
|
# This program is free software; you may redistribute it and/or modify it under
|
||
|
|
# the same terms as Perl itself. For a copy of the license, please reference
|
||
|
|
# 'perldoc perlartistic' or 'perldoc perlgpl'.
|
||
|
|
|
||
|
|
package DW::CleanEmail;
|
||
|
|
|
||
|
|
use strict;
|
||
|
|
|
||
|
|
=head1 NAME
|
||
|
|
|
||
|
|
DW::CleanEmail - Clean up text from email
|
||
|
|
|
||
|
|
=head1 SYNOPSIS
|
||
|
|
|
||
|
|
=cut
|
||
|
|
|
||
|
|
=head2 C<< $class->nonquoted_text( $text ) >>
|
||
|
|
|
||
|
|
Returns original content from an email body. That is, non-quoted
|
||
|
|
|
||
|
|
=cut
|
||
|
|
|
||
|
|
sub nonquoted_text {
|
||
|
|
my ( $class, $text ) = @_;
|
||
|
|
|
||
|
|
my @lines = split /$/m, $text;
|
||
|
|
|
||
|
|
my $num_lines = 0;
|
||
|
|
|
||
|
|
# remove all quoted lines, nice and easy
|
||
|
|
foreach (@lines) {
|
||
|
|
last if m/^\s*>/;
|
||
|
|
|
||
|
|
# e.g., --- Original Message ---
|
||
|
|
# but this can be in various languages, so not hardcoding the text
|
||
|
|
last if m/^\s*-{3,}[^-]+-{3,}\s*$/;
|
||
|
|
|
||
|
|
# the bogus email we sent the comments as wrapped in <>
|
||
|
|
last if m/<\s?$LJ::BOGUS_EMAIL>/;
|
||
|
|
|
||
|
|
$num_lines++;
|
||
|
|
}
|
||
|
|
|
||
|
|
@lines = splice @lines, 0, $num_lines;
|
||
|
|
|
||
|
|
# go back through the last few lines
|
||
|
|
# look for something that looks like:
|
||
|
|
# On (date), someone wrote:
|
||
|
|
my $max_backtrack = 3;
|
||
|
|
my $backtrack = 0;
|
||
|
|
|
||
|
|
foreach ( reverse @lines ) {
|
||
|
|
$backtrack++;
|
||
|
|
last if $backtrack > $max_backtrack;
|
||
|
|
|
||
|
|
last if m/^\s*On.+wrote:\s*$/i;
|
||
|
|
|
||
|
|
# sometimes that gets split across two lines
|
||
|
|
# so look for date-like things too
|
||
|
|
last if m!^\s*On (?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)!i;
|
||
|
|
last if m!
|
||
|
|
(?:\d{2}/\d{2}/\d{4}) # mm/dd/yyyy
|
||
|
|
| (?:[a-z]{3,4}\s+\d{1,2},\s+\d{4}) # Jan 31, 2013
|
||
|
|
| (?:\d{1,2}\s+[a-z]{3,4}\s+\d{4}) # 31 Jan 2013
|
||
|
|
|
||
|
|
!ix;
|
||
|
|
}
|
||
|
|
|
||
|
|
@lines = splice @lines, 0, $num_lines - $backtrack
|
||
|
|
unless $backtrack > $max_backtrack || $backtrack >= $num_lines;
|
||
|
|
|
||
|
|
return join "", @lines;
|
||
|
|
}
|
||
|
|
|
||
|
|
=head2 C<< $class->reply_subject( $text ) >>
|
||
|
|
|
||
|
|
Clean out "Re:" from the subject and decode HTML entities
|
||
|
|
|
||
|
|
=cut
|
||
|
|
|
||
|
|
sub reply_subject {
|
||
|
|
my ( $class, $subject ) = @_;
|
||
|
|
|
||
|
|
$subject =~ s/^(Re:\s*)*//i;
|
||
|
|
$subject = "Re: $subject" if $subject;
|
||
|
|
|
||
|
|
return LJ::dhtml($subject);
|
||
|
|
}
|
||
|
|
|
||
|
|
1;
|