mourningdove/cgi-bin/LJ/TextUtil.pm

# This code was forked from the LiveJournal project owned and operated
# by Live Journal, Inc. The code has been modified and expanded by
# Dreamwidth Studios, LLC. These files were originally licensed under
# the terms of the license supplied by Live Journal, Inc, which can
# currently be found at:
#
# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt
#
# In accordance with the original license, this code and all its
# modifications are provided under the GNU General Public License.
# A copy of that license can be found in the LICENSE file included as
# part of this distribution.

package LJ;
use strict;

use LJ::ConvUTF8;
use HTML::TokeParser;
use HTML::Entities;
use Carp qw(cluck);
use Encode;

# <LJFUNC>
# name: LJ::trim
# class: text
# des: Removes whitespace from left and right side of a string.
# args: string
# des-string: string to be trimmed
# returns: trimmed string
# </LJFUNC>
sub trim {
    my $a = $_[0];
    return '' unless defined $a;

    $a =~ s/^\s+//;
    $a =~ s/\s+$//;
    return $a;
}

# check argument text for see_request links, and strip any auth args

sub strip_request_auth {
    my $a = $_[0];
    return '' unless defined $a;

    $a =~ s/(see_request\S+?)\&auth=\w+/$1/ig;
    return $a;
}

# <LJFUNC>
# name: LJ::get_urls
# class: text
# des: Returns a list of all referenced URLs from a string.
# args: text
# des-text: Text from which to return extra URLs.
# returns: list of URLs
# </LJFUNC>
sub get_urls {
    return ( $_[0] =~ m!https?://[^\s\"\'\<\>]+!g );
}

# similar to decode_url_string below, but a nicer calling convention.  returns
# a hash of items parsed from the string passed in as the only argument.

# FIXME: This method using \0 is being used in legacy locations
#  however should be factored out ( to Hash::MultiValue )
#  as soon as the need for the legacy use is removed.
sub parse_args {
    my $args = $_[0];
    return unless defined $args;

    my %GET;
    foreach my $pair ( split /&/, $args ) {
        my ( $name, $value ) = split /=/, $pair;

        if ( defined $value ) {
            $value =~ tr/+/ /;
            $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
        }
        else {
            $value = '';
        }

        $name =~ tr/+/ /;
        $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

        $GET{$name} .= $GET{$name} ? "\0$value" : $value;
    }
    return %GET;
}

# <LJFUNC>
# name: LJ::decode_url_string
# class: web
# des: Parse URL-style arg/value pairs into a hash.
# args: buffer, hashref
# des-buffer: Scalar or scalarref of buffer to parse.
# des-hashref: Hashref to populate.
# returns: boolean; true.
# </LJFUNC>
sub decode_url_string {
    my $a       = shift;
    my $buffer  = ref $a ? $a : \$a;
    my $hashref = shift;               # output hash
    my $keyref  = shift;               # array of keys as they were found

    my $pair;
    my @pairs = split( /&/, $$buffer );
    @$keyref = @pairs;
    my ( $name, $value );
    foreach $pair (@pairs) {
        ( $name, $value ) = split( /=/, $pair );
        $value =~ tr/+/ /;
        $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
        $name  =~ tr/+/ /;
        $name  =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
        $hashref->{$name} .= $hashref->{$name} ? "\0$value" : $value;
    }
    return 1;
}

# args: hashref of key/values
#       arrayref of keys in order (optional)
# returns: urlencoded string
sub encode_url_string {
    my ( $hashref, $keyref ) = @_;

    return join( '&',
        map { LJ::eurl($_) . '=' . LJ::eurl( $hashref->{$_} ) }
            ( ref $keyref ? @$keyref : keys %$hashref ) );
}

# <LJFUNC>
# name: LJ::eurl
# class: text
# des: Escapes a value before it can be put in a URL.  See also [func[LJ::durl]].
# args: string
# des-string: string to be escaped
# returns: string escaped
# </LJFUNC>
sub eurl {
    my $a = $_[0];
    return '' unless defined $a;

    $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
    $a =~ tr/ /+/;
    return $a;
}

# <LJFUNC>
# name: LJ::durl
# class: text
# des: Decodes a value that's URL-escaped.  See also [func[LJ::eurl]].
# args: string
# des-string: string to be decoded
# returns: string decoded
# </LJFUNC>
sub durl {
    my $a = $_[0];
    return '' unless defined $a;

    $a =~ tr/+/ /;
    $a =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
    return $a;
}

# <LJFUNC>
# name: LJ::exml
# class: text
# des: Escapes a value before it can be put in XML.
# args: string
# des-string: string to be escaped
# returns: string escaped.
# </LJFUNC>
sub exml {
    my $a = $_[0];
    return '' unless defined $a;

    # fast path for the commmon case:
    return $a unless $a =~ /[&\"\'<>\x00-\x08\x0B\x0C\x0E-\x1F]/;

    # what are those character ranges? XML 1.0 allows:
    # #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]

    $a =~ s/\&/&amp;/g;
    $a =~ s/\"/&quot;/g;
    $a =~ s/\'/&apos;/g;
    $a =~ s/</&lt;/g;
    $a =~ s/>/&gt;/g;
    $a =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g;
    return $a;
}

# <LJFUNC>
# name: LJ::ehtml
# class: text
# des: Escapes a value before it can be put in HTML.
# args: string
# des-string: string to be escaped
# returns: string escaped.
# </LJFUNC>
sub ehtml {
    my $a = $_[0];
    return '' unless defined $a;

    # fast path for the commmon case:
    return $a unless $a =~ /[&\"\'<>]/;

    # this is faster than doing one substitution with a map:
    $a =~ s/\&/&amp;/g;
    $a =~ s/\"/&quot;/g;
    $a =~ s/\'/&\#39;/g;
    $a =~ s/</&lt;/g;
    $a =~ s/>/&gt;/g;
    return $a;
}
*eall = \&ehtml;    # old BML syntax required eall to also escape BML.  not anymore.

# <LJFUNC>
# name: LJ::dhtml
# class: text
# des: Decodes a value that's HTML-escaped.  See also [func[LJ::ehtml]].
# args: string
# des-string: string to be decoded
# returns: string decoded
# </LJFUNC>
sub dhtml {
    my $a = $_[0];
    return '' unless defined $a;

    return HTML::Entities::decode_entities($a);
}

# <LJFUNC>
# name: LJ::etags
# class: text
# des: Escapes < and > from a string
# args: string
# des-string: string to be escaped
# returns: string escaped.
# </LJFUNC>
sub etags {
    my $a = $_[0];
    return '' unless defined $a;

    # fast path for the commmon case:
    return $a unless $a =~ /[<>]/;

    $a =~ s/</&lt;/g;
    $a =~ s/>/&gt;/g;
    return $a;
}

# <LJFUNC>
# name: LJ::ejs
# class: text
# des: Escapes a string value before it can be put in JavaScript.
# args: string
# des-string: string to be escaped
# returns: string escaped.
# </LJFUNC>
sub ejs {
    my $a = $_[0];
    return '' unless defined $a;

    # use zero-width lookahead to insert a backslash where needed
    $a =~ s/(?=[\"\'\\])/\\/g;
    $a =~ s/&quot;/\\&quot;/g;
    $a =~ s/\r?\n/\\n/gs;
    $a =~ s/\r//gs;
    $a =~ s/\xE2\x80[\xA8\xA9]//gs;
    return $a;
}

# given a string, makes it into a string you can put into javascript,
# including protecting against closing </script> tags in the entry.
# does the double quotes for ya.
sub ejs_string {
    my $str = ejs( $_[0] );
    $str =~ s!</script!</scri\" + \"pt!gi;
    return "\"" . $str . "\"";
}

# changes every char in a string to %XX where XX is the hex value
# this is useful for passing strings to javascript through HTML, because
# javascript's "unescape" function expects strings in this format
sub ejs_all {
    my $a = $_[0];
    return '' unless defined $a;

    $a =~ s/(.)/uc sprintf("%%%02x",ord($1))/eg;
    return $a;
}

# strip all HTML tags from a string
sub strip_html {
    my $str = $_[0];
    return '' unless defined $str;

    $str =~
        s/\<(?:lj(?: site=[^\s]+)? user|user(?: site=[^\s]+)? name)\=['"]?([\w-]+)['"]?[^>]*\>/$1/g;
    $str =~ s/\<([^\<])+\>//g;
    return $str;
}

# <LJFUNC>
# name: LJ::is_ascii
# des: checks if text is pure ASCII.
# args: text
# des-text: text to check for being pure 7-bit ASCII text.
# returns: 1 if text is indeed pure 7-bit, 0 otherwise.
# </LJFUNC>
sub is_ascii {
    my $text = $_[0];
    return 1 unless defined $text;
    return ( $text !~ m/[^\x01-\x7f]/ );
}

# Logs a warning if text has Perl's internal UTF8 flag set, so we can track it
# down later. This is intended for debugging problems on prod that can't be
# reproduced in dev.
sub warn_for_perl_utf8 {
    my $text = $_[0];
    if ( Encode::is_utf8($text) ) {
        cluck("MOJIBAKE ALERT: Found text with Perl UTF8 flag set!");
    }
}

# <LJFUNC>
# name: LJ::is_utf8
# des: check text for UTF-8 validity.
# args: text
# des-text: text to check for UTF-8 validity
# returns: 1 if text is a valid UTF-8 stream, 0 otherwise.
# </LJFUNC>
sub is_utf8 {
    my $text = shift;

    if ( LJ::Hooks::are_hooks("is_utf8") ) {
        return LJ::Hooks::run_hook( "is_utf8", $text );
    }

    require Unicode::CheckUTF8;
    {
        no strict;
        local $^W = 0;
        *stab = *{"main::LJ::"};
        undef $stab{is_utf8};
    }
    *LJ::is_utf8 = \&LJ::is_utf8_wrapper;
    return LJ::is_utf8_wrapper($text);
}

# <LJFUNC>
# name: LJ::is_utf8_wrapper
# des: wraps the check for UTF-8 validity.
# args: text
# des-text: text to check for UTF-8 validity
# returns: 1 if text is a valid UTF-8 stream, a reference, or null; 0 otherwise.
# </LJFUNC>
sub is_utf8_wrapper {
    my $text = $_[0];

    if ( defined $text && !ref $text && $text ) {

        # we need to make sure $text values are treated as strings
        return Unicode::CheckUTF8::is_utf8( '' . $text );
    }
    else {
        # all possible "false" values for $text are valid unicode
        return 1;
    }
}

# <LJFUNC>
# name: LJ::has_too_many
# des: checks if text is too long
# args: text, maxbreaks, maxchars
# des-text: text to check if too long
# des-maxbreaks: maximum number of linebreak
# des-maxchars: maximum number of characters
# returns: true if text has more than maxbreaks HTML linebreaks or more than maxchars characters
# </LJFUNC>
sub has_too_many {
    my ( $text, %opts ) = @_;

    return 1 if exists $opts{chars} && length($text) > $opts{chars};

    if ( exists $opts{linebreaks} ) {

        # - we always call this on HTML, so ignore literal \n.
        # - paragraphs count as two linebreaks.
        # - this is ballpark guessing and ignores MANY things that can add
        # vertical space (<li>s, blockquotes...) -- shrug!
        my @breaks = $text =~ m!</?(?:p|br)[^>]*>!g;
        return 1 if scalar @breaks > $opts{linebreaks};
    }

    return 0;
}

# alternate version of "lc" that handles UTF-8
# args: text string for lowercasing
# returns: lowercase string
sub utf8_lc {
    use Encode;    # Perl 5.8 or higher

    # get the encoded text to work with
    my $text = decode( "UTF-8", $_[0] );

    # return the lowercased text
    return encode( "UTF-8", lc $text );
}

# <LJFUNC>
# name: LJ::text_out
# des: force outgoing text into valid UTF-8.
# args: text
# des-text: reference to text to pass to output. Text if modified in-place.
# returns: nothing.
# </LJFUNC>
sub text_out {
    my $rtext = shift;

    # is this valid UTF-8 already?
    return if LJ::is_utf8($$rtext);

    # no. Blot out all non-ASCII chars
    $$rtext =~ s/[\x00\x80-\xff]/\?/g;
    return;
}

# <LJFUNC>
# name: LJ::text_in
# des: do appropriate checks on input text. Should be called on all
#      user-generated text.
# args: text
# des-text: text to check
# returns: 1 if the text is valid, 0 if not.
# </LJFUNC>
sub text_in {
    my $text = shift;

    if ( ref($text) eq "HASH" ) {
        return !( grep { !LJ::is_utf8($_) } values %{$text} );
    }
    if ( ref($text) eq "ARRAY" ) {
        return !( grep { !LJ::is_utf8($_) } @{$text} );
    }
    return LJ::is_utf8($text);
}

# <LJFUNC>
# name: LJ::text_convert
# des: convert old entries/comments to UTF-8 using user's default encoding.
# args: dbs?, text, u, error
# des-dbs: optional. Deprecated; a master/slave set of database handles.
# des-text: old possibly non-ASCII text to convert
# des-u: user hashref of the journal's owner
# des-error: ref to a scalar variable which is set to 1 on error
#            (when user has no default encoding defined, but
#            text needs to be translated).
# returns: converted text or undef on error
# </LJFUNC>
sub text_convert {
    my ( $text, $u, $error ) = @_;

    # maybe it's pure ASCII?
    return $text if LJ::is_ascii($text);

    # load encoding id->name mapping if it's not loaded yet
    LJ::load_codes( { "encoding" => \%LJ::CACHE_ENCODINGS } )
        unless %LJ::CACHE_ENCODINGS;

    if ( $u->{'oldenc'} == 0
        || not defined $LJ::CACHE_ENCODINGS{ $u->{'oldenc'} } )
    {
        $$error = 1;
        return undef;
    }

    # convert!
    my $name = $LJ::CACHE_ENCODINGS{ $u->{'oldenc'} };
    unless ( LJ::ConvUTF8->supported_charset($name) ) {
        $$error = 1;
        return undef;
    }

    return LJ::ConvUTF8->to_utf8( $name, $text );
}

# <LJFUNC>
# name: LJ::text_length
# des: returns both byte length and character length of a string.
#      The function assumes that its argument is a valid UTF-8 string.
# args: text
# des-text: the string to measure
# returns: a list of two values, (byte_length, char_length).
# </LJFUNC>

sub text_length {
    my $text     = shift;
    my $bl       = length($text);
    my $cl       = 0;
    my $utf_char = "([\x00-\x7f]|[\xc0-\xdf].|[\xe0-\xef]..|[\xf0-\xf7]...)";

    while ( $text =~ m/$utf_char/go ) { $cl++; }
    return ( $bl, $cl );
}

# <LJFUNC>
# name: LJ::text_trim
# des: truncate string according to requirements on byte length, char
#      length, or both. "char length" means number of UTF-8 characters.
# args: text, byte_max, char_max
# des-text: the string to trim
# des-byte_max: maximum allowed length in bytes; if 0, there's no restriction
# des-char_max: maximum allowed length in chars; if 0, there's no restriction
# returns: the truncated string.
# </LJFUNC>
sub text_trim {
    my ( $text, $byte_max, $char_max, $didtrim_ref ) = @_;
    $text = defined $text ? LJ::trim($text) : '';
    return $text unless $byte_max or $char_max;

    my $cur      = 0;
    my $utf_char = "([\x00-\x7f]|[\xc0-\xdf].|[\xe0-\xef]..|[\xf0-\xf7]...)";

    # if we don't have a character limit, assume it's the same as the byte limit.
    # we will never have more characters than bytes, but we might have more bytes
    # than characters, so we can't inherit the other way.
    $char_max ||= $byte_max;

    my $fake_scalar;
    my $ref = ref $didtrim_ref ? $didtrim_ref : \$fake_scalar;

    while ( $text =~ m/$utf_char/gco ) {
        unless ($char_max) {
            $$ref = 1;
            last;
        }
        if ( $byte_max and $cur + length($1) > $byte_max ) {
            $$ref = 1;
            last;
        }
        $cur += length($1);
        $char_max--;
    }

    return LJ::trim( substr( $text, 0, $cur ) );
}

# <LJFUNC>
# name: LJ::text_compress
# des: Compresses a chunk of text, to gzip, if configured for site.  Can compress
#      a scalarref in place, or return a compressed copy.  Won't compress if
#      value is too small, already compressed, or size would grow by compressing.
# args: text
# des-text: either a scalar or scalarref
# returns: nothing if given a scalarref (to compress in-place), or original/compressed value,
#          depending on site config.
# </LJFUNC>
sub text_compress {
    my $text = $_[0];
    my $ref  = ref $text;
    die "Invalid reference" if $ref && $ref ne "SCALAR";

    my $tref    = $ref ? $text : \$text;
    my $pre_len = length($$tref);
    unless ( substr( $$tref, 0, 2 ) eq "\037\213" || $pre_len < 100 ) {
        my $gz = Compress::Zlib::memGzip($$tref);
        if ( length($gz) < $pre_len ) {
            $$tref = $gz;
        }
    }

    return $ref ? undef : $$tref;
}

# <LJFUNC>
# name: LJ::text_uncompress
# des: Uncompresses a chunk of text, from gzip, if configured for site.  Can uncompress
#      a scalarref in place, or return a compressed copy.  Won't uncompress unless
#      it finds the gzip magic number at the beginning of the text.
# args: text
# des-text: either a scalar or scalarref.
# returns: nothing if given a scalarref (to uncompress in-place), or original/uncompressed value,
#          depending on if test was compressed or not
# </LJFUNC>
sub text_uncompress {
    my $text = $_[0];
    my $ref  = ref $text;
    die "Invalid reference" if $ref && $ref ne "SCALAR";
    my $tref = $ref ? $text : \$text;

    # check for gzip's magic number
    if ( substr( $$tref, 0, 2 ) eq "\037\213" ) {
        $$tref = Compress::Zlib::memGunzip($$tref);
    }

    return $ref ? undef : $$tref;
}

# function to trim a string containing HTML.  this will auto-close any
# html tags that were still open when the string was truncated
sub html_trim {
    my ( $text, $char_max, $truncated ) = @_;

    return $text unless $char_max;

    my $p = HTML::TokeParser->new( \$text );
    my @open_tags;    # keep track of what tags are open
    my $out         = '';
    my $content_len = 0;

TOKEN:
    while ( my $token = $p->get_token ) {
        my $type = $token->[0];
        my $tag  = $token->[1];
        my $attr = $token->[2];    # hashref

        if ( $type eq "S" ) {
            my $selfclose;

            # start tag
            $out .= "<$tag";

            # assume tags are properly self-closed
            $selfclose = 1 if lc $tag eq 'input' || lc $tag eq 'br' || lc $tag eq 'img';

            # preserve order of attributes. the original order is
            # in element 4 of $token
            foreach my $attrname ( @{ $token->[3] } ) {
                if ( $attrname eq '/' ) {
                    $selfclose = 1;
                    next;
                }

                # FIXME: neaten
                $attr->{$attrname} = LJ::no_utf8_flag( $attr->{$attrname} );
                $out .= " $attrname=\"" . LJ::ehtml( $attr->{$attrname} ) . "\"";
            }

            $out .= $selfclose ? " />" : ">";

            push @open_tags, $tag unless $selfclose;

        }
        elsif ( $type eq 'T' || $type eq 'D' ) {
            my $content = $token->[1];

            if ( length($content) + $content_len > $char_max ) {

                # truncate and stop parsing
                $content = LJ::text_trim( $content, undef, ( $char_max - $content_len ) );
                $out .= $content;
                $$truncated = 1 if ref $truncated;
                last;
            }

            $content_len += length $content;

            $out .= $content;

        }
        elsif ( $type eq 'C' ) {

            # comment, don't care
            $out .= $token->[1];

        }
        elsif ( $type eq 'E' ) {

            # end tag
            if ( $open_tags[-1] eq $tag ) {
                pop @open_tags;
                $out .= "</$tag>";
            }
        }
    }

    $out .= join( "\n", map { "</$_>" } reverse @open_tags );

    return $out;
}

# takes a number, inserts commas where needed
sub commafy {
    my $number = $_[0];
    return '' unless defined $number;
    return $number unless $number =~ /^\d+$/;

    my $punc = LJ::Lang::ml('number.punctuation') || ",";
    $number =~ s/(?<=\d)(?=(\d\d\d)+(?!\d))/$punc/g;
    return $number;
}

# <LJFUNC>
# name: LJ::html_newlines
# des: Replace newlines with HTML break tags.
# args: text
# returns: text, possibly including HTML break tags.
# </LJFUNC>
sub html_newlines {
    my $text = $_[0];
    return '' unless defined $text;

    $text =~ s/\n/<br \/>/gm;
    return $text;
}

# prepend ">" to each line of text to make a blockquote in markdown
# for when text has multiple lines and prepending ">" to the entire
# text will just convert the first line / paragraph
sub markdown_blockquote {
    my $text = $_[0];
    return '' unless defined $text;

    $text =~ s/(^.*)/\> $1/gm;
    return $text;
}

1;
first! 2026-05-24 01:03:05 +00:00			`# This code was forked from the LiveJournal project owned and operated`
			`# by Live Journal, Inc. The code has been modified and expanded by`
			`# Dreamwidth Studios, LLC. These files were originally licensed under`
			`# the terms of the license supplied by Live Journal, Inc, which can`
			`# currently be found at:`
			`#`
			`# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt`
			`#`
			`# In accordance with the original license, this code and all its`
			`# modifications are provided under the GNU General Public License.`
			`# A copy of that license can be found in the LICENSE file included as`
			`# part of this distribution.`

			`package LJ;`
			`use strict;`

			`use LJ::ConvUTF8;`
			`use HTML::TokeParser;`
			`use HTML::Entities;`
			`use Carp qw(cluck);`
			`use Encode;`

			`# <LJFUNC>`
			`# name: LJ::trim`
			`# class: text`
			`# des: Removes whitespace from left and right side of a string.`
			`# args: string`
			`# des-string: string to be trimmed`
			`# returns: trimmed string`
			`# </LJFUNC>`
			`sub trim {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`$a =~ s/^\s+//;`
			`$a =~ s/\s+$//;`
			`return $a;`
			`}`

			`# check argument text for see_request links, and strip any auth args`

			`sub strip_request_auth {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`$a =~ s/(see_request\S+?)\&auth=\w+/$1/ig;`
			`return $a;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::get_urls`
			`# class: text`
			`# des: Returns a list of all referenced URLs from a string.`
			`# args: text`
			`# des-text: Text from which to return extra URLs.`
			`# returns: list of URLs`
			`# </LJFUNC>`
			`sub get_urls {`
			`return ( $_[0] =~ m!https?://[^\s\"\'\<\>]+!g );`
			`}`

			`# similar to decode_url_string below, but a nicer calling convention. returns`
			`# a hash of items parsed from the string passed in as the only argument.`

			`# FIXME: This method using \0 is being used in legacy locations`
			`# however should be factored out ( to Hash::MultiValue )`
			`# as soon as the need for the legacy use is removed.`
			`sub parse_args {`
			`my $args = $_[0];`
			`return unless defined $args;`

			`my %GET;`
			`foreach my $pair ( split /&/, $args ) {`
			`my ( $name, $value ) = split /=/, $pair;`

			`if ( defined $value ) {`
			`$value =~ tr/+/ /;`
			`$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;`
			`}`
			`else {`
			`$value = '';`
			`}`

			`$name =~ tr/+/ /;`
			`$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;`

			`$GET{$name} .= $GET{$name} ? "\0$value" : $value;`
			`}`
			`return %GET;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::decode_url_string`
			`# class: web`
			`# des: Parse URL-style arg/value pairs into a hash.`
			`# args: buffer, hashref`
			`# des-buffer: Scalar or scalarref of buffer to parse.`
			`# des-hashref: Hashref to populate.`
			`# returns: boolean; true.`
			`# </LJFUNC>`
			`sub decode_url_string {`
			`my $a = shift;`
			`my $buffer = ref $a ? $a : \$a;`
			`my $hashref = shift; # output hash`
			`my $keyref = shift; # array of keys as they were found`

			`my $pair;`
			`my @pairs = split( /&/, $$buffer );`
			`@$keyref = @pairs;`
			`my ( $name, $value );`
			`foreach $pair (@pairs) {`
			`( $name, $value ) = split( /=/, $pair );`
			`$value =~ tr/+/ /;`
			`$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;`
			`$name =~ tr/+/ /;`
			`$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;`
			`$hashref->{$name} .= $hashref->{$name} ? "\0$value" : $value;`
			`}`
			`return 1;`
			`}`

			`# args: hashref of key/values`
			`# arrayref of keys in order (optional)`
			`# returns: urlencoded string`
			`sub encode_url_string {`
			`my ( $hashref, $keyref ) = @_;`

			`return join( '&',`
			`map { LJ::eurl($_) . '=' . LJ::eurl( $hashref->{$_} ) }`
			`( ref $keyref ? @$keyref : keys %$hashref ) );`
			`}`

			`# <LJFUNC>`
			`# name: LJ::eurl`
			`# class: text`
			`# des: Escapes a value before it can be put in a URL. See also [func[LJ::durl]].`
			`# args: string`
			`# des-string: string to be escaped`
			`# returns: string escaped`
			`# </LJFUNC>`
			`sub eurl {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`$a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;`
			`$a =~ tr/ /+/;`
			`return $a;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::durl`
			`# class: text`
			`# des: Decodes a value that's URL-escaped. See also [func[LJ::eurl]].`
			`# args: string`
			`# des-string: string to be decoded`
			`# returns: string decoded`
			`# </LJFUNC>`
			`sub durl {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`$a =~ tr/+/ /;`
			`$a =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;`
			`return $a;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::exml`
			`# class: text`
			`# des: Escapes a value before it can be put in XML.`
			`# args: string`
			`# des-string: string to be escaped`
			`# returns: string escaped.`
			`# </LJFUNC>`
			`sub exml {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`# fast path for the commmon case:`
			`return $a unless $a =~ /[&\"\'<>\x00-\x08\x0B\x0C\x0E-\x1F]/;`

			`# what are those character ranges? XML 1.0 allows:`
			`# #x9 \| #xA \| #xD \| [#x20-#xD7FF] \| [#xE000-#xFFFD] \| [#x10000-#x10FFFF]`

			`$a =~ s/\&/&/g;`
			`$a =~ s/\"/"/g;`
			`$a =~ s/\'/'/g;`
			`$a =~ s/</</g;`
			`$a =~ s/>/>/g;`
			`$a =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g;`
			`return $a;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::ehtml`
			`# class: text`
			`# des: Escapes a value before it can be put in HTML.`
			`# args: string`
			`# des-string: string to be escaped`
			`# returns: string escaped.`
			`# </LJFUNC>`
			`sub ehtml {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`# fast path for the commmon case:`
			`return $a unless $a =~ /[&\"\'<>]/;`

			`# this is faster than doing one substitution with a map:`
			`$a =~ s/\&/&/g;`
			`$a =~ s/\"/"/g;`
			`$a =~ s/\'/&\#39;/g;`
			`$a =~ s/</</g;`
			`$a =~ s/>/>/g;`
			`return $a;`
			`}`
			`*eall = \&ehtml; # old BML syntax required eall to also escape BML. not anymore.`

			`# <LJFUNC>`
			`# name: LJ::dhtml`
			`# class: text`
			`# des: Decodes a value that's HTML-escaped. See also [func[LJ::ehtml]].`
			`# args: string`
			`# des-string: string to be decoded`
			`# returns: string decoded`
			`# </LJFUNC>`
			`sub dhtml {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`return HTML::Entities::decode_entities($a);`
			`}`

			`# <LJFUNC>`
			`# name: LJ::etags`
			`# class: text`
			`# des: Escapes < and > from a string`
			`# args: string`
			`# des-string: string to be escaped`
			`# returns: string escaped.`
			`# </LJFUNC>`
			`sub etags {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`# fast path for the commmon case:`
			`return $a unless $a =~ /[<>]/;`

			`$a =~ s/</</g;`
			`$a =~ s/>/>/g;`
			`return $a;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::ejs`
			`# class: text`
			`# des: Escapes a string value before it can be put in JavaScript.`
			`# args: string`
			`# des-string: string to be escaped`
			`# returns: string escaped.`
			`# </LJFUNC>`
			`sub ejs {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`# use zero-width lookahead to insert a backslash where needed`
			`$a =~ s/(?=[\"\'\\])/\\/g;`
			`$a =~ s/"/\\"/g;`
			`$a =~ s/\r?\n/\\n/gs;`
			`$a =~ s/\r//gs;`
			`$a =~ s/\xE2\x80[\xA8\xA9]//gs;`
			`return $a;`
			`}`

			`# given a string, makes it into a string you can put into javascript,`
			`# including protecting against closing </script> tags in the entry.`
			`# does the double quotes for ya.`
			`sub ejs_string {`
			`my $str = ejs( $_[0] );`
			`$str =~ s!</script!</scri\" + \"pt!gi;`
			`return "\"" . $str . "\"";`
			`}`

			`# changes every char in a string to %XX where XX is the hex value`
			`# this is useful for passing strings to javascript through HTML, because`
			`# javascript's "unescape" function expects strings in this format`
			`sub ejs_all {`
			`my $a = $_[0];`
			`return '' unless defined $a;`

			`$a =~ s/(.)/uc sprintf("%%%02x",ord($1))/eg;`
			`return $a;`
			`}`

			`# strip all HTML tags from a string`
			`sub strip_html {`
			`my $str = $_[0];`
			`return '' unless defined $str;`

			`$str =~`
			`s/\<(?:lj(?: site=[^\s]+)? user\|user(?: site=[^\s]+)? name)\=['"]?([\w-]+)['"]?[^>]*\>/$1/g;`
			`$str =~ s/\<([^\<])+\>//g;`
			`return $str;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::is_ascii`
			`# des: checks if text is pure ASCII.`
			`# args: text`
			`# des-text: text to check for being pure 7-bit ASCII text.`
			`# returns: 1 if text is indeed pure 7-bit, 0 otherwise.`
			`# </LJFUNC>`
			`sub is_ascii {`
			`my $text = $_[0];`
			`return 1 unless defined $text;`
			`return ( $text !~ m/[^\x01-\x7f]/ );`
			`}`

			`# Logs a warning if text has Perl's internal UTF8 flag set, so we can track it`
			`# down later. This is intended for debugging problems on prod that can't be`
			`# reproduced in dev.`
			`sub warn_for_perl_utf8 {`
			`my $text = $_[0];`
			`if ( Encode::is_utf8($text) ) {`
			`cluck("MOJIBAKE ALERT: Found text with Perl UTF8 flag set!");`
			`}`
			`}`

			`# <LJFUNC>`
			`# name: LJ::is_utf8`
			`# des: check text for UTF-8 validity.`
			`# args: text`
			`# des-text: text to check for UTF-8 validity`
			`# returns: 1 if text is a valid UTF-8 stream, 0 otherwise.`
			`# </LJFUNC>`
			`sub is_utf8 {`
			`my $text = shift;`

			`if ( LJ::Hooks::are_hooks("is_utf8") ) {`
			`return LJ::Hooks::run_hook( "is_utf8", $text );`
			`}`

			`require Unicode::CheckUTF8;`
			`{`
			`no strict;`
			`local $^W = 0;`
			`stab = {"main::LJ::"};`
			`undef $stab{is_utf8};`
			`}`
			`*LJ::is_utf8 = \&LJ::is_utf8_wrapper;`
			`return LJ::is_utf8_wrapper($text);`
			`}`

			`# <LJFUNC>`
			`# name: LJ::is_utf8_wrapper`
			`# des: wraps the check for UTF-8 validity.`
			`# args: text`
			`# des-text: text to check for UTF-8 validity`
			`# returns: 1 if text is a valid UTF-8 stream, a reference, or null; 0 otherwise.`
			`# </LJFUNC>`
			`sub is_utf8_wrapper {`
			`my $text = $_[0];`

			`if ( defined $text && !ref $text && $text ) {`

			`# we need to make sure $text values are treated as strings`
			`return Unicode::CheckUTF8::is_utf8( '' . $text );`
			`}`
			`else {`
			`# all possible "false" values for $text are valid unicode`
			`return 1;`
			`}`
			`}`

			`# <LJFUNC>`
			`# name: LJ::has_too_many`
			`# des: checks if text is too long`
			`# args: text, maxbreaks, maxchars`
			`# des-text: text to check if too long`
			`# des-maxbreaks: maximum number of linebreak`
			`# des-maxchars: maximum number of characters`
			`# returns: true if text has more than maxbreaks HTML linebreaks or more than maxchars characters`
			`# </LJFUNC>`
			`sub has_too_many {`
			`my ( $text, %opts ) = @_;`

			`return 1 if exists $opts{chars} && length($text) > $opts{chars};`

			`if ( exists $opts{linebreaks} ) {`

			`# - we always call this on HTML, so ignore literal \n.`
			`# - paragraphs count as two linebreaks.`
			`# - this is ballpark guessing and ignores MANY things that can add`
			`# vertical space (<li>s, blockquotes...) -- shrug!`
			`my @breaks = $text =~ m!</?(?:p\|br)[^>]*>!g;`
			`return 1 if scalar @breaks > $opts{linebreaks};`
			`}`

			`return 0;`
			`}`

			`# alternate version of "lc" that handles UTF-8`
			`# args: text string for lowercasing`
			`# returns: lowercase string`
			`sub utf8_lc {`
			`use Encode; # Perl 5.8 or higher`

			`# get the encoded text to work with`
			`my $text = decode( "UTF-8", $_[0] );`

			`# return the lowercased text`
			`return encode( "UTF-8", lc $text );`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_out`
			`# des: force outgoing text into valid UTF-8.`
			`# args: text`
			`# des-text: reference to text to pass to output. Text if modified in-place.`
			`# returns: nothing.`
			`# </LJFUNC>`
			`sub text_out {`
			`my $rtext = shift;`

			`# is this valid UTF-8 already?`
			`return if LJ::is_utf8($$rtext);`

			`# no. Blot out all non-ASCII chars`
			`$$rtext =~ s/[\x00\x80-\xff]/\?/g;`
			`return;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_in`
			`# des: do appropriate checks on input text. Should be called on all`
			`# user-generated text.`
			`# args: text`
			`# des-text: text to check`
			`# returns: 1 if the text is valid, 0 if not.`
			`# </LJFUNC>`
			`sub text_in {`
			`my $text = shift;`

			`if ( ref($text) eq "HASH" ) {`
			`return !( grep { !LJ::is_utf8($_) } values %{$text} );`
			`}`
			`if ( ref($text) eq "ARRAY" ) {`
			`return !( grep { !LJ::is_utf8($_) } @{$text} );`
			`}`
			`return LJ::is_utf8($text);`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_convert`
			`# des: convert old entries/comments to UTF-8 using user's default encoding.`
			`# args: dbs?, text, u, error`
			`# des-dbs: optional. Deprecated; a master/slave set of database handles.`
			`# des-text: old possibly non-ASCII text to convert`
			`# des-u: user hashref of the journal's owner`
			`# des-error: ref to a scalar variable which is set to 1 on error`
			`# (when user has no default encoding defined, but`
			`# text needs to be translated).`
			`# returns: converted text or undef on error`
			`# </LJFUNC>`
			`sub text_convert {`
			`my ( $text, $u, $error ) = @_;`

			`# maybe it's pure ASCII?`
			`return $text if LJ::is_ascii($text);`

			`# load encoding id->name mapping if it's not loaded yet`
			`LJ::load_codes( { "encoding" => \%LJ::CACHE_ENCODINGS } )`
			`unless %LJ::CACHE_ENCODINGS;`

			`if ( $u->{'oldenc'} == 0`
			`\|\| not defined $LJ::CACHE_ENCODINGS{ $u->{'oldenc'} } )`
			`{`
			`$$error = 1;`
			`return undef;`
			`}`

			`# convert!`
			`my $name = $LJ::CACHE_ENCODINGS{ $u->{'oldenc'} };`
			`unless ( LJ::ConvUTF8->supported_charset($name) ) {`
			`$$error = 1;`
			`return undef;`
			`}`

			`return LJ::ConvUTF8->to_utf8( $name, $text );`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_length`
			`# des: returns both byte length and character length of a string.`
			`# The function assumes that its argument is a valid UTF-8 string.`
			`# args: text`
			`# des-text: the string to measure`
			`# returns: a list of two values, (byte_length, char_length).`
			`# </LJFUNC>`

			`sub text_length {`
			`my $text = shift;`
			`my $bl = length($text);`
			`my $cl = 0;`
			`my $utf_char = "([\x00-\x7f]\|[\xc0-\xdf].\|[\xe0-\xef]..\|[\xf0-\xf7]...)";`

			`while ( $text =~ m/$utf_char/go ) { $cl++; }`
			`return ( $bl, $cl );`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_trim`
			`# des: truncate string according to requirements on byte length, char`
			`# length, or both. "char length" means number of UTF-8 characters.`
			`# args: text, byte_max, char_max`
			`# des-text: the string to trim`
			`# des-byte_max: maximum allowed length in bytes; if 0, there's no restriction`
			`# des-char_max: maximum allowed length in chars; if 0, there's no restriction`
			`# returns: the truncated string.`
			`# </LJFUNC>`
			`sub text_trim {`
			`my ( $text, $byte_max, $char_max, $didtrim_ref ) = @_;`
			`$text = defined $text ? LJ::trim($text) : '';`
			`return $text unless $byte_max or $char_max;`

			`my $cur = 0;`
			`my $utf_char = "([\x00-\x7f]\|[\xc0-\xdf].\|[\xe0-\xef]..\|[\xf0-\xf7]...)";`

			`# if we don't have a character limit, assume it's the same as the byte limit.`
			`# we will never have more characters than bytes, but we might have more bytes`
			`# than characters, so we can't inherit the other way.`
			`$char_max \|\|= $byte_max;`

			`my $fake_scalar;`
			`my $ref = ref $didtrim_ref ? $didtrim_ref : \$fake_scalar;`

			`while ( $text =~ m/$utf_char/gco ) {`
			`unless ($char_max) {`
			`$$ref = 1;`
			`last;`
			`}`
			`if ( $byte_max and $cur + length($1) > $byte_max ) {`
			`$$ref = 1;`
			`last;`
			`}`
			`$cur += length($1);`
			`$char_max--;`
			`}`

			`return LJ::trim( substr( $text, 0, $cur ) );`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_compress`
			`# des: Compresses a chunk of text, to gzip, if configured for site. Can compress`
			`# a scalarref in place, or return a compressed copy. Won't compress if`
			`# value is too small, already compressed, or size would grow by compressing.`
			`# args: text`
			`# des-text: either a scalar or scalarref`
			`# returns: nothing if given a scalarref (to compress in-place), or original/compressed value,`
			`# depending on site config.`
			`# </LJFUNC>`
			`sub text_compress {`
			`my $text = $_[0];`
			`my $ref = ref $text;`
			`die "Invalid reference" if $ref && $ref ne "SCALAR";`

			`my $tref = $ref ? $text : \$text;`
			`my $pre_len = length($$tref);`
			`unless ( substr( $$tref, 0, 2 ) eq "\037\213" \|\| $pre_len < 100 ) {`
			`my $gz = Compress::Zlib::memGzip($$tref);`
			`if ( length($gz) < $pre_len ) {`
			`$$tref = $gz;`
			`}`
			`}`

			`return $ref ? undef : $$tref;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::text_uncompress`
			`# des: Uncompresses a chunk of text, from gzip, if configured for site. Can uncompress`
			`# a scalarref in place, or return a compressed copy. Won't uncompress unless`
			`# it finds the gzip magic number at the beginning of the text.`
			`# args: text`
			`# des-text: either a scalar or scalarref.`
			`# returns: nothing if given a scalarref (to uncompress in-place), or original/uncompressed value,`
			`# depending on if test was compressed or not`
			`# </LJFUNC>`
			`sub text_uncompress {`
			`my $text = $_[0];`
			`my $ref = ref $text;`
			`die "Invalid reference" if $ref && $ref ne "SCALAR";`
			`my $tref = $ref ? $text : \$text;`

			`# check for gzip's magic number`
			`if ( substr( $$tref, 0, 2 ) eq "\037\213" ) {`
			`$$tref = Compress::Zlib::memGunzip($$tref);`
			`}`

			`return $ref ? undef : $$tref;`
			`}`

			`# function to trim a string containing HTML. this will auto-close any`
			`# html tags that were still open when the string was truncated`
			`sub html_trim {`
			`my ( $text, $char_max, $truncated ) = @_;`

			`return $text unless $char_max;`

			`my $p = HTML::TokeParser->new( \$text );`
			`my @open_tags; # keep track of what tags are open`
			`my $out = '';`
			`my $content_len = 0;`

			`TOKEN:`
			`while ( my $token = $p->get_token ) {`
			`my $type = $token->[0];`
			`my $tag = $token->[1];`
			`my $attr = $token->[2]; # hashref`

			`if ( $type eq "S" ) {`
			`my $selfclose;`

			`# start tag`
			`$out .= "<$tag";`

			`# assume tags are properly self-closed`
			`$selfclose = 1 if lc $tag eq 'input' \|\| lc $tag eq 'br' \|\| lc $tag eq 'img';`

			`# preserve order of attributes. the original order is`
			`# in element 4 of $token`
			`foreach my $attrname ( @{ $token->[3] } ) {`
			`if ( $attrname eq '/' ) {`
			`$selfclose = 1;`
			`next;`
			`}`

			`# FIXME: neaten`
			`$attr->{$attrname} = LJ::no_utf8_flag( $attr->{$attrname} );`
			`$out .= " $attrname=\"" . LJ::ehtml( $attr->{$attrname} ) . "\"";`
			`}`

			`$out .= $selfclose ? " />" : ">";`

			`push @open_tags, $tag unless $selfclose;`

			`}`
			`elsif ( $type eq 'T' \|\| $type eq 'D' ) {`
			`my $content = $token->[1];`

			`if ( length($content) + $content_len > $char_max ) {`

			`# truncate and stop parsing`
			`$content = LJ::text_trim( $content, undef, ( $char_max - $content_len ) );`
			`$out .= $content;`
			`$$truncated = 1 if ref $truncated;`
			`last;`
			`}`

			`$content_len += length $content;`

			`$out .= $content;`

			`}`
			`elsif ( $type eq 'C' ) {`

			`# comment, don't care`
			`$out .= $token->[1];`

			`}`
			`elsif ( $type eq 'E' ) {`

			`# end tag`
			`if ( $open_tags[-1] eq $tag ) {`
			`pop @open_tags;`
			`$out .= "</$tag>";`
			`}`
			`}`
			`}`

			`$out .= join( "\n", map { "</$_>" } reverse @open_tags );`

			`return $out;`
			`}`

			`# takes a number, inserts commas where needed`
			`sub commafy {`
			`my $number = $_[0];`
			`return '' unless defined $number;`
			`return $number unless $number =~ /^\d+$/;`

			`my $punc = LJ::Lang::ml('number.punctuation') \|\| ",";`
			`$number =~ s/(?<=\d)(?=(\d\d\d)+(?!\d))/$punc/g;`
			`return $number;`
			`}`

			`# <LJFUNC>`
			`# name: LJ::html_newlines`
			`# des: Replace newlines with HTML break tags.`
			`# args: text`
			`# returns: text, possibly including HTML break tags.`
			`# </LJFUNC>`
			`sub html_newlines {`
			`my $text = $_[0];`
			`return '' unless defined $text;`

			`$text =~ s/\n/<br \/>/gm;`
			`return $text;`
			`}`

			`# prepend ">" to each line of text to make a blockquote in markdown`
			`# for when text has multiple lines and prepending ">" to the entire`
			`# text will just convert the first line / paragraph`
			`sub markdown_blockquote {`
			`my $text = $_[0];`
			`return '' unless defined $text;`

			`$text =~ s/(^.*)/\> $1/gm;`
			`return $text;`
			`}`

			`1;`