otwarchive-symphonyarchive/lib/url_formatter.rb

68 lines
1.4 KiB
Ruby
Raw Permalink Normal View History

2026-03-11 22:22:11 +00:00
require 'uri'
require 'cgi'
require 'addressable/uri'
class UrlFormatter
attr_accessor :url
def initialize(url)
@url = url || ""
end
def original
url
end
# Remove anchors and query parameters, preserve sid parameter for eFiction sites
def minimal (input = url)
uri = Addressable::URI.parse(input)
queries = CGI::parse(uri.query) unless uri.query.nil?
if queries.nil?
input.gsub(/[?#].*$/, "")
else
queries.keep_if { |k, _| ["sid"].include? k }
querystring = "?#{URI.encode_www_form(queries)}" unless queries.empty?
input.gsub(/[?#].*$/, "") << querystring.to_s
end
end
def minimal_no_protocol_no_www
minimal.gsub(%r{^https?://(www\.)?}, "")
end
def no_www
minimal.gsub(%r{^(https?)://www\.}, "\\1://")
end
def with_www
minimal.gsub(%r{^(https?)://}, "\\1://www.")
end
def with_http
minimal.gsub(%r{^https?://}, "").prepend("http://")
end
def with_https
minimal.gsub(%r{^https?://}, "").prepend("https://")
end
def encoded
minimal URI::Parser.new.escape(url)
end
def decoded
URI::Parser.new.unescape(minimal)
end
# Adds http if not present, downcases the host and hyphenates spaces
# Extracted from story parser class
# Returns a Generic::URI
def standardized
uri = URI.parse(url)
uri = URI.parse("http://#{url}") if uri.instance_of?(URI::Generic)
uri.host = uri.host.downcase.tr(" ", "-")
uri
end
end