otwarchive-symphonyarchive/app/models/download_writer.rb

177 lines
5.7 KiB
Ruby
Raw Normal View History

2026-03-11 22:22:11 +00:00
require "open3"
class DownloadWriter
attr_reader :download, :work
def initialize(download)
@download = download
@work = download.work
end
def write
generate_html_download
generate_ebook_download unless download.file_type == "html"
download
end
def generate_html
renderer = ApplicationController.renderer.new(
http_host: ArchiveConfig.APP_HOST
)
renderer.render(
template: "downloads/show",
layout: "barebones",
assigns: {
work: work,
page_title: download.page_title,
chapters: download.chapters
}
)
end
private
# Write the HTML version to file
def generate_html_download
return if download.exists?
File.open(download.html_file_path, "w:UTF-8") { |f| f.write(generate_html) }
end
# transform HTML version into ebook version
def generate_ebook_download
return unless %w[azw3 epub mobi pdf].include?(download.file_type)
return if download.exists?
cmds = get_commands
# Make sure the command is sanitary, and use popen3 in order to
# capture and discard the stdin/out info
# See http://stackoverflow.com/a/5970819/469544 for details
cmds.each do |cmd|
exit_status = nil
Open3.popen3(*cmd) { |_stdin, _stdout, _stderr, wait_thread| exit_status = wait_thread.value }
unless exit_status
Rails.logger.warn "Download generation failed: " + cmd.to_s
end
end
end
# Get the version of the command we need to execute
def get_commands
[get_web2disk_command, get_zip_command, get_calibre_command]
end
# Create the format-specific command-line call to calibre/ebook-convert
def get_calibre_command
# Add info about first series if any
series = []
if meta[:series_title].present?
series = ["--series", meta[:series_title],
"--series-index", meta[:series_position]]
end
### Format-specific options
# epub: don't generate a cover image
epub = download.file_type == "epub" ? ["--no-default-epub-cover"] : []
pdf = []
if download.file_type == "pdf"
pdf = [
# pdf: decrease margins from 72pt default
"--pdf-page-margin-top", "36",
"--pdf-page-margin-right", "36",
"--pdf-page-margin-bottom", "36",
"--pdf-page-margin-left", "36",
"--pdf-default-font-size", "17",
# pdf: only include necessary characters when embedding fonts
"--subset-embedded-fonts"
]
end
### CSS options
# azw3, epub, and mobi get a special stylesheet
css = []
if %w[azw3 epub mobi].include?(download.file_type)
css = ["--extra-css",
Rails.public_path.join("stylesheets/ebooks.css").to_s]
end
[
"ebook-convert",
download.zip_path,
download.file_path,
"--input-encoding", "utf-8",
# Prevent it from turning links to endnotes into entries for the table of
# contents on works with fewer than the specified number of chapters.
"--toc-threshold", "0",
"--use-auto-toc",
"--title", meta[:title],
"--title-sort", meta[:sortable_title],
"--authors", meta[:authors],
"--author-sort", meta[:sortable_authors],
"--comments", meta[:summary],
"--tags", meta[:tags],
"--pubdate", meta[:pubdate],
"--publisher", ArchiveConfig.APP_NAME,
"--language", meta[:language],
# XPaths for detecting chapters are overly specific to make sure we don't grab
# anything inputted by the user. First path is for single-chapter works,
# second for multi-chapter, and third for the preface and afterword
"--chapter", "//h:body/h:div[@id='chapters']/h:h2[@class='toc-heading'] | //h:body/h:div[@id='chapters']/h:div[@class='meta group']/h:h2[@class='heading'] | //h:body/h:div[@id='preface' or @id='afterword']/h:h2[@class='toc-heading']"
] + series + css + epub + pdf
end
# Grab the HTML file and any images and put them in --base-dir.
# --max-recursions 0 prevents it from grabbing all the linked pages.
# --dont-download-stylesheets isn't strictly necessary for us but avoids
# creating an empty stylesheets directory.
def get_web2disk_command
[
"web2disk",
"--base-dir", download.assets_path,
"--max-recursions", "0",
"--dont-download-stylesheets",
"file://#{download.html_file_path}"
]
end
# Zip the directory containing the HTML file and images.
def get_zip_command
[
"zip",
"-r",
download.zip_path,
download.assets_path
]
end
# A hash of the work data calibre needs
def meta
return @metadata if @metadata
@metadata = {
title: work.title,
sortable_title: work.sorted_title,
# Using ampersands as instructed by Calibre's ebook-convert documentation
# hides all but the first author name in Books (formerly iBooks). The
# other authors cannot be used for searching or sorting. Using commas
# just means Calibre's GUI treats it as one name, e.g. "testy, testy2" is
# like "Fangirl, Suzy Q", for searching and sorting.
authors: download.authors,
sortable_authors: work.authors_to_sort_on,
# We add "Fanworks" because Books uses the first tag as the category and
# it would otherwise be the work's rating, which is weird.
tags: "Fanworks, " + work.tags.pluck(:name).join(", "),
pubdate: work.revised_at.to_date.to_s,
summary: work.summary.to_s,
language: work.language.short
}
if work.series.exists?
series = work.series.first
@metadata[:series_title] = series.title
@metadata[:series_position] = series.position_of(work).to_s
end
@metadata
end
end