otwarchive-symphonyarchive/app/models/download_writer.rb

require "open3"

class DownloadWriter
  attr_reader :download, :work

  def initialize(download)
    @download = download
    @work = download.work
  end

  def write
    generate_html_download
    generate_ebook_download unless download.file_type == "html"
    download
  end

  def generate_html
    renderer = ApplicationController.renderer.new(
      http_host: ArchiveConfig.APP_HOST
    )
    renderer.render(
      template: "downloads/show",
      layout: "barebones",
      assigns: {
        work: work,
        page_title: download.page_title,
        chapters: download.chapters
      }
    )
  end

  private

  # Write the HTML version to file
  def generate_html_download
    return if download.exists?

    File.open(download.html_file_path, "w:UTF-8") { |f| f.write(generate_html) }
  end

  # transform HTML version into ebook version
  def generate_ebook_download
    return unless %w[azw3 epub mobi pdf].include?(download.file_type)
    return if download.exists?

    cmds = get_commands

    # Make sure the command is sanitary, and use popen3 in order to
    # capture and discard the stdin/out info
    # See http://stackoverflow.com/a/5970819/469544 for details
    cmds.each do |cmd|
      exit_status = nil
      Open3.popen3(*cmd) { |_stdin, _stdout, _stderr, wait_thread| exit_status = wait_thread.value }
      unless exit_status
        Rails.logger.warn "Download generation failed: " + cmd.to_s
      end
    end
  end

  # Get the version of the command we need to execute
  def get_commands
    [get_web2disk_command, get_zip_command, get_calibre_command]
  end

  # Create the format-specific command-line call to calibre/ebook-convert
  def get_calibre_command
    # Add info about first series if any
    series = []
    if meta[:series_title].present?
      series = ["--series", meta[:series_title],
                "--series-index", meta[:series_position]]
    end

    ### Format-specific options
    # epub: don't generate a cover image
    epub = download.file_type == "epub" ? ["--no-default-epub-cover"] : []

    pdf = []
    if download.file_type == "pdf"
      pdf = [
        # pdf: decrease margins from 72pt default
        "--pdf-page-margin-top", "36",
        "--pdf-page-margin-right", "36",
        "--pdf-page-margin-bottom", "36",
        "--pdf-page-margin-left", "36",
        "--pdf-default-font-size", "17",
        # pdf: only include necessary characters when embedding fonts
        "--subset-embedded-fonts"
      ]
    end

    ### CSS options
    # azw3, epub, and mobi get a special stylesheet
    css = []
    if %w[azw3 epub mobi].include?(download.file_type)
      css = ["--extra-css",
             Rails.public_path.join("stylesheets/ebooks.css").to_s]
    end

    [
      "ebook-convert",
      download.zip_path,
      download.file_path,
      "--input-encoding", "utf-8",
      # Prevent it from turning links to endnotes into entries for the table of
      # contents on works with fewer than the specified number of chapters.
      "--toc-threshold", "0",
      "--use-auto-toc",
      "--title", meta[:title],
      "--title-sort", meta[:sortable_title],
      "--authors", meta[:authors],
      "--author-sort", meta[:sortable_authors],
      "--comments", meta[:summary],
      "--tags", meta[:tags],
      "--pubdate", meta[:pubdate],
      "--publisher", ArchiveConfig.APP_NAME,
      "--language", meta[:language],
      # XPaths for detecting chapters are overly specific to make sure we don't grab
      # anything inputted by the user. First path is for single-chapter works,
      # second for multi-chapter, and third for the preface and afterword
      "--chapter", "//h:body/h:div[@id='chapters']/h:h2[@class='toc-heading'] | //h:body/h:div[@id='chapters']/h:div[@class='meta group']/h:h2[@class='heading'] | //h:body/h:div[@id='preface' or @id='afterword']/h:h2[@class='toc-heading']"
    ] + series + css + epub + pdf
  end

  # Grab the HTML file and any images and put them in --base-dir.
  # --max-recursions 0 prevents it from grabbing all the linked pages.
  # --dont-download-stylesheets isn't strictly necessary for us but avoids
  # creating an empty stylesheets directory.
  def get_web2disk_command
    [
      "web2disk",
      "--base-dir", download.assets_path,
      "--max-recursions", "0",
      "--dont-download-stylesheets",
      "file://#{download.html_file_path}"
    ]
  end

  # Zip the directory containing the HTML file and images.
  def get_zip_command
    [
      "zip",
      "-r",
      download.zip_path,
      download.assets_path
    ]
  end

  # A hash of the work data calibre needs
  def meta
    return @metadata if @metadata
    @metadata = {
      title:             work.title,
      sortable_title:    work.sorted_title,
      # Using ampersands as instructed by Calibre's ebook-convert documentation
      # hides all but the first author name in Books (formerly iBooks). The
      # other authors cannot be used for searching or sorting. Using commas
      # just means Calibre's GUI treats it as one name, e.g. "testy, testy2" is
      # like "Fangirl, Suzy Q", for searching and sorting.
      authors:           download.authors,
      sortable_authors:  work.authors_to_sort_on,
      # We add "Fanworks" because Books uses the first tag as the category and
      # it would otherwise be the work's rating, which is weird.
      tags:              "Fanworks, " + work.tags.pluck(:name).join(", "),
      pubdate:           work.revised_at.to_date.to_s,
      summary:           work.summary.to_s,
      language:          work.language.short
    }
    if work.series.exists?
      series = work.series.first
      @metadata[:series_title] = series.title
      @metadata[:series_position] = series.position_of(work).to_s
    end
    @metadata
  end
end
first 2026-03-11 22:22:11 +00:00			`require "open3"`

			`class DownloadWriter`
			`attr_reader :download, :work`

			`def initialize(download)`
			`@download = download`
			`@work = download.work`
			`end`

			`def write`
			`generate_html_download`
			`generate_ebook_download unless download.file_type == "html"`
			`download`
			`end`

			`def generate_html`
			`renderer = ApplicationController.renderer.new(`
			`http_host: ArchiveConfig.APP_HOST`
			`)`
			`renderer.render(`
			`template: "downloads/show",`
			`layout: "barebones",`
			`assigns: {`
			`work: work,`
			`page_title: download.page_title,`
			`chapters: download.chapters`
			`}`
			`)`
			`end`

			`private`

			`# Write the HTML version to file`
			`def generate_html_download`
			`return if download.exists?`

			`File.open(download.html_file_path, "w:UTF-8") { \|f\| f.write(generate_html) }`
			`end`

			`# transform HTML version into ebook version`
			`def generate_ebook_download`
			`return unless %w[azw3 epub mobi pdf].include?(download.file_type)`
			`return if download.exists?`

			`cmds = get_commands`

			`# Make sure the command is sanitary, and use popen3 in order to`
			`# capture and discard the stdin/out info`
			`# See http://stackoverflow.com/a/5970819/469544 for details`
			`cmds.each do \|cmd\|`
			`exit_status = nil`
			`Open3.popen3(*cmd) { \|_stdin, _stdout, _stderr, wait_thread\| exit_status = wait_thread.value }`
			`unless exit_status`
			`Rails.logger.warn "Download generation failed: " + cmd.to_s`
			`end`
			`end`
			`end`

			`# Get the version of the command we need to execute`
			`def get_commands`
			`[get_web2disk_command, get_zip_command, get_calibre_command]`
			`end`

			`# Create the format-specific command-line call to calibre/ebook-convert`
			`def get_calibre_command`
			`# Add info about first series if any`
			`series = []`
			`if meta[:series_title].present?`
			`series = ["--series", meta[:series_title],`
			`"--series-index", meta[:series_position]]`
			`end`

			`### Format-specific options`
			`# epub: don't generate a cover image`
			`epub = download.file_type == "epub" ? ["--no-default-epub-cover"] : []`

			`pdf = []`
			`if download.file_type == "pdf"`
			`pdf = [`
			`# pdf: decrease margins from 72pt default`
			`"--pdf-page-margin-top", "36",`
			`"--pdf-page-margin-right", "36",`
			`"--pdf-page-margin-bottom", "36",`
			`"--pdf-page-margin-left", "36",`
			`"--pdf-default-font-size", "17",`
			`# pdf: only include necessary characters when embedding fonts`
			`"--subset-embedded-fonts"`
			`]`
			`end`

			`### CSS options`
			`# azw3, epub, and mobi get a special stylesheet`
			`css = []`
			`if %w[azw3 epub mobi].include?(download.file_type)`
			`css = ["--extra-css",`
			`Rails.public_path.join("stylesheets/ebooks.css").to_s]`
			`end`

			`[`
			`"ebook-convert",`
			`download.zip_path,`
			`download.file_path,`
			`"--input-encoding", "utf-8",`
			`# Prevent it from turning links to endnotes into entries for the table of`
			`# contents on works with fewer than the specified number of chapters.`
			`"--toc-threshold", "0",`
			`"--use-auto-toc",`
			`"--title", meta[:title],`
			`"--title-sort", meta[:sortable_title],`
			`"--authors", meta[:authors],`
			`"--author-sort", meta[:sortable_authors],`
			`"--comments", meta[:summary],`
			`"--tags", meta[:tags],`
			`"--pubdate", meta[:pubdate],`
			`"--publisher", ArchiveConfig.APP_NAME,`
			`"--language", meta[:language],`
			`# XPaths for detecting chapters are overly specific to make sure we don't grab`
			`# anything inputted by the user. First path is for single-chapter works,`
			`# second for multi-chapter, and third for the preface and afterword`
			`"--chapter", "//h:body/h:div[@id='chapters']/h:h2[@class='toc-heading'] \| //h:body/h:div[@id='chapters']/h:div[@class='meta group']/h:h2[@class='heading'] \| //h:body/h:div[@id='preface' or @id='afterword']/h:h2[@class='toc-heading']"`
			`] + series + css + epub + pdf`
			`end`

			`# Grab the HTML file and any images and put them in --base-dir.`
			`# --max-recursions 0 prevents it from grabbing all the linked pages.`
			`# --dont-download-stylesheets isn't strictly necessary for us but avoids`
			`# creating an empty stylesheets directory.`
			`def get_web2disk_command`
			`[`
			`"web2disk",`
			`"--base-dir", download.assets_path,`
			`"--max-recursions", "0",`
			`"--dont-download-stylesheets",`
			`"file://#{download.html_file_path}"`
			`]`
			`end`

			`# Zip the directory containing the HTML file and images.`
			`def get_zip_command`
			`[`
			`"zip",`
			`"-r",`
			`download.zip_path,`
			`download.assets_path`
			`]`
			`end`

			`# A hash of the work data calibre needs`
			`def meta`
			`return @metadata if @metadata`
			`@metadata = {`
			`title: work.title,`
			`sortable_title: work.sorted_title,`
			`# Using ampersands as instructed by Calibre's ebook-convert documentation`
			`# hides all but the first author name in Books (formerly iBooks). The`
			`# other authors cannot be used for searching or sorting. Using commas`
			`# just means Calibre's GUI treats it as one name, e.g. "testy, testy2" is`
			`# like "Fangirl, Suzy Q", for searching and sorting.`
			`authors: download.authors,`
			`sortable_authors: work.authors_to_sort_on,`
			`# We add "Fanworks" because Books uses the first tag as the category and`
			`# it would otherwise be the work's rating, which is weird.`
			`tags: "Fanworks, " + work.tags.pluck(:name).join(", "),`
			`pubdate: work.revised_at.to_date.to_s,`
			`summary: work.summary.to_s,`
			`language: work.language.short`
			`}`
			`if work.series.exists?`
			`series = work.series.first`
			`@metadata[:series_title] = series.title`
			`@metadata[:series_position] = series.position_of(work).to_s`
			`end`
			`@metadata`
			`end`
			`end`