commit f0b2401e13487a7382c1bb1e531f1e389afab516
parent c45b7960d9e8527bab3ac0b984e814e1c8f32dca
Author: egor-achkasov <eaachkasov@gmail.com>
Date: Thu, 26 Feb 2026 15:38:42 +0000
Add export module
Diffstat:
7 files changed, 161 insertions(+), 157 deletions(-)
diff --git a/src/backend.rs b/src/backend.rs
@@ -1,19 +1,19 @@
use anyhow::{Context, Ok, Result};
use std::result::Result::Ok as StdOk;
-use crate::{config::Config, events::{Event, Reporter}, export, http, post::Post};
+use crate::{config::Config, events::{Event, Reporter}, export::{Export, html}, http, post::Post};
-pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter) -> Result<Post> {
+pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter, exporter: &dyn Export) -> Result<Post> {
let t_total = std::time::Instant::now();
reporter.report(Event::FetchStarted { url: url.to_string() });
let t = std::time::Instant::now();
- let html = http::fetch_with_retry(url, 3, reporter)?;
+ let html_content = http::fetch_with_retry(url, 3, reporter)?;
reporter.report(Event::FetchDone { elapsed_ms: t.elapsed().as_millis() });
reporter.report(Event::ParseStarted);
let t = std::time::Instant::now();
- let posts = Post::parse_posts(&html).context("failed to parse thread HTML")?;
+ let posts = Post::parse_posts(&html_content).context("failed to parse thread HTML")?;
reporter.report(Event::ParseDone {
post_count: posts.len(),
elapsed_ms: t.elapsed().as_millis(),
@@ -21,7 +21,7 @@ pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter) -> Res
let first_post = posts.first().context("thread has no posts")?.clone();
- export::export2html(&posts, config, reporter).context("failed to export thread")?;
+ exporter.export(&posts, config, reporter).context("failed to export thread")?;
reporter.report(Event::ThreadDone {
url: url.to_string(),
@@ -31,7 +31,7 @@ pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter) -> Res
Ok(first_post)
}
-pub fn run(config: &Config, reporter: &dyn Reporter) -> Result<()> {
+pub fn run(config: &Config, reporter: &dyn Reporter, exporter: &dyn Export) -> Result<()> {
let total = config.urls.len();
let mut first_posts: Vec<Post> = Vec::new();
@@ -42,7 +42,7 @@ pub fn run(config: &Config, reporter: &dyn Reporter) -> Result<()> {
total,
});
- match scrape_thread(url, config, reporter) {
+ match scrape_thread(url, config, reporter, exporter) {
StdOk(first_post) => first_posts.push(first_post),
Err(e) => {
reporter.report(Event::ThreadFailed {
@@ -53,7 +53,7 @@ pub fn run(config: &Config, reporter: &dyn Reporter) -> Result<()> {
}
}
- export::write_index_html(&first_posts, config).context("failed to write main index.html")?;
+ html::write_index_html(&first_posts, config).context("failed to write main index.html")?;
Ok(())
}
diff --git a/src/bin/cli/main.rs b/src/bin/cli/main.rs
@@ -1,4 +1,4 @@
-use arhivarch_downloader::{backend, events::Event, config::Config};
+use arhivarch_downloader::{backend, events::Event, config::Config, HtmlExporter};
use clap::Parser;
use anyhow::Result;
@@ -16,7 +16,7 @@ fn main() -> anyhow::Result<()> {
let handle = std::thread::spawn({
let config = config.clone();
- move || backend::run(&config, &tx)
+ move || backend::run(&config, &tx, &HtmlExporter)
});
for event in rx {
diff --git a/src/export.rs b/src/export.rs
@@ -1,146 +0,0 @@
-use crate::{config::Config, events::{Event, Reporter}, http, post::{File, Post}, render};
-
-use anyhow::{Result, Context};
-
-const TEMPLATE: &str = include_str!("../template.html");
-
-/// Write a top-level index.html with one entry per thread (first post + link to thread folder)
-pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
- if first_posts.is_empty() {
- return Ok(());
- }
-
- let posts_html: String = first_posts
- .iter()
- .map(|p| {
- let mut post_html = render::render_post(p, config.files, config.thumb);
- // render_post references thumbnails and images in the same directory,
- // so replace them with links to the thread folder
- config.files.then(|| post_html = post_html.replace(
- "<a href=\"files/",
- &format!("<a href=\"{}/files/", p.id),
- ));
- config.thumb.then(|| post_html = post_html.replace(
- "<img src=\"thumb/",
- &format!("<img src=\"{}/thumb/", p.id),
- ));
- format!("<div><a href=\"{}/index.html\">В тред →</a></div>{}\n", p.id, post_html)
- })
- .collect::<Vec<String>>()
- .join("\n");
-
- let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
- std::fs::write("index.html", index_html)
- .context("failed to write index.html")?;
-
- Ok(())
-}
-
-/// Export the thread to a simple static HTML
-///
-/// Creates a directory as follows:
-/// ./{thread_id}, where {thread_id} is OP ID
-/// If download_files is true, downloads files to ./{thread_id}/files
-/// If download_thumbnails is true, downloads thumbnails to ./{thread_id}/thumb
-///
-/// WARNING: If the directory already exists, it will be overwritten
-pub fn export2html(posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()> {
- if posts.is_empty() {
- anyhow::bail!("No posts to export");
- }
-
- let dir = format!("{}", posts[0].id);
- std::fs::create_dir_all(&dir)?;
-
- let posts_html: String = posts
- .iter()
- .map(|p| render::render_post(p, config.files, config.thumb))
- .collect::<Vec<String>>()
- .join("\n");
-
- if config.files {
- download_assets(
- &posts,
- &format!("{}/files", dir),
- "files",
- |f| &f.url,
- config.resume,
- reporter,
- )?;
- }
- if config.thumb {
- download_assets(
- &posts,
- &format!("{}/thumb", dir),
- "thumbnails",
- |f| &f.url_thumb,
- config.resume,
- reporter,
- )?;
- }
-
- let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
- std::fs::write(format!("{}/index.html", dir), index_html)?;
-
- Ok(())
-}
-
-fn download_assets(
- posts: &[Post],
- dest_dir: &str,
- label: &str,
- url_of: impl Fn(&File) -> &str,
- skip_if_exists: bool,
- reporter: &dyn Reporter,
-) -> Result<()> {
- std::fs::create_dir_all(dest_dir)
- .with_context(|| format!("Failed to create directory {}", dest_dir))?;
-
- let t = std::time::Instant::now();
- reporter.report(Event::DownloadBatchStarted {
- label: label.to_string(),
- total_posts: posts.len(),
- });
-
- for (i, post) in posts.iter().enumerate() {
- for f in &post.files {
- let url = url_of(f);
- let filename = url.split('/').last().unwrap_or("").to_string();
- let path = format!("{}/{}", dest_dir, filename);
- if skip_if_exists && std::path::Path::new(&path).exists() {
- continue;
- }
- let mut result = Err(anyhow::anyhow!("no attempts"));
- for attempt in 1..=3u32 {
- result = http::download(url, &path);
- if result.is_ok() { break; }
- let e = result.as_ref().unwrap_err();
- reporter.report(Event::DownloadAssetFailed {
- label: label.to_string(),
- filename: filename.clone(),
- attempt,
- error: e.to_string(),
- });
- std::thread::sleep(std::time::Duration::from_secs(3));
- }
- if result.is_err() {
- reporter.report(Event::DownloadAssetSkipped {
- label: label.to_string(),
- filename: filename.clone(),
- });
- }
- }
- reporter.report(Event::DownloadBatchProgress {
- label: label.to_string(),
- done: i + 1,
- total: posts.len(),
- });
- }
-
- reporter.report(Event::DownloadBatchDone {
- label: label.to_string(),
- elapsed_ms: t.elapsed().as_millis(),
- });
-
- Ok(())
-}
diff --git a/src/export/html/mod.rs b/src/export/html/mod.rs
@@ -0,0 +1,142 @@
+use crate::{config::Config, events::{Event, Reporter}, http, post::{File, Post}};
+use anyhow::{Result, Context};
+use super::Export;
+
+mod render;
+
+const TEMPLATE: &str = include_str!("../../../template.html");
+
+pub struct HtmlExporter;
+
+impl Export for HtmlExporter {
+ fn export(&self, posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()> {
+ if posts.is_empty() {
+ anyhow::bail!("No posts to export");
+ }
+
+ let dir = format!("{}", posts[0].id);
+ std::fs::create_dir_all(&dir)?;
+
+ let posts_html: String = posts
+ .iter()
+ .map(|p| render::render_post(p, config.files, config.thumb))
+ .collect::<Vec<String>>()
+ .join("\n");
+
+ if config.files {
+ download_assets(
+ &posts,
+ &format!("{}/files", dir),
+ "files",
+ |f| &f.url,
+ config.resume,
+ reporter,
+ )?;
+ }
+ if config.thumb {
+ download_assets(
+ &posts,
+ &format!("{}/thumb", dir),
+ "thumbnails",
+ |f| &f.url_thumb,
+ config.resume,
+ reporter,
+ )?;
+ }
+
+ let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
+ std::fs::write(format!("{}/index.html", dir), index_html)?;
+
+ Ok(())
+ }
+}
+
+/// Write a top-level index.html with one entry per thread (first post + link to thread folder)
+pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
+ if first_posts.is_empty() {
+ return Ok(());
+ }
+
+ let posts_html: String = first_posts
+ .iter()
+ .map(|p| {
+ let mut post_html = render::render_post(p, config.files, config.thumb);
+ config.files.then(|| post_html = post_html.replace(
+ "<a href=\"files/",
+ &format!("<a href=\"{}/files/", p.id),
+ ));
+ config.thumb.then(|| post_html = post_html.replace(
+ "<img src=\"thumb/",
+ &format!("<img src=\"{}/thumb/", p.id),
+ ));
+ format!("<div><a href=\"{}/index.html\">В тред →</a></div>{}\n", p.id, post_html)
+ })
+ .collect::<Vec<String>>()
+ .join("\n");
+
+ let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
+ std::fs::write("index.html", index_html)
+ .context("failed to write index.html")?;
+
+ Ok(())
+}
+
+fn download_assets(
+ posts: &[Post],
+ dest_dir: &str,
+ label: &str,
+ url_of: impl Fn(&File) -> &str,
+ skip_if_exists: bool,
+ reporter: &dyn Reporter,
+) -> Result<()> {
+ std::fs::create_dir_all(dest_dir)
+ .with_context(|| format!("Failed to create directory {}", dest_dir))?;
+
+ let t = std::time::Instant::now();
+ reporter.report(Event::DownloadBatchStarted {
+ label: label.to_string(),
+ total_posts: posts.len(),
+ });
+
+ for (i, post) in posts.iter().enumerate() {
+ for f in &post.files {
+ let url = url_of(f);
+ let filename = url.split('/').last().unwrap_or("").to_string();
+ let path = format!("{}/{}", dest_dir, filename);
+ if skip_if_exists && std::path::Path::new(&path).exists() {
+ continue;
+ }
+ let mut result = Err(anyhow::anyhow!("no attempts"));
+ for attempt in 1..=3u32 {
+ result = http::download(url, &path);
+ if result.is_ok() { break; }
+ let e = result.as_ref().unwrap_err();
+ reporter.report(Event::DownloadAssetFailed {
+ label: label.to_string(),
+ filename: filename.clone(),
+ attempt,
+ error: e.to_string(),
+ });
+ std::thread::sleep(std::time::Duration::from_secs(3));
+ }
+ if result.is_err() {
+ reporter.report(Event::DownloadAssetSkipped {
+ label: label.to_string(),
+ filename: filename.clone(),
+ });
+ }
+ }
+ reporter.report(Event::DownloadBatchProgress {
+ label: label.to_string(),
+ done: i + 1,
+ total: posts.len(),
+ });
+ }
+
+ reporter.report(Event::DownloadBatchDone {
+ label: label.to_string(),
+ elapsed_ms: t.elapsed().as_millis(),
+ });
+
+ Ok(())
+}
diff --git a/src/render.rs b/src/export/html/render.rs
diff --git a/src/export/mod.rs b/src/export/mod.rs
@@ -0,0 +1,8 @@
+use crate::{config::Config, events::Reporter, post::Post};
+use anyhow::Result;
+
+pub mod html;
+
+pub trait Export {
+ fn export(&self, posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()>;
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -3,7 +3,7 @@ pub mod events;
pub mod backend;
pub mod post;
pub mod http;
-pub mod render;
pub mod export;
pub use events::{Reporter, NullReporter};
+pub use export::html::HtmlExporter;