commit 70a26e088ac34b471c3b4705d52f6793dbb138c4
parent 552a60776429c2f61e0cf188d48334e445176d27
Author: egor-achkasov <eaachkasov@gmail.com>
Date: Wed, 25 Feb 2026 16:00:24 +0000
Refactor towards libfirst arch
Diffstat:
| A | src/backend.rs | | | 81 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/cli.rs | | | 54 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | src/config.rs | | | 7 | +++++++ |
| A | src/events.rs | | | 23 | +++++++++++++++++++++++ |
| M | src/export.rs | | | 51 | +++++++++++++++++++++++++++++++++++++-------------- |
| A | src/lib.rs | | | 6 | ++++++ |
| M | src/main.rs | | | 127 | ++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
| D | src/parse_args.rs | | | 59 | ----------------------------------------------------------- |
8 files changed, 273 insertions(+), 135 deletions(-)
diff --git a/src/backend.rs b/src/backend.rs
@@ -0,0 +1,81 @@
+use std::sync::mpsc::Sender;
+
+use anyhow::{Context, Ok, Result};
+use std::result::Result::Ok as StdOk;
+
+use crate::{config::Config, events::Event, export, post::Post};
+
+pub fn fetch_with_retry(url: &str, attempts: u32, tx: &Sender<Event>) -> Result<String> {
+ for attempt in 1..=attempts {
+ match reqwest::blocking::get(url).and_then(|r| r.text()) {
+ StdOk(text) => return Ok(text),
+ Err(e) => {
+ tx.send(Event::FetchRetrying {
+ url: url.to_string(),
+ attempt,
+ max_attempts: attempts,
+ error: e.to_string(),
+ }).ok();
+ if attempt < attempts {
+ std::thread::sleep(std::time::Duration::from_secs(3));
+ }
+ }
+ }
+ }
+ anyhow::bail!("failed to get thread after {attempts} attempts")
+}
+
+pub fn scrape_thread(url: &str, config: &Config, tx: &Sender<Event>) -> Result<Post> {
+ let t_total = std::time::Instant::now();
+
+ tx.send(Event::FetchStarted { url: url.to_string() }).ok();
+ let t = std::time::Instant::now();
+ let html = fetch_with_retry(url, 3, tx)?;
+ tx.send(Event::FetchDone { elapsed_ms: t.elapsed().as_millis() }).ok();
+
+ tx.send(Event::ParseStarted).ok();
+ let t = std::time::Instant::now();
+ let posts = Post::parse_posts(&html).context("failed to parse thread HTML")?;
+ tx.send(Event::ParseDone {
+ post_count: posts.len(),
+ elapsed_ms: t.elapsed().as_millis(),
+ }).ok();
+
+ let first_post = posts.first().context("thread has no posts")?.clone();
+
+ export::export2html(&posts, config, tx).context("failed to export thread")?;
+
+ tx.send(Event::ThreadDone {
+ url: url.to_string(),
+ elapsed_ms: t_total.elapsed().as_millis(),
+ }).ok();
+
+ Ok(first_post)
+}
+
+pub fn run(config: &Config, tx: Sender<Event>) -> Result<()> {
+ let total = config.urls.len();
+ let mut first_posts: Vec<Post> = Vec::new();
+
+ for (i, url) in config.urls.iter().enumerate() {
+ tx.send(Event::ThreadStarted {
+ url: url.clone(),
+ index: i + 1,
+ total,
+ }).ok();
+
+ match scrape_thread(url, config, &tx) {
+ StdOk(first_post) => first_posts.push(first_post),
+ Err(e) => {
+ tx.send(Event::ThreadFailed {
+ url: url.clone(),
+ error: format!("{:#}", e),
+ }).ok();
+ }
+ }
+ }
+
+ export::write_index_html(&first_posts, config).context("failed to write main index.html")?;
+
+ Ok(())
+}
diff --git a/src/cli.rs b/src/cli.rs
@@ -0,0 +1,54 @@
+use clap::Parser;
+use anyhow::Result;
+
+use std::path::PathBuf;
+
+use arhivarch_downloader::config::Config;
+
+pub fn parse_args() -> Result<Config> {
+ #[derive(Parser)]
+ #[command(about, long_about)]
+ struct Cli {
+ /// URL to download
+ url: Option<String>,
+
+ /// Path to a text file containing a list of URLs (one per line)
+ #[arg(short = 'l', long = "list")]
+ list: Option<PathBuf>,
+
+ /// Download thumbnail images, default: false
+ #[arg(short = 't', long = "thumb", default_value_t = false)]
+ thumb: bool,
+
+ /// Download files (images, videos, gifs, etc), default: false
+ #[arg(short = 'f', long = "files", default_value_t = false)]
+ files: bool,
+
+ /// Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
+ #[arg(short = 'r', long = "resume", default_value_t = false)]
+ resume: bool
+ }
+ let cli = Cli::parse();
+
+ let mut urls = Vec::new();
+ // [URL]
+ if let Some(url) = cli.url {
+ urls.push(url);
+ }
+ // [List]
+ if let Some(list) = cli.list {
+ for line in std::fs::read_to_string(list)?.lines() {
+ urls.push(line.to_string());
+ }
+ }
+ if urls.is_empty() {
+ anyhow::bail!("No URLs provided");
+ }
+
+ Ok(Config {
+ urls,
+ thumb: cli.thumb,
+ files: cli.files,
+ resume: cli.resume,
+ })
+}
diff --git a/src/config.rs b/src/config.rs
@@ -0,0 +1,7 @@
+#[derive(Debug, Clone)]
+pub struct Config {
+ pub urls: Vec<String>,
+ pub thumb: bool,
+ pub files: bool,
+ pub resume: bool,
+}
diff --git a/src/events.rs b/src/events.rs
@@ -0,0 +1,23 @@
+#[derive(Debug, Clone)]
+pub enum Event {
+ // Thread-level lifecycle
+ ThreadStarted { url: String, index: usize, total: usize },
+ ThreadDone { url: String, elapsed_ms: u128 },
+ ThreadFailed { url: String, error: String },
+
+ // HTTP fetch
+ FetchStarted { url: String },
+ FetchDone { elapsed_ms: u128 },
+ FetchRetrying { url: String, attempt: u32, max_attempts: u32, error: String },
+
+ // HTML parsing
+ ParseStarted,
+ ParseDone { post_count: usize, elapsed_ms: u128 },
+
+ // Asset downloading
+ DownloadBatchStarted { label: String, total_posts: usize },
+ DownloadBatchProgress { label: String, done: usize, total: usize },
+ DownloadAssetFailed { label: String, filename: String, attempt: u32, error: String },
+ DownloadAssetSkipped { label: String, filename: String },
+ DownloadBatchDone { label: String, elapsed_ms: u128 },
+}
diff --git a/src/export.rs b/src/export.rs
@@ -1,4 +1,6 @@
-use crate::{parse_args::Config, post::Post};
+use std::sync::mpsc::Sender;
+
+use crate::{config::Config, events::Event, post::Post};
use anyhow::{Result, Context};
@@ -39,7 +41,7 @@ fn render_text_to_html(text: &str) -> String {
}
/// Write a top-level index.html with one entry per thread (first post + link to thread folder)
-pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
+pub(crate) fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
if first_posts.is_empty() {
return Ok(());
}
@@ -78,7 +80,7 @@ pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
/// If download_thumbnails is true, downloads thumbnails to ./{thread_id}/thumb
///
/// WARNING: If the directory already exists, it will be overwritten
-pub fn export2html(posts: &[Post], config: &Config) -> Result<()> {
+pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -> Result<()> {
if posts.is_empty() {
anyhow::bail!("No posts to export");
}
@@ -99,6 +101,7 @@ pub fn export2html(posts: &[Post], config: &Config) -> Result<()> {
"files",
|f| &f.url,
config.resume,
+ tx,
)?;
}
if config.thumb {
@@ -108,6 +111,7 @@ pub fn export2html(posts: &[Post], config: &Config) -> Result<()> {
"thumbnails",
|f| &f.url_thumb,
config.resume,
+ tx,
)?;
}
@@ -213,38 +217,57 @@ fn download_assets(
label: &str,
url_of: impl Fn(&crate::file::File) -> &str,
skip_if_exists: bool,
+ tx: &Sender<Event>,
) -> Result<()> {
- use std::io::Write;
-
std::fs::create_dir_all(dest_dir)
.with_context(|| format!("Failed to create directory {}", dest_dir))?;
+
let t = std::time::Instant::now();
- print!("\tDownloading {}... post 0 / {}", label, posts.len());
- std::io::stdout().flush().ok();
+ tx.send(Event::DownloadBatchStarted {
+ label: label.to_string(),
+ total_posts: posts.len(),
+ }).ok();
+
for (i, post) in posts.iter().enumerate() {
for f in &post.files {
let url = url_of(f);
- let filename = url.split('/').last().unwrap_or("");
+ let filename = url.split('/').last().unwrap_or("").to_string();
let path = format!("{}/{}", dest_dir, filename);
if skip_if_exists && std::path::Path::new(&path).exists() {
continue;
}
let mut result = Err(anyhow::anyhow!("no attempts"));
- for _ in 0..3 {
+ for attempt in 1..=3u32 {
result = download(url, &path);
if result.is_ok() { break; }
let e = result.as_ref().unwrap_err();
- println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, e);
+ tx.send(Event::DownloadAssetFailed {
+ label: label.to_string(),
+ filename: filename.clone(),
+ attempt,
+ error: e.to_string(),
+ }).ok();
std::thread::sleep(std::time::Duration::from_secs(3));
}
if result.is_err() {
- println!("\tSkipping {} {} after 3 failed attempts.", label, filename);
+ tx.send(Event::DownloadAssetSkipped {
+ label: label.to_string(),
+ filename: filename.clone(),
+ }).ok();
}
}
- print!("\r\tDownloading {}... post {} / {}", label, i + 1, posts.len());
- std::io::stdout().flush().ok();
+ tx.send(Event::DownloadBatchProgress {
+ label: label.to_string(),
+ done: i + 1,
+ total: posts.len(),
+ }).ok();
}
- println!(" Done ({} ms)", t.elapsed().as_millis());
+
+ tx.send(Event::DownloadBatchDone {
+ label: label.to_string(),
+ elapsed_ms: t.elapsed().as_millis(),
+ }).ok();
+
Ok(())
}
diff --git a/src/lib.rs b/src/lib.rs
@@ -0,0 +1,6 @@
+pub mod config;
+pub mod events;
+pub mod backend;
+pub mod post;
+pub mod file;
+pub(crate) mod export;
diff --git a/src/main.rs b/src/main.rs
@@ -1,77 +1,80 @@
-mod parse_args;
-mod post;
-mod file;
-mod export;
-
-use parse_args::{Config, parse_args};
-use post::Post;
-
-use anyhow::{Context, Ok, Result};
-use std::result::Result::Ok as StdOk;
-
-fn fetch_with_retry(url: &str, attempts: u32) -> Result<String> {
- for attempt in 1..=attempts {
- match reqwest::blocking::get(url).and_then(|r| r.text()) {
- StdOk(text) => return Ok(text),
- Err(e) => {
- eprintln!("\n\tHTTP request failed for {url}: {e}");
- if attempt < attempts {
- eprintln!("\tWaiting 3 seconds...");
- std::thread::sleep(std::time::Duration::from_secs(3));
- }
- }
- }
+mod cli;
+
+use arhivarch_downloader::{backend, events::Event};
+use std::sync::mpsc;
+
+fn main() -> anyhow::Result<()> {
+ let config = cli::parse_args().unwrap_or_else(|e| {
+ eprintln!("Error: {}", e);
+ std::process::exit(1);
+ });
+
+ let (tx, rx) = mpsc::channel::<Event>();
+
+ let handle = std::thread::spawn({
+ let config = config.clone();
+ move || backend::run(&config, tx)
+ });
+
+ for event in rx {
+ render_event(&event);
}
- anyhow::bail!("failed to get thread after {attempts} attempts")
+
+ handle.join().unwrap()
}
-fn scrape_thread(url: &str, config: &Config) -> Result<Post> {
+fn render_event(event: &Event) {
use std::io::Write;
- let t_total = std::time::Instant::now();
+ match event {
+ Event::ThreadStarted { url, index, total } =>
+ println!("Processing {} ({} / {}):", url, index, total),
- print!("\tGetting thread...");
- std::io::stdout().flush().ok();
- let t = std::time::Instant::now();
- let html = fetch_with_retry(url, 3)?;
- println!(" Done ({} ms)", t.elapsed().as_millis());
+ Event::ThreadDone { url, elapsed_ms } =>
+ println!("Done processing {} ({} ms)", url, elapsed_ms),
- print!("\tParsing posts...");
- std::io::stdout().flush().ok();
- let t = std::time::Instant::now();
- let posts = Post::parse_posts(&html)
- .context("failed to parse thread HTML")?;
- println!(" Done ({} ms)", t.elapsed().as_millis());
+ Event::ThreadFailed { url, error } =>
+ eprintln!("Error processing {}: {}", url, error),
- let first_post = posts.first().context("thread has no posts")?.clone();
+ Event::FetchStarted { .. } => {
+ print!("\tGetting thread...");
+ std::io::stdout().flush().ok();
+ }
- export::export2html(&posts, &config)
- .context("failed to export thread")?;
+ Event::FetchDone { elapsed_ms } =>
+ println!(" Done ({} ms)", elapsed_ms),
- println!("Done processing {} ({} ms)", url, t_total.elapsed().as_millis());
- Ok(first_post)
-}
+ Event::FetchRetrying { url, attempt, max_attempts, error } => {
+ eprintln!("\n\tHTTP request failed for {}: {}", url, error);
+ if attempt < max_attempts {
+ eprintln!("\tWaiting 3 seconds...");
+ }
+ }
+ Event::ParseStarted => {
+ print!("\tParsing posts...");
+ std::io::stdout().flush().ok();
+ }
-fn main() -> Result<()> {
- let config = parse_args()
- .unwrap_or_else(|e| {
- eprintln!("Error: {}", e);
- std::process::exit(1);
- });
-
- let mut first_posts: Vec<Post> = Vec::new();
- let mut i = 1;
- for url in &config.urls {
- println!("Processing {} ({} / {}):", url, i, config.urls.len());
- i += 1;
- match scrape_thread(url, &config) {
- StdOk(first_post) => first_posts.push(first_post),
- Err(e) => eprintln!("Error processing {}: {:#}", url, e),
+ Event::ParseDone { elapsed_ms, .. } =>
+ println!(" Done ({} ms)", elapsed_ms),
+
+ Event::DownloadBatchStarted { label, total_posts } => {
+ print!("\tDownloading {}... post 0 / {}", label, total_posts);
+ std::io::stdout().flush().ok();
}
- }
- export::write_index_html(&first_posts, &config)
- .context("failed to write main index.html")?;
+ Event::DownloadBatchProgress { label, done, total } => {
+ print!("\r\tDownloading {}... post {} / {}", label, done, total);
+ std::io::stdout().flush().ok();
+ }
+
+ Event::DownloadAssetFailed { label, filename, error, .. } =>
+ println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, error),
- Ok(())
+ Event::DownloadAssetSkipped { label, filename } =>
+ println!("\tSkipping {} {} after 3 failed attempts.", label, filename),
+
+ Event::DownloadBatchDone { elapsed_ms, .. } =>
+ println!(" Done ({} ms)", elapsed_ms),
+ }
}
diff --git a/src/parse_args.rs b/src/parse_args.rs
@@ -1,59 +0,0 @@
-use clap::Parser;
-use anyhow::Result;
-
-use std::path::PathBuf;
-
-pub struct Config{
- pub urls: Vec<String>,
- pub thumb: bool,
- pub files: bool,
- pub resume: bool,
-}
-
-pub fn parse_args() -> Result<Config> {
- #[derive(Parser)]
- #[command(about, long_about)]
- struct Cli {
- /// URL to download
- url: Option<String>,
-
- /// Path to a text file containing a list of URLs (one per line)
- #[arg(short = 'l', long = "list")]
- list: Option<PathBuf>,
-
- /// Download thumbnail images, default: false
- #[arg(short = 't', long = "thumb", default_value_t = false)]
- thumb: bool,
-
- /// Download files (images, videos, gifs, etc), default: false
- #[arg(short = 'f', long = "files", default_value_t = false)]
- files: bool,
-
- /// Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
- #[arg(short = 'r', long = "resume", default_value_t = false)]
- resume: bool
- }
- let cli = Cli::parse();
-
- let mut urls = Vec::new();
- // [URL]
- if let Some(url) = cli.url {
- urls.push(url);
- }
- // [List]
- if let Some(list) = cli.list {
- for line in std::fs::read_to_string(list)?.lines() {
- urls.push(line.to_string());
- }
- }
- if urls.is_empty() {
- anyhow::bail!("No URLs provided");
- }
-
- Ok(Config {
- urls,
- thumb: cli.thumb,
- files: cli.files,
- resume: cli.resume,
- })
-}