commit c45b7960d9e8527bab3ac0b984e814e1c8f32dca
parent 207a68d04aca44e318f2b4474bc76c937066836c
Author: egor-achkasov <eaachkasov@gmail.com>
Date: Thu, 26 Feb 2026 15:18:18 +0000
Refactor to llib+bin arch
Diffstat:
| M | Cargo.toml | | | 4 | ++++ |
| M | src/backend.rs | | | 56 | +++++++++++++++++--------------------------------------- |
| A | src/bin/cli/main.rs | | | 131 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | src/cli.rs | | | 54 | ------------------------------------------------------ |
| M | src/events.rs | | | 22 | ++++++++++++++++++++++ |
| M | src/export.rs | | | 178 | ++++++++++--------------------------------------------------------------------- |
| D | src/file.rs | | | 22 | ---------------------- |
| A | src/http.rs | | | 35 | +++++++++++++++++++++++++++++++++++ |
| M | src/lib.rs | | | 7 | +++++-- |
| D | src/main.rs | | | 80 | ------------------------------------------------------------------------------- |
| M | src/post.rs | | | 68 | ++++++++++++++++++++++++++++++++++++++++++++------------------------ |
| A | src/render.rs | | | 125 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
12 files changed, 404 insertions(+), 378 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,6 +4,10 @@ description = "Download threads from arhivach."
version = "0.1.0"
edition = "2024"
+[[bin]]
+name = "arhivach-downloader-cli"
+path = "src/bin/cli/main.rs"
+
[dependencies]
anyhow = "1.0.102"
clap = { version = "4.5.57", features = ["derive"] }
diff --git a/src/backend.rs b/src/backend.rs
@@ -1,76 +1,54 @@
-use std::sync::mpsc::Sender;
-
use anyhow::{Context, Ok, Result};
use std::result::Result::Ok as StdOk;
-use crate::{config::Config, events::Event, export, post::Post};
-
-pub fn fetch_with_retry(url: &str, attempts: u32, tx: &Sender<Event>) -> Result<String> {
- for attempt in 1..=attempts {
- match reqwest::blocking::get(url).and_then(|r| r.text()) {
- StdOk(text) => return Ok(text),
- Err(e) => {
- tx.send(Event::FetchRetrying {
- url: url.to_string(),
- attempt,
- max_attempts: attempts,
- error: e.to_string(),
- }).ok();
- if attempt < attempts {
- std::thread::sleep(std::time::Duration::from_secs(3));
- }
- }
- }
- }
- anyhow::bail!("failed to get thread after {attempts} attempts")
-}
+use crate::{config::Config, events::{Event, Reporter}, export, http, post::Post};
-pub fn scrape_thread(url: &str, config: &Config, tx: &Sender<Event>) -> Result<Post> {
+pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter) -> Result<Post> {
let t_total = std::time::Instant::now();
- tx.send(Event::FetchStarted { url: url.to_string() }).ok();
+ reporter.report(Event::FetchStarted { url: url.to_string() });
let t = std::time::Instant::now();
- let html = fetch_with_retry(url, 3, tx)?;
- tx.send(Event::FetchDone { elapsed_ms: t.elapsed().as_millis() }).ok();
+ let html = http::fetch_with_retry(url, 3, reporter)?;
+ reporter.report(Event::FetchDone { elapsed_ms: t.elapsed().as_millis() });
- tx.send(Event::ParseStarted).ok();
+ reporter.report(Event::ParseStarted);
let t = std::time::Instant::now();
let posts = Post::parse_posts(&html).context("failed to parse thread HTML")?;
- tx.send(Event::ParseDone {
+ reporter.report(Event::ParseDone {
post_count: posts.len(),
elapsed_ms: t.elapsed().as_millis(),
- }).ok();
+ });
let first_post = posts.first().context("thread has no posts")?.clone();
- export::export2html(&posts, config, tx).context("failed to export thread")?;
+ export::export2html(&posts, config, reporter).context("failed to export thread")?;
- tx.send(Event::ThreadDone {
+ reporter.report(Event::ThreadDone {
url: url.to_string(),
elapsed_ms: t_total.elapsed().as_millis(),
- }).ok();
+ });
Ok(first_post)
}
-pub fn run(config: &Config, tx: Sender<Event>) -> Result<()> {
+pub fn run(config: &Config, reporter: &dyn Reporter) -> Result<()> {
let total = config.urls.len();
let mut first_posts: Vec<Post> = Vec::new();
for (i, url) in config.urls.iter().enumerate() {
- tx.send(Event::ThreadStarted {
+ reporter.report(Event::ThreadStarted {
url: url.clone(),
index: i + 1,
total,
- }).ok();
+ });
- match scrape_thread(url, config, &tx) {
+ match scrape_thread(url, config, reporter) {
StdOk(first_post) => first_posts.push(first_post),
Err(e) => {
- tx.send(Event::ThreadFailed {
+ reporter.report(Event::ThreadFailed {
url: url.clone(),
error: format!("{:#}", e),
- }).ok();
+ });
}
}
}
diff --git a/src/bin/cli/main.rs b/src/bin/cli/main.rs
@@ -0,0 +1,131 @@
+use arhivarch_downloader::{backend, events::Event, config::Config};
+
+use clap::Parser;
+use anyhow::Result;
+
+use std::path::PathBuf;
+use std::sync::mpsc;
+
+fn main() -> anyhow::Result<()> {
+ let config = parse_args().unwrap_or_else(|e| {
+ eprintln!("Error: {}", e);
+ std::process::exit(1);
+ });
+
+ let (tx, rx) = mpsc::channel::<Event>();
+
+ let handle = std::thread::spawn({
+ let config = config.clone();
+ move || backend::run(&config, &tx)
+ });
+
+ for event in rx {
+ render_event(&event);
+ }
+
+ handle.join().unwrap()
+}
+
+pub fn parse_args() -> Result<Config> {
+ #[derive(Parser)]
+ #[command(about, long_about)]
+ struct Cli {
+ /// URL to download
+ url: Option<String>,
+
+ /// Path to a text file containing a list of URLs (one per line)
+ #[arg(short = 'l', long = "list")]
+ list: Option<PathBuf>,
+
+ /// Download thumbnail images, default: false
+ #[arg(short = 't', long = "thumb", default_value_t = false)]
+ thumb: bool,
+
+ /// Download files (images, videos, gifs, etc), default: false
+ #[arg(short = 'f', long = "files", default_value_t = false)]
+ files: bool,
+
+ /// Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
+ #[arg(short = 'r', long = "resume", default_value_t = false)]
+ resume: bool
+ }
+ let cli = Cli::parse();
+
+ let mut urls = Vec::new();
+ // [URL]
+ if let Some(url) = cli.url {
+ urls.push(url);
+ }
+ // [List]
+ if let Some(list) = cli.list {
+ for line in std::fs::read_to_string(list)?.lines() {
+ urls.push(line.to_string());
+ }
+ }
+ if urls.is_empty() {
+ anyhow::bail!("No URLs provided");
+ }
+
+ Ok(Config {
+ urls,
+ thumb: cli.thumb,
+ files: cli.files,
+ resume: cli.resume,
+ })
+}
+
+fn render_event(event: &Event) {
+ use std::io::Write;
+ match event {
+ Event::ThreadStarted { url, index, total } =>
+ println!("Processing {} ({} / {}):", url, index, total),
+
+ Event::ThreadDone { url, elapsed_ms } =>
+ println!("Done processing {} ({} ms)", url, elapsed_ms),
+
+ Event::ThreadFailed { url, error } =>
+ eprintln!("Error processing {}: {}", url, error),
+
+ Event::FetchStarted { .. } => {
+ print!("\tGetting thread...");
+ std::io::stdout().flush().ok();
+ }
+
+ Event::FetchDone { elapsed_ms } =>
+ println!(" Done ({} ms)", elapsed_ms),
+
+ Event::FetchRetrying { url, attempt, max_attempts, error } => {
+ eprintln!("\n\tHTTP request failed for {}: {}", url, error);
+ if attempt < max_attempts {
+ eprintln!("\tWaiting 3 seconds...");
+ }
+ }
+
+ Event::ParseStarted => {
+ print!("\tParsing posts...");
+ std::io::stdout().flush().ok();
+ }
+
+ Event::ParseDone { elapsed_ms, .. } =>
+ println!(" Done ({} ms)", elapsed_ms),
+
+ Event::DownloadBatchStarted { label, total_posts } => {
+ print!("\tDownloading {}... post 0 / {}", label, total_posts);
+ std::io::stdout().flush().ok();
+ }
+
+ Event::DownloadBatchProgress { label, done, total } => {
+ print!("\r\tDownloading {}... post {} / {}", label, done, total);
+ std::io::stdout().flush().ok();
+ }
+
+ Event::DownloadAssetFailed { label, filename, error, .. } =>
+ println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, error),
+
+ Event::DownloadAssetSkipped { label, filename } =>
+ println!("\tSkipping {} {} after 3 failed attempts.", label, filename),
+
+ Event::DownloadBatchDone { elapsed_ms, .. } =>
+ println!(" Done ({} ms)", elapsed_ms),
+ }
+}
diff --git a/src/cli.rs b/src/cli.rs
@@ -1,54 +0,0 @@
-use clap::Parser;
-use anyhow::Result;
-
-use std::path::PathBuf;
-
-use arhivarch_downloader::config::Config;
-
-pub fn parse_args() -> Result<Config> {
- #[derive(Parser)]
- #[command(about, long_about)]
- struct Cli {
- /// URL to download
- url: Option<String>,
-
- /// Path to a text file containing a list of URLs (one per line)
- #[arg(short = 'l', long = "list")]
- list: Option<PathBuf>,
-
- /// Download thumbnail images, default: false
- #[arg(short = 't', long = "thumb", default_value_t = false)]
- thumb: bool,
-
- /// Download files (images, videos, gifs, etc), default: false
- #[arg(short = 'f', long = "files", default_value_t = false)]
- files: bool,
-
- /// Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
- #[arg(short = 'r', long = "resume", default_value_t = false)]
- resume: bool
- }
- let cli = Cli::parse();
-
- let mut urls = Vec::new();
- // [URL]
- if let Some(url) = cli.url {
- urls.push(url);
- }
- // [List]
- if let Some(list) = cli.list {
- for line in std::fs::read_to_string(list)?.lines() {
- urls.push(line.to_string());
- }
- }
- if urls.is_empty() {
- anyhow::bail!("No URLs provided");
- }
-
- Ok(Config {
- urls,
- thumb: cli.thumb,
- files: cli.files,
- resume: cli.resume,
- })
-}
diff --git a/src/events.rs b/src/events.rs
@@ -21,3 +21,25 @@ pub enum Event {
DownloadAssetSkipped { label: String, filename: String },
DownloadBatchDone { label: String, elapsed_ms: u128 },
}
+
+use std::sync::mpsc;
+
+/// Sink for progress events emitted by the library.
+/// Implement this to connect the library to any frontend.
+pub trait Reporter: Send + Sync {
+ fn report(&self, event: Event);
+}
+
+/// Blanket impl: mpsc::Sender<Event> is already a valid Reporter.
+impl Reporter for mpsc::Sender<Event> {
+ fn report(&self, event: Event) {
+ self.send(event).ok();
+ }
+}
+
+/// No-op reporter — useful in tests or when progress output is not needed.
+pub struct NullReporter;
+
+impl Reporter for NullReporter {
+ fn report(&self, _event: Event) {}
+}
diff --git a/src/export.rs b/src/export.rs
@@ -1,47 +1,11 @@
-use std::sync::mpsc::Sender;
-
-use crate::{config::Config, events::Event, post::Post};
+use crate::{config::Config, events::{Event, Reporter}, http, post::{File, Post}, render};
use anyhow::{Result, Context};
-const TEMPLATE: &'static str = include_str!("../template.html");
-
-fn html_escape(s: &str) -> String {
- s.replace('&', "&")
- .replace('<', "<")
- .replace('>', ">")
- .replace('"', """)
-}
-
-/// Converts plain post text to HTML.
-/// - `>>id` → reply link anchor
-/// - Lines starting with `>` (not `>>digit`) → greentext span
-/// - `\n` → `<br>`
-fn render_text_to_html(text: &str) -> String {
- static RE_REPLY: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
- regex::Regex::new(r">>(\d+)").unwrap()
- });
-
- let lines: Vec<String> = text.split('\n').map(|line| {
- let escaped = html_escape(line);
- // Greentext: starts with > but not >>digit
- let processed = if escaped.starts_with(">") && !escaped.starts_with(">>") {
- format!("<span class=\"quote\">{}</span>", escaped)
- } else {
- escaped
- };
- // Reply links: >>id
- RE_REPLY.replace_all(&processed, |caps: ®ex::Captures| {
- let id = &caps[1];
- format!("<a href=\"#post{}\" class=\"reply-link\">>>{}</a>", id, id)
- }).into_owned()
- }).collect();
-
- lines.join("<br>\n")
-}
+const TEMPLATE: &str = include_str!("../template.html");
/// Write a top-level index.html with one entry per thread (first post + link to thread folder)
-pub(crate) fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
+pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
if first_posts.is_empty() {
return Ok(());
}
@@ -49,7 +13,7 @@ pub(crate) fn write_index_html(first_posts: &[Post], config: &Config) -> Result<
let posts_html: String = first_posts
.iter()
.map(|p| {
- let mut post_html = render_post(p, config.files, config.thumb);
+ let mut post_html = render::render_post(p, config.files, config.thumb);
// render_post references thumbnails and images in the same directory,
// so replace them with links to the thread folder
config.files.then(|| post_html = post_html.replace(
@@ -80,7 +44,7 @@ pub(crate) fn write_index_html(first_posts: &[Post], config: &Config) -> Result<
/// If download_thumbnails is true, downloads thumbnails to ./{thread_id}/thumb
///
/// WARNING: If the directory already exists, it will be overwritten
-pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -> Result<()> {
+pub fn export2html(posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()> {
if posts.is_empty() {
anyhow::bail!("No posts to export");
}
@@ -90,7 +54,7 @@ pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -
let posts_html: String = posts
.iter()
- .map(|p| render_post(p, config.files, config.thumb))
+ .map(|p| render::render_post(p, config.files, config.thumb))
.collect::<Vec<String>>()
.join("\n");
@@ -101,7 +65,7 @@ pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -
"files",
|f| &f.url,
config.resume,
- tx,
+ reporter,
)?;
}
if config.thumb {
@@ -111,7 +75,7 @@ pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -
"thumbnails",
|f| &f.url_thumb,
config.resume,
- tx,
+ reporter,
)?;
}
@@ -121,112 +85,22 @@ pub(crate) fn export2html(posts: &[Post], config: &Config, tx: &Sender<Event>) -
Ok(())
}
-fn render_post(post: &Post, download_files: bool, download_thumbnails: bool) -> String {
- let mut html = format!("<div class=\"post\" id=\"post{}\">\n", post.id);
-
- html.push_str(" <div class=\"post-head\">\n");
-
- // Subject
- if let Some(ref subject) = post.subject {
- html.push_str(&format!(
- " <span class=\"post-subject\">{}</span>\n",
- html_escape(subject)
- ));
- }
-
- // Name /w mailto/sage
- let name = post.name.as_deref().unwrap_or("Аноним");
- let name_display = if let Some(ref mailto) = post.mailto {
- format!("[{}] {}", mailto, name)
- } else {
- name.to_string()
- };
- html.push_str(&format!(
- " <span class=\"post-name\">{}</span>\n",
- html_escape(&name_display)
- ));
-
- // Time, num, id
- html.push_str(&format!(" <span class=\"post-time\">{}</span>\n", html_escape(&post.time)));
- html.push_str(&format!(" <span class=\"post-num\">{}</span>\n", html_escape(&post.num)));
- html.push_str(&format!(
- " <span class=\"post-id\"><a href=\"#post{0}\">№{0}</a></span>\n",
- post.id
- ));
-
- html.push_str(" </div>\n");
-
- // Images
- html.push_str(&render_images(&post.files, download_files, download_thumbnails));
-
- // Body
- html.push_str(" <div class=\"post-body\">\n");
- if !post.text.is_empty() {
- html.push_str(" ");
- html.push_str(&render_text_to_html(&post.text));
- html.push('\n');
- }
- html.push_str(" </div>\n");
-
- html.push_str("</div>\n");
- html
-}
-
-fn render_images(
- files: &[crate::file::File],
- download_files: bool,
- download_thumbnails: bool,
-) -> String {
- if files.is_empty() {
- return String::new();
- }
-
- let mut html = String::from(" <div class=\"post-images\">\n");
- for file in files {
- let href = if download_files && !file.url.is_empty() {
- format!("files/{}", file.url.split('/').last().unwrap_or(""))
- } else {
- file.url.clone()
- };
-
- let thumb_filename = file.url_thumb.split('/').last().unwrap_or("").to_string();
- let img_src = if download_thumbnails && !file.url_thumb.is_empty() {
- format!("thumb/{}", thumb_filename)
- } else {
- file.url_thumb.clone()
- };
-
- html.push_str(&format!(
- " <div class=\"post-image\">\n <a href=\"{}\" target=\"_blank\" title=\"{}\">\n <img src=\"{}\" alt=\"\" loading=\"lazy\">\n </a>\n <div class=\"post-image-info\">{} (<a href=\"{}\" target=\"_blank\" class=\"post-image-link\">o</a>, <a href=\"{}\" target=\"_blank\" class=\"post-image-link\">t</a>)</div>\n </div>\n",
- html_escape(&href),
- html_escape(&file.name_orig),
- html_escape(&img_src),
- html_escape(&file.name_orig),
- html_escape(&file.url),
- html_escape(&file.url_thumb),
- ));
- }
- html.push_str(" </div>\n");
- html
-}
-
-
fn download_assets(
posts: &[Post],
dest_dir: &str,
label: &str,
- url_of: impl Fn(&crate::file::File) -> &str,
+ url_of: impl Fn(&File) -> &str,
skip_if_exists: bool,
- tx: &Sender<Event>,
+ reporter: &dyn Reporter,
) -> Result<()> {
std::fs::create_dir_all(dest_dir)
.with_context(|| format!("Failed to create directory {}", dest_dir))?;
let t = std::time::Instant::now();
- tx.send(Event::DownloadBatchStarted {
+ reporter.report(Event::DownloadBatchStarted {
label: label.to_string(),
total_posts: posts.len(),
- }).ok();
+ });
for (i, post) in posts.iter().enumerate() {
for f in &post.files {
@@ -238,45 +112,35 @@ fn download_assets(
}
let mut result = Err(anyhow::anyhow!("no attempts"));
for attempt in 1..=3u32 {
- result = download(url, &path);
+ result = http::download(url, &path);
if result.is_ok() { break; }
let e = result.as_ref().unwrap_err();
- tx.send(Event::DownloadAssetFailed {
+ reporter.report(Event::DownloadAssetFailed {
label: label.to_string(),
filename: filename.clone(),
attempt,
error: e.to_string(),
- }).ok();
+ });
std::thread::sleep(std::time::Duration::from_secs(3));
}
if result.is_err() {
- tx.send(Event::DownloadAssetSkipped {
+ reporter.report(Event::DownloadAssetSkipped {
label: label.to_string(),
filename: filename.clone(),
- }).ok();
+ });
}
}
- tx.send(Event::DownloadBatchProgress {
+ reporter.report(Event::DownloadBatchProgress {
label: label.to_string(),
done: i + 1,
total: posts.len(),
- }).ok();
+ });
}
- tx.send(Event::DownloadBatchDone {
+ reporter.report(Event::DownloadBatchDone {
label: label.to_string(),
elapsed_ms: t.elapsed().as_millis(),
- }).ok();
-
- Ok(())
-}
+ });
-fn download(url: &str, path: &str) -> Result<()> {
- let bytes = reqwest::blocking::get(url)
- .with_context(|| format!("HTTP GET failed for {}", url))?
- .bytes()
- .context("failed to read response body")?;
- std::fs::write(path, &bytes)
- .with_context(|| format!("failed to write {}", path))?;
Ok(())
}
diff --git a/src/file.rs b/src/file.rs
@@ -1,21 +0,0 @@
-#[derive(Debug, Clone)]
-pub struct File {
- /// original name, "videolol.mp4"
- pub name_orig: String,
- /// timestampname, "17699100670710.mp4"
- pub name_timestamp: String,
- /// thumbnail url, "https://arhivach.vc/storage/t/aeaa7825f8d8ffe3f07f242a59b7761c.thumb"
- pub url_thumb: String,
- /// url, "https://i.arhivach.vc/storage/a/ea/aeaa7825f8d8ffe3f07f242a59b7761c.mp4"
- pub url: String,
-}
-
-impl std::fmt::Display for File {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(
- f,
- "{} [{}]\n url: {}\n thumb: {}",
- self.name_orig, self.name_timestamp, self.url, self.url_thumb
- )
- }
-}
-\ No newline at end of file
diff --git a/src/http.rs b/src/http.rs
@@ -0,0 +1,35 @@
+use anyhow::{Context, Result};
+
+use crate::events::{Event, Reporter};
+
+/// GET a URL with up to `attempts` retries, reporting each failure via `reporter`.
+pub fn fetch_with_retry(url: &str, attempts: u32, reporter: &dyn Reporter) -> Result<String> {
+ for attempt in 1..=attempts {
+ match reqwest::blocking::get(url).and_then(|r| r.text()) {
+ Ok(text) => return Ok(text),
+ Err(e) => {
+ reporter.report(Event::FetchRetrying {
+ url: url.to_string(),
+ attempt,
+ max_attempts: attempts,
+ error: e.to_string(),
+ });
+ if attempt < attempts {
+ std::thread::sleep(std::time::Duration::from_secs(3));
+ }
+ }
+ }
+ }
+ anyhow::bail!("failed to get thread after {attempts} attempts")
+}
+
+/// Download a single URL and write it to `path`.
+pub fn download(url: &str, path: &str) -> Result<()> {
+ let bytes = reqwest::blocking::get(url)
+ .with_context(|| format!("HTTP GET failed for {}", url))?
+ .bytes()
+ .context("failed to read response body")?;
+ std::fs::write(path, &bytes)
+ .with_context(|| format!("failed to write {}", path))?;
+ Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -2,5 +2,8 @@ pub mod config;
pub mod events;
pub mod backend;
pub mod post;
-pub mod file;
-pub(crate) mod export;
+pub mod http;
+pub mod render;
+pub mod export;
+
+pub use events::{Reporter, NullReporter};
diff --git a/src/main.rs b/src/main.rs
@@ -1,80 +0,0 @@
-mod cli;
-
-use arhivarch_downloader::{backend, events::Event};
-use std::sync::mpsc;
-
-fn main() -> anyhow::Result<()> {
- let config = cli::parse_args().unwrap_or_else(|e| {
- eprintln!("Error: {}", e);
- std::process::exit(1);
- });
-
- let (tx, rx) = mpsc::channel::<Event>();
-
- let handle = std::thread::spawn({
- let config = config.clone();
- move || backend::run(&config, tx)
- });
-
- for event in rx {
- render_event(&event);
- }
-
- handle.join().unwrap()
-}
-
-fn render_event(event: &Event) {
- use std::io::Write;
- match event {
- Event::ThreadStarted { url, index, total } =>
- println!("Processing {} ({} / {}):", url, index, total),
-
- Event::ThreadDone { url, elapsed_ms } =>
- println!("Done processing {} ({} ms)", url, elapsed_ms),
-
- Event::ThreadFailed { url, error } =>
- eprintln!("Error processing {}: {}", url, error),
-
- Event::FetchStarted { .. } => {
- print!("\tGetting thread...");
- std::io::stdout().flush().ok();
- }
-
- Event::FetchDone { elapsed_ms } =>
- println!(" Done ({} ms)", elapsed_ms),
-
- Event::FetchRetrying { url, attempt, max_attempts, error } => {
- eprintln!("\n\tHTTP request failed for {}: {}", url, error);
- if attempt < max_attempts {
- eprintln!("\tWaiting 3 seconds...");
- }
- }
-
- Event::ParseStarted => {
- print!("\tParsing posts...");
- std::io::stdout().flush().ok();
- }
-
- Event::ParseDone { elapsed_ms, .. } =>
- println!(" Done ({} ms)", elapsed_ms),
-
- Event::DownloadBatchStarted { label, total_posts } => {
- print!("\tDownloading {}... post 0 / {}", label, total_posts);
- std::io::stdout().flush().ok();
- }
-
- Event::DownloadBatchProgress { label, done, total } => {
- print!("\r\tDownloading {}... post {} / {}", label, done, total);
- std::io::stdout().flush().ok();
- }
-
- Event::DownloadAssetFailed { label, filename, error, .. } =>
- println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, error),
-
- Event::DownloadAssetSkipped { label, filename } =>
- println!("\tSkipping {} {} after 3 failed attempts.", label, filename),
-
- Event::DownloadBatchDone { elapsed_ms, .. } =>
- println!(" Done ({} ms)", elapsed_ms),
- }
-}
diff --git a/src/post.rs b/src/post.rs
@@ -1,7 +1,38 @@
-use crate::file::File;
-
use anyhow::{Context, Result};
+const BASE_URL: &str = "https://arhivach.vc";
+
+#[derive(Debug, Clone)]
+pub struct File {
+ /// original name, "videolol.mp4"
+ pub name_orig: String,
+ /// timestampname, "17699100670710.mp4"
+ pub name_timestamp: String,
+ /// thumbnail url, "https://arhivach.vc/storage/t/aeaa7825f8d8ffe3f07f242a59b7761c.thumb"
+ pub url_thumb: String,
+ /// url, "https://i.arhivach.vc/storage/a/ea/aeaa7825f8d8ffe3f07f242a59b7761c.mp4"
+ pub url: String,
+}
+
+impl std::fmt::Display for File {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "{} [{}]\n url: {}\n thumb: {}",
+ self.name_orig, self.name_timestamp, self.url, self.url_thumb
+ )
+ }
+}
+
+struct PostHead {
+ subject: Option<String>,
+ name: Option<String>,
+ mailto: Option<String>,
+ time: String,
+ num: String,
+ id: u32,
+}
+
/// Represents a single post in a thread
#[derive(Debug, Clone)]
pub struct Post {
@@ -59,7 +90,7 @@ impl Post {
.select(&SEL_POST_HEAD)
.next()
.context("missing post_head")?;
- let (subject, name, mailto, time, num, id) = Post::parse_post_head(post_head)?;
+ let head = Post::parse_post_head(post_head)?;
let post_comment = node
.select(&SEL_POST_IMAGE_BLOCK)
@@ -68,12 +99,12 @@ impl Post {
let (files, text) = Post::parse_post_comment(post_comment)?;
Ok(Post {
- subject,
- name,
- mailto,
- time,
- num,
- id,
+ subject: head.subject,
+ name: head.name,
+ mailto: head.mailto,
+ time: head.time,
+ num: head.num,
+ id: head.id,
files,
text,
})
@@ -96,18 +127,7 @@ impl Post {
/// </span>
/// </div>
/// ```
- fn parse_post_head(
- post_head: scraper::ElementRef
- ) -> Result<
- (
- Option<String>, // subject
- Option<String>, // name
- Option<String>, // mailto
- String, // time
- String, // num
- u32 // id
- )
- > {
+ fn parse_post_head(post_head: scraper::ElementRef) -> Result<PostHead> {
static SEL_SPAN_POST_ID_A_HREF: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
|| scraper::Selector::parse("span.post_id a[href]").unwrap()
);
@@ -166,7 +186,7 @@ impl Post {
.text()
.collect::<String>();
- Ok((subject, name, mailto, time, num, id))
+ Ok(PostHead { subject, name, mailto, time, num, id })
}
/// Parses the sapn post_comment element from a post element
@@ -248,7 +268,7 @@ impl Post {
let url_thumb = if url_thumb.is_empty() {
String::new()
} else {
- format!("https://arhivach.vc{}", url_thumb)
+ format!("{BASE_URL}{url_thumb}")
};
// url
@@ -262,7 +282,7 @@ impl Post {
} else if url.is_empty() {
String::new()
} else {
- format!("https://arhivach.vc{}", url)
+ format!("{BASE_URL}{url}")
};
File {
diff --git a/src/render.rs b/src/render.rs
@@ -0,0 +1,125 @@
+use crate::post::{File, Post};
+
+fn html_escape(s: &str) -> String {
+ s.replace('&', "&")
+ .replace('<', "<")
+ .replace('>', ">")
+ .replace('"', """)
+}
+
+/// Converts plain post text to HTML.
+/// - `>>id` → reply link anchor
+/// - Lines starting with `>` (not `>>digit`) → greentext span
+/// - `\n` → `<br>`
+pub fn render_text_to_html(text: &str) -> String {
+ static RE_REPLY: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| {
+ regex::Regex::new(r">>(\d+)").unwrap()
+ });
+
+ let lines: Vec<String> = text.split('\n').map(|line| {
+ let escaped = html_escape(line);
+ // Greentext: starts with > but not >>digit
+ let processed = if escaped.starts_with(">") && !escaped.starts_with(">>") {
+ format!("<span class=\"quote\">{}</span>", escaped)
+ } else {
+ escaped
+ };
+ // Reply links: >>id
+ RE_REPLY.replace_all(&processed, |caps: ®ex::Captures| {
+ let id = &caps[1];
+ format!("<a href=\"#post{}\" class=\"reply-link\">>>{}</a>", id, id)
+ }).into_owned()
+ }).collect();
+
+ lines.join("<br>\n")
+}
+
+/// Renders a single post to an HTML fragment string.
+pub fn render_post(post: &Post, download_files: bool, download_thumbnails: bool) -> String {
+ let mut html = format!("<div class=\"post\" id=\"post{}\">\n", post.id);
+
+ html.push_str(" <div class=\"post-head\">\n");
+
+ // Subject
+ if let Some(ref subject) = post.subject {
+ html.push_str(&format!(
+ " <span class=\"post-subject\">{}</span>\n",
+ html_escape(subject)
+ ));
+ }
+
+ // Name /w mailto/sage
+ let name = post.name.as_deref().unwrap_or("Аноним");
+ let name_display = if let Some(ref mailto) = post.mailto {
+ format!("[{}] {}", mailto, name)
+ } else {
+ name.to_string()
+ };
+ html.push_str(&format!(
+ " <span class=\"post-name\">{}</span>\n",
+ html_escape(&name_display)
+ ));
+
+ // Time, num, id
+ html.push_str(&format!(" <span class=\"post-time\">{}</span>\n", html_escape(&post.time)));
+ html.push_str(&format!(" <span class=\"post-num\">{}</span>\n", html_escape(&post.num)));
+ html.push_str(&format!(
+ " <span class=\"post-id\"><a href=\"#post{0}\">№{0}</a></span>\n",
+ post.id
+ ));
+
+ html.push_str(" </div>\n");
+
+ // Images
+ html.push_str(&render_images(&post.files, download_files, download_thumbnails));
+
+ // Body
+ html.push_str(" <div class=\"post-body\">\n");
+ if !post.text.is_empty() {
+ html.push_str(" ");
+ html.push_str(&render_text_to_html(&post.text));
+ html.push('\n');
+ }
+ html.push_str(" </div>\n");
+
+ html.push_str("</div>\n");
+ html
+}
+
+fn render_images(
+ files: &[File],
+ download_files: bool,
+ download_thumbnails: bool,
+) -> String {
+ if files.is_empty() {
+ return String::new();
+ }
+
+ let mut html = String::from(" <div class=\"post-images\">\n");
+ for file in files {
+ let href = if download_files && !file.url.is_empty() {
+ format!("files/{}", file.url.split('/').last().unwrap_or(""))
+ } else {
+ file.url.clone()
+ };
+
+ let thumb_filename = file.url_thumb.split('/').last().unwrap_or("").to_string();
+ let img_src = if download_thumbnails && !file.url_thumb.is_empty() {
+ format!("thumb/{}", thumb_filename)
+ } else {
+ file.url_thumb.clone()
+ };
+
+ html.push_str(&format!(
+ " <div class=\"post-image\">\n <a href=\"{}\" target=\"_blank\" title=\"{}\">\n <img src=\"{}\" alt=\"\" loading=\"lazy\">\n </a>\n <div class=\"post-image-info\">{} (<a href=\"{}\" target=\"_blank\" class=\"post-image-link\">o</a>, <a href=\"{}\" target=\"_blank\" class=\"post-image-link\">t</a>)</div>\n </div>\n",
+ html_escape(&href),
+ html_escape(&file.name_orig),
+ html_escape(&img_src),
+ html_escape(&file.name_orig),
+ html_escape(&file.url),
+ html_escape(&file.url_thumb),
+ ));
+ }
+ html.push_str(" </div>\n");
+ html
+}