Complete refactor to separte front and back; ecapsulate export - arhivach-downloader

commit 766cc139ec89a1ac4d6c4da3bbbb5398fe493856
parent 6815f58962d8337ca8590c4af9218ded1ff11752
Author: egor-achkasov <eaachkasov@gmail.com>
Date:   Mon,  9 Mar 2026 01:02:45 +0000

Complete refactor to separte front and back; ecapsulate export

Diffstat:
M Cargo.toml  | 3 +++
M README.md  | 32 ++++++++++++++++----------------
D src/backend.rs  | 59 -----------------------------------------------------------
M src/bin/cli/main.rs  | 153 +++++++++++++++++++++++++++++++++++++++----------------------------------------
D src/config.rs  | 7 -------
D src/events.rs  | 45 ---------------------------------------------
D src/export/html/mod.rs  | 142 -------------------------------------------------------------------------------
D src/export/html/render.rs  | 139 -------------------------------------------------------------------------------
D src/export/mod.rs  | 8 --------
D src/http.rs  | 35 -----------------------------------
D src/lib.rs  | 9 ---------
A src/lib/config.rs  | 12 ++++++++++++
A src/lib/download.rs  | 25 +++++++++++++++++++++++++
A src/lib/event.rs  | 29 +++++++++++++++++++++++++++++
A src/lib/export/html/mod.rs  | 27 +++++++++++++++++++++++++++
A src/lib/export/html/render.rs  | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R src/export/html/template.html -> src/lib/export/html/template.html  | 0 
A src/lib/export/mod.rs  | 35 +++++++++++++++++++++++++++++++++++
A src/lib/lib.rs  | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/lib/post.rs  | 374 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D src/post.rs  | 374 -------------------------------------------------------------------------------

21 files changed, 833 insertions(+), 911 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,6 +4,9 @@ description = "Download threads from arhivach."
 version = "0.1.0"
 edition = "2024"
 
+[lib]
+path = "src/lib/lib.rs"
+
 [[bin]]
 name = "arhivach-downloader-cli"
 path = "src/bin/cli/main.rs"
diff --git a/README.md b/README.md
@@ -9,28 +9,28 @@ Download threads from arhivach.vc and save them locally for offline access or pr
 `arhivach-downloader --help`:
 
 ```
-Download threads from arhivach.
-
-Usage: arhivarch-downloader.exe [OPTIONS] [URL]
+Usage: arhivarch-downloader-cli.exe [OPTIONS] <URL>
 
 Arguments:
-  [URL]  URL to download
+  <URL>  URL to download
 
 Options:
-  -l, --list <LIST>  Path to a text file containing a list of URLs (one per line)
-  -t, --thumb        Download thumbnail images, default: false
-  -f, --files        Download files (images, videos, gifs, etc), default: false
-  -r, --resume       Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
-  -h, --help         Print help
+  -d, --dir <DIR>            Path to download directory [default: .]
+  -e, --exporter <EXPORTER>  Exporter [default: html] [possible values: html]
+  -t, --thumb                Download thumbnail images, default: false
+  -f, --files                Download files (images, videos, gifs, etc), default: false
+  -r, --resume               Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
+  -R, --retries <RETRIES>    Download retries in case of a error [default: 3]
+  -h, --help                 Print help
 ```
 
-Each thread will be downloaded in a directory named by an OP №. Contents:
-- index.html -- the thread. Open it with your web browser.
-- files directory (if -f (--files) argument is given) -- all the files original attached to posts. Might be heavy if there are many videos.
-- thumb directory (if -t (--thumb) argument is given) -- all the thumbnails needed to render file previews in the thread.
+Creates a subdirectory named after the arhivach thread id (the number after `/thread/` in the URL) inside the download directory, and saves the thread there. Contents:
+- `index.html` — the thread. Open it with your web browser.
+- `files/` (if `-f`/`--files` is given) — original files attached to posts. May be large if there are many videos.
+- `thumb/` (if `-t`/`--thumb` is given) — thumbnails needed to render file previews in the thread.
 
-Main index.html will be created in the current directory to feature the first posts of the downloaded threads.
+Use `-r`/`--resume` to skip files and thumbnails that are already downloaded.
 
-Note that you may pass an URL directly as an argument, pass a path to a text file with URLs via -f, or both.
+Use `-d`/`--dir` to specify where to create the thread directory (defaults to the current directory).
 
-Use -r (--resume) to skip downloading files and thumbnails that are already there.
+Use `-R`/`--retries` to control how many times a failed download is retried (default: 3).
diff --git a/src/backend.rs b/src/backend.rs
@@ -1,59 +0,0 @@
-use anyhow::{Context, Ok, Result};
-use std::result::Result::Ok as StdOk;
-
-use crate::{config::Config, events::{Event, Reporter}, export::{Export, html}, http, post::Post};
-
-pub fn scrape_thread(url: &str, config: &Config, reporter: &dyn Reporter, exporter: &dyn Export) -> Result<Post> {
-    let t_total = std::time::Instant::now();
-
-    reporter.report(Event::FetchStarted { url: url.to_string() });
-    let t = std::time::Instant::now();
-    let html_content = http::fetch_with_retry(url, 3, reporter)?;
-    reporter.report(Event::FetchDone { elapsed_ms: t.elapsed().as_millis() });
-
-    reporter.report(Event::ParseStarted);
-    let t = std::time::Instant::now();
-    let posts = Post::parse_posts(&html_content).context("failed to parse thread HTML")?;
-    reporter.report(Event::ParseDone {
-        post_count: posts.len(),
-        elapsed_ms: t.elapsed().as_millis(),
-    });
-
-    let first_post = posts.first().context("thread has no posts")?.clone();
-
-    exporter.export(&posts, config, reporter).context("failed to export thread")?;
-
-    reporter.report(Event::ThreadDone {
-        url: url.to_string(),
-        elapsed_ms: t_total.elapsed().as_millis(),
-    });
-
-    Ok(first_post)
-}
-
-pub fn run(config: &Config, reporter: &dyn Reporter, exporter: &dyn Export) -> Result<()> {
-    let total = config.urls.len();
-    let mut first_posts: Vec<Post> = Vec::new();
-
-    for (i, url) in config.urls.iter().enumerate() {
-        reporter.report(Event::ThreadStarted {
-            url: url.clone(),
-            index: i + 1,
-            total,
-        });
-
-        match scrape_thread(url, config, reporter, exporter) {
-            StdOk(first_post) => first_posts.push(first_post),
-            Err(e) => {
-                reporter.report(Event::ThreadFailed {
-                    url: url.clone(),
-                    error: format!("{:#}", e),
-                });
-            }
-        }
-    }
-
-    html::write_index_html(&first_posts, config).context("failed to write main index.html")?;
-
-    Ok(())
-}
diff --git a/src/bin/cli/main.rs b/src/bin/cli/main.rs
@@ -1,41 +1,47 @@
-use arhivarch_downloader::{backend, events::Event, config::Config, HtmlExporter};
+use arhivarch_downloader::config::Config;
+use arhivarch_downloader::event::Event;
+use arhivarch_downloader::export::{html::HtmlExporter, ExporterKind};
 
-use clap::Parser;
-use anyhow::Result;
+use clap::{Parser, ValueEnum};
 
 use std::path::PathBuf;
-use std::sync::mpsc;
 
-fn main() -> anyhow::Result<()> {
-    let config = parse_args().unwrap_or_else(|e| {
-        eprintln!("Error: {}", e);
-        std::process::exit(1);
-    });
-
-    let (tx, rx) = mpsc::channel::<Event>();
+#[derive(Clone, ValueEnum)]
+enum ExporterArg {
+    Html,
+}
+use std::sync::mpsc::channel;
 
+fn main() -> anyhow::Result<()> {
+    let config = parse_args();
+    let (tx, rx) = channel::<Event>();
     let handle = std::thread::spawn({
         let config = config.clone();
-        move || backend::run(&config, &tx, &HtmlExporter)
+        move || arhivarch_downloader::run(&config, tx)
     });
 
     for event in rx {
         render_event(&event);
     }
 
-    handle.join().unwrap()
+    let _ = handle.join().map_err(|e| anyhow::anyhow!("{:?}", e))?;
+    Ok(())
 }
 
-pub fn parse_args() -> Result<Config> {
+pub fn parse_args() -> Config {
     #[derive(Parser)]
     #[command(about, long_about)]
     struct Cli {
         /// URL to download
-        url: Option<String>,
+        url: String,
+
+        /// Path to download directory
+        #[arg(short = 'd', long = "dir", value_name = "DIR", default_value = ".", value_hint = clap::ValueHint::DirPath)]
+        dir: PathBuf,
 
-        /// Path to a text file containing a list of URLs (one per line)
-        #[arg(short = 'l', long = "list")]
-        list: Option<PathBuf>,
+        /// Exporter
+        #[arg(short = 'e', long = "exporter", value_name = "EXPORTER", default_value = "html")]
+        exporter: ExporterArg,
 
         /// Download thumbnail images, default: false
         #[arg(short = 't', long = "thumb", default_value_t = false)]
@@ -47,85 +53,78 @@ pub fn parse_args() -> Result<Config> {
 
         /// Resume files and thumbnails downloading instead of overwriting. Useless if neither -t nor -f are set, default: false
         #[arg(short = 'r', long = "resume", default_value_t = false)]
-        resume: bool
-    }
-    let cli = Cli::parse();
+        resume: bool,
 
-    let mut urls = Vec::new();
-    // [URL]
-    if let Some(url) = cli.url {
-        urls.push(url);
-    }
-    // [List]
-    if let Some(list) = cli.list {
-        for line in std::fs::read_to_string(list)?.lines() {
-            urls.push(line.to_string());
-        }
-    }
-    if urls.is_empty() {
-        anyhow::bail!("No URLs provided");
+        /// Download retries in case of a error
+        #[arg(short = 'R', long = "retries", default_value_t = 3)]
+        download_retries: u32,
     }
+    let cli = Cli::parse();
 
-    Ok(Config {
-        urls,
+    Config {
+        url: cli.url,
+        dir: cli.dir,
+        exporter: match cli.exporter {
+            ExporterArg::Html => ExporterKind::Html(HtmlExporter),
+        },
         thumb: cli.thumb,
         files: cli.files,
         resume: cli.resume,
-    })
+        download_retries: cli.download_retries,
+    }
 }
 
 fn render_event(event: &Event) {
     use std::io::Write;
     match event {
-        Event::ThreadStarted { url, index, total } =>
-            println!("Processing {} ({} / {}):", url, index, total),
-
-        Event::ThreadDone { url, elapsed_ms } =>
-            println!("Done processing {} ({} ms)", url, elapsed_ms),
-
-        Event::ThreadFailed { url, error } =>
-            eprintln!("Error processing {}: {}", url, error),
-
-        Event::FetchStarted { .. } => {
-            print!("\tGetting thread...");
+        Event::GetStarted => {
+            print!("Fetching thread...");
             std::io::stdout().flush().ok();
         }
-
-        Event::FetchDone { elapsed_ms } =>
-            println!(" Done ({} ms)", elapsed_ms),
-
-        Event::FetchRetrying { url, attempt, max_attempts, error } => {
-            eprintln!("\n\tHTTP request failed for {}: {}", url, error);
-            if attempt < max_attempts {
-                eprintln!("\tWaiting 3 seconds...");
-            }
+        Event::GetDone =>
+            println!(" Done."),
+        Event::GetFailed { error } =>
+            eprintln!("\nFailed to fetch thread: {}", error),
+
+        Event::DownloadAllStarted =>
+            println!("Downloading stuff..."),
+        Event::DownloadAllDone =>
+            println!("All downloads complete."),
+        Event::DownloadAllFailed { error } =>
+            eprintln!("Download failed: {}", error),
+
+        Event::DownloadStarted { index, max_index } => {
+            print!("\r\tDownloading {} / {}...", index, max_index);
+            std::io::stdout().flush().ok();
         }
-
-        Event::ParseStarted => {
-            print!("\tParsing posts...");
+        Event::DownloadDone { index, max_index } => {
+            println!("\r\tDownloading {} / {}... Done.", index, max_index);
+        }
+        Event::DownloadFailed { url, error } =>
+            eprintln!("\r\tFailed to download {}: {}", url, error),
+        Event::DownloadSkipped { index, max_index } =>
+            println!("\r\tDownloading {} / {}... Skipped.", index, max_index),
+        
+        Event::DownloadFilesStarted => {
+            println!("Downloading files...");
             std::io::stdout().flush().ok();
         }
-
-        Event::ParseDone { elapsed_ms, .. } =>
-            println!(" Done ({} ms)", elapsed_ms),
-
-        Event::DownloadBatchStarted { label, total_posts } => {
-            print!("\tDownloading {}... post 0 / {}", label, total_posts);
+        Event::DownloadFilesDone =>
+            println!("Done."),
+        Event::DownloadThumbStarted => {
+            println!("Downloading thumbnails...");
             std::io::stdout().flush().ok();
         }
+        Event::DownloadThumbDone =>
+            println!("Done."),
 
-        Event::DownloadBatchProgress { label, done, total } => {
-            print!("\r\tDownloading {}... post {} / {}", label, done, total);
+        Event::ExportStarted => {
+            print!("Exporting...");
             std::io::stdout().flush().ok();
         }
-
-        Event::DownloadAssetFailed { label, filename, error, .. } =>
-            println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, error),
-
-        Event::DownloadAssetSkipped { label, filename } =>
-            println!("\tSkipping {} {} after 3 failed attempts.", label, filename),
-
-        Event::DownloadBatchDone { elapsed_ms, .. } =>
-            println!(" Done ({} ms)", elapsed_ms),
+        Event::ExportDone =>
+            println!(" Done."),
+        Event::ExportFailed { error } =>
+            eprintln!("\nExport failed: {}", error),
     }
 }
diff --git a/src/config.rs b/src/config.rs
@@ -1,7 +0,0 @@
-#[derive(Debug, Clone)]
-pub struct Config {
-    pub urls: Vec<String>,
-    pub thumb: bool,
-    pub files: bool,
-    pub resume: bool,
-}
diff --git a/src/events.rs b/src/events.rs
@@ -1,45 +0,0 @@
-#[derive(Debug, Clone)]
-pub enum Event {
-    // Thread-level lifecycle
-    ThreadStarted { url: String, index: usize, total: usize },
-    ThreadDone    { url: String, elapsed_ms: u128 },
-    ThreadFailed  { url: String, error: String },
-
-    // HTTP fetch
-    FetchStarted  { url: String },
-    FetchDone     { elapsed_ms: u128 },
-    FetchRetrying { url: String, attempt: u32, max_attempts: u32, error: String },
-
-    // HTML parsing
-    ParseStarted,
-    ParseDone { post_count: usize, elapsed_ms: u128 },
-
-    // Asset downloading
-    DownloadBatchStarted  { label: String, total_posts: usize },
-    DownloadBatchProgress { label: String, done: usize, total: usize },
-    DownloadAssetFailed   { label: String, filename: String, attempt: u32, error: String },
-    DownloadAssetSkipped  { label: String, filename: String },
-    DownloadBatchDone     { label: String, elapsed_ms: u128 },
-}
-
-use std::sync::mpsc;
-
-/// Sink for progress events emitted by the library.
-/// Implement this to connect the library to any frontend.
-pub trait Reporter: Send + Sync {
-    fn report(&self, event: Event);
-}
-
-/// Blanket impl: mpsc::Sender<Event> is already a valid Reporter.
-impl Reporter for mpsc::Sender<Event> {
-    fn report(&self, event: Event) {
-        self.send(event).ok();
-    }
-}
-
-/// No-op reporter — useful in tests or when progress output is not needed.
-pub struct NullReporter;
-
-impl Reporter for NullReporter {
-    fn report(&self, _event: Event) {}
-}
diff --git a/src/export/html/mod.rs b/src/export/html/mod.rs
@@ -1,142 +0,0 @@
-use crate::{config::Config, events::{Event, Reporter}, http, post::{File, Post}};
-use anyhow::{Result, Context};
-use super::Export;
-
-mod render;
-
-const TEMPLATE: &str = include_str!("template.html");
-
-pub struct HtmlExporter;
-
-impl Export for HtmlExporter {
-    fn export(&self, posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()> {
-        if posts.is_empty() {
-            anyhow::bail!("No posts to export");
-        }
-
-        let dir = format!("{}", posts[0].id);
-        std::fs::create_dir_all(&dir)?;
-
-        let posts_html: String = posts
-            .iter()
-            .map(|p| render::render_post(p, config.files, config.thumb))
-            .collect::<Vec<String>>()
-            .join("\n");
-
-        if config.files {
-            download_assets(
-                &posts,
-                &format!("{}/files", dir),
-                "files",
-                |f| &f.url,
-                config.resume,
-                reporter,
-            )?;
-        }
-        if config.thumb {
-            download_assets(
-                &posts,
-                &format!("{}/thumb", dir),
-                "thumbnails",
-                |f| &f.url_thumb,
-                config.resume,
-                reporter,
-            )?;
-        }
-
-        let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
-        std::fs::write(format!("{}/index.html", dir), index_html)?;
-
-        Ok(())
-    }
-}
-
-/// Write a top-level index.html with one entry per thread (first post + link to thread folder)
-pub fn write_index_html(first_posts: &[Post], config: &Config) -> Result<()> {
-    if first_posts.is_empty() {
-        return Ok(());
-    }
-
-    let posts_html: String = first_posts
-        .iter()
-        .map(|p| {
-            let mut post_html = render::render_post(p, config.files, config.thumb);
-            config.files.then(|| post_html = post_html.replace(
-                "<a href=\"files/",
-                &format!("<a href=\"{}/files/", p.id),
-            ));
-            config.thumb.then(|| post_html = post_html.replace(
-                "<img src=\"thumb/",
-                &format!("<img src=\"{}/thumb/", p.id),
-            ));
-            format!("<div><a href=\"{}/index.html\">В тред &rarr;</a></div>{}\n", p.id, post_html)
-        })
-        .collect::<Vec<String>>()
-        .join("\n");
-
-    let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
-    std::fs::write("index.html", index_html)
-        .context("failed to write index.html")?;
-
-    Ok(())
-}
-
-fn download_assets(
-    posts: &[Post],
-    dest_dir: &str,
-    label: &str,
-    url_of: impl Fn(&File) -> &str,
-    skip_if_exists: bool,
-    reporter: &dyn Reporter,
-) -> Result<()> {
-    std::fs::create_dir_all(dest_dir)
-        .with_context(|| format!("Failed to create directory {}", dest_dir))?;
-
-    let t = std::time::Instant::now();
-    reporter.report(Event::DownloadBatchStarted {
-        label: label.to_string(),
-        total_posts: posts.len(),
-    });
-
-    for (i, post) in posts.iter().enumerate() {
-        for f in &post.files {
-            let url = url_of(f);
-            let filename = url.split('/').last().unwrap_or("").to_string();
-            let path = format!("{}/{}", dest_dir, filename);
-            if skip_if_exists && std::path::Path::new(&path).exists() {
-                continue;
-            }
-            let mut result = Err(anyhow::anyhow!("no attempts"));
-            for attempt in 1..=3u32 {
-                result = http::download(url, &path);
-                if result.is_ok() { break; }
-                let e = result.as_ref().unwrap_err();
-                reporter.report(Event::DownloadAssetFailed {
-                    label: label.to_string(),
-                    filename: filename.clone(),
-                    attempt,
-                    error: e.to_string(),
-                });
-                std::thread::sleep(std::time::Duration::from_secs(3));
-            }
-            if result.is_err() {
-                reporter.report(Event::DownloadAssetSkipped {
-                    label: label.to_string(),
-                    filename: filename.clone(),
-                });
-            }
-        }
-        reporter.report(Event::DownloadBatchProgress {
-            label: label.to_string(),
-            done: i + 1,
-            total: posts.len(),
-        });
-    }
-
-    reporter.report(Event::DownloadBatchDone {
-        label: label.to_string(),
-        elapsed_ms: t.elapsed().as_millis(),
-    });
-
-    Ok(())
-}
diff --git a/src/export/html/render.rs b/src/export/html/render.rs
@@ -1,139 +0,0 @@
-use crate::thread::{File, Post};
-
-fn html_escape(s: &str) -> String {
-    s.replace('&', "&amp;")
-     .replace('<', "&lt;")
-     .replace('>', "&gt;")
-     .replace('"', "&quot;")
-}
-
-/// Converts plain post text to HTML.
-/// - `>>id` → reply link anchor
-/// - Lines starting with `>` (not `>>digit`) → greentext span
-/// - `\n` → `<br>`
-pub fn render_text_to_html(text: &str) -> String {
-    let needle = "&gt;&gt;";
-
-    let lines: Vec<String> = text.split('\n').map(|line| {
-        let escaped = html_escape(line);
-
-        // Replace >>id with reply link anchors
-        let mut processed = String::with_capacity(escaped.len());
-        let mut rest = escaped.as_str();
-        while let Some(pos) = rest.find(needle) {
-            processed.push_str(&rest[..pos]);
-            let after = &rest[pos + needle.len()..];
-            let digit_end = after.find(|c: char| !c.is_ascii_digit()).unwrap_or(after.len());
-            if digit_end > 0 {
-                let id = &after[..digit_end];
-                processed.push_str(&format!("<a href=\"#post{id}\" class=\"reply-link\">&gt;&gt;{id}</a>"));
-                rest = &after[digit_end..];
-            } else {
-                processed.push_str(needle);
-                rest = after;
-            }
-        }
-        processed.push_str(rest);
-
-        // Wrap in greentext span if line starts with > but not >>digit
-        let is_greentext = escaped.starts_with("&gt;")
-            && !escaped.strip_prefix(needle).is_some_and(|s| s.starts_with(|c: char| c.is_ascii_digit()));
-        if is_greentext {
-            format!("<span class=\"quote\">{processed}</span>")
-        } else {
-            processed
-        }
-    }).collect();
-
-    lines.join("<br>\n")
-}
-
-/// Renders a single post to an HTML fragment string.
-pub fn render_post(post: &Post, download_files: bool, download_thumbnails: bool) -> String {
-    let mut html = format!("<div class=\"post\" id=\"post{}\">\n", post.id);
-
-    html.push_str("  <div class=\"post-head\">\n");
-
-    // Subject
-    if let Some(ref subject) = post.subject {
-        html.push_str(&format!(
-            "    <span class=\"post-subject\">{}</span>\n",
-            html_escape(subject)
-        ));
-    }
-
-    // Name /w mailto/sage
-    let name = post.name.as_deref().unwrap_or("Аноним");
-    let name_display = if let Some(ref mailto) = post.mailto {
-        format!("[{}] {}", mailto, name)
-    } else {
-        name.to_string()
-    };
-    html.push_str(&format!(
-        "    <span class=\"post-name\">{}</span>\n",
-        html_escape(&name_display)
-    ));
-
-    // Time, num, id
-    html.push_str(&format!("    <span class=\"post-time\">{}</span>\n", html_escape(&post.time)));
-    html.push_str(&format!("    <span class=\"post-num\">{}</span>\n", html_escape(&post.num)));
-    html.push_str(&format!(
-        "    <span class=\"post-id\"><a href=\"#post{0}\">№{0}</a></span>\n",
-        post.id
-    ));
-
-    html.push_str("  </div>\n");
-
-    // Images
-    html.push_str(&render_images(&post.files, download_files, download_thumbnails));
-
-    // Body
-    html.push_str("  <div class=\"post-body\">\n");
-    if !post.text.is_empty() {
-        html.push_str("    ");
-        html.push_str(&render_text_to_html(&post.text));
-        html.push('\n');
-    }
-    html.push_str("  </div>\n");
-
-    html.push_str("</div>\n");
-    html
-}
-
-fn render_images(
-    files: &[File],
-    download_files: bool,
-    download_thumbnails: bool,
-) -> String {
-    if files.is_empty() {
-        return String::new();
-    }
-
-    let mut html = String::from("  <div class=\"post-images\">\n");
-    for file in files {
-        let href = if download_files && !file.url.is_empty() {
-            format!("files/{}", file.url.split('/').last().unwrap_or(""))
-        } else {
-            file.url.clone()
-        };
-
-        let thumb_filename = file.url_thumb.split('/').last().unwrap_or("").to_string();
-        let img_src = if download_thumbnails && !file.url_thumb.is_empty() {
-            format!("thumb/{}", thumb_filename)
-        } else {
-            file.url_thumb.clone()
-        };
-
-        html.push_str(&format!(
-            "    <div class=\"post-image\">\n      <a href=\"{}\" target=\"_blank\" title=\"{}\">\n        <img src=\"{}\" alt=\"\" loading=\"lazy\">\n      </a>\n      <div class=\"post-image-info\">{} (<a href=\"{}\" target=\"_blank\" class=\"post-image-link\">o</a>, <a href=\"{}\" target=\"_blank\" class=\"post-image-link\">t</a>)</div>\n    </div>\n",
-            html_escape(&href),
-            html_escape(&file.name_orig),
-            html_escape(&img_src),
-            html_escape(&file.name_orig),
-            html_escape(&file.url),
-            html_escape(&file.url_thumb),
-        ));
-    }
-    html.push_str("  </div>\n");
-    html
-}
diff --git a/src/export/mod.rs b/src/export/mod.rs
@@ -1,8 +0,0 @@
-use crate::{config::Config, events::Reporter, post::Post};
-use anyhow::Result;
-
-pub mod html;
-
-pub trait Export {
-    fn export(&self, posts: &[Post], config: &Config, reporter: &dyn Reporter) -> Result<()>;
-}
diff --git a/src/http.rs b/src/http.rs
@@ -1,35 +0,0 @@
-use anyhow::{Context, Result};
-
-use crate::events::{Event, Reporter};
-
-/// GET a URL with up to `attempts` retries, reporting each failure via `reporter`.
-pub fn fetch_with_retry(url: &str, attempts: u32, reporter: &dyn Reporter) -> Result<String> {
-    for attempt in 1..=attempts {
-        match reqwest::blocking::get(url).and_then(|r| r.text()) {
-            Ok(text) => return Ok(text),
-            Err(e) => {
-                reporter.report(Event::FetchRetrying {
-                    url: url.to_string(),
-                    attempt,
-                    max_attempts: attempts,
-                    error: e.to_string(),
-                });
-                if attempt < attempts {
-                    std::thread::sleep(std::time::Duration::from_secs(3));
-                }
-            }
-        }
-    }
-    anyhow::bail!("failed to get thread after {attempts} attempts")
-}
-
-/// Download a single URL and write it to `path`.
-pub fn download(url: &str, path: &str) -> Result<()> {
-    let bytes = reqwest::blocking::get(url)
-        .with_context(|| format!("HTTP GET failed for {}", url))?
-        .bytes()
-        .context("failed to read response body")?;
-    std::fs::write(path, &bytes)
-        .with_context(|| format!("failed to write {}", path))?;
-    Ok(())
-}
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,9 +0,0 @@
-pub mod config;
-pub mod events;
-pub mod backend;
-pub mod post;
-pub mod http;
-pub mod export;
-
-pub use events::{Reporter, NullReporter};
-pub use export::html::HtmlExporter;
diff --git a/src/lib/config.rs b/src/lib/config.rs
@@ -0,0 +1,12 @@
+use crate::export::ExporterKind;
+
+#[derive(Clone)]
+pub struct Config {
+    pub url: String,
+    pub dir: std::path::PathBuf,
+    pub exporter: ExporterKind,
+    pub thumb: bool,
+    pub files: bool,
+    pub resume: bool,
+    pub download_retries: u32,
+}
diff --git a/src/lib/download.rs b/src/lib/download.rs
@@ -0,0 +1,25 @@
+use anyhow::{anyhow, Result};
+
+/// Downloads a URL, retrying up to `tries` times.
+///
+/// # Errors
+/// Returns an error if all attempts fail or `tries` is 0.
+pub fn download(url: &str, tries: u32) -> Result<reqwest::blocking::Response> {
+    static CLIENT: std::sync::LazyLock<reqwest::blocking::Client> =
+        std::sync::LazyLock::new(reqwest::blocking::Client::new);
+
+    for attempt in 0..tries {
+        if attempt > 0 {
+            std::thread::sleep(std::time::Duration::from_millis(500 * 2u64.pow(attempt)));
+        }
+        let response = CLIENT.get(url).send()?;
+        if response.status().is_success() {
+            return Ok(response);
+        }
+        if response.status().is_client_error() {
+            return Err(anyhow!("client error: {}", response.status()));
+        }
+    }
+
+    Err(anyhow!("failed to download {} after {} tries", url, tries))
+}
diff --git a/src/lib/event.rs b/src/lib/event.rs
@@ -0,0 +1,29 @@
+#[derive(Debug, Clone)]
+pub enum Event {
+    // Thread retrieval
+    GetStarted,
+    GetDone,
+    GetFailed { error: String },
+
+    // Files download
+    DownloadAllStarted,
+    DownloadAllDone,
+    DownloadAllFailed { error: String },
+
+    // File download
+    DownloadStarted { index: usize, max_index: usize },
+    DownloadDone { index: usize, max_index: usize },
+    DownloadSkipped { index: usize, max_index: usize },
+    DownloadFailed { url: String, error: String },    
+    
+    // Files and thumbnails download
+    DownloadFilesStarted,
+    DownloadFilesDone,
+    DownloadThumbStarted,
+    DownloadThumbDone,
+
+    // Thread export
+    ExportStarted,
+    ExportDone,
+    ExportFailed { error: String },
+}
diff --git a/src/lib/export/html/mod.rs b/src/lib/export/html/mod.rs
@@ -0,0 +1,27 @@
+use crate::{config::Config, post::Post};
+use anyhow::Result;
+use super::Exporter;
+
+mod render;
+
+const TEMPLATE: &str = include_str!("template.html");
+
+#[derive(Clone)]
+pub struct HtmlExporter;
+
+impl Exporter for HtmlExporter {
+    fn export(&self, posts: &[Post], config: &Config) -> Result<()> {
+        anyhow::ensure!(!posts.is_empty(), "No posts to export");
+
+        std::fs::create_dir_all(&config.dir)?;
+        let posts_html = posts
+            .iter()
+            .map(|p| render::render_post(p, config.files, config.thumb))
+            .collect::<Vec<String>>()
+            .join("\n");
+        let index_html = TEMPLATE.replace("{{posts}}", &posts_html);
+        std::fs::write(config.dir.join("index.html"), index_html)?;
+
+        Ok(())
+    }
+}
diff --git a/src/lib/export/html/render.rs b/src/lib/export/html/render.rs
@@ -0,0 +1,140 @@
+use crate::post::{File, Post};
+
+/// Renders a single post to an HTML fragment string.
+/// If download_files or download_thumbnails is true, the links will be converted to local paths
+pub fn render_post(post: &Post, download_files: bool, download_thumbnails: bool) -> String {
+    let mut html = format!("<div class=\"post\" id=\"post{}\">\n", post.id);
+
+    html.push_str("  <div class=\"post-head\">\n");
+
+    // Subject
+    if let Some(ref subject) = post.subject {
+        html.push_str(&format!(
+            "    <span class=\"post-subject\">{}</span>\n",
+            html_escape(subject)
+        ));
+    }
+
+    // Name /w mailto/sage
+    let name = post.name.as_deref().unwrap_or("Аноним");
+    let name_display = if let Some(ref mailto) = post.mailto {
+        format!("[{}] {}", mailto, name)
+    } else {
+        name.to_string()
+    };
+    html.push_str(&format!(
+        "    <span class=\"post-name\">{}</span>\n",
+        html_escape(&name_display)
+    ));
+
+    // Time, num, id
+    html.push_str(&format!("    <span class=\"post-time\">{}</span>\n", html_escape(&post.time)));
+    html.push_str(&format!("    <span class=\"post-num\">{}</span>\n", html_escape(&post.num)));
+    html.push_str(&format!(
+        "    <span class=\"post-id\"><a href=\"#post{0}\">№{0}</a></span>\n",
+        post.id
+    ));
+
+    html.push_str("  </div>\n");
+
+    // Images
+    html.push_str(&render_images(&post.files, download_files, download_thumbnails));
+
+    // Body
+    html.push_str("  <div class=\"post-body\">\n");
+    if !post.text.is_empty() {
+        html.push_str("    ");
+        html.push_str(&render_text_to_html(&post.text));
+        html.push('\n');
+    }
+    html.push_str("  </div>\n");
+
+    html.push_str("</div>\n");
+    html
+}
+
+fn html_escape(s: &str) -> String {
+    s.replace('&', "&amp;")
+     .replace('<', "&lt;")
+     .replace('>', "&gt;")
+     .replace('"', "&quot;")
+}
+
+/// Converts plain post text to HTML.
+/// - `>>id` → reply link anchor
+/// - Lines starting with `>` (not `>>digit`) → greentext span
+/// - `\n` → `<br>`
+fn render_text_to_html(text: &str) -> String {
+    let needle = "&gt;&gt;";
+
+    let lines: Vec<String> = text.split('\n').map(|line| {
+        let escaped = html_escape(line);
+
+        // Replace >>id with reply link anchors
+        let mut processed = String::with_capacity(escaped.len());
+        let mut rest = escaped.as_str();
+        while let Some(pos) = rest.find(needle) {
+            processed.push_str(&rest[..pos]);
+            let after = &rest[pos + needle.len()..];
+            let digit_end = after.find(|c: char| !c.is_ascii_digit()).unwrap_or(after.len());
+            if digit_end > 0 {
+                let id = &after[..digit_end];
+                processed.push_str(&format!("<a href=\"#post{id}\" class=\"reply-link\">&gt;&gt;{id}</a>"));
+                rest = &after[digit_end..];
+            } else {
+                processed.push_str(needle);
+                rest = after;
+            }
+        }
+        processed.push_str(rest);
+
+        // Wrap in greentext span if line starts with > but not >>digit
+        let is_greentext = escaped.starts_with("&gt;")
+            && !escaped.strip_prefix(needle).is_some_and(|s| s.starts_with(|c: char| c.is_ascii_digit()));
+        if is_greentext {
+            format!("<span class=\"quote\">{processed}</span>")
+        } else {
+            processed
+        }
+    }).collect();
+
+    lines.join("<br>\n")
+}
+
+fn render_images(
+    files: &[File],
+    download_files: bool,
+    download_thumbnails: bool,
+) -> String {
+    if files.is_empty() {
+        return String::new();
+    }
+
+    let mut html = String::from("  <div class=\"post-images\">\n");
+    for file in files {
+        let href = if download_files && !file.url.is_empty() {
+            format!("files/{}", file.url.split('/').last().unwrap_or(""))
+        } else {
+            file.url.clone()
+        };
+
+        let thumb_filename = file.url_thumb.split('/').last().unwrap_or("").to_string();
+        let img_src = if download_thumbnails && !file.url_thumb.is_empty() {
+            format!("thumb/{}", thumb_filename)
+        } else {
+            file.url_thumb.clone()
+        };
+
+        html.push_str(&format!(
+            "    <div class=\"post-image\">\n      <a href=\"{}\" target=\"_blank\" title=\"{}\">\n        <img src=\"{}\" alt=\"\" loading=\"lazy\">\n      </a>\n      <div class=\"post-image-info\">{} (<a href=\"{}\" target=\"_blank\" class=\"post-image-link\">o</a>, <a href=\"{}\" target=\"_blank\" class=\"post-image-link\">t</a>)</div>\n    </div>\n",
+            html_escape(&href),
+            html_escape(&file.name_orig),
+            html_escape(&img_src),
+            html_escape(&file.name_orig),
+            html_escape(&file.url),
+            html_escape(&file.url_thumb),
+        ));
+    }
+    html.push_str("  </div>\n");
+    html
+}
diff --git a/src/export/html/template.html b/src/lib/export/html/template.html
diff --git a/src/lib/export/mod.rs b/src/lib/export/mod.rs
@@ -0,0 +1,35 @@
+pub mod html;
+
+use super::{config::Config, post::Post};
+
+use anyhow::Result;
+
+use std::str::FromStr;
+
+#[derive(Clone)]
+pub enum ExporterKind {
+    Html(html::HtmlExporter),
+}
+
+pub trait Exporter {
+    fn export(&self, posts: &[Post], config: &Config) -> Result<()>;
+}
+
+impl Exporter for ExporterKind {
+    fn export(&self, posts: &[Post], config: &Config) -> Result<()> {
+        match self {
+            ExporterKind::Html(html) => html.export(posts, config),
+        }
+    }
+}
+
+impl FromStr for ExporterKind {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<ExporterKind> {
+        match s.to_lowercase().as_str() {
+            "html" => Ok(ExporterKind::Html(html::HtmlExporter {})),
+            _ => anyhow::bail!("unknown exporter: {}", s),
+        }
+    }
+}
diff --git a/src/lib/lib.rs b/src/lib/lib.rs
@@ -0,0 +1,96 @@
+pub mod config;
+pub mod event;
+pub mod export;
+
+mod download;
+mod post;
+
+use crate::post::{Post, File};
+use crate::export::Exporter;
+
+use anyhow::{Result, Context};
+
+use std::sync::mpsc::Sender;
+
+pub const BASE_URL: &str = "https://arhivach.vc";
+
+pub fn run(config: &config::Config, tx: Sender<event::Event>) -> Result<()> {
+    tx.send(event::Event::GetStarted)?;
+    let html = download::download(&config.url, config.download_retries)?.text()?;
+    let posts = Post::parse_posts(&html)
+        .inspect_err(|e| { let _ = tx.send(event::Event::GetFailed { error: format!("{:#}", e) }); })
+        .context("failed to parse posts")?;
+    tx.send(event::Event::GetDone)?;
+
+    tx.send(event::Event::DownloadAllStarted)?;
+    run_download(&posts, &config, tx.clone())
+        .inspect_err(|e| { let _ = tx.send(event::Event::DownloadAllFailed { error: format!("{:#}", e) }); })
+        .context("failed to download files")?;
+    tx.send(event::Event::DownloadAllDone)?;
+
+    tx.send(event::Event::ExportStarted)?;
+    config.exporter.export(&posts, config)
+        .inspect_err(|e| { let _ = tx.send(event::Event::ExportFailed { error: format!("{:#}", e) }); })
+        .context("failed to export")?;
+    tx.send(event::Event::ExportDone)?;
+
+    Ok(())
+}
+
+/// Download files and thumbnails. Send DownloadStarted, DownloadDone and DownloadFailed events
+fn run_download(posts: &[Post], config: &config::Config, tx: Sender<event::Event>) -> Result<()> {
+    std::fs::create_dir_all(&config.dir)?;
+
+    let download_item = |url: &str, filepath: &std::path::PathBuf| -> Result<()> {
+        let result = download::download(url, config.download_retries)?;
+        anyhow::ensure!(result.status().is_success(), "failed to download {}: {}", url, result.status());
+        let bytes = result.bytes()?;
+        anyhow::ensure!(!bytes.is_empty(), "empty file: {}", url);
+        std::fs::write(filepath, bytes)?;
+        Ok(())
+    };
+
+    let download_section = |
+        subdir: &str,
+        get_url: fn(&File) -> (&str, &str),
+    | -> Result<()> {
+        let dir = config.dir.join(subdir);
+        std::fs::create_dir_all(&dir)?;
+
+        let mut index: usize = 1;
+        let max_index: usize = posts.iter().map(|p| p.files.len()).sum();
+        for f in posts.iter().flat_map(|p| &p.files) {
+            tx.send(event::Event::DownloadStarted { index, max_index })?;
+            let (url, fallback) = get_url(f);
+            let filename = url.rsplit("/").next().unwrap_or(fallback).trim();
+            let filepath = dir.join(filename);
+            if config.resume && filepath.exists() {
+                tx.send(event::Event::DownloadSkipped { index, max_index })?;
+                index += 1;
+                continue
+            }
+            match download_item(url, &filepath) {
+                Ok(()) => tx.send(event::Event::DownloadDone{ index, max_index })?,
+                Err(e) => tx.send(event::Event::DownloadFailed {
+                    url: url.to_string(),
+                    error: format!("{:#}", e)
+                })?
+            };
+            index += 1;
+        }
+        Ok(())
+    };
+
+    if config.files {
+        tx.send(event::Event::DownloadFilesStarted)?;
+        download_section("files", |f| (&f.url, &f.name_timestamp))?;
+        tx.send(event::Event::DownloadFilesDone)?;
+    }
+    if config.thumb {
+        tx.send(event::Event::DownloadThumbStarted)?;
+        download_section("thumb", |f| (&f.url_thumb, &f.name_timestamp))?;
+        tx.send(event::Event::DownloadThumbDone)?;
+    }
+
+    Ok(())
+}
diff --git a/src/lib/post.rs b/src/lib/post.rs
@@ -0,0 +1,374 @@
+use super::BASE_URL;
+
+use anyhow::{Context, Result};
+
+#[derive(Debug, Clone)]
+pub struct File {
+    /// original name, "videolol.mp4"
+    pub name_orig: String,
+    /// timestampname, "17699100670710.mp4"
+    pub name_timestamp: String,
+    /// thumbnail url, "https://arhivach.vc/storage/t/aeaa7825f8d8ffe3f07f242a59b7761c.thumb"
+    pub url_thumb: String,
+    /// url, "https://i.arhivach.vc/storage/a/ea/aeaa7825f8d8ffe3f07f242a59b7761c.mp4"
+    pub url: String,
+}
+
+impl std::fmt::Display for File {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{} [{}]\n  url:   {}\n  thumb: {}",
+            self.name_orig, self.name_timestamp, self.url, self.url_thumb
+        )
+    }
+}
+
+struct PostHead {
+    subject: Option<String>,
+    name: Option<String>,
+    mailto: Option<String>,
+    time: String,
+    num: String,
+    id: u32,
+}
+
+/// Represents a single post in a thread
+#[derive(Debug, Clone)]
+pub struct Post {
+    /// Empty if None
+    pub subject: Option<String>,
+    /// "Аноним" if none
+    pub name: Option<String>,
+    /// "mailto:sage"
+    pub mailto: Option<String>,
+    /// "01/02/26 Вск 03:13:12"
+    pub time: String,
+    /// "#5"
+    pub num: String,
+    /// "329281515"
+    pub id: u32,
+    pub files: Vec<File>,
+    /// Post text
+    pub text: String,
+}
+
+impl Post {
+    pub fn parse_posts(
+        html: &str,
+    ) -> Result<Vec<Post>> {
+        let mut posts = Vec::new();
+        
+        let document = scraper::Html::parse_document(html);
+        let selector = scraper::Selector::parse(r#"div.post"#).unwrap();
+        for node in document.select(&selector) {
+            let post = Post::parse_post(node)?;
+            posts.push(post);
+        }
+
+        Ok(posts)
+    }
+
+    /// Parse div class="post"
+    /// 
+    /// Example element:
+    /// ```html
+    /// <div class="post" id="post329274763" postid="329274763">
+    ///     <div class="post_head">...</div> (see parse_post_head function)
+    ///     <span class="post_comment">...</span> (see parse_post_comment function)
+    /// </div>
+    /// ```
+    fn parse_post(node: scraper::ElementRef) -> Result<Post> {
+        static SEL_POST_HEAD: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("div.post_head").unwrap()
+        );
+        static SEL_POST_IMAGE_BLOCK: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("span.post_comment").unwrap()
+        );
+
+        let post_head = node
+            .select(&SEL_POST_HEAD)
+            .next()
+            .context("missing post_head")?;
+        let head = Post::parse_post_head(post_head)?;
+
+        let post_comment = node
+            .select(&SEL_POST_IMAGE_BLOCK)
+            .next()
+            .context("missing post_comment")?;
+        let (files, text) = Post::parse_post_comment(post_comment)?;
+
+        Ok(Post {
+            subject: head.subject,
+            name: head.name,
+            mailto: head.mailto,
+            time: head.time,
+            num: head.num,
+            id: head.id,
+            files,
+            text,
+        })
+    }
+
+    /// Parses the post_head element
+    ///
+    /// Returns (subject, name, mailto, time, num, id)
+    /// Returns error if no time, num or id is found or if id is not a number
+    /// 
+    /// Example element:
+    /// ```html
+    /// <div class="post_head">
+    ///     <span class="poster_name" title="">Аноним</span>&nbsp;
+    ///     <span class="post_time">01/02/26 Вск 04:27:32</span>&nbsp;
+    ///     <span class="post_num">#77</span>&nbsp;
+    ///     <span class="post_id">
+    ///         <a style="position:absolute;margin-top:-50px;" id="329274763"></a>
+    ///         <a href="#329274763">№329274763</a>
+    ///     </span> &nbsp;
+    /// </div>
+    /// ```
+    fn parse_post_head(post_head: scraper::ElementRef) -> Result<PostHead> {
+        static SEL_SPAN_POST_ID_A_HREF: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("span.post_id a[href]").unwrap()
+        );
+        static SEL_H1_POST_SUBJECT: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("h1.post_subject").unwrap()
+        );
+        static SEL_SPAN_POSTER_NAME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("span.poster_name").unwrap()
+        );
+        static SEL_A_POST_MAIL: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("a.post_mail").unwrap()
+        );
+        static SEL_SPAN_POST_TIME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("span.post_time").unwrap()
+        );
+        static SEL_SPAN_POST_NUM: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("span.post_num").unwrap()
+        );
+
+        let id: u32 = post_head
+            .select(&SEL_SPAN_POST_ID_A_HREF)
+            .next()
+            .and_then(|el| el.value().attr("href"))
+            .and_then(|href| href.strip_prefix('#'))
+            .context("missing post id")?
+            .parse()?;
+
+        let subject = post_head
+            .select(&SEL_H1_POST_SUBJECT)
+            .next()
+            .map(|el| el.text().collect::<String>());
+
+        let name = post_head
+            .select(&SEL_SPAN_POSTER_NAME)
+            .next()
+            .map(|el| el.text().collect::<String>())
+            .and_then(|n| if n == "Аноним" { None } else { Some(n) });
+
+        let mailto = post_head
+            .select(&SEL_A_POST_MAIL)
+            .next()
+            .and_then(|el| el.value().attr("title"))
+            .map(|s| s.to_string());
+
+        let time = post_head
+            .select(&SEL_SPAN_POST_TIME)
+            .next()
+            .context("missing post_time")?
+            .text()
+            .collect::<String>();
+
+        let num = post_head
+            .select(&SEL_SPAN_POST_NUM)
+            .next()
+            .context("missing post_num")?
+            .text()
+            .collect::<String>();
+
+        Ok(PostHead { subject, name, mailto, time, num, id })
+    }
+
+    /// Parses the sapn post_comment element from a post element
+    /// 
+    /// Returns (files, text)
+    /// 
+    /// Example element:
+    /// <span class="post_comment">
+    ///     <div class="post_image_block" ...>...</div> (see parse_post_image_block function) (can appear 0 to multiple times)
+    ///     <div class="post_comment_body">...</div> (see parse_post_comment_body function)
+    /// </span>
+    fn parse_post_comment(
+        node: scraper::ElementRef,
+    ) -> Result<(Vec<File>, String)> {
+        static SEL_POST_IMAGE_BLOCK: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("div.post_image_block").unwrap()
+        );
+        static SEL_POST_COMMENT_BODY: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("div.post_comment_body").unwrap()
+        );
+
+        // TODO handle the errors instead of propagating them upper. Change the return type to non-Result
+        let files: Vec<File> = node
+            .select(&SEL_POST_IMAGE_BLOCK)
+            .map(Post::parse_post_image_block)
+            .collect();
+        let text = Post::parse_post_comment_body(node
+            .select(&SEL_POST_COMMENT_BODY)
+            .next()
+            .context("missing post_comment_body")?);
+        Ok((files, text))
+    }
+
+    /// Parses "post_image_block" element
+    /// Returns File
+    /// 
+    /// Example element:
+    /// ```html
+    /// <div class="post_image_block" id="pib_77_2" pib="77_2" title="537.4 Кб, 946 x 946
+    /// image.png
+    /// 17699092523481.png">
+    ///     <a class="expand_image" onclick="expand_local('77_2','/storage/a/cc/acc7f5856bc60ad3bdbd4dc7027e33f9.png','946','946',event); return false;" href="#">
+    ///         <div class="post_image" id="thumb_77_2">
+    ///             <img src="/storage/t/acc7f5856bc60ad3bdbd4dc7027e33f9.png" alt="" loading="lazy"> // thumbnail path
+    ///         </div>
+    ///     </a>
+    ///     <a href="/storage/a/cc/acc7f5856bc60ad3bdbd4dc7027e33f9.png" target="_blank" class="img_filename">image.png</a> // can also be https://i.arhivach.vc/... if it's a video
+    /// </div>
+    /// ```
+    fn parse_post_image_block(pib: scraper::ElementRef) -> File {
+        static SEL_POST_IMAGE_IMG: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse(".post_image img").unwrap()
+        );
+        static SEL_A_IMG_FILENAME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
+            || scraper::Selector::parse("a.img_filename").unwrap()
+        );
+
+        // Title example:
+        // 402.2 Кб, 800 x 532
+        // image.png <- name_orig
+        // 17699142349880.png <- name_timestamp
+        let title = pib.value().attr("title").unwrap_or("");
+        let title_lines: Vec<&str> = title.lines().collect();
+        let name_orig      = title_lines
+            .get(1)
+            .map(|s| s.to_string())
+            .unwrap_or("unnamed".to_string());
+        let name_timestamp = title_lines
+            .get(2)
+            .map(|s| s.to_string())
+            .unwrap_or("unnamed".to_string());
+
+        // url_thumb
+        let url_thumb = pib
+            .select(&SEL_POST_IMAGE_IMG)
+            .next()
+            .and_then(|el| el.value().attr("src"))
+            .unwrap_or(""); // /storage/t/83c2fe5ba9a8469d9eeef4af124e3b52.thumb
+        let url_thumb = if url_thumb.is_empty() {
+            String::new()
+        } else {
+            format!("{BASE_URL}{url_thumb}")
+        };
+
+        // url
+        let url = pib
+            .select(&SEL_A_IMG_FILENAME)
+            .next()
+            .and_then(|el| el.value().attr("href"))
+            .unwrap_or("");
+        let url = if url.starts_with("http") { // is `https://i.arhivach.vc/...`?
+            url.to_string()
+        } else if url.is_empty() {
+            String::new()
+        } else {
+            format!("{BASE_URL}{url}")
+        };
+
+        File {
+            name_orig,
+            name_timestamp,
+            url_thumb,
+            url,
+        }
+    }
+
+    /// Parses the post text from `div.post_comment_body`
+    ///
+    /// Returns post text:
+    /// - References are plaintext (e.g. >>329274789)
+    /// - `<br>` is replaced with \n
+    /// - `<span class="unkfunc">` (greentext) is replaced with >text
+    /// 
+    /// If the text contains a reference (e.g. >>329274789) it looks like this in the element:
+    /// ```html
+    /// <div class="post_comment_body">
+    ///     <a href="#329274893" class="post-reply-link" data-thread="329273515" data-num="329274893">&gt;&gt;329274893</a> // This will be replaced with >>329274893
+    ///     <br>
+    ///     <span class="unkfunc">&gt;greentext1</span>
+    ///     <br>
+    ///     text1
+    /// </div>
+    /// ```
+    /// 
+    /// This example returns:
+    /// ```text
+    /// >>329274893
+    /// >greentext1
+    /// text1
+    /// ```
+    fn parse_post_comment_body(node: scraper::ElementRef) -> String {
+        use scraper::node::Node;
+
+        let mut result = String::new();
+        for child in node.children() {
+            match child.value() {
+                Node::Text(text) => result.push_str(&text.text),
+                Node::Element(el) if el.name() == "br" => result.push('\n'),
+                Node::Element(_) => {
+                    if let Some(el_ref) = scraper::ElementRef::wrap(child) {
+                        result.push_str(&el_ref.text().collect::<String>());
+                    }
+                }
+                _ => {}
+            }
+        }
+        result.trim().to_string()
+    }
+}
+
+impl std::fmt::Display for Post {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Header line
+        let name = self.name.as_deref().unwrap_or("Аноним");
+        let mailto = self.mailto.as_deref().unwrap_or("");
+        
+        if !mailto.is_empty() {
+            write!(f, "{} ({})", name, mailto)?;
+        } else {
+            write!(f, "{}", name)?;
+        }
+        
+        write!(f, " {} {} ID:{}", self.time, self.num, self.id)?;
+        
+        // Subject
+        if let Some(ref subject) = self.subject {
+            write!(f, "\n{}", subject)?;
+        }
+        
+        // Files
+        if !self.files.is_empty() {
+            write!(f, "\n[Files: {}]", self.files.len())?;
+            for file in &self.files {
+                write!(f, "\n  - {}", file)?;
+            }
+        }
+        
+        // Post text
+        if !self.text.is_empty() {
+            write!(f, "\n{}", self.text)?;
+        }
+        
+        Ok(())
+    }
+}
diff --git a/src/post.rs b/src/post.rs
@@ -1,374 +0,0 @@
-use anyhow::{Context, Result};
-
-const BASE_URL: &str = "https://arhivach.vc";
-
-#[derive(Debug, Clone)]
-pub struct File {
-    /// original name, "videolol.mp4"
-    pub name_orig: String,
-    /// timestampname, "17699100670710.mp4"
-    pub name_timestamp: String,
-    /// thumbnail url, "https://arhivach.vc/storage/t/aeaa7825f8d8ffe3f07f242a59b7761c.thumb"
-    pub url_thumb: String,
-    /// url, "https://i.arhivach.vc/storage/a/ea/aeaa7825f8d8ffe3f07f242a59b7761c.mp4"
-    pub url: String,
-}
-
-impl std::fmt::Display for File {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{} [{}]\n  url:   {}\n  thumb: {}",
-            self.name_orig, self.name_timestamp, self.url, self.url_thumb
-        )
-    }
-}
-
-struct PostHead {
-    subject: Option<String>,
-    name: Option<String>,
-    mailto: Option<String>,
-    time: String,
-    num: String,
-    id: u32,
-}
-
-/// Represents a single post in a thread
-#[derive(Debug, Clone)]
-pub struct Post {
-    /// Empty if None
-    pub subject: Option<String>,
-    /// "Аноним" if none
-    pub name: Option<String>,
-    /// "mailto:sage"
-    pub mailto: Option<String>,
-    /// "01/02/26 Вск 03:13:12"
-    pub time: String,
-    /// "#5"
-    pub num: String,
-    /// "329281515"
-    pub id: u32,
-    pub files: Vec<File>,
-    /// Post text
-    pub text: String,
-}
-
-impl Post {
-    pub fn parse_posts(
-        html: &str,
-    ) -> Result<Vec<Post>> {
-        let mut posts = Vec::new();
-        
-        let document = scraper::Html::parse_document(html);
-        let selector = scraper::Selector::parse(r#"div.post"#).unwrap();
-        for node in document.select(&selector) {
-            let post = Post::parse_post(node)?;
-            posts.push(post);
-        }
-
-        Ok(posts)
-    }
-
-    /// Parse div class="post"
-    /// 
-    /// Example element:
-    /// ```html
-    /// <div class="post" id="post329274763" postid="329274763">
-    ///     <div class="post_head">...</div> (see parse_post_head function)
-    ///     <span class="post_comment">...</span> (see parse_post_comment function)
-    /// </div>
-    /// ```
-    fn parse_post(node: scraper::ElementRef) -> Result<Post> {
-        static SEL_POST_HEAD: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("div.post_head").unwrap()
-        );
-        static SEL_POST_IMAGE_BLOCK: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("span.post_comment").unwrap()
-        );
-
-        let post_head = node
-            .select(&SEL_POST_HEAD)
-            .next()
-            .context("missing post_head")?;
-        let head = Post::parse_post_head(post_head)?;
-
-        let post_comment = node
-            .select(&SEL_POST_IMAGE_BLOCK)
-            .next()
-            .context("missing post_comment")?;
-        let (files, text) = Post::parse_post_comment(post_comment)?;
-
-        Ok(Post {
-            subject: head.subject,
-            name: head.name,
-            mailto: head.mailto,
-            time: head.time,
-            num: head.num,
-            id: head.id,
-            files,
-            text,
-        })
-    }
-
-    /// Parses the post_head element
-    ///
-    /// Returns (subject, name, mailto, time, num, id)
-    /// Returns error if no time, num or id is found or if id is not a number
-    /// 
-    /// Example element:
-    /// ```html
-    /// <div class="post_head">
-    ///     <span class="poster_name" title="">Аноним</span>&nbsp;
-    ///     <span class="post_time">01/02/26 Вск 04:27:32</span>&nbsp;
-    ///     <span class="post_num">#77</span>&nbsp;
-    ///     <span class="post_id">
-    ///         <a style="position:absolute;margin-top:-50px;" id="329274763"></a>
-    ///         <a href="#329274763">№329274763</a>
-    ///     </span> &nbsp;
-    /// </div>
-    /// ```
-    fn parse_post_head(post_head: scraper::ElementRef) -> Result<PostHead> {
-        static SEL_SPAN_POST_ID_A_HREF: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("span.post_id a[href]").unwrap()
-        );
-        static SEL_H1_POST_SUBJECT: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("h1.post_subject").unwrap()
-        );
-        static SEL_SPAN_POSTER_NAME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("span.poster_name").unwrap()
-        );
-        static SEL_A_POST_MAIL: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("a.post_mail").unwrap()
-        );
-        static SEL_SPAN_POST_TIME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("span.post_time").unwrap()
-        );
-        static SEL_SPAN_POST_NUM: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("span.post_num").unwrap()
-        );
-
-        let id: u32 = post_head
-            .select(&SEL_SPAN_POST_ID_A_HREF)
-            .next()
-            .and_then(|el| el.value().attr("href"))
-            .and_then(|href| href.strip_prefix('#'))
-            .context("missing post id")?
-            .parse()?;
-
-        let subject = post_head
-            .select(&SEL_H1_POST_SUBJECT)
-            .next()
-            .map(|el| el.text().collect::<String>());
-
-        let name = post_head
-            .select(&SEL_SPAN_POSTER_NAME)
-            .next()
-            .map(|el| el.text().collect::<String>())
-            .and_then(|n| if n == "Аноним" { None } else { Some(n) });
-
-        let mailto = post_head
-            .select(&SEL_A_POST_MAIL)
-            .next()
-            .and_then(|el| el.value().attr("title"))
-            .map(|s| s.to_string());
-
-        let time = post_head
-            .select(&SEL_SPAN_POST_TIME)
-            .next()
-            .context("missing post_time")?
-            .text()
-            .collect::<String>();
-
-        let num = post_head
-            .select(&SEL_SPAN_POST_NUM)
-            .next()
-            .context("missing post_num")?
-            .text()
-            .collect::<String>();
-
-        Ok(PostHead { subject, name, mailto, time, num, id })
-    }
-
-    /// Parses the sapn post_comment element from a post element
-    /// 
-    /// Returns (files, text)
-    /// 
-    /// Example element:
-    /// <span class="post_comment">
-    ///     <div class="post_image_block" ...>...</div> (see parse_post_image_block function) (can appear 0 to multiple times)
-    ///     <div class="post_comment_body">...</div> (see parse_post_comment_body function)
-    /// </span>
-    fn parse_post_comment(
-        node: scraper::ElementRef,
-    ) -> Result<(Vec<File>, String)> {
-        static SEL_POST_IMAGE_BLOCK: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("div.post_image_block").unwrap()
-        );
-        static SEL_POST_COMMENT_BODY: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("div.post_comment_body").unwrap()
-        );
-
-        // TODO handle the errors instead of propagating them upper. Change the return type to non-Result
-        let files: Vec<File> = node
-            .select(&SEL_POST_IMAGE_BLOCK)
-            .map(Post::parse_post_image_block)
-            .collect();
-        let text = Post::parse_post_comment_body(node
-            .select(&SEL_POST_COMMENT_BODY)
-            .next()
-            .context("missing post_comment_body")?);
-        Ok((files, text))
-    }
-
-    /// Parses "post_image_block" element
-    /// Returns File
-    /// 
-    /// Example element:
-    /// ```html
-    /// <div class="post_image_block" id="pib_77_2" pib="77_2" title="537.4 Кб, 946 x 946
-    /// image.png
-    /// 17699092523481.png">
-    ///     <a class="expand_image" onclick="expand_local('77_2','/storage/a/cc/acc7f5856bc60ad3bdbd4dc7027e33f9.png','946','946',event); return false;" href="#">
-    ///         <div class="post_image" id="thumb_77_2">
-    ///             <img src="/storage/t/acc7f5856bc60ad3bdbd4dc7027e33f9.png" alt="" loading="lazy"> // thumbnail path
-    ///         </div>
-    ///     </a>
-    ///     <a href="/storage/a/cc/acc7f5856bc60ad3bdbd4dc7027e33f9.png" target="_blank" class="img_filename">image.png</a> // can also be https://i.arhivach.vc/... if it's a video
-    /// </div>
-    /// ```
-    fn parse_post_image_block(pib: scraper::ElementRef) -> File {
-        static SEL_POST_IMAGE_IMG: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse(".post_image img").unwrap()
-        );
-        static SEL_A_IMG_FILENAME: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
-            || scraper::Selector::parse("a.img_filename").unwrap()
-        );
-
-        // Title example:
-        // 402.2 Кб, 800 x 532
-        // image.png <- name_orig
-        // 17699142349880.png <- name_timestamp
-        let title = pib.value().attr("title").unwrap_or("");
-        let title_lines: Vec<&str> = title.lines().collect();
-        let name_orig      = title_lines
-            .get(1)
-            .map(|s| s.to_string())
-            .unwrap_or("unnamed".to_string());
-        let name_timestamp = title_lines
-            .get(2)
-            .map(|s| s.to_string())
-            .unwrap_or("unnamed".to_string());
-
-        // url_thumb
-        let url_thumb = pib
-            .select(&SEL_POST_IMAGE_IMG)
-            .next()
-            .and_then(|el| el.value().attr("src"))
-            .unwrap_or(""); // /storage/t/83c2fe5ba9a8469d9eeef4af124e3b52.thumb
-        let url_thumb = if url_thumb.is_empty() {
-            String::new()
-        } else {
-            format!("{BASE_URL}{url_thumb}")
-        };
-
-        // url
-        let url = pib
-            .select(&SEL_A_IMG_FILENAME)
-            .next()
-            .and_then(|el| el.value().attr("href"))
-            .unwrap_or("");
-        let url = if url.starts_with("http") { // is `https://i.arhivach.vc/...`?
-            url.to_string()
-        } else if url.is_empty() {
-            String::new()
-        } else {
-            format!("{BASE_URL}{url}")
-        };
-
-        File {
-            name_orig,
-            name_timestamp,
-            url_thumb,
-            url,
-        }
-    }
-
-    /// Parses the post text from `div.post_comment_body`
-    ///
-    /// Returns post text:
-    /// - References are plaintext (e.g. >>329274789)
-    /// - `<br>` is replaced with \n
-    /// - `<span class="unkfunc">` (greentext) is replaced with >text
-    /// 
-    /// If the text contains a reference (e.g. >>329274789) it looks like this in the element:
-    /// ```html
-    /// <div class="post_comment_body">
-    ///     <a href="#329274893" class="post-reply-link" data-thread="329273515" data-num="329274893">&gt;&gt;329274893</a> // This will be replaced with >>329274893
-    ///     <br>
-    ///     <span class="unkfunc">&gt;greentext1</span>
-    ///     <br>
-    ///     text1
-    /// </div>
-    /// ```
-    /// 
-    /// This example returns:
-    /// ```text
-    /// >>329274893
-    /// >greentext1
-    /// text1
-    /// ```
-    fn parse_post_comment_body(node: scraper::ElementRef) -> String {
-        use scraper::node::Node;
-
-        let mut result = String::new();
-        for child in node.children() {
-            match child.value() {
-                Node::Text(text) => result.push_str(&text.text),
-                Node::Element(el) if el.name() == "br" => result.push('\n'),
-                Node::Element(_) => {
-                    if let Some(el_ref) = scraper::ElementRef::wrap(child) {
-                        result.push_str(&el_ref.text().collect::<String>());
-                    }
-                }
-                _ => {}
-            }
-        }
-        result.trim().to_string()
-    }
-}
-
-impl std::fmt::Display for Post {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Header line
-        let name = self.name.as_deref().unwrap_or("Аноним");
-        let mailto = self.mailto.as_deref().unwrap_or("");
-        
-        if !mailto.is_empty() {
-            write!(f, "{} ({})", name, mailto)?;
-        } else {
-            write!(f, "{}", name)?;
-        }
-        
-        write!(f, " {} {} ID:{}", self.time, self.num, self.id)?;
-        
-        // Subject
-        if let Some(ref subject) = self.subject {
-            write!(f, "\n{}", subject)?;
-        }
-        
-        // Files
-        if !self.files.is_empty() {
-            write!(f, "\n[Files: {}]", self.files.len())?;
-            for file in &self.files {
-                write!(f, "\n  - {}", file)?;
-            }
-        }
-        
-        // Post text
-        if !self.text.is_empty() {
-            write!(f, "\n{}", self.text)?;
-        }
-        
-        Ok(())
-    }
-}

	arhivach-downloader Download arhivach.vc threads
	git clone https://git.ea.contact/arhivach-downloader
	Log \| Files \| Refs \| README

M	Cargo.toml	\|	3	+++
M	README.md	\|	32	++++++++++++++++----------------
D	src/backend.rs	\|	59	-----------------------------------------------------------
M	src/bin/cli/main.rs	\|	153	+++++++++++++++++++++++++++++++++++++++----------------------------------------
D	src/config.rs	\|	7	-------
D	src/events.rs	\|	45	---------------------------------------------
D	src/export/html/mod.rs	\|	142	-------------------------------------------------------------------------------
D	src/export/html/render.rs	\|	139	-------------------------------------------------------------------------------
D	src/export/mod.rs	\|	8	--------
D	src/http.rs	\|	35	-----------------------------------
D	src/lib.rs	\|	9	---------
A	src/lib/config.rs	\|	12	++++++++++++
A	src/lib/download.rs	\|	25	+++++++++++++++++++++++++
A	src/lib/event.rs	\|	29	+++++++++++++++++++++++++++++
A	src/lib/export/html/mod.rs	\|	27	+++++++++++++++++++++++++++
A	src/lib/export/html/render.rs	\|	140	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R	src/export/html/template.html -> src/lib/export/html/template.html	\|	0
A	src/lib/export/mod.rs	\|	35	+++++++++++++++++++++++++++++++++++
A	src/lib/lib.rs	\|	96	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/lib/post.rs	\|	374	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	src/post.rs	\|	374	-------------------------------------------------------------------------------