commit 90a47f47d40d2af5be72a86f068660e6eced45da
parent 0458300db0e2dadd557187538424b7cde8c42082
Author: egor-achkasov <eaachkasov@gmail.com>
Date: Sun, 22 Feb 2026 19:36:47 +0000
Improve info and error output
Diffstat:
| M | Cargo.lock | | | 79 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | Cargo.toml | | | 1 | + |
| M | src/export.rs | | | 75 | +++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
| M | src/main.rs | | | 35 | +++++++++++++++++++++++++++++------ |
| M | src/post.rs | | | 2 | ++ |
5 files changed, 160 insertions(+), 32 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -78,6 +78,7 @@ dependencies = [
"scraper",
"tokio",
"tracing",
+ "tracing-subscriber",
]
[[package]]
@@ -733,6 +734,12 @@ dependencies = [
]
[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
name = "libc"
version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -783,6 +790,15 @@ dependencies = [
]
[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -829,6 +845,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1333,6 +1358,15 @@ dependencies = [
]
[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1485,6 +1519,15 @@ dependencies = [
]
[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
name = "tinystr"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1627,6 +1670,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
]
[[package]]
@@ -1684,6 +1757,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
@@ -12,3 +12,4 @@ reqwest = { version = "0.12", features = ["blocking"] }
scraper = "0.25.0"
tokio = { version = "1.49.0", features = ["macros", "rt", "rt-multi-thread"] }
tracing = "0.1.44"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
diff --git a/src/export.rs b/src/export.rs
@@ -1,7 +1,7 @@
use crate::post::Post;
-use crate::file::File;
use anyhow::{Result, Context};
+use tracing::debug;
fn html_escape(s: &str) -> String {
s.replace('&', "&")
@@ -54,47 +54,27 @@ pub async fn export2html(
anyhow::bail!("No posts to export");
}
- // Create directories
let dir = format!("{}", posts[0].id);
std::fs::create_dir_all(&dir)?;
- // Render the thread
let posts_html: String = posts
.iter()
.map(|p| render_post(p, download_files, download_thumbnails))
.collect::<Vec<String>>()
.join("\n");
- // Download files
- async fn download_helper(
- base_dir: &str,
- subdir: &str,
- posts: &[Post],
- get_url: fn(&File) -> &str,
- ) -> Result<()>{
- let dir = format!("{}/{}", base_dir, subdir);
- std::fs::create_dir_all(&dir)
- .with_context(|| format!("Failed to create directory {}", dir))?;
- for (f, filename) in posts.iter().flat_map(|p| &p.files)
- .filter_map(|f| f.url.split('/').last().map(|name| (f, name)))
- {
- let path = format!("{}/{}", dir, filename);
- download(get_url(f), &path).await
- .with_context(|| format!("Failed to download file {}", path))?;
- }
- Ok(())
- }
+
if download_files {
- download_helper(&dir, "files", &posts, |f| &f.url).await?;
+ download_assets(&posts, &format!("{}/files", dir), "files", |f| &f.url).await?;
}
+
if download_thumbnails {
- download_helper(&dir, "thumb", &posts, |f| &f.url_thumb).await?;
+ download_assets(&posts, &format!("{}/thumb", dir), "thumbnails", |f| &f.url_thumb).await?;
}
- // Insert the posts html into a template and write as index.html
let template = std::fs::read_to_string("template.html")?
.replace("{{posts}}", &posts_html);
std::fs::write(format!("{}/index.html", dir), template)?;
-
+
Ok(())
}
@@ -192,6 +172,49 @@ fn render_images(
}
+async fn download_assets(
+ posts: &[Post],
+ dest_dir: &str,
+ label: &str,
+ url_of: impl Fn(&crate::file::File) -> &str,
+) -> Result<()> {
+ use std::io::Write;
+
+ std::fs::create_dir_all(dest_dir)
+ .with_context(|| format!("Failed to create directory {}", dest_dir))?;
+ let t = std::time::Instant::now();
+ print!("\tDownloading {}... post 0 / {}", label, posts.len());
+ std::io::stdout().flush().ok();
+ for (i, post) in posts.iter().enumerate() {
+ for f in &post.files {
+ let url = url_of(f);
+ let filename = url.split('/').last().unwrap_or("");
+ let path = format!("{}/{}", dest_dir, filename);
+ debug!(url = %url, %path, "Downloading {}", label);
+ let mut failed = false;
+ for attempt in 0..3 {
+ match download(url, &path).await {
+ Ok(()) => { failed = false; break; }
+ Err(e) => {
+ failed = true;
+ println!("\r\tFailed to download {} {}: {}\n\t-> Waiting 3 seconds...", label, filename, e);
+ if attempt < 2 {
+ tokio::time::sleep(std::time::Duration::from_secs(3)).await;
+ }
+ }
+ }
+ }
+ if failed {
+ println!("\tSkipping {} {} after 3 failed attempts.", label, filename);
+ }
+ }
+ print!("\r\tDownloading {}... post {} / {}", label, i + 1, posts.len());
+ std::io::stdout().flush().ok();
+ }
+ println!(" Done ({} ms)", t.elapsed().as_millis());
+ Ok(())
+}
+
async fn download(url: &str, path: &str) -> Result<()> {
let bytes = reqwest::get(url).await
.with_context(|| format!("HTTP GET failed for {}", url))?
diff --git a/src/main.rs b/src/main.rs
@@ -9,32 +9,55 @@ use post::Post;
use anyhow::{Context, Ok, Result};
async fn scrape_thread(url: &str, config: &Config) -> Result<()> {
+ use std::io::Write;
+ let t_total = std::time::Instant::now();
+
+ print!("\tGetting thread...");
+ std::io::stdout().flush().ok();
+ let t = std::time::Instant::now();
let html = reqwest::get(url).await
.with_context(|| format!("HTTP GET failed for {url}"))?
.text().await
.context("failed to read response body")?;
+ println!(" Done ({} ms)", t.elapsed().as_millis());
+
+ print!("\tParsing posts...");
+ std::io::stdout().flush().ok();
+ let t = std::time::Instant::now();
let posts = Post::parse_posts(&html)
.context("failed to parse thread HTML")?;
+ println!(" Done ({} ms)", t.elapsed().as_millis());
+
export::export2html(posts, config.files, config.thumb).await
.context("failed to export thread")?;
+
+ println!("Done processing {} ({} ms)", url, t_total.elapsed().as_millis());
Ok(())
}
#[tokio::main]
-async fn main() -> Result<()>{
+async fn main() -> Result<()> {
+ tracing_subscriber::fmt()
+ .without_time()
+ .with_target(false)
+ .with_env_filter(
+ tracing_subscriber::EnvFilter::try_from_default_env()
+ .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("warn"))
+ )
+ .init();
+
let config = parse_args()
.unwrap_or_else(|e| {
- eprintln!("Error parsing arguments: {}", e);
+ eprintln!("Error: {}", e);
std::process::exit(1);
});
- for (i, url) in config.urls.iter().enumerate() {
- println!("Processing: {} ({} / {})", url, i + 1, config.urls.len());
+ for url in &config.urls {
+ println!("Processing {}:", url);
scrape_thread(url, &config).await
- .unwrap_or_else(|e| eprintln!("Error processing {}: {}", url, e));
+ .unwrap_or_else(|e| eprintln!("Error processing {}: {:#}", url, e));
}
- println!("Done");
Ok(())
}
diff --git a/src/post.rs b/src/post.rs
@@ -1,6 +1,7 @@
use crate::file::File;
use anyhow::{Context, Result};
+use tracing::debug;
/// Represents a single post in a thread
#[derive(Debug, Clone)]
@@ -35,6 +36,7 @@ impl Post {
posts.push(post);
}
+ debug!("Parsed {} posts", posts.len());
Ok(posts)
}