commit 0458300db0e2dadd557187538424b7cde8c42082
parent d3652bbd77e194b6a8194e4b9e635d77cb0f9205
Author: egor-achkasov <eaachkasov@gmail.com>
Date: Sun, 22 Feb 2026 00:03:28 +0000
Use anyhow for results
Diffstat:
6 files changed, 78 insertions(+), 46 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -62,14 +62,22 @@ dependencies = [
]
[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
name = "archivarch-downloader"
version = "0.1.0"
dependencies = [
+ "anyhow",
"clap",
"regex",
"reqwest",
"scraper",
"tokio",
+ "tracing",
]
[[package]]
@@ -1597,10 +1605,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"pin-project-lite",
+ "tracing-attributes",
"tracing-core",
]
[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
name = "tracing-core"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,8 +5,10 @@ version = "0.1.0"
edition = "2024"
[dependencies]
+anyhow = "1.0.102"
clap = { version = "4.5.57", features = ["derive"] }
regex = "1.12.3"
reqwest = { version = "0.12", features = ["blocking"] }
scraper = "0.25.0"
tokio = { version = "1.49.0", features = ["macros", "rt", "rt-multi-thread"] }
+tracing = "0.1.44"
diff --git a/src/export.rs b/src/export.rs
@@ -1,4 +1,7 @@
use crate::post::Post;
+use crate::file::File;
+
+use anyhow::{Result, Context};
fn html_escape(s: &str) -> String {
s.replace('&', "&")
@@ -46,9 +49,9 @@ pub async fn export2html(
posts: Vec<Post>,
download_files: bool,
download_thumbnails: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
+) -> Result<()> {
if posts.is_empty() {
- return Err("No posts to export".into());
+ anyhow::bail!("No posts to export");
}
// Create directories
@@ -62,26 +65,29 @@ pub async fn export2html(
.collect::<Vec<String>>()
.join("\n");
// Download files
- if download_files {
- let dir = format!("{}/files", dir);
- std::fs::create_dir_all(&dir)?;
+ async fn download_helper(
+ base_dir: &str,
+ subdir: &str,
+ posts: &[Post],
+ get_url: fn(&File) -> &str,
+ ) -> Result<()>{
+ let dir = format!("{}/{}", base_dir, subdir);
+ std::fs::create_dir_all(&dir)
+ .with_context(|| format!("Failed to create directory {}", dir))?;
for (f, filename) in posts.iter().flat_map(|p| &p.files)
.filter_map(|f| f.url.split('/').last().map(|name| (f, name)))
{
let path = format!("{}/{}", dir, filename);
- download(&f.url, &path).await?;
+ download(get_url(f), &path).await
+ .with_context(|| format!("Failed to download file {}", path))?;
}
+ Ok(())
+ }
+ if download_files {
+ download_helper(&dir, "files", &posts, |f| &f.url).await?;
}
- // Download thumbnails
if download_thumbnails {
- let dir = format!("{}/thumb", dir);
- std::fs::create_dir_all(&dir)?;
- for (f, filename) in posts.iter().flat_map(|p| &p.files)
- .filter_map(|f| f.url_thumb.split('/').last().map(|name| (f, name)))
- {
- let path = format!("{}/{}", dir, filename);
- download(&f.url_thumb, &path).await?;
- }
+ download_helper(&dir, "thumb", &posts, |f| &f.url_thumb).await?;
}
// Insert the posts html into a template and write as index.html
@@ -186,8 +192,12 @@ fn render_images(
}
-async fn download(url: &str, path: &str) -> Result<(), Box<dyn std::error::Error>> {
- let bytes = reqwest::get(url).await?.bytes().await?;
- std::fs::write(path, &bytes)?;
+async fn download(url: &str, path: &str) -> Result<()> {
+ let bytes = reqwest::get(url).await
+ .with_context(|| format!("HTTP GET failed for {}", url))?
+ .bytes().await
+ .context("failed to read response body")?;
+ std::fs::write(path, &bytes)
+ .with_context(|| format!("failed to write {}", path))?;
Ok(())
}
diff --git a/src/main.rs b/src/main.rs
@@ -6,26 +6,23 @@ mod export;
use parse_args::{Config, parse_args};
use post::Post;
-async fn scrape_thread(url: &str, config: &Config) -> Result<(), Box<dyn std::error::Error>> {
- // Validate URL (expect https?://arhivach\.vc/thread/\d{7}/?)
- let is_valid = matches!(
- url.trim().trim_end_matches('/').split('/').collect::<Vec<_>>().as_slice(),
- ["https:" | "http:", "", "arhivach.vc", "thread", _]
- );
- if !is_valid {
- return Err("invalid URL".into());
- }
-
- let html = reqwest::get(url).await?.text().await?;
- let posts = Post::parse_posts(&html)?;
- export::export2html(posts, config.files, config.thumb).await?;
-
+use anyhow::{Context, Ok, Result};
+
+async fn scrape_thread(url: &str, config: &Config) -> Result<()> {
+ let html = reqwest::get(url).await
+ .with_context(|| format!("HTTP GET failed for {url}"))?
+ .text().await
+ .context("failed to read response body")?;
+ let posts = Post::parse_posts(&html)
+ .context("failed to parse thread HTML")?;
+ export::export2html(posts, config.files, config.thumb).await
+ .context("failed to export thread")?;
Ok(())
}
#[tokio::main]
-async fn main() {
+async fn main() -> Result<()>{
let config = parse_args()
.unwrap_or_else(|e| {
eprintln!("Error parsing arguments: {}", e);
@@ -39,4 +36,5 @@ async fn main() {
}
println!("Done");
+ Ok(())
}
diff --git a/src/parse_args.rs b/src/parse_args.rs
@@ -1,4 +1,5 @@
use clap::Parser;
+use anyhow::Result;
use std::path::PathBuf;
@@ -8,7 +9,7 @@ pub struct Config{
pub files: bool
}
-pub fn parse_args() -> Result<Config, Box<dyn std::error::Error>> {
+pub fn parse_args() -> Result<Config> {
#[derive(Parser)]
#[command(about, long_about)]
struct Cli {
@@ -41,7 +42,7 @@ pub fn parse_args() -> Result<Config, Box<dyn std::error::Error>> {
}
}
if urls.is_empty() {
- return Err("No URLs provided".into());
+ anyhow::bail!("No URLs provided");
}
Ok(Config {
diff --git a/src/post.rs b/src/post.rs
@@ -1,5 +1,7 @@
use crate::file::File;
+use anyhow::{Context, Result};
+
/// Represents a single post in a thread
#[derive(Debug, Clone)]
pub struct Post {
@@ -23,7 +25,7 @@ pub struct Post {
impl Post {
pub fn parse_posts(
html: &str,
- ) -> Result<Vec<Post>, Box<dyn std::error::Error>> {
+ ) -> Result<Vec<Post>> {
let mut posts = Vec::new();
let document = scraper::Html::parse_document(html);
@@ -45,7 +47,7 @@ impl Post {
/// <span class="post_comment">...</span> (see parse_post_comment function)
/// </div>
/// ```
- fn parse_post(node: scraper::ElementRef) -> Result<Post, Box<dyn std::error::Error>> {
+ fn parse_post(node: scraper::ElementRef) -> Result<Post> {
static SEL_POST_HEAD: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
|| scraper::Selector::parse("div.post_head").unwrap()
);
@@ -56,13 +58,13 @@ impl Post {
let post_head = node
.select(&SEL_POST_HEAD)
.next()
- .ok_or("missing post_head")?;
+ .context("missing post_head")?;
let (subject, name, mailto, time, num, id) = Post::parse_post_head(post_head)?;
let post_comment = node
.select(&SEL_POST_IMAGE_BLOCK)
.next()
- .ok_or("missing post_comment")?;
+ .context("missing post_comment")?;
let (files, text) = Post::parse_post_comment(post_comment)?;
Ok(Post {
@@ -104,8 +106,7 @@ impl Post {
String, // time
String, // num
u32 // id
- ),
- Box<dyn std::error::Error>
+ )
> {
static SEL_SPAN_POST_ID_A_HREF: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
|| scraper::Selector::parse("span.post_id a[href]").unwrap()
@@ -131,7 +132,7 @@ impl Post {
.next()
.and_then(|el| el.value().attr("href"))
.and_then(|href| href.strip_prefix('#'))
- .ok_or("missing post id")?
+ .context("missing post id")?
.parse()?;
let subject = post_head
@@ -154,14 +155,14 @@ impl Post {
let time = post_head
.select(&SEL_SPAN_POST_TIME)
.next()
- .ok_or("missing post_time")?
+ .context("missing post_time")?
.text()
.collect::<String>();
let num = post_head
.select(&SEL_SPAN_POST_NUM)
.next()
- .ok_or("missing post_num")?
+ .context("missing post_num")?
.text()
.collect::<String>();
@@ -179,7 +180,7 @@ impl Post {
/// </span>
fn parse_post_comment(
node: scraper::ElementRef,
- ) -> Result<(Vec<File>, String), Box<dyn std::error::Error>> {
+ ) -> Result<(Vec<File>, String)> {
static SEL_POST_IMAGE_BLOCK: std::sync::LazyLock<scraper::Selector> = std::sync::LazyLock::new(
|| scraper::Selector::parse("div.post_image_block").unwrap()
);
@@ -195,7 +196,7 @@ impl Post {
let text = Post::parse_post_comment_body(node
.select(&SEL_POST_COMMENT_BODY)
.next()
- .ok_or("missing post_comment_body")?);
+ .context("missing post_comment_body")?);
Ok((files, text))
}