downloads-khinsider-com-dl

Download all songs at once from downloads.khinsider.com
git clone https://git.ea.contact/downloads-khinsider-com-dl
Log | Files | Refs | README

lib.rs (6673B)


      1 pub mod config;
      2 pub mod event;
      3 
      4 use config::Config;
      5 use event::Event;
      6 
      7 use anyhow::{Context, Result};
      8 use reqwest::Url;
      9 
     10 use std::process::exit;
     11 use std::sync::atomic::{AtomicUsize, Ordering};
     12 use std::sync::mpsc::Sender;
     13 
     14 static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
     15 
     16 pub async fn run(config: Config, tx: Sender<Event>) -> Result<()> {
     17     let client = reqwest::Client::new();
     18 
     19     tx.send(Event::GetPageStarted)?;
     20     let main_page = client.get(&config.url).send().await.context("Failed to get main page")?;
     21     tx.send(Event::GetPageCompleted)?;
     22 
     23     let (name, image_urls, track_urls) = parse_page(main_page, config.images).await?;
     24     let dest_dir = std::path::Path::new(&name).to_path_buf();
     25     std::fs::create_dir_all(&dest_dir)?;
     26 
     27     let image_count = if config.images { image_urls.len() } else { 0 };
     28     tx.send(Event::TotalDownloads(track_urls.len() + image_count))?;
     29 
     30     let mut joinset = tokio::task::JoinSet::new();
     31 
     32     for url in track_urls {
     33         joinset.spawn(download(client.clone(), url, dest_dir.clone(), tx.clone(), config.flac));
     34     }
     35 
     36     if config.images {
     37         let dest_dir = std::path::Path::new(&name).join("images");
     38         std::fs::create_dir_all(&dest_dir)?;
     39         for url in image_urls {
     40             joinset.spawn(download(client.clone(), url, dest_dir.clone(), tx.clone(), false));
     41         }
     42     }
     43 
     44     while let Some(result) = joinset.join_next().await {
     45         result.context("download task panicked")??;
     46     }
     47 
     48     Ok(())
     49 }
     50 
     51 async fn parse_page(
     52     main_page: reqwest::Response,
     53     images: bool,
     54 ) -> Result<(String, Vec<Url>, Vec<Url>)> {
     55     let base_url = main_page.url().clone();
     56     let html = main_page.text().await.context("Failed to read page body")?;
     57     let document = scraper::Html::parse_document(&html);
     58 
     59     let name = {
     60         let sel = scraper::Selector::parse("#pageContent h2").unwrap();
     61         document
     62             .select(&sel)
     63             .next()
     64             .ok_or_else(|| anyhow::anyhow!("Album name element not found"))?
     65             .text()
     66             .collect::<String>()
     67             .trim()
     68             .to_string()
     69     };
     70 
     71     let image_urls = if images {
     72         let sel = scraper::Selector::parse(
     73             "#pageContent table:first-of-type tr td div:first-of-type a"
     74         ).unwrap();
     75         document
     76             .select(&sel)
     77             .filter_map(|el| el.value().attr("href"))
     78             .filter_map(|href| base_url.join(href).ok())
     79             .collect()
     80     } else {
     81         Vec::new()
     82     };
     83 
     84     let track_sel = scraper::Selector::parse("#songlist tbody tr td:nth-child(5) a").unwrap();
     85     let track_urls = document
     86         .select(&track_sel)
     87         .filter_map(|el| el.value().attr("href"))
     88         .filter_map(|href| base_url.join(href).ok())
     89         .collect();
     90 
     91     Ok((name, image_urls, track_urls))
     92 }
     93 
     94 fn percent_decode(s: &str) -> String {
     95     let s = s.as_bytes();
     96     let mut bytes = Vec::with_capacity(s.len());
     97     let mut i = 0;
     98     while i < s.len() {
     99         if s[i] == b'%' && i + 2 < s.len() {
    100             if let (Some(h), Some(l)) = (
    101                 (s[i + 1] as char).to_digit(16),
    102                 (s[i + 2] as char).to_digit(16),
    103             ) {
    104                 bytes.push((h << 4 | l) as u8);
    105                 i += 3;
    106                 continue;
    107             }
    108         }
    109         bytes.push(s[i]);
    110         i += 1;
    111     }
    112     String::from_utf8_lossy(&bytes).into_owned()
    113 }
    114 
    115 async fn resolve_flac_url(client: &reqwest::Client, track_page_url: &Url) -> Result<Url> {
    116     let html = client.get(track_page_url.clone()).send().await?.text().await?;
    117     let document = scraper::Html::parse_document(&html);
    118     let sel = scraper::Selector::parse(
    119         "#pageContent > p:nth-child(10) > a"
    120     ).map_err(|e| anyhow::anyhow!("Failed to parse FLAC link selector: {e}"))?;
    121     let href = document
    122         .select(&sel)
    123         .next()
    124         .ok_or_else(|| anyhow::anyhow!("FLAC link not found on page: {}", track_page_url))?
    125         .value()
    126         .attr("href")
    127         .ok_or_else(|| anyhow::anyhow!("FLAC link has no href on page: {}", track_page_url))?;
    128     track_page_url.join(href).context("Failed to parse FLAC URL")
    129 }
    130 
    131 async fn download(
    132     client: reqwest::Client,
    133     url: Url,
    134     dest_dir: std::path::PathBuf,
    135     tx: Sender<Event>,
    136     flac: bool,
    137 ) -> Result<()> {
    138     let id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
    139     let mut name = url
    140         .path_segments()
    141         .and_then(|s| s.last())
    142         .map(|s| percent_decode(&percent_decode(s)))
    143         .unwrap_or_else(|| url.to_string());
    144     if flac && name.ends_with(".mp3") {
    145         name.truncate(name.len() - 3);
    146         name.push_str("flac");
    147     }
    148 
    149     tx.send(Event::DlStarted { id, name })?;
    150 
    151     let download_url = if flac {
    152         match resolve_flac_url(&client, &url).await {
    153             Ok(u) => u,
    154             Err(e) => {
    155                 tx.send(Event::DlFailed { id, error: e })?;
    156                 return Ok(());
    157             }
    158         }
    159     } else {
    160         url.clone()
    161     };
    162 
    163     let mut response = match client.get(download_url.clone()).send().await {
    164         Ok(r) => r,
    165         Err(e) => {
    166             tx.send(Event::DlFailed { id, error: e.into() })?;
    167             return Ok(());
    168         }
    169     };
    170 
    171     let total: Option<usize> = response.content_length().and_then(|l| l.try_into().ok());
    172     let mut downloaded: usize = 0;
    173     let mut file_bytes = Vec::new();
    174 
    175     loop {
    176         match response.chunk().await {
    177             Ok(Some(chunk)) => {
    178                 downloaded += chunk.len();
    179                 file_bytes.extend_from_slice(&chunk);
    180                 let _ = tx.send(Event::DlProgress { id, downloaded, total });
    181             }
    182             Ok(None) => break,
    183             Err(e) => {
    184                 tx.send(Event::DlFailed { id, error: e.into() })?;
    185                 return Ok(());
    186             }
    187         }
    188     }
    189 
    190     // khinsider double-encodes URLs (%20 → %2520), so decode twice
    191     let filename = match download_url.path_segments().and_then(|s| s.last()) {
    192         Some(s) => percent_decode(&percent_decode(s)),
    193         None => {
    194             tx.send(Event::DlFailed {
    195                 id,
    196                 error: anyhow::anyhow!("Failed to get filename from url"),
    197             })?;
    198             return Ok(());
    199         }
    200     };
    201 
    202     match tokio::fs::write(dest_dir.join(&filename), &file_bytes).await {
    203         Err(e) => tx.send(Event::DlFailed { id, error: e.into() })?,
    204         Ok(()) => tx.send(Event::DlCompleted { id })?,
    205     };
    206 
    207     Ok(())
    208 }