lib.rs (6673B)
1 pub mod config; 2 pub mod event; 3 4 use config::Config; 5 use event::Event; 6 7 use anyhow::{Context, Result}; 8 use reqwest::Url; 9 10 use std::process::exit; 11 use std::sync::atomic::{AtomicUsize, Ordering}; 12 use std::sync::mpsc::Sender; 13 14 static NEXT_ID: AtomicUsize = AtomicUsize::new(0); 15 16 pub async fn run(config: Config, tx: Sender<Event>) -> Result<()> { 17 let client = reqwest::Client::new(); 18 19 tx.send(Event::GetPageStarted)?; 20 let main_page = client.get(&config.url).send().await.context("Failed to get main page")?; 21 tx.send(Event::GetPageCompleted)?; 22 23 let (name, image_urls, track_urls) = parse_page(main_page, config.images).await?; 24 let dest_dir = std::path::Path::new(&name).to_path_buf(); 25 std::fs::create_dir_all(&dest_dir)?; 26 27 let image_count = if config.images { image_urls.len() } else { 0 }; 28 tx.send(Event::TotalDownloads(track_urls.len() + image_count))?; 29 30 let mut joinset = tokio::task::JoinSet::new(); 31 32 for url in track_urls { 33 joinset.spawn(download(client.clone(), url, dest_dir.clone(), tx.clone(), config.flac)); 34 } 35 36 if config.images { 37 let dest_dir = std::path::Path::new(&name).join("images"); 38 std::fs::create_dir_all(&dest_dir)?; 39 for url in image_urls { 40 joinset.spawn(download(client.clone(), url, dest_dir.clone(), tx.clone(), false)); 41 } 42 } 43 44 while let Some(result) = joinset.join_next().await { 45 result.context("download task panicked")??; 46 } 47 48 Ok(()) 49 } 50 51 async fn parse_page( 52 main_page: reqwest::Response, 53 images: bool, 54 ) -> Result<(String, Vec<Url>, Vec<Url>)> { 55 let base_url = main_page.url().clone(); 56 let html = main_page.text().await.context("Failed to read page body")?; 57 let document = scraper::Html::parse_document(&html); 58 59 let name = { 60 let sel = scraper::Selector::parse("#pageContent h2").unwrap(); 61 document 62 .select(&sel) 63 .next() 64 .ok_or_else(|| anyhow::anyhow!("Album name element not found"))? 65 .text() 66 .collect::<String>() 67 .trim() 68 .to_string() 69 }; 70 71 let image_urls = if images { 72 let sel = scraper::Selector::parse( 73 "#pageContent table:first-of-type tr td div:first-of-type a" 74 ).unwrap(); 75 document 76 .select(&sel) 77 .filter_map(|el| el.value().attr("href")) 78 .filter_map(|href| base_url.join(href).ok()) 79 .collect() 80 } else { 81 Vec::new() 82 }; 83 84 let track_sel = scraper::Selector::parse("#songlist tbody tr td:nth-child(5) a").unwrap(); 85 let track_urls = document 86 .select(&track_sel) 87 .filter_map(|el| el.value().attr("href")) 88 .filter_map(|href| base_url.join(href).ok()) 89 .collect(); 90 91 Ok((name, image_urls, track_urls)) 92 } 93 94 fn percent_decode(s: &str) -> String { 95 let s = s.as_bytes(); 96 let mut bytes = Vec::with_capacity(s.len()); 97 let mut i = 0; 98 while i < s.len() { 99 if s[i] == b'%' && i + 2 < s.len() { 100 if let (Some(h), Some(l)) = ( 101 (s[i + 1] as char).to_digit(16), 102 (s[i + 2] as char).to_digit(16), 103 ) { 104 bytes.push((h << 4 | l) as u8); 105 i += 3; 106 continue; 107 } 108 } 109 bytes.push(s[i]); 110 i += 1; 111 } 112 String::from_utf8_lossy(&bytes).into_owned() 113 } 114 115 async fn resolve_flac_url(client: &reqwest::Client, track_page_url: &Url) -> Result<Url> { 116 let html = client.get(track_page_url.clone()).send().await?.text().await?; 117 let document = scraper::Html::parse_document(&html); 118 let sel = scraper::Selector::parse( 119 "#pageContent > p:nth-child(10) > a" 120 ).map_err(|e| anyhow::anyhow!("Failed to parse FLAC link selector: {e}"))?; 121 let href = document 122 .select(&sel) 123 .next() 124 .ok_or_else(|| anyhow::anyhow!("FLAC link not found on page: {}", track_page_url))? 125 .value() 126 .attr("href") 127 .ok_or_else(|| anyhow::anyhow!("FLAC link has no href on page: {}", track_page_url))?; 128 track_page_url.join(href).context("Failed to parse FLAC URL") 129 } 130 131 async fn download( 132 client: reqwest::Client, 133 url: Url, 134 dest_dir: std::path::PathBuf, 135 tx: Sender<Event>, 136 flac: bool, 137 ) -> Result<()> { 138 let id = NEXT_ID.fetch_add(1, Ordering::Relaxed); 139 let mut name = url 140 .path_segments() 141 .and_then(|s| s.last()) 142 .map(|s| percent_decode(&percent_decode(s))) 143 .unwrap_or_else(|| url.to_string()); 144 if flac && name.ends_with(".mp3") { 145 name.truncate(name.len() - 3); 146 name.push_str("flac"); 147 } 148 149 tx.send(Event::DlStarted { id, name })?; 150 151 let download_url = if flac { 152 match resolve_flac_url(&client, &url).await { 153 Ok(u) => u, 154 Err(e) => { 155 tx.send(Event::DlFailed { id, error: e })?; 156 return Ok(()); 157 } 158 } 159 } else { 160 url.clone() 161 }; 162 163 let mut response = match client.get(download_url.clone()).send().await { 164 Ok(r) => r, 165 Err(e) => { 166 tx.send(Event::DlFailed { id, error: e.into() })?; 167 return Ok(()); 168 } 169 }; 170 171 let total: Option<usize> = response.content_length().and_then(|l| l.try_into().ok()); 172 let mut downloaded: usize = 0; 173 let mut file_bytes = Vec::new(); 174 175 loop { 176 match response.chunk().await { 177 Ok(Some(chunk)) => { 178 downloaded += chunk.len(); 179 file_bytes.extend_from_slice(&chunk); 180 let _ = tx.send(Event::DlProgress { id, downloaded, total }); 181 } 182 Ok(None) => break, 183 Err(e) => { 184 tx.send(Event::DlFailed { id, error: e.into() })?; 185 return Ok(()); 186 } 187 } 188 } 189 190 // khinsider double-encodes URLs (%20 → %2520), so decode twice 191 let filename = match download_url.path_segments().and_then(|s| s.last()) { 192 Some(s) => percent_decode(&percent_decode(s)), 193 None => { 194 tx.send(Event::DlFailed { 195 id, 196 error: anyhow::anyhow!("Failed to get filename from url"), 197 })?; 198 return Ok(()); 199 } 200 }; 201 202 match tokio::fs::write(dest_dir.join(&filename), &file_bytes).await { 203 Err(e) => tx.send(Event::DlFailed { id, error: e.into() })?, 204 Ok(()) => tx.send(Event::DlCompleted { id })?, 205 }; 206 207 Ok(()) 208 }