diff --git a/src/stackexchange/api.rs b/src/stackexchange/api.rs index 897ef0b..cd50296 100644 --- a/src/stackexchange/api.rs +++ b/src/stackexchange/api.rs @@ -9,8 +9,7 @@ use crate::error::Result; use crate::tui::markdown; /// StackExchange API v2.2 URL -// TODO why not https? -const SE_API_URL: &str = "http://api.stackexchange.com"; +const SE_API_URL: &str = "https://api.stackexchange.com"; const SE_API_VERSION: &str = "2.2"; /// Filter generated to include only the fields needed to populate @@ -80,6 +79,10 @@ impl Api { header::ACCEPT, header::HeaderValue::from_static("application/json"), ); + headers.insert( + header::USER_AGENT, + header::HeaderValue::from_static(super::USER_AGENT), + ); let client = Client::builder().default_headers(headers).build().unwrap(); Api { client, api_key } } @@ -91,15 +94,17 @@ impl Api { let endpoint = format!("questions/{ids}", ids = ids.join(";")); let url = stackexchange_url(&endpoint); log::debug!("Fetching questions from: {url}"); - let qs = self + let qs_rsp = self .client .get(url) .query(&self.get_default_se_opts()) .query(&[("site", site), ("pagesize", &total)]) .send() - .await? - .json::>>() - .await? + .await?; + let status_code = qs_rsp.status(); + let body = qs_rsp.text().await?; + log::debug!("Stack exchange returned status {status_code} and body {body}"); + let qs = serde_json::from_str::>>(&body)? .items .into_iter() .filter(|q| !q.answers.is_empty()) diff --git a/src/stackexchange/mod.rs b/src/stackexchange/mod.rs index 1002201..1a66cdf 100644 --- a/src/stackexchange/mod.rs +++ b/src/stackexchange/mod.rs @@ -7,3 +7,7 @@ pub mod scraper; pub use api::{Answer, Id, Question}; pub use local_storage::{LocalStorage, SiteMap}; pub use search::Search; + +/// Mock user agent +const USER_AGENT: &str = + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0"; diff --git a/src/stackexchange/search.rs b/src/stackexchange/search.rs index 102eb8e..8b40615 100644 --- a/src/stackexchange/search.rs +++ b/src/stackexchange/search.rs @@ -16,11 +16,6 @@ use super::scraper::{DuckDuckGo, Google, ScrapedData, Scraper}; /// Limit on concurrent requests (gets passed to `buffer_unordered`) const CONCURRENT_REQUESTS_LIMIT: usize = 8; -/// Mock user agent to get real DuckDuckGo results -// TODO copy other user agents and use random one each time -const USER_AGENT: &str = - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0"; - /// This structure provides methods to search queries and get StackExchange /// questions/answers in return. // TODO this really needs a better name... @@ -105,13 +100,13 @@ impl Search { let url = scraper.get_url(&self.query, self.site_map.values()); let html = Client::new() .get(url) - .header(header::USER_AGENT, USER_AGENT) + .header(header::USER_AGENT, super::USER_AGENT) .send() .await? .text() .await?; let data = scraper.parse(&html, self.site_map.as_ref(), self.config.limit)?; - log::trace!("Scraped question IDs: {:#?}", &data.question_ids); + log::debug!("Scraped question IDs: {:#?}", &data.question_ids); self.parallel_questions(data).await }