mirror of
https://github.com/redlib-org/redlib.git
synced 2025-06-07 07:07:48 +00:00
List post duplicates (resolves #574).
This commit is contained in:
parent
fade305f90
commit
e579b97442
21 changed files with 695 additions and 250 deletions
228
src/duplicates.rs
Normal file
228
src/duplicates.rs
Normal file
|
@ -0,0 +1,228 @@
|
|||
// Handler for post duplicates.
|
||||
|
||||
use crate::client::json;
|
||||
use crate::server::RequestExt;
|
||||
use crate::subreddit::{can_access_quarantine, quarantine};
|
||||
use crate::utils::{error, filter_posts, get_filters, parse_post, template, Post, Preferences};
|
||||
|
||||
use askama::Template;
|
||||
use hyper::{Body, Request, Response};
|
||||
use serde_json::Value;
|
||||
use std::borrow::ToOwned;
|
||||
use std::collections::HashSet;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// DuplicatesParams contains the parameters in the URL.
|
||||
struct DuplicatesParams {
|
||||
before: String,
|
||||
after: String,
|
||||
sort: String,
|
||||
}
|
||||
|
||||
/// DuplicatesTemplate defines an Askama template for rendering duplicate
|
||||
/// posts.
|
||||
#[derive(Template)]
|
||||
#[template(path = "duplicates.html")]
|
||||
struct DuplicatesTemplate {
|
||||
/// params contains the relevant request parameters.
|
||||
params: DuplicatesParams,
|
||||
|
||||
/// post is the post whose ID is specified in the reqeust URL. Note that
|
||||
/// this is not necessarily the "original" post.
|
||||
post: Post,
|
||||
|
||||
/// duplicates is the list of posts that, per Reddit, are duplicates of
|
||||
/// Post above.
|
||||
duplicates: Vec<Post>,
|
||||
|
||||
/// prefs are the user preferences.
|
||||
prefs: Preferences,
|
||||
|
||||
/// url is the request URL.
|
||||
url: String,
|
||||
|
||||
/// num_posts_filtered counts how many posts were filtered from the
|
||||
/// duplicates list.
|
||||
num_posts_filtered: u64,
|
||||
|
||||
/// all_posts_filtered is true if every duplicate was filtered. This is an
|
||||
/// edge case but can still happen.
|
||||
all_posts_filtered: bool,
|
||||
}
|
||||
|
||||
/// Make the GET request to Reddit. It assumes `req` is the appropriate Reddit
|
||||
/// REST endpoint for enumerating post duplicates.
|
||||
pub async fn item(req: Request<Body>) -> Result<Response<Body>, String> {
|
||||
let path: String = format!("{}.json?{}&raw_json=1", req.uri().path(), req.uri().query().unwrap_or_default());
|
||||
let sub = req.param("sub").unwrap_or_default();
|
||||
let quarantined = can_access_quarantine(&req, &sub);
|
||||
|
||||
// Log the request in debugging mode
|
||||
#[cfg(debug_assertions)]
|
||||
dbg!(req.param("id").unwrap_or_default());
|
||||
|
||||
// Send the GET, and await JSON.
|
||||
match json(path, quarantined).await {
|
||||
// Process response JSON.
|
||||
Ok(response) => {
|
||||
let filters = get_filters(&req);
|
||||
let post = parse_post(&response[0]["data"]["children"][0]).await;
|
||||
let (duplicates, num_posts_filtered, all_posts_filtered) = parse_duplicates(&response[1], &filters).await;
|
||||
|
||||
// These are the values for the "before=", "after=", and "sort="
|
||||
// query params, respectively.
|
||||
let mut before: String = String::new();
|
||||
let mut after: String = String::new();
|
||||
let mut sort: String = String::new();
|
||||
|
||||
// FIXME: We have to perform a kludge to work around a Reddit API
|
||||
// bug.
|
||||
//
|
||||
// The JSON object in "data" will never contain a "before" value so
|
||||
// it is impossible to use it to determine our position in a
|
||||
// listing. We'll make do by getting the ID of the first post in
|
||||
// the listing, setting that as our "before" value, and ask Reddit
|
||||
// to give us a batch of duplicate posts up to that post.
|
||||
//
|
||||
// Likewise, if we provide a "before" request in the GET, the
|
||||
// result won't have an "after" in the JSON, in addition to missing
|
||||
// the "before." So we will have to use the final post in the list
|
||||
// of duplicates.
|
||||
//
|
||||
// That being said, we'll also need to capture the value of the
|
||||
// "sort=" parameter as well, so we will need to inspect the
|
||||
// query key-value pairs anyway.
|
||||
let l = duplicates.len();
|
||||
if l > 0 {
|
||||
// This gets set to true if "before=" is one of the GET params.
|
||||
let mut have_before: bool = false;
|
||||
|
||||
// This gets set to true if "after=" is one of the GET params.
|
||||
let mut have_after: bool = false;
|
||||
|
||||
// Inspect the query key-value pairs. We will need to record
|
||||
// the value of "sort=", along with checking to see if either
|
||||
// one of "before=" or "after=" are given.
|
||||
//
|
||||
// If we're in the middle of the batch (evidenced by the
|
||||
// presence of a "before=" or "after=" parameter in the GET),
|
||||
// then use the first post as the "before" reference.
|
||||
//
|
||||
// We'll do this iteratively. Better than with .map_or()
|
||||
// since a closure will continue to operate on remaining
|
||||
// elements even after we've determined one of "before=" or
|
||||
// "after=" (or both) are in the GET request.
|
||||
//
|
||||
// In practice, here should only ever be one of "before=" or
|
||||
// "after=" and never both.
|
||||
let query_str = req.uri().query().unwrap_or_default().to_string();
|
||||
|
||||
if !query_str.is_empty() {
|
||||
for param in query_str.split('&') {
|
||||
let kv: Vec<&str> = param.split('=').collect();
|
||||
if kv.len() < 2 {
|
||||
// Reject invalid query parameter.
|
||||
continue;
|
||||
}
|
||||
|
||||
let key: &str = kv[0];
|
||||
match key {
|
||||
"before" => have_before = true,
|
||||
"after" => have_after = true,
|
||||
"sort" => {
|
||||
let val: &str = kv[1];
|
||||
match val {
|
||||
"new" | "num_comments" => sort = val.to_string(),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if have_after {
|
||||
before = "t3_".to_owned();
|
||||
before.push_str(&duplicates[0].id);
|
||||
}
|
||||
|
||||
// Address potentially missing "after". If "before=" is in the
|
||||
// GET, then "after" will be null in the JSON (see FIXME
|
||||
// above).
|
||||
if have_before {
|
||||
// The next batch will need to start from one after the
|
||||
// last post in the current batch.
|
||||
after = "t3_".to_owned();
|
||||
after.push_str(&duplicates[l - 1].id);
|
||||
|
||||
// Here is where things get terrible. Notice that we
|
||||
// haven't set `before`. In order to do so, we will
|
||||
// need to know if there is a batch that exists before
|
||||
// this one, and doing so requires actually fetching the
|
||||
// previous batch. In other words, we have to do yet one
|
||||
// more GET to Reddit. There is no other way to determine
|
||||
// whether or not to define `before`.
|
||||
//
|
||||
// We'll mitigate that by requesting at most one duplicate.
|
||||
let new_path: String = format!(
|
||||
"{}.json?before=t3_{}&sort={}&limit=1&raw_json=1",
|
||||
req.uri().path(),
|
||||
&duplicates[0].id,
|
||||
if sort.is_empty() { "num_comments".to_string() } else { sort.clone() }
|
||||
);
|
||||
match json(new_path, true).await {
|
||||
Ok(response) => {
|
||||
if !response[1]["data"]["children"].as_array().unwrap_or(&Vec::new()).is_empty() {
|
||||
before = "t3_".to_owned();
|
||||
before.push_str(&duplicates[0].id);
|
||||
}
|
||||
}
|
||||
Err(msg) => {
|
||||
// Abort entirely if we couldn't get the previous
|
||||
// batch.
|
||||
return error(req, msg).await;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
after = response[1]["data"]["after"].as_str().unwrap_or_default().to_string();
|
||||
}
|
||||
}
|
||||
let url = req.uri().to_string();
|
||||
|
||||
template(DuplicatesTemplate {
|
||||
params: DuplicatesParams { before, after, sort },
|
||||
post,
|
||||
duplicates,
|
||||
prefs: Preferences::new(req),
|
||||
url,
|
||||
num_posts_filtered,
|
||||
all_posts_filtered,
|
||||
})
|
||||
}
|
||||
|
||||
// Process error.
|
||||
Err(msg) => {
|
||||
if msg == "quarantined" {
|
||||
let sub = req.param("sub").unwrap_or_default();
|
||||
quarantine(req, sub)
|
||||
} else {
|
||||
error(req, msg).await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DUPLICATES
|
||||
async fn parse_duplicates(json: &serde_json::Value, filters: &HashSet<String>) -> (Vec<Post>, u64, bool) {
|
||||
let post_duplicates: &Vec<Value> = &json["data"]["children"].as_array().map_or(Vec::new(), ToOwned::to_owned);
|
||||
let mut duplicates: Vec<Post> = Vec::new();
|
||||
|
||||
// Process each post and place them in the Vec<Post>.
|
||||
for val in post_duplicates.iter() {
|
||||
let post: Post = parse_post(val).await;
|
||||
duplicates.push(post);
|
||||
}
|
||||
|
||||
let (num_posts_filtered, all_posts_filtered) = filter_posts(&mut duplicates, filters);
|
||||
(duplicates, num_posts_filtered, all_posts_filtered)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue