Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding a scaled sort, to boost smaller communities. #3907

Merged
merged 11 commits into from
Sep 6, 2023
2 changes: 1 addition & 1 deletion crates/apub/src/activities/create_or_update/post.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl ActivityHandler for CreateOrUpdatePage {
PostLike::like(&mut context.pool(), &like_form).await?;

// Calculate initial hot_rank for post
PostAggregates::update_hot_rank(&mut context.pool(), post.id).await?;
PostAggregates::update_ranks(&mut context.pool(), post.id).await?;

Ok(())
}
Expand Down
27 changes: 23 additions & 4 deletions crates/db_schema/src/aggregates/post_aggregates.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
use crate::{
aggregates::structs::PostAggregates,
newtypes::PostId,
schema::post_aggregates,
utils::{functions::hot_rank, get_conn, DbPool},
schema::{community_aggregates, post, post_aggregates},
utils::{
functions::{hot_rank, scaled_rank},
get_conn,
DbPool,
},
};
use diesel::{result::Error, ExpressionMethods, QueryDsl};
use diesel::{result::Error, ExpressionMethods, JoinOnDsl, QueryDsl};
use diesel_async::RunQueryDsl;

impl PostAggregates {
Expand All @@ -16,9 +20,19 @@ impl PostAggregates {
.await
}

pub async fn update_hot_rank(pool: &mut DbPool<'_>, post_id: PostId) -> Result<Self, Error> {
pub async fn update_ranks(pool: &mut DbPool<'_>, post_id: PostId) -> Result<Self, Error> {
let conn = &mut get_conn(pool).await?;

// Diesel can't update based on a join, which is necessary for the scaled_rank
// https://github.com/diesel-rs/diesel/issues/1478
// Just select the users_active_month manually for now, since its a single post anyway
let users_active_month = community_aggregates::table
Copy link
Member Author

@dessalines dessalines Aug 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can probably move this query down into the update statement, to avoid the round-trip cost.

edit: tried and failed at this.

.select(community_aggregates::users_active_month)
.inner_join(post::table.on(community_aggregates::community_id.eq(post::community_id)))
.filter(post::id.eq(post_id))
.first::<i64>(conn)
.await?;

diesel::update(post_aggregates::table)
.filter(post_aggregates::post_id.eq(post_id))
.set((
Expand All @@ -27,6 +41,11 @@ impl PostAggregates {
post_aggregates::score,
post_aggregates::newest_comment_time_necro,
)),
post_aggregates::scaled_rank.eq(scaled_rank(
post_aggregates::score,
post_aggregates::published,
users_active_month,
)),
))
.get_result::<Self>(conn)
.await
Expand Down
12 changes: 7 additions & 5 deletions crates/db_schema/src/aggregates/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ pub struct CommentAggregates {
pub published: DateTime<Utc>,
/// The total number of children in this comment branch.
pub child_count: i32,
pub hot_rank: i32,
pub hot_rank: f64,
pub controversy_rank: f64,
}

#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(Queryable, Associations, Identifiable, TS))]
#[cfg_attr(feature = "full", diesel(table_name = community_aggregates))]
#[cfg_attr(
Expand All @@ -55,7 +55,7 @@ pub struct CommunityAggregates {
pub users_active_month: i64,
/// The number of users with any activity in the last year.
pub users_active_half_year: i64,
pub hot_rank: i32,
pub hot_rank: f64,
}

#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)]
Expand Down Expand Up @@ -95,11 +95,13 @@ pub struct PostAggregates {
pub featured_community: bool,
/// If the post is featured on the site / to local.
pub featured_local: bool,
pub hot_rank: i32,
pub hot_rank_active: i32,
pub hot_rank: f64,
pub hot_rank_active: f64,
pub community_id: CommunityId,
pub creator_id: PersonId,
pub controversy_rank: f64,
/// A rank that amplifies smaller communities
pub scaled_rank: f64,
}

#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
Expand Down
2 changes: 2 additions & 0 deletions crates/db_schema/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use ts_rs::TS;
)]
#[cfg_attr(feature = "full", DbValueStyle = "verbatim")]
#[cfg_attr(feature = "full", ts(export))]
// TODO add the controversial and scaled rankings to the doc below
/// The post sort types. See here for descriptions: https://join-lemmy.org/docs/en/users/03-votes-and-ranking.html
pub enum SortType {
#[default]
Expand All @@ -75,6 +76,7 @@ pub enum SortType {
TopSixMonths,
TopNineMonths,
Controversial,
Scaled,
}
dessalines marked this conversation as resolved.
Show resolved Hide resolved

#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy)]
Expand Down
21 changes: 17 additions & 4 deletions crates/db_schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ diesel::table! {
downvotes -> Int8,
published -> Timestamptz,
child_count -> Int4,
hot_rank -> Int4,
hot_rank -> Float8,
controversy_rank -> Float8,
}
}
Expand Down Expand Up @@ -198,7 +198,7 @@ diesel::table! {
users_active_week -> Int8,
users_active_month -> Int8,
users_active_half_year -> Int8,
hot_rank -> Int4,
hot_rank -> Float8,
}
}

Expand Down Expand Up @@ -299,6 +299,16 @@ diesel::table! {
}
}

diesel::table! {
image_upload (id) {
id -> Int4,
local_user_id -> Int4,
pictrs_alias -> Text,
pictrs_delete_token -> Text,
published -> Timestamptz,
}
}

diesel::table! {
instance (id) {
id -> Int4,
Expand Down Expand Up @@ -683,11 +693,12 @@ diesel::table! {
newest_comment_time -> Timestamptz,
featured_community -> Bool,
featured_local -> Bool,
hot_rank -> Int4,
hot_rank_active -> Int4,
hot_rank -> Float8,
hot_rank_active -> Float8,
community_id -> Int4,
creator_id -> Int4,
controversy_rank -> Float8,
scaled_rank -> Float8,
}
}

Expand Down Expand Up @@ -893,6 +904,7 @@ diesel::joinable!(custom_emoji_keyword -> custom_emoji (custom_emoji_id));
diesel::joinable!(email_verification -> local_user (local_user_id));
diesel::joinable!(federation_allowlist -> instance (instance_id));
diesel::joinable!(federation_blocklist -> instance (instance_id));
diesel::joinable!(image_upload -> local_user (local_user_id));
diesel::joinable!(local_site -> site (site_id));
diesel::joinable!(local_site_rate_limit -> local_site (local_site_id));
diesel::joinable!(local_user -> person (person_id));
Expand Down Expand Up @@ -967,6 +979,7 @@ diesel::allow_tables_to_appear_in_same_query!(
email_verification,
federation_allowlist,
federation_blocklist,
image_upload,
instance,
language,
local_site,
Expand Down
8 changes: 6 additions & 2 deletions crates/db_schema/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ pub fn naive_now() -> DateTime<Utc> {

pub fn post_to_comment_sort_type(sort: SortType) -> CommentSortType {
match sort {
SortType::Active | SortType::Hot => CommentSortType::Hot,
SortType::Active | SortType::Hot | SortType::Scaled => CommentSortType::Hot,
SortType::New | SortType::NewComments | SortType::MostComments => CommentSortType::New,
SortType::Old => CommentSortType::Old,
SortType::Controversial => CommentSortType::Controversial,
Expand Down Expand Up @@ -384,7 +384,11 @@ pub mod functions {
use diesel::sql_types::{BigInt, Text, Timestamptz};

sql_function! {
fn hot_rank(score: BigInt, time: Timestamptz) -> Integer;
fn hot_rank(score: BigInt, time: Timestamptz) -> Double;
}

sql_function! {
fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double;
}

sql_function! {
Expand Down
2 changes: 1 addition & 1 deletion crates/db_views/src/comment_report_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ mod tests {
downvotes: 0,
published: agg.published,
child_count: 0,
hot_rank: 1728,
hot_rank: 0.1728,
controversy_rank: 0.0,
},
my_vote: None,
Expand Down
2 changes: 1 addition & 1 deletion crates/db_views/src/comment_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ mod tests {
downvotes: 0,
published: agg.published,
child_count: 5,
hot_rank: 1728,
hot_rank: 0.1728,
controversy_rank: 0.0,
},
}
Expand Down
8 changes: 6 additions & 2 deletions crates/db_views/src/post_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ fn queries<'a>() -> Queries<
SortType::Hot => query
.then_order_by(post_aggregates::hot_rank.desc())
.then_order_by(post_aggregates::published.desc()),
SortType::Scaled => query
.then_order_by(post_aggregates::scaled_rank.desc())
.then_order_by(post_aggregates::published.desc()),
SortType::Controversial => query.then_order_by(post_aggregates::controversy_rank.desc()),
SortType::New => query.then_order_by(post_aggregates::published.desc()),
SortType::Old => query.then_order_by(post_aggregates::published.asc()),
Expand Down Expand Up @@ -1154,9 +1157,10 @@ mod tests {
newest_comment_time: inserted_post.published,
featured_community: false,
featured_local: false,
hot_rank: 1728,
hot_rank_active: 1728,
hot_rank: 0.1728,
hot_rank_active: 0.1728,
controversy_rank: 0.0,
scaled_rank: 0.3621,
community_id: inserted_post.community_id,
creator_id: inserted_post.creator_id,
},
Expand Down
2 changes: 1 addition & 1 deletion crates/db_views_actor/src/community_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ fn queries<'a>() -> Queries<
}

match options.sort.unwrap_or(Hot) {
Hot | Active => query = query.order_by(community_aggregates::hot_rank.desc()),
Hot | Active | Scaled => query = query.order_by(community_aggregates::hot_rank.desc()),
NewComments | TopDay | TopTwelveHour | TopSixHour | TopHour => {
query = query.order_by(community_aggregates::users_active_day.desc())
}
Expand Down
87 changes: 87 additions & 0 deletions migrations/2023-08-23-182533_scaled_rank/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
DROP FUNCTION scaled_rank;

ALTER TABLE community_aggregates
ALTER COLUMN hot_rank TYPE integer,
ALTER COLUMN hot_rank SET DEFAULT 1728;

ALTER TABLE comment_aggregates
ALTER COLUMN hot_rank TYPE integer,
ALTER COLUMN hot_rank SET DEFAULT 1728;

ALTER TABLE post_aggregates
ALTER COLUMN hot_rank TYPE integer,
ALTER COLUMN hot_rank SET DEFAULT 1728,
ALTER COLUMN hot_rank_active TYPE integer,
ALTER COLUMN hot_rank_active SET DEFAULT 1728;

-- Change back to integer version
DROP FUNCTION hot_rank (numeric, published timestamp with time zone);

CREATE OR REPLACE FUNCTION hot_rank (score numeric, published timestamp with time zone)
RETURNS integer
AS $$
DECLARE
hours_diff numeric := EXTRACT(EPOCH FROM (now() - published)) / 3600;
BEGIN
IF (hours_diff > 0) THEN
RETURN floor(10000 * log(greatest (1, score + 3)) / power((hours_diff + 2), 1.8))::integer;
ELSE
-- if the post is from the future, set hot score to 0. otherwise you can game the post to
-- always be on top even with only 1 vote by setting it to the future
RETURN 0;
END IF;
END;
$$
LANGUAGE plpgsql
IMMUTABLE PARALLEL SAFE;

ALTER TABLE post_aggregates
DROP COLUMN scaled_rank;

-- The following code is necessary because postgres can't remove
-- a single enum value.
ALTER TABLE local_user
ALTER default_sort_type DROP DEFAULT;

UPDATE
local_user
SET
default_sort_type = 'Hot'
WHERE
default_sort_type = 'Scaled';

-- rename the old enum
ALTER TYPE sort_type_enum RENAME TO sort_type_enum__;

-- create the new enum
CREATE TYPE sort_type_enum AS ENUM (
'Active',
'Hot',
'New',
'Old',
'TopDay',
'TopWeek',
'TopMonth',
'TopYear',
'TopAll',
'MostComments',
'NewComments',
'TopHour',
'TopSixHour',
'TopTwelveHour',
'TopThreeMonths',
'TopSixMonths',
'TopNineMonths'
);

-- alter all your enum columns
ALTER TABLE local_user
ALTER COLUMN default_sort_type TYPE sort_type_enum
USING default_sort_type::text::sort_type_enum;

ALTER TABLE local_user
ALTER default_sort_type SET DEFAULT 'Active';

-- drop the old enum
DROP TYPE sort_type_enum__;

Loading