Add trending links (#16917)

* Add trending links

* Add overriding specific links trendability

* Add link type to preview cards and only trend articles

Change trends review notifications from being sent every 5 minutes to being sent every 2 hours

Change threshold from 5 unique accounts to 15 unique accounts

* Fix tests
This commit is contained in:
Eugen Rochko 2021-11-25 13:07:38 +01:00 committed by GitHub
parent 46e62fc4b3
commit 6e50134a42
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
97 changed files with 2071 additions and 722 deletions

80
app/models/trends/base.rb Normal file
View file

@ -0,0 +1,80 @@
# frozen_string_literal: true
class Trends::Base
include Redisable
class_attribute :default_options
attr_reader :options
# @param [Hash] options
# @option options [Integer] :threshold Minimum amount of uses by unique accounts to begin calculating the score
# @option options [Integer] :review_threshold Minimum rank (lower = better) before requesting a review
# @option options [ActiveSupport::Duration] :max_score_cooldown For this amount of time, the peak score (if bigger than current score) is decayed-from
# @option options [ActiveSupport::Duration] :max_score_halflife How quickly a peak score decays
def initialize(options = {})
@options = self.class.default_options.merge(options)
end
def register(_status)
raise NotImplementedError
end
def add(*)
raise NotImplementedError
end
def refresh(*)
raise NotImplementedError
end
def request_review
raise NotImplementedError
end
def get(*)
raise NotImplementedError
end
def score(id)
redis.zscore("#{key_prefix}:all", id) || 0
end
def rank(id)
redis.zrevrank("#{key_prefix}:allowed", id)
end
def currently_trending_ids(allowed, limit)
redis.zrevrange(allowed ? "#{key_prefix}:allowed" : "#{key_prefix}:all", 0, limit.positive? ? limit - 1 : limit).map(&:to_i)
end
protected
def key_prefix
raise NotImplementedError
end
def recently_used_ids(at_time = Time.now.utc)
redis.smembers(used_key(at_time)).map(&:to_i)
end
def record_used_id(id, at_time = Time.now.utc)
redis.sadd(used_key(at_time), id)
redis.expire(used_key(at_time), 1.day.seconds)
end
def trim_older_items
redis.zremrangebyscore("#{key_prefix}:all", '-inf', '(1')
redis.zremrangebyscore("#{key_prefix}:allowed", '-inf', '(1')
end
def score_at_rank(rank)
redis.zrevrange("#{key_prefix}:allowed", 0, rank, with_scores: true).last&.last || 0
end
private
def used_key(at_time)
"#{key_prefix}:used:#{at_time.beginning_of_day.to_i}"
end
end

View file

@ -0,0 +1,98 @@
# frozen_string_literal: true
class Trends::History
include Enumerable
class Aggregate
include Redisable
def initialize(prefix, id, date_range)
@days = date_range.map { |date| Day.new(prefix, id, date.to_time(:utc)) }
end
def uses
redis.mget(*@days.map { |day| day.key_for(:uses) }).map(&:to_i).sum
end
def accounts
redis.pfcount(*@days.map { |day| day.key_for(:accounts) })
end
end
class Day
include Redisable
EXPIRE_AFTER = 14.days.seconds
def initialize(prefix, id, day)
@prefix = prefix
@id = id
@day = day.beginning_of_day
end
attr_reader :day
def accounts
redis.pfcount(key_for(:accounts))
end
def uses
redis.get(key_for(:uses))&.to_i || 0
end
def add(account_id)
redis.pipelined do
redis.incrby(key_for(:uses), 1)
redis.pfadd(key_for(:accounts), account_id)
redis.expire(key_for(:uses), EXPIRE_AFTER)
redis.expire(key_for(:accounts), EXPIRE_AFTER)
end
end
def as_json
{ day: day.to_i.to_s, accounts: accounts.to_s, uses: uses.to_s }
end
def key_for(suffix)
case suffix
when :accounts
"#{key_prefix}:#{suffix}"
when :uses
key_prefix
end
end
def key_prefix
"activity:#{@prefix}:#{@id}:#{day.to_i}"
end
end
def initialize(prefix, id)
@prefix = prefix
@id = id
end
def get(date)
Day.new(@prefix, @id, date)
end
def add(account_id, at_time = Time.now.utc)
Day.new(@prefix, @id, at_time).add(account_id)
end
def aggregate(date_range)
Aggregate.new(@prefix, @id, date_range)
end
def each(&block)
if block_given?
(0...7).map { |i| block.call(get(i.days.ago)) }
else
to_enum(:each)
end
end
def as_json(*)
map(&:as_json)
end
end

117
app/models/trends/links.rb Normal file
View file

@ -0,0 +1,117 @@
# frozen_string_literal: true
class Trends::Links < Trends::Base
PREFIX = 'trending_links'
self.default_options = {
threshold: 15,
review_threshold: 10,
max_score_cooldown: 2.days.freeze,
max_score_halflife: 8.hours.freeze,
}
def register(status, at_time = Time.now.utc)
original_status = status.reblog? ? status.reblog : status
return unless original_status.public_visibility? && status.public_visibility? &&
!original_status.account.silenced? && !status.account.silenced? &&
!original_status.spoiler_text?
original_status.preview_cards.each do |preview_card|
add(preview_card, status.account_id, at_time) if preview_card.appropriate_for_trends?
end
end
def add(preview_card, account_id, at_time = Time.now.utc)
preview_card.history.add(account_id, at_time)
record_used_id(preview_card.id, at_time)
end
def get(allowed, limit)
preview_card_ids = currently_trending_ids(allowed, limit)
preview_cards = PreviewCard.where(id: preview_card_ids).index_by(&:id)
preview_card_ids.map { |id| preview_cards[id] }.compact
end
def refresh(at_time = Time.now.utc)
preview_cards = PreviewCard.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq)
calculate_scores(preview_cards, at_time)
trim_older_items
end
def request_review
preview_cards = PreviewCard.where(id: currently_trending_ids(false, -1))
preview_cards_requiring_review = preview_cards.filter_map do |preview_card|
next unless would_be_trending?(preview_card.id) && !preview_card.trendable? && preview_card.requires_review_notification?
if preview_card.provider.nil?
preview_card.provider = PreviewCardProvider.create(domain: preview_card.domain, requested_review_at: Time.now.utc)
else
preview_card.provider.touch(:requested_review_at)
end
preview_card
end
return if preview_cards_requiring_review.empty?
User.staff.includes(:account).find_each do |user|
AdminMailer.new_trending_links(user.account, preview_cards_requiring_review).deliver_later! if user.allows_trending_tag_emails?
end
end
protected
def key_prefix
PREFIX
end
private
def calculate_scores(preview_cards, at_time)
preview_cards.each do |preview_card|
expected = preview_card.history.get(at_time - 1.day).accounts.to_f
expected = 1.0 if expected.zero?
observed = preview_card.history.get(at_time).accounts.to_f
max_time = preview_card.max_score_at
max_score = preview_card.max_score
max_score = 0 if max_time.nil? || max_time < (at_time - options[:max_score_cooldown])
score = begin
if expected > observed || observed < options[:threshold]
0
else
((observed - expected)**2) / expected
end
end
if score > max_score
max_score = score
max_time = at_time
# Not interested in triggering any callbacks for this
preview_card.update_columns(max_score: max_score, max_score_at: max_time)
end
decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
if decaying_score.zero?
redis.zrem("#{PREFIX}:all", preview_card.id)
redis.zrem("#{PREFIX}:allowed", preview_card.id)
else
redis.zadd("#{PREFIX}:all", decaying_score, preview_card.id)
if preview_card.trendable?
redis.zadd("#{PREFIX}:allowed", decaying_score, preview_card.id)
else
redis.zrem("#{PREFIX}:allowed", preview_card.id)
end
end
end
end
def would_be_trending?(id)
score(id) > score_at_rank(options[:review_threshold] - 1)
end
end

111
app/models/trends/tags.rb Normal file
View file

@ -0,0 +1,111 @@
# frozen_string_literal: true
class Trends::Tags < Trends::Base
PREFIX = 'trending_tags'
self.default_options = {
threshold: 15,
review_threshold: 10,
max_score_cooldown: 2.days.freeze,
max_score_halflife: 4.hours.freeze,
}
def register(status, at_time = Time.now.utc)
original_status = status.reblog? ? status.reblog : status
return unless original_status.public_visibility? && status.public_visibility? &&
!original_status.account.silenced? && !status.account.silenced?
original_status.tags.each do |tag|
add(tag, status.account_id, at_time) if tag.usable?
end
end
def add(tag, account_id, at_time = Time.now.utc)
tag.history.add(account_id, at_time)
record_used_id(tag.id, at_time)
end
def refresh(at_time = Time.now.utc)
tags = Tag.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq)
calculate_scores(tags, at_time)
trim_older_items
end
def get(allowed, limit)
tag_ids = currently_trending_ids(allowed, limit)
tags = Tag.where(id: tag_ids).index_by(&:id)
tag_ids.map { |id| tags[id] }.compact
end
def request_review
tags = Tag.where(id: currently_trending_ids(false, -1))
tags_requiring_review = tags.filter_map do |tag|
next unless would_be_trending?(tag.id) && !tag.trendable? && tag.requires_review_notification?
tag.touch(:requested_review_at)
tag
end
return if tags_requiring_review.empty?
User.staff.includes(:account).find_each do |user|
AdminMailer.new_trending_tags(user.account, tags_requiring_review).deliver_later! if user.allows_trending_tag_emails?
end
end
protected
def key_prefix
PREFIX
end
private
def calculate_scores(tags, at_time)
tags.each do |tag|
expected = tag.history.get(at_time - 1.day).accounts.to_f
expected = 1.0 if expected.zero?
observed = tag.history.get(at_time).accounts.to_f
max_time = tag.max_score_at
max_score = tag.max_score
max_score = 0 if max_time.nil? || max_time < (at_time - options[:max_score_cooldown])
score = begin
if expected > observed || observed < options[:threshold]
0
else
((observed - expected)**2) / expected
end
end
if score > max_score
max_score = score
max_time = at_time
# Not interested in triggering any callbacks for this
tag.update_columns(max_score: max_score, max_score_at: max_time)
end
decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
if decaying_score.zero?
redis.zrem("#{PREFIX}:all", tag.id)
redis.zrem("#{PREFIX}:allowed", tag.id)
else
redis.zadd("#{PREFIX}:all", decaying_score, tag.id)
if tag.trendable?
redis.zadd("#{PREFIX}:allowed", decaying_score, tag.id)
else
redis.zrem("#{PREFIX}:allowed", tag.id)
end
end
end
end
def would_be_trending?(id)
score(id) > score_at_rank(options[:review_threshold] - 1)
end
end