switch (back) to postgres fts engine for fast search & timeline filters
parent
4c8591fbea
commit
f03960382b
|
@ -7,7 +7,7 @@ module FilterHelper
|
|||
|
||||
status = status.reblog if status.reblog?
|
||||
|
||||
if Status.where(id: status.id).regex_filtered_by_account(receiver_id).exists?
|
||||
if Status.where(id: status.id).search_filtered_by_account(receiver_id).exists?
|
||||
redis.sadd("filtered_statuses:#{receiver_id}", status.id)
|
||||
return true
|
||||
end
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
require 'sixarm_ruby_unaccent'
|
||||
|
||||
module SearchHelper
|
||||
def expand_search_query(query)
|
||||
return '' if query.blank?
|
||||
query = query.downcase.unaccent.gsub(/[^\p{Word} [:punct:]]/, '').gsub(/ +/, ' ').strip
|
||||
return '' if query.blank?
|
||||
|
||||
if query.include?(':')
|
||||
query_parts = query.split(':', 2)
|
||||
if %w(tag tags).include?(query_parts[0])
|
||||
query = "^tag (#{query_parts[1].split.join('|')})"
|
||||
elsif %w(subj text desc).include?(query_parts[0])
|
||||
query = "^#{query_parts[0]} .*#{query_parts[1]}"
|
||||
end
|
||||
end
|
||||
|
||||
query.gsub(/"(.*)"/, '\\y\1\\y')
|
||||
end
|
||||
end
|
|
@ -3,7 +3,6 @@
|
|||
class Bangtags
|
||||
include ModerationHelper
|
||||
include ServiceAccountHelper
|
||||
include SearchHelper
|
||||
|
||||
attr_reader :status, :account
|
||||
|
||||
|
@ -764,7 +763,7 @@ class Bangtags
|
|||
q = cmd[1..-1].join.strip
|
||||
next if q.blank?
|
||||
begin
|
||||
data = @account.statuses.regex(expand_search_query(q))
|
||||
data = @account.statuses.search(q.unaccent)
|
||||
.reorder(:created_at)
|
||||
.pluck(:created_at)
|
||||
.map { |d| d.strftime('%Y-%m') }
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
# == Schema Information
|
||||
#
|
||||
# Table name: normalized_statuses
|
||||
#
|
||||
# id :bigint(8) not null, primary key
|
||||
# status_id :bigint(8)
|
||||
# text :text
|
||||
#
|
||||
|
||||
class NormalizedStatus < ApplicationRecord
|
||||
belongs_to :status, inverse_of: :normalized_status
|
||||
validates_uniqueness_of :status_id
|
||||
end
|
|
@ -30,6 +30,7 @@
|
|||
# edited :boolean
|
||||
# boostable :boolean
|
||||
# reject_replies :boolean
|
||||
# tsv :tsvector
|
||||
#
|
||||
|
||||
class Status < ApplicationRecord
|
||||
|
@ -82,7 +83,6 @@ class Status < ApplicationRecord
|
|||
has_one :status_stat, inverse_of: :status
|
||||
has_one :poll, inverse_of: :status, dependent: :destroy
|
||||
has_one :destructing_status, inverse_of: :status, dependent: :destroy
|
||||
has_one :normalized_status, inverse_of: :status, dependent: :destroy
|
||||
has_one :imported_status, inverse_of: :status, dependent: :destroy
|
||||
has_one :sharekey, inverse_of: :status, dependent: :destroy
|
||||
|
||||
|
@ -118,10 +118,10 @@ class Status < ApplicationRecord
|
|||
scope :mention_not_excluded_by_account, ->(account) { left_outer_joins(:mentions).where('mentions.account_id IS NULL OR mentions.account_id NOT IN (?)', account.excluded_from_timeline_account_ids) }
|
||||
scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
|
||||
|
||||
scope :like, ->(needle) { joins(:normalized_status).select('statuses.*').where('normalized_statuses.text LIKE f_normalize(?)', needle) }
|
||||
scope :regex, ->(needle) { joins(:normalized_status).select('statuses.*').where('normalized_statuses.text ~ f_normalize(?)', needle) }
|
||||
scope :regex_filtered_by_account, ->(account_id) { joins(:normalized_status).select('statuses.*').where('normalized_statuses.text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account_id) }
|
||||
scope :regex_not_filtered_by_account, ->(account_id) { joins(:normalized_status).select('statuses.*').where('normalized_statuses.text !~ ALL(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account_id) }
|
||||
scope :search, ->(needle) { where("tsv @@ websearch_to_tsquery('fedi', ?)", needle) }
|
||||
scope :search_not, ->(needle) { where.not("tsv @@ websearch_to_tsquery('fedi', ?)", needle) }
|
||||
scope :search_filtered_by_account, ->(account_id) { where('tsv @@ (SELECT tsquery_union(websearch_to_tsquery(phrase)) FROM custom_filters WHERE account_id = ?)', account_id) }
|
||||
scope :search_not_filtered_by_account, ->(account_id) { where.not('tsv @@ (SELECT tsquery_union(websearch_to_tsquery(phrase)) FROM custom_filters WHERE account_id = ?)', account_id) }
|
||||
|
||||
scope :not_missing_media_desc, -> { left_outer_joins(:media_attachments).select('statuses.*').where('media_attachments.id IS NULL OR media_attachments.description IS NOT NULL') }
|
||||
|
||||
|
@ -362,8 +362,6 @@ class Status < ApplicationRecord
|
|||
after_save :process_bangtags, if: :local?
|
||||
|
||||
class << self
|
||||
include SearchHelper
|
||||
|
||||
def search_for(term, account = nil, limit = 33, offset = 0)
|
||||
return none if account.nil?
|
||||
if term.start_with?('me:')
|
||||
|
@ -371,12 +369,13 @@ class Status < ApplicationRecord
|
|||
query = account.statuses
|
||||
else
|
||||
query = Status.where(account_id: account.id)
|
||||
.or(Status.where(account_id: account.following, visibility: [:private, :local, :unlisted]))
|
||||
.or(Status.where(visibility: [:local, :public]))
|
||||
.or(Status.where(account_id: account.following, visibility: [:private, :unlisted]))
|
||||
.or(Status.where(id: account.mentions.select(:status_id)))
|
||||
end
|
||||
return none if term.blank? || term.length < 3
|
||||
return none if term.blank?
|
||||
query = query.without_reblogs
|
||||
.regex(expand_search_query(term))
|
||||
.search(term.unaccent)
|
||||
.offset(offset).limit(limit)
|
||||
apply_timeline_filters(query, account, true)
|
||||
rescue ActiveRecord::StatementInvalid
|
||||
|
@ -583,9 +582,9 @@ class Status < ApplicationRecord
|
|||
query = query.mention_not_excluded_by_account(account)
|
||||
unless account.custom_filters.nil?
|
||||
if account.user.invert_filters
|
||||
query = query.regex_filtered_by_account(account.id)
|
||||
query = query.search_filtered_by_account(account.id)
|
||||
else
|
||||
query = query.regex_not_filtered_by_account(account.id)
|
||||
query = query.search_not_filtered_by_account(account.id)
|
||||
end
|
||||
end
|
||||
query = query.not_missing_media_desc if account.filter_undescribed?
|
||||
|
|
|
@ -45,7 +45,7 @@ en:
|
|||
setting_skin: Reskins the selected Mastodon flavour
|
||||
setting_theme: Affects how Mastodon looks when you're logged in from any device.
|
||||
username: Your username will be unique on %{domain}
|
||||
phrase_html: "<code>"thing"</code> - match whole words<br/><code>tags: "tag1" tag2 ...</code> - match tags (don't include <code>#</code>)<br/><code>subj: thing</code> - match subject or CW</code><br/><code>text: thing</code> - match text<br/><code>desc: thing</code> - match media descriptions"
|
||||
phrase_html: "<strong>Examples</strong><br>Containing any terms: <code>this OR that</code><br>Containing all terms: <code>this that</code>, <code>this AND that</code><br>Containing an exact term: <code>"this thing"</code><br>Grouping: <code>this OR ("this thing" AND "that thing")</code>"
|
||||
featured_tag:
|
||||
name: 'You might want to use one of these:'
|
||||
imports:
|
||||
|
@ -113,7 +113,7 @@ en:
|
|||
note: Bio
|
||||
otp_attempt: Two-factor code
|
||||
password: Password
|
||||
phrase: Regular expression
|
||||
phrase: Filter query
|
||||
setting_advanced_layout: Enable advanced web interface
|
||||
setting_aggregate_reblogs: Group repeats in timelines
|
||||
setting_auto_play_gif: Auto-play animated GIFs
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
class CreateNormalizedStatuses < ActiveRecord::Migration[5.2]
|
||||
def up
|
||||
create_table :normalized_statuses do |t|
|
||||
t.references :status, null: false, foreign_key: {on_delete: :cascade}, index: {unique: true}
|
||||
t.text :text
|
||||
end
|
||||
|
||||
safety_assured do
|
||||
remove_index :statuses, name: 'index_statuses_on_normalized_text_trgm'
|
||||
execute 'INSERT INTO normalized_statuses (status_id, text) SELECT id, normalized_text FROM statuses'
|
||||
remove_column :statuses, :normalized_text
|
||||
end
|
||||
end
|
||||
|
||||
def down
|
||||
safety_assured do
|
||||
execute 'UPDATE statuses SET normalized_text = s.text FROM (SELECT status_id, text FROM normalized_statuses) AS s WHERE statuses.id = s.id'
|
||||
remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm'
|
||||
drop_table :normalized_statuses
|
||||
add_column :statuses, :normalized_text, :text, null: false, default: ''
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,42 @@
|
|||
class MigrateBackToFts < ActiveRecord::Migration[5.2]
|
||||
def up
|
||||
if table_exists? :normalized_statuses
|
||||
remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm'
|
||||
drop_table :normalized_statuses
|
||||
end
|
||||
|
||||
safety_assured do
|
||||
execute <<-SQL.squish
|
||||
DROP FUNCTION IF EXISTS public.f_normalize;
|
||||
DROP FUNCTION IF EXISTS public.f_unaccent;
|
||||
|
||||
CREATE OR REPLACE FUNCTION public.f_strip_mentions(text)
|
||||
RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE STRICT AS
|
||||
$func$
|
||||
SELECT regexp_replace(
|
||||
regexp_replace($1, '</?span>', '', 'g'),
|
||||
'>@[^[:space:]]+<', '><', 'g'
|
||||
)
|
||||
$func$;
|
||||
|
||||
CREATE OR REPLACE AGGREGATE tsquery_union(tsquery) (
|
||||
SFUNC = tsquery_or,
|
||||
STYPE = tsquery,
|
||||
PARALLEL = SAFE
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH CONFIGURATION fedi ( COPY = simple );
|
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION fedi
|
||||
ALTER MAPPING FOR hword, hword_part, word
|
||||
WITH unaccent, simple;
|
||||
|
||||
ALTER TABLE statuses
|
||||
ADD COLUMN tsv tsvector
|
||||
GENERATED ALWAYS AS (
|
||||
to_tsvector('fedi', f_strip_mentions(spoiler_text || ' ' || text))
|
||||
) STORED;
|
||||
SQL
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,15 @@
|
|||
class AddIndexToTsv < ActiveRecord::Migration[5.2]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def up
|
||||
safety_assured do
|
||||
execute 'CREATE INDEX CONCURRENTLY statuses_text_vector_idx ON statuses USING GIN(tsv)'
|
||||
end
|
||||
end
|
||||
|
||||
def down
|
||||
safety_assured do
|
||||
execute 'DROP INDEX statuses_text_vector_idx ON statuses'
|
||||
end
|
||||
end
|
||||
end
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +0,0 @@
|
|||
Fabricator(:normalized_status) do
|
||||
status nil
|
||||
text "MyText"
|
||||
end
|
|
@ -1,5 +0,0 @@
|
|||
require 'rails_helper'
|
||||
|
||||
RSpec.describe NormalizedStatus, type: :model do
|
||||
pending "add some examples to (or delete) #{__FILE__}"
|
||||
end
|
|
@ -424,7 +424,7 @@ const startWorker = (workerId) => {
|
|||
}
|
||||
|
||||
const queries = [
|
||||
client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM normalized_statuses WHERE status_id = $3 AND text ${req.invertFilters ? '!~' : '~'} ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = $1)) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)),
|
||||
client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM statuses WHERE id = $3 ${req.invertFilters ? 'AND NOT' : 'AND'} tsv @@ (SELECT tsquery_union(websearch_to_tsquery(phrase)) FROM custom_filters WHERE account_id = $1) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)),
|
||||
];
|
||||
|
||||
if (accountDomain) {
|
||||
|
|
Loading…
Reference in New Issue