move normalized text into own table

master
multiple creatures 2019-12-11 20:04:53 -06:00
parent 3dc62460b0
commit 9a435494c2
11 changed files with 86 additions and 17 deletions

View File

@ -7,7 +7,7 @@ module FilterHelper
status = status.reblog if status.reblog?
if Status.where(id: status.id).where("statuses.normalized_text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))", receiver_id).exists?
if Status.where(id: status.id).regex_filtered_by_account(receiver_id).exists?
redis.sadd("filtered_statuses:#{receiver_id}", status.id)
return true
end

View File

@ -717,7 +717,7 @@ class Bangtags
q = cmd[1..-1].join.strip
next if q.blank?
begin
data = @account.statuses.where('normalized_text ~ ?', expand_search_query(q))
data = @account.statuses.regex(expand_search_query(q))
.reorder(:created_at)
.pluck(:created_at)
.map { |d| d.strftime('%Y-%m') }

View File

@ -0,0 +1,12 @@
# == Schema Information
#
# Table name: normalized_statuses
#
# id :bigint(8) not null, primary key
# status_id :bigint(8)
# text :text
#
class NormalizedStatus < ApplicationRecord
belongs_to :status, inverse_of: :normalized_status
end

View File

@ -33,7 +33,6 @@
# origin :string
# boostable :boolean
# reject_replies :boolean
# normalized_text :text default(""), not null
#
class Status < ApplicationRecord
@ -86,6 +85,7 @@ class Status < ApplicationRecord
has_one :status_stat, inverse_of: :status
has_one :poll, inverse_of: :status, dependent: :destroy
has_one :destructing_status, inverse_of: :status, dependent: :destroy
has_one :normalized_status, inverse_of: :status, dependent: :destroy
validates :uri, uniqueness: true, presence: true, unless: :local?
validates :text, presence: true, unless: -> { with_media? || reblog? }
@ -119,7 +119,11 @@ class Status < ApplicationRecord
scope :mention_not_excluded_by_account, ->(account) { left_outer_joins(:mentions).where('mentions.account_id IS NULL OR mentions.account_id NOT IN (?)', account.excluded_from_timeline_account_ids) }
scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
scope :not_string_filtered_by_account, ->(account) { where("statuses.normalized_text !~ ALL(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))", account.id) }
scope :like, ->(needle) { joins(:normalized_status).where('normalized_statuses.text LIKE f_normalize(?)', needle) }
scope :regex, ->(needle) { joins(:normalized_status).where('normalized_statuses.text ~ f_normalize(?)', needle) }
scope :regex_filtered_by_account, ->(account) { joins(:normalized_status).where('normalized_statuses.text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account.id) }
scope :regex_not_filtered_by_account, ->(account) { joins(:normalized_status).where('normalized_statuses.text !~ ALL(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account.id) }
scope :not_missing_media_desc, -> { left_outer_joins(:media_attachments).where('media_attachments.id IS NULL OR media_attachments.description IS NOT NULL') }
scope :tagged_with_all, ->(tags) {
@ -340,10 +344,9 @@ class Status < ApplicationRecord
before_validation :infer_reject_replies
after_create :set_poll_id
after_create :update_normalized_text
after_create :process_bangtags, if: :local?
before_save :update_normalized_text
class << self
include SearchHelper
@ -359,7 +362,7 @@ class Status < ApplicationRecord
end
return none if term.blank? || term.length < 3
query = query.without_reblogs
.where('normalized_text ~ ?', expand_search_query(term))
.regex(expand_search_query(term))
.offset(offset).limit(limit)
apply_timeline_filters(query, account, true)
rescue ActiveRecord::StatementInvalid
@ -554,7 +557,7 @@ class Status < ApplicationRecord
query = query.in_chosen_languages(account) if account.chosen_languages.present?
query = query.reply_not_excluded_by_account(account) unless tag_timeline
query = query.mention_not_excluded_by_account(account)
query = query.not_string_filtered_by_account(account) if account.custom_filters.present?
query = query.regex_not_filtered_by_account(account) if account.custom_filters.present?
query = query.not_missing_media_desc if account.filter_undescribed?
query.merge(account_silencing_filter(account))
end
@ -630,9 +633,13 @@ class Status < ApplicationRecord
end
def update_normalized_text
return if destroyed?
return unless (normalized_text.blank? && !text.blank?) || saved_change_to_text?
self.normalized_text = normalize_status(self)
return if destroyed? || text.blank? || !(text_changed? || saved_change_to_text?)
normalized_text = normalize_status(self)
if self.normalized_status.nil?
self.create_normalized_status(text: normalized_text)
else
self.normalized_status.update_attributes(text: normalized_text)
end
end
def set_conversation

View File

@ -22,7 +22,6 @@ class MigrateToNewSearchImpl < ActiveRecord::Migration[5.2]
end
def down
#raise ActiveRecord::IrreversibleMigration
true
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,23 @@
class CreateNormalizedStatuses < ActiveRecord::Migration[5.2]
def up
create_table :normalized_statuses do |t|
t.references :status, foreign_key: true
t.text :text
end
safety_assured do
remove_index :statuses, name: 'index_statuses_on_normalized_text_trgm'
execute 'INSERT INTO normalized_statuses (status_id, text) SELECT id, normalized_text FROM statuses'
remove_column :statuses, :normalized_text
end
end
def down
safety_assured do
execute 'UPDATE statuses SET normalized_text = s.text FROM (SELECT status_id, text FROM normalized_statuses) AS s WHERE statuses.id = s.id'
remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm'
drop_table :normalized_statuses
add_column :statuses, :normalized_text, :text, null: false, default: ''
end
end
end

View File

@ -0,0 +1,13 @@
class AddIndexToNormalizedStatuses < ActiveRecord::Migration[5.2]
disable_ddl_transaction!
def up
safety_assured do
execute 'CREATE INDEX CONCURRENTLY IF NOT EXISTS index_statuses_on_normalized_text_trgm ON normalized_statuses USING GIN (text gin_trgm_ops)'
end
end
def down
remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm'
end
end

View File

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2019_11_18_102858) do
ActiveRecord::Schema.define(version: 2019_12_12_002705) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_trgm"
@ -432,6 +432,13 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do
t.index ["target_account_id"], name: "index_mutes_on_target_account_id"
end
create_table "normalized_statuses", force: :cascade do |t|
t.bigint "status_id"
t.text "text"
t.index ["status_id"], name: "index_normalized_statuses_on_status_id"
t.index ["text"], name: "index_statuses_on_normalized_text_trgm", opclass: :gin_trgm_ops, using: :gin
end
create_table "notifications", force: :cascade do |t|
t.bigint "activity_id", null: false
t.string "activity_type", null: false
@ -689,13 +696,11 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do
t.string "origin"
t.boolean "boostable"
t.boolean "reject_replies"
t.text "normalized_text", default: "", null: false
t.index ["account_id", "id", "visibility", "updated_at"], name: "index_statuses_20180106", order: { id: :desc }
t.index ["account_id", "id", "visibility"], name: "index_statuses_on_account_id_and_id_and_visibility", order: { id: :desc }, where: "(visibility = ANY (ARRAY[0, 1, 2, 4]))"
t.index ["in_reply_to_account_id"], name: "index_statuses_on_in_reply_to_account_id"
t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id"
t.index ["network"], name: "index_statuses_on_network", where: "network"
t.index ["normalized_text"], name: "index_statuses_on_normalized_text_trgm", opclass: :gin_trgm_ops, using: :gin
t.index ["origin"], name: "index_statuses_on_origin", unique: true
t.index ["reblog_of_id", "account_id"], name: "index_statuses_on_reblog_of_id_and_account_id"
t.index ["uri"], name: "index_statuses_on_uri", unique: true
@ -860,6 +865,7 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do
add_foreign_key "mentions", "statuses", on_delete: :cascade
add_foreign_key "mutes", "accounts", column: "target_account_id", name: "fk_eecff219ea", on_delete: :cascade
add_foreign_key "mutes", "accounts", name: "fk_b8d8daf315", on_delete: :cascade
add_foreign_key "normalized_statuses", "statuses"
add_foreign_key "notifications", "accounts", column: "from_account_id", name: "fk_fbd6b0bf9e", on_delete: :cascade
add_foreign_key "notifications", "accounts", name: "fk_c141c8ee55", on_delete: :cascade
add_foreign_key "oauth_access_grants", "oauth_applications", column: "application_id", name: "fk_34d54b0a33", on_delete: :cascade

View File

@ -0,0 +1,4 @@
Fabricator(:normalized_status) do
status nil
text "MyText"
end

View File

@ -0,0 +1,5 @@
require 'rails_helper'
RSpec.describe NormalizedStatus, type: :model do
pending "add some examples to (or delete) #{__FILE__}"
end

View File

@ -417,7 +417,7 @@ const startWorker = (workerId) => {
}
const queries = [
client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM statuses WHERE id = $3 AND normalized_text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = $1)) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)),
client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM normalized_statuses WHERE status_id = $3 AND text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = $1)) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)),
];
if (accountDomain) {