Moved to using a normalized text column for searches. Admins using an FTS-enabled version of Monsterfork will need to apply the migration from `dist/search.sql` then run `bundle exec rails monsterfork:index_statuses`.
parent
487c945d16
commit
1132af1515
2
Gemfile
2
Gemfile
|
@ -153,3 +153,5 @@ gem 'concurrent-ruby', require: false
|
|||
gem "ruby-bbcode", "~> 2.0"
|
||||
|
||||
gem "sun_calc", "~> 0.1.0"
|
||||
|
||||
gem "sixarm_ruby_unaccent", "~> 1.2"
|
||||
|
|
|
@ -579,6 +579,7 @@ GEM
|
|||
json (>= 1.8, < 3)
|
||||
simplecov-html (~> 0.10.0)
|
||||
simplecov-html (0.10.2)
|
||||
sixarm_ruby_unaccent (1.2.0)
|
||||
sprockets (3.7.2)
|
||||
concurrent-ruby (~> 1.0)
|
||||
rack (> 1, < 3)
|
||||
|
@ -763,6 +764,7 @@ DEPENDENCIES
|
|||
simple-navigation (~> 4.0)
|
||||
simple_form (~> 4.1)
|
||||
simplecov (~> 0.16)
|
||||
sixarm_ruby_unaccent (~> 1.2)
|
||||
sprockets-rails (~> 3.2)
|
||||
stackprof
|
||||
stoplight (~> 2.1.3)
|
||||
|
|
|
@ -21,7 +21,7 @@ module FilterHelper
|
|||
return false if filters.empty?
|
||||
|
||||
status = status.reblog if status.reblog?
|
||||
status_text = Formatter.instance.plaintext(status)
|
||||
status_text = status.normalized_text
|
||||
spoiler_text = status.spoiler_text
|
||||
tags = status.tags.pluck(:name).join("\n")
|
||||
descs = status.media_attachments.map { |a| a.description }.join("\n").strip
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
module SearchHelper
|
||||
require 'sixarm_ruby_unaccent'
|
||||
|
||||
module SearchHelper
|
||||
def expand_search_query(query)
|
||||
query.gsub(/"(.*)"/, '\\y\1\\y')
|
||||
query.downcase.unaccent.gsub(/"(.*)"/, '\\y\1\\y')
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# coding: utf-8
|
||||
require 'htmlentities'
|
||||
require 'sixarm_ruby_unaccent'
|
||||
|
||||
module TextHelper
|
||||
|
||||
def normalize_text(html)
|
||||
t = html.downcase
|
||||
|
||||
t.gsub!(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n")
|
||||
t.gsub!(/<[bh]r[\/ ]*>/, "\n")
|
||||
t.gsub!(/<\/?[^>]*>/, '')
|
||||
|
||||
t = HTMLEntities.new.decode(t)
|
||||
|
||||
t.gsub!(/[ \t]*\302\240+[ \t]*/, ' ')
|
||||
t.gsub!(/ +/, ' ')
|
||||
|
||||
t.gsub!(/\r\n?/, "\n")
|
||||
t.gsub!(/\n[ \t]+/, "\n")
|
||||
t.gsub!(/[ \t]+\n/, "\n")
|
||||
t.gsub!(/\n\n+/, "\n")
|
||||
|
||||
t.unaccent_via_split_map.strip
|
||||
end
|
||||
|
||||
def normalize_status(status)
|
||||
return normalize_text("#{status.spoiler_text}\n#{status.text}") unless status.local?
|
||||
normalize_text("#{status.spoiler_text}\n#{Formatter.instance.format(status)}")
|
||||
end
|
||||
end
|
|
@ -720,7 +720,7 @@ class Bangtags
|
|||
q = cmd[1..-1].join.strip
|
||||
next if q.blank?
|
||||
begin
|
||||
data = @account.statuses.where('text ~* ?', expand_search_query(q))
|
||||
data = @account.statuses.where('normalized_text ~ ?', expand_search_query(q))
|
||||
.reorder(:created_at)
|
||||
.pluck(:created_at)
|
||||
.map { |d| d.strftime('%Y-%m') }
|
||||
|
|
|
@ -31,9 +31,9 @@
|
|||
# edited :boolean
|
||||
# imported :boolean
|
||||
# origin :string
|
||||
# tsv :tsvector
|
||||
# boostable :boolean
|
||||
# reject_replies :boolean
|
||||
# normalized_text :text default(""), not null
|
||||
#
|
||||
|
||||
class Status < ApplicationRecord
|
||||
|
@ -43,6 +43,7 @@ class Status < ApplicationRecord
|
|||
include Streamable
|
||||
include Cacheable
|
||||
include StatusThreadingConcern
|
||||
include TextHelper
|
||||
|
||||
# match both with and without U+FE0F (the emoji variation selector)
|
||||
LOCAL_ONLY_TOKENS = /(?:#!|\u{1f441}\ufe0f?)\u200b?\z/
|
||||
|
@ -324,6 +325,7 @@ class Status < ApplicationRecord
|
|||
around_create Mastodon::Snowflake::Callbacks
|
||||
|
||||
before_create :set_locality
|
||||
before_create :update_normalized_text
|
||||
|
||||
before_validation :prepare_contents, if: :local?
|
||||
before_validation :set_reblog
|
||||
|
@ -334,6 +336,9 @@ class Status < ApplicationRecord
|
|||
|
||||
after_create :set_poll_id
|
||||
after_create :process_bangtags, if: :local?
|
||||
after_create :update_normalized_text
|
||||
|
||||
after_update :update_normalized_text
|
||||
|
||||
class << self
|
||||
include SearchHelper
|
||||
|
@ -350,7 +355,7 @@ class Status < ApplicationRecord
|
|||
end
|
||||
return none if term.blank? || term.length < 3
|
||||
query = query.without_reblogs
|
||||
.where('text ~* ?', expand_search_query(term))
|
||||
.where('normalized_text ~ ?', expand_search_query(term))
|
||||
.offset(offset).limit(limit)
|
||||
apply_timeline_filters(query, account, true)
|
||||
rescue ActiveRecord::StatementInvalid
|
||||
|
@ -618,6 +623,12 @@ class Status < ApplicationRecord
|
|||
Bangtags.new(self).process
|
||||
end
|
||||
|
||||
def update_normalized_text
|
||||
return unless (normalized_text.blank? && !text.blank?) || saved_change_to_text?
|
||||
Rails.cache.delete("formatted_status:#{status.id}")
|
||||
self.normalized_text = normalize_status(self)
|
||||
end
|
||||
|
||||
def set_conversation
|
||||
self.thread = thread.reblog if thread&.reblog?
|
||||
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
class AddNormalizedTextToStatuses < ActiveRecord::Migration[5.2]
|
||||
def change
|
||||
add_column :statuses, :normalized_text, :text, null: false, default: ''
|
||||
end
|
||||
end
|
|
@ -10,10 +10,12 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(version: 2019_10_27_182731) do
|
||||
ActiveRecord::Schema.define(version: 2019_11_16_233416) do
|
||||
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_trgm"
|
||||
enable_extension "plpgsql"
|
||||
enable_extension "unaccent"
|
||||
|
||||
create_table "account_conversations", force: :cascade do |t|
|
||||
t.bigint "account_id"
|
||||
|
@ -697,9 +699,9 @@ ActiveRecord::Schema.define(version: 2019_10_27_182731) do
|
|||
t.boolean "edited"
|
||||
t.boolean "imported"
|
||||
t.string "origin"
|
||||
t.tsvector "tsv"
|
||||
t.boolean "boostable"
|
||||
t.boolean "reject_replies"
|
||||
t.text "normalized_text", default: "", null: false
|
||||
t.index ["account_id", "id", "visibility", "updated_at"], name: "index_statuses_20180106", order: { id: :desc }
|
||||
t.index ["account_id", "id", "visibility"], name: "index_statuses_on_account_id_and_id_and_visibility", order: { id: :desc }, where: "(visibility = ANY (ARRAY[0, 1, 2, 4]))"
|
||||
t.index ["in_reply_to_account_id"], name: "index_statuses_on_in_reply_to_account_id"
|
||||
|
@ -707,7 +709,8 @@ ActiveRecord::Schema.define(version: 2019_10_27_182731) do
|
|||
t.index ["network"], name: "index_statuses_on_network", where: "network"
|
||||
t.index ["origin"], name: "index_statuses_on_origin", unique: true
|
||||
t.index ["reblog_of_id", "account_id"], name: "index_statuses_on_reblog_of_id_and_account_id"
|
||||
t.index ["tsv"], name: "tsv_idx", using: :gin
|
||||
t.index ["spoiler_text"], name: "index_statuses_on_spoiler_text_trgm", opclass: :gin_trgm_ops, using: :gin
|
||||
t.index ["text"], name: "index_statuses_on_text_trgm", opclass: :gin_trgm_ops, using: :gin
|
||||
t.index ["uri"], name: "index_statuses_on_uri", unique: true
|
||||
end
|
||||
|
||||
|
|
|
@ -12,7 +12,11 @@ DROP TRIGGER IF EXISTS tsvectorupdate ON statuses;
|
|||
DROP FUNCTION IF EXISTS tsv_update_trigger;
|
||||
DROP INDEX IF EXISTS tsv_idx;
|
||||
ALTER TABLE statuses DROP COLUMN IF EXISTS tsv;
|
||||
DROP INDEX IF EXISTS index_statuses_on_text_trgm;
|
||||
DROP INDEX IF EXISTS index_statuses_on_spoiler_text_trgm;
|
||||
|
||||
-- Create new trigram indexes --
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS index_statuses_on_text_trgm ON statuses USING GIN (text gin_trgm_ops);
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS index_statuses_on_spoiler_text_trgm ON statuses USING GIN (spoiler_text gin_trgm_ops);
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS index_statuses_on_normalized_text_trgm ON statuses USING GIN (normalized_text gin_trgm_ops);
|
||||
|
||||
-- Compact tables ---
|
||||
VACUUM ANALYZE;
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
namespace :monsterfork do
|
||||
desc '(Re-)Index statuses for search.'
|
||||
task index_statuses: :environment do
|
||||
include TextHelper
|
||||
|
||||
i = 0
|
||||
total = Status.count
|
||||
|
||||
Status.find_in_batches do |statuses|
|
||||
ActiveRecord::Base.logger.info("Indexing statuses #{1+i}-#{statuses.count} of #{total}.")
|
||||
i += statuses.count
|
||||
statuses.each do |s|
|
||||
ActiveRecord::Base.logger.silence { s.update_column(:normalized_text, normalize_status(s)) }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue