PERF: refactor incoming links table

This commit is contained in:
Sam 2014-08-04 12:35:55 +10:00
parent 44a6fb0f0a
commit 22768a4b68
5 changed files with 121 additions and 13 deletions

View File

@ -0,0 +1,16 @@
class IncomingDomain < ActiveRecord::Base
end
# == Schema Information
#
# Table name: incoming_domains
#
# id :integer not null, primary key
# name :string(100) not null
# https :boolean default(FALSE), not null
# port :integer not null
#
# Indexes
#
# index_incoming_domains_on_name_and_https_and_port (name,https,port) UNIQUE
#

View File

@ -89,20 +89,17 @@ end
#
# Table name: incoming_links
#
# id :integer not null, primary key
# referer :string(1000)
# domain :string(100)
# topic_id :integer
# post_number :integer
# created_at :datetime
# user_id :integer
# ip_address :inet
# current_user_id :integer
# post_id :integer not null
# id :integer not null, primary key
# topic_id :integer
# created_at :datetime
# user_id :integer
# ip_address :inet
# current_user_id :integer
# post_id :integer not null
# incoming_referer_id :integer
#
# Indexes
#
# index_incoming_links_on_created_at_and_domain (created_at,domain)
# index_incoming_links_on_created_at_and_user_id (created_at,user_id)
# index_incoming_links_on_post_id (post_id)
#

View File

@ -0,0 +1,16 @@
class IncomingReferer < ActiveRecord::Base
end
# == Schema Information
#
# Table name: incoming_referers
#
# id :integer not null, primary key
# url :string(1000) not null
# path :string(1000) not null
# incoming_domain_id :integer not null
#
# Indexes
#
# index_incoming_referers_on_path_and_incoming_domain_id (path,incoming_domain_id) UNIQUE
#

View File

@ -101,6 +101,7 @@ end
#
# Indexes
#
# post_timings_summary (topic_id,post_number)
# post_timings_unique (topic_id,post_number,user_id) UNIQUE
# index_post_timings_on_user_id (user_id)
# post_timings_summary (topic_id,post_number)
# post_timings_unique (topic_id,post_number,user_id) UNIQUE
#

View File

@ -0,0 +1,78 @@
class IncomingLinkNormalization < ActiveRecord::Migration
def up
remove_column :incoming_links, :post_number
remove_column :incoming_links, :domain
add_column :incoming_links, :incoming_referer_id, :integer
create_table :incoming_referers do |t|
t.string :url, limit: 1000, null: false
t.string :domain, limit: 100, null: false
t.string :path, limit: 1000, null: false
t.integer :port, null: false
t.boolean :https, null: false
t.integer :incoming_domain_id
end
# start the shuffle
#
execute "INSERT INTO incoming_referers(url, https, domain, port, path)
SELECT referer,
CASE WHEN a[1] = 's' THEN true ELSE false END,
a[2] as domain,
CASE WHEN a[1] = 's' THEN
COALESCE(a[4]::integer, 443)::integer
ELSE
COALESCE(a[4]::integer, 80)::integer
END,
COALESCE(a[5], '') path
FROM
(
SELECT referer, regexp_matches(referer, 'http(s)?://([^/:]+)(:(\d+))?(.*)') a
FROM
(
SELECT DISTINCT referer
FROM incoming_links WHERE referer ~ '^https?://.+'
) Z
) X
WHERE a[2] IS NOT NULL"
execute "UPDATE incoming_links l
SET incoming_referer_id = r.id
FROM incoming_referers r
WHERE r.url = l.referer"
create_table :incoming_domains do |t|
t.string :name, limit: 100, null: false
t.boolean :https, null: false, default: false
t.integer :port, null: false
end
# shuffle part 2
#
execute "INSERT INTO incoming_domains(name, port, https)
SELECT DISTINCT domain, port, https
FROM incoming_referers"
execute "UPDATE incoming_referers l
SET incoming_domain_id = d.id
FROM incoming_domains d
WHERE d.name = l.domain AND d.https = l.https AND d.port = l.port"
remove_column :incoming_referers, :domain
remove_column :incoming_referers, :port
remove_column :incoming_referers, :https
change_column :incoming_referers, :incoming_domain_id, :integer, null: false
add_index :incoming_referers, [:path, :incoming_domain_id], unique: true
add_index :incoming_domains, [:name, :https, :port], unique: true
remove_column :incoming_links, :referer
end
def down
raise ActiveRecord::IrreversibleMigration
end
end