2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2018-06-16 05:11:58 +08:00
require_relative " base "
class BulkImport :: DiscourseMerger < BulkImport :: Base
2020-04-30 14:48:34 +08:00
NOW || = " now() "
2018-06-16 05:11:58 +08:00
CUSTOM_FIELDS = %w[ category group post topic user ]
# DB_NAME: name of database being merged into the current local db
# DB_HOST: hostname of database being merged
2021-01-08 23:31:39 +08:00
# DB_PASS: password used to access the Discourse database by the postgres user
2018-06-16 05:11:58 +08:00
# UPLOADS_PATH: absolute path of the directory containing "original"
# and "optimized" dirs. e.g. /home/discourse/other-site/public/uploads/default
# SOURCE_BASE_URL: base url of the site being merged. e.g. https://meta.discourse.org
# SOURCE_CDN: (optional) base url of the CDN of the site being merged.
# e.g. https://discourse-cdn-sjc1.com/business4
def initialize
2021-01-08 23:31:39 +08:00
db_password = ENV [ " DB_PASS " ] || " import_password "
2022-05-09 16:49:13 +08:00
local_db = ActiveRecord :: Base . connection_db_config . configuration_hash
2021-01-08 23:31:39 +08:00
@raw_connection =
PG . connect (
dbname : local_db [ :database ] ,
host : " localhost " ,
port : local_db [ :port ] ,
user : " postgres " ,
password : db_password ,
)
2018-06-16 05:11:58 +08:00
@source_db_config = {
dbname : ENV [ " DB_NAME " ] || " dd_demo " ,
2021-01-08 23:31:39 +08:00
host : ENV [ " DB_HOST " ] || " localhost " ,
user : " postgres " ,
password : db_password ,
2018-06-16 05:11:58 +08:00
}
raise " SOURCE_BASE_URL missing! " unless ENV [ " SOURCE_BASE_URL " ]
@source_base_url = ENV [ " SOURCE_BASE_URL " ]
@uploads_path = ENV [ " UPLOADS_PATH " ]
@uploader = ImportScripts :: Uploader . new
@source_cdn = ENV [ " SOURCE_CDN " ] if ENV [ " SOURCE_CDN " ]
local_version = @raw_connection . exec ( " select max(version) from schema_migrations " )
local_version = local_version . first [ " max " ]
source_version = source_raw_connection . exec ( " select max(version) from schema_migrations " )
source_version = source_version . first [ " max " ]
if local_version != source_version
raise " DB schema mismatch. Databases must be at the same migration version. Local is #{ local_version } , other is #{ source_version } "
end
@encoder = PG :: TextEncoder :: CopyRow . new
@merged_user_ids = [ ]
@tags = { }
@tag_groups = { }
@uploads = { }
@post_actions = { }
2018-06-20 03:11:02 +08:00
@notifications = { }
@badge_groupings = { }
@badges = { }
2018-07-23 10:05:07 +08:00
@email_tokens = { }
2018-06-16 05:11:58 +08:00
@auto_group_ids = Group :: AUTO_GROUPS . values
# add your authorized extensions here:
SiteSetting . authorized_extensions = %w[ jpg jpeg png gif ] . join ( " | " )
@sequences = { }
end
def start
run
2018-06-19 22:33:56 +08:00
ensure
@raw_connection & . close
source_raw_connection & . close
2018-06-16 05:11:58 +08:00
end
def execute
2018-07-18 09:40:24 +08:00
@first_new_user_id = @last_user_id + 1
2018-07-19 04:45:48 +08:00
@first_new_topic_id = @last_topic_id + 1
2018-07-18 09:40:24 +08:00
2018-06-16 05:11:58 +08:00
copy_users
copy_user_stuff
copy_groups
copy_categories
copy_topics
copy_posts
copy_tags
copy_uploads if @uploads_path
copy_everything_else
2018-06-20 03:11:02 +08:00
copy_badges
2018-07-18 09:40:24 +08:00
2018-07-23 10:05:07 +08:00
fix_user_columns
2018-06-16 05:11:58 +08:00
fix_category_descriptions
2018-07-19 04:45:48 +08:00
fix_topic_links
2018-06-16 05:11:58 +08:00
end
def source_raw_connection
@source_raw_connection || = PG . connect ( @source_db_config )
end
def copy_users
puts " " , " merging users... "
imported_ids = [ ]
usernames_lower = User . unscoped . pluck ( :username_lower ) . to_set
columns = User . columns . map ( & :name )
sql = " COPY users ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
source_raw_connection
. exec (
" SELECT #{ columns . map { | c | " u. \" #{ c } \" " } . join ( " , " ) } , e.email FROM users u INNER JOIN user_emails e ON (u.id = e.user_id AND e.primary = TRUE) WHERE u.id > 0 " ,
2023-01-07 19:53:14 +08:00
)
2018-06-16 05:11:58 +08:00
. each do | row |
old_user_id = row [ " id " ] & . to_i
if existing = UserEmail . where ( email : row . delete ( " email " ) ) . first & . user
# Merge these users
2021-01-08 23:31:39 +08:00
@users [ old_user_id ] = existing . id
2018-06-16 05:11:58 +08:00
@merged_user_ids << old_user_id
next
else
# New user
unless usernames_lower . add? ( row [ " username_lower " ] )
username = row [ " username " ] + " _1 "
username . next! until usernames_lower . add? ( username . downcase )
row [ " username " ] = username
row [ " username_lower " ] = row [ " username " ] . downcase
end
row [ " id " ] = ( @last_user_id += 1 )
2021-01-08 23:31:39 +08:00
@users [ old_user_id ] = row [ " id " ]
2018-07-23 10:05:07 +08:00
2018-06-16 05:11:58 +08:00
@raw_connection . put_copy_data row . values
2023-01-07 19:53:14 +08:00
end
2018-06-16 05:11:58 +08:00
imported_ids << old_user_id
end
end
@sequences [ User . sequence_name ] = @last_user_id + 1 if @last_user_id
create_custom_fields ( " user " , " id " , imported_ids ) do | old_user_id |
{ value : old_user_id , record_id : user_id_from_imported_id ( old_user_id ) }
end
end
def copy_user_stuff
2018-07-23 10:05:07 +08:00
copy_model (
EmailToken ,
skip_if_merged : true ,
is_a_user_model : true ,
skip_processing : true ,
mapping : @email_tokens ,
)
2018-06-16 05:11:58 +08:00
[
2018-07-23 10:05:07 +08:00
UserEmail ,
UserStat ,
UserOption ,
UserProfile ,
2020-02-08 01:32:35 +08:00
UserVisit ,
UserSearchData ,
GivenDailyLike ,
UserSecondFactor ,
2018-06-16 05:11:58 +08:00
] . each { | c | copy_model ( c , skip_if_merged : true , is_a_user_model : true , skip_processing : true ) }
2023-01-07 19:53:14 +08:00
2020-11-10 18:09:15 +08:00
[ UserAssociatedAccount , Oauth2UserInfo , SingleSignOnRecord , EmailChangeRequest ] . each do | c |
2018-06-16 05:11:58 +08:00
copy_model ( c , skip_if_merged : true , is_a_user_model : true )
end
end
def copy_groups
copy_model (
Group ,
mapping : @groups ,
skip_processing : true ,
select_sql :
" SELECT #{ Group . columns . map { | c | " \" #{ c . name } \" " } . join ( " , " ) } FROM groups WHERE automatic = false " ,
)
copy_model ( GroupUser , skip_if_merged : true )
end
def copy_categories
puts " merging categories... "
columns = Category . columns . map ( & :name )
imported_ids = [ ]
2018-09-17 21:57:11 +08:00
last_id = Category . unscoped . maximum ( :id ) || 1
2018-06-16 05:11:58 +08:00
sql = " COPY categories ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
2018-09-21 00:33:51 +08:00
source_raw_connection
. exec (
" SELECT concat('/c/', x.parent_slug, '/', x.slug) as path,
#{columns.map { |c| "c.\"#{c}\"" }.join(", ")}
FROM categories c
INNER JOIN (
SELECT c1 . id AS id ,
c2 . slug AS parent_slug ,
c1 . slug AS slug
FROM categories c1
LEFT OUTER JOIN categories c2 ON c1 . parent_category_id = c2 . id
2021-01-08 23:31:39 +08:00
) x ON c . id = x . id
ORDER BY c . id " ,
2018-09-21 00:33:51 +08:00
)
. each do | row |
2021-01-08 23:31:39 +08:00
# using ORDER BY id to import categories in order of creation.
# this assumes parent categories were created prior to child categories
# and have a lower category id.
#
# without this definition, categories import in different orders in subsequent imports
# and can potentially mess up parent/child structure
2023-01-07 19:53:14 +08:00
2018-09-21 00:33:51 +08:00
source_category_path = row . delete ( " path " ) & . squeeze ( " / " )
2023-01-07 19:53:14 +08:00
2018-09-21 00:33:51 +08:00
existing = Category . where ( slug : row [ " slug " ] ) . first
parent_slug = existing & . parent_category & . slug
if existing && source_category_path == " /c/ #{ parent_slug } / #{ existing . slug } " . squeeze ( " / " )
2021-01-08 23:31:39 +08:00
@categories [ row [ " id " ] . to_i ] = existing . id
2018-06-16 05:11:58 +08:00
next
2021-01-08 23:31:39 +08:00
elsif existing
# if not the exact path as the source,
# we still need to avoid a unique index conflict on the slug when importing
# if that's the case, we'll append the imported id
row [ " slug " ] = " #{ row [ " slug " ] } - #{ row [ " id " ] } "
2018-06-16 05:11:58 +08:00
end
old_user_id = row [ " user_id " ] . to_i
row [ " user_id " ] = user_id_from_imported_id ( old_user_id ) || - 1 if old_user_id > = 1
if row [ " parent_category_id " ]
row [ " parent_category_id " ] = category_id_from_imported_id ( row [ " parent_category_id " ] )
end
2021-01-08 23:31:39 +08:00
old_id = row [ " id " ] . to_i
2018-06-16 05:11:58 +08:00
row [ " id " ] = ( last_id += 1 )
imported_ids << old_id
2021-01-08 23:31:39 +08:00
@categories [ old_id ] = row [ " id " ]
2018-06-16 05:11:58 +08:00
@raw_connection . put_copy_data ( row . values )
end
end
@sequences [ Category . sequence_name ] = last_id + 1
create_custom_fields ( " category " , " id " , imported_ids ) do | imported_id |
{ record_id : category_id_from_imported_id ( imported_id ) , value : imported_id }
end
end
def fix_category_descriptions
puts " updating category description topic ids... "
@categories . each do | old_id , new_id |
2021-01-08 23:31:39 +08:00
category = Category . find ( new_id ) if new_id . present?
if description_topic_id = topic_id_from_imported_id ( category & . topic_id )
2018-06-16 05:11:58 +08:00
category . topic_id = description_topic_id
category . save!
end
end
end
def copy_topics
2018-06-30 00:21:25 +08:00
copy_model ( Topic , mapping : @topics )
2018-06-16 05:11:58 +08:00
[
TopicAllowedGroup ,
TopicAllowedUser ,
TopicEmbed ,
TopicSearchData ,
TopicTimer ,
TopicUser ,
TopicViewItem ,
] . each { | k | copy_model ( k , skip_processing : true ) }
end
def copy_posts
copy_model ( Post , skip_processing : true , mapping : @posts )
copy_model ( PostAction , mapping : @post_actions )
[ PostReply , TopicLink , UserAction , QuotedPost ] . each { | k | copy_model ( k ) }
[ PostStat , IncomingEmail , PostDetail , PostRevision ] . each do | k |
copy_model ( k , skip_processing : true )
end
end
def copy_tags
puts " merging tags... "
columns = Tag . columns . map ( & :name )
imported_ids = [ ]
2018-09-17 21:57:11 +08:00
last_id = Tag . unscoped . maximum ( :id ) || 1
2018-06-16 05:11:58 +08:00
sql = " COPY tags ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
source_raw_connection
. exec ( " SELECT #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } FROM tags " )
. each do | row |
2018-10-05 17:23:52 +08:00
if existing = Tag . where_name ( row [ " name " ] ) . first
2018-06-16 05:11:58 +08:00
@tags [ row [ " id " ] ] = existing . id
next
end
old_id = row [ " id " ]
row [ " id " ] = ( last_id += 1 )
@tags [ old_id . to_s ] = row [ " id " ]
@raw_connection . put_copy_data ( row . values )
end
end
@sequences [ Tag . sequence_name ] = last_id + 1
[ TagUser , TopicTag , CategoryTag , CategoryTagStat ] . each { | k | copy_model ( k ) }
copy_model ( TagGroup , mapping : @tag_groups )
[ TagGroupMembership , CategoryTagGroup ] . each { | k | copy_model ( k , skip_processing : true ) }
col_list = TagGroupPermission . columns . map { | c | " \" #{ c . name } \" " } . join ( " , " )
copy_model (
TagGroupPermission ,
skip_processing : true ,
select_sql :
" SELECT #{ col_list } FROM tag_group_permissions WHERE group_id NOT IN ( #{ @auto_group_ids . join ( " , " ) } ) " ,
)
end
2018-07-19 04:42:59 +08:00
def copy_uploads
2018-07-18 04:46:32 +08:00
puts " "
print " copying uploads... "
FileUtils . cp_r (
File . join ( @uploads_path , " . " ) ,
File . join ( Rails . root , " public " , " uploads " , " default " ) ,
)
columns = Upload . columns . map ( & :name )
last_id = Upload . unscoped . maximum ( :id ) || 1
sql = " COPY uploads ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
source_raw_connection
. exec ( " SELECT #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } FROM uploads " )
. each do | row |
2018-07-19 04:42:59 +08:00
next if Upload . where ( sha1 : row [ " sha1 " ] ) . exists?
2023-01-07 19:53:14 +08:00
2021-01-08 23:31:39 +08:00
# make sure to get a backup with uploads then convert them to local.
# when the backup is restored to a site with s3 uploads, it will upload the items
# to the bucket
2018-07-18 04:46:32 +08:00
rel_filename = row [ " url " ] . gsub ( %r{ ^/uploads/[^/]+/ } , " " )
2021-01-08 23:31:39 +08:00
# assumes if coming from amazonaws.com that we want to remove everything
# but the text after the last `/`, which should leave us the filename
rel_filename = rel_filename . gsub ( %r{ ^//[^/]+ \ .amazonaws \ .com/ \ S+/ } , " " )
2018-07-18 04:46:32 +08:00
absolute_filename = File . join ( @uploads_path , rel_filename )
2023-01-07 19:53:14 +08:00
2018-07-18 04:46:32 +08:00
old_id = row [ " id " ]
if old_id && last_id
row [ " id " ] = ( last_id += 1 )
@uploads [ old_id . to_s ] = row [ " id " ]
end
old_user_id = row [ " user_id " ] . to_i
if old_user_id > = 1
row [ " user_id " ] = user_id_from_imported_id ( old_user_id )
2018-07-19 04:42:59 +08:00
next if row [ " user_id " ] . nil?
2018-07-18 04:46:32 +08:00
end
2022-01-06 01:45:08 +08:00
row [ " url " ] = " /uploads/default/ #{ rel_filename } " if File . exist? ( absolute_filename )
2018-07-18 04:46:32 +08:00
@raw_connection . put_copy_data ( row . values )
end
end
2018-07-19 23:07:15 +08:00
@sequences [ Upload . sequence_name ] = last_id + 1
2018-07-18 04:46:32 +08:00
puts " "
copy_model ( PostUpload )
copy_model ( UserAvatar )
2018-07-18 09:40:24 +08:00
# Users have a column "uploaded_avatar_id" which needs to be mapped now.
User
. where ( " id >= ? " , @first_new_user_id )
. find_each do | u |
if u . uploaded_avatar_id
u . uploaded_avatar_id = upload_id_from_imported_id ( u . uploaded_avatar_id )
u . save! unless u . uploaded_avatar_id . nil?
2023-01-07 19:53:14 +08:00
end
2018-07-18 09:40:24 +08:00
end
2018-06-16 05:11:58 +08:00
end
def copy_everything_else
2018-07-23 10:05:07 +08:00
[ PostTiming , UserArchivedMessage , UnsubscribeKey , GroupMention ] . each do | k |
2018-06-16 05:11:58 +08:00
copy_model ( k , skip_processing : true )
end
2018-07-23 10:05:07 +08:00
[ UserHistory , UserWarning , GroupArchivedMessage ] . each { | k | copy_model ( k ) }
2018-06-20 03:11:02 +08:00
copy_model ( Notification , mapping : @notifications )
2018-07-23 10:05:07 +08:00
[ CategoryGroup , GroupHistory ] . each do | k |
col_list = k . columns . map { | c | " \" #{ c . name } \" " } . join ( " , " )
copy_model (
k ,
select_sql :
" SELECT #{ col_list } FROM #{ k . table_name } WHERE group_id NOT IN ( #{ @auto_group_ids . join ( " , " ) } ) " ,
)
end
2018-06-20 03:11:02 +08:00
end
def copy_badges
copy_model ( BadgeGrouping , mapping : @badge_groupings , skip_processing : true )
puts " merging badges... "
columns = Badge . columns . map ( & :name )
imported_ids = [ ]
2018-09-17 21:57:11 +08:00
last_id = Badge . unscoped . maximum ( :id ) || 1
2018-06-20 03:11:02 +08:00
sql = " COPY badges ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
source_raw_connection
. exec ( " SELECT #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } FROM badges " )
. each do | row |
if existing = Badge . where ( name : row [ " name " ] ) . first
@badges [ row [ " id " ] ] = existing . id
next
end
old_id = row [ " id " ]
row [ " id " ] = ( last_id += 1 )
@badges [ old_id . to_s ] = row [ " id " ]
row [ " badge_grouping_id " ] = @badge_groupings [ row [ " badge_grouping_id " ] ] if row [
" badge_grouping_id "
]
@raw_connection . put_copy_data ( row . values )
end
end
@sequences [ Badge . sequence_name ] = last_id + 1
2018-07-13 05:43:21 +08:00
copy_model ( UserBadge , is_a_user_model : true )
2018-06-16 05:11:58 +08:00
end
def copy_model (
klass ,
skip_if_merged : false ,
is_a_user_model : false ,
skip_processing : false ,
mapping : nil ,
select_sql : nil
)
2018-06-20 03:11:02 +08:00
puts " copying #{ klass . table_name } ... "
2018-06-16 05:11:58 +08:00
columns = klass . columns . map ( & :name )
has_custom_fields = CUSTOM_FIELDS . include? ( klass . name . downcase )
imported_ids = [ ]
last_id = columns . include? ( " id " ) ? ( klass . unscoped . maximum ( :id ) || 1 ) : nil
sql = " COPY #{ klass . table_name } ( #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } ) FROM STDIN "
@raw_connection . copy_data ( sql , @encoder ) do
source_raw_connection
. exec (
select_sql ||
" SELECT #{ columns . map { | c | " \" #{ c } \" " } . join ( " , " ) } FROM #{ klass . table_name } " ,
2023-01-07 19:53:14 +08:00
)
2018-06-16 05:11:58 +08:00
. each do | row |
if row [ " user_id " ]
old_user_id = row [ " user_id " ] . to_i
next if skip_if_merged && @merged_user_ids . include? ( old_user_id )
if is_a_user_model
next if old_user_id < 1
next if user_id_from_imported_id ( old_user_id ) . nil?
end
if old_user_id > = 1
row [ " user_id " ] = user_id_from_imported_id ( old_user_id )
if is_a_user_model && row [ " user_id " ] . nil?
raise " user_id nil for user id ' #{ old_user_id } ' "
2023-01-07 19:53:14 +08:00
end
2018-06-16 05:11:58 +08:00
next if row [ " user_id " ] . nil? # associated record for a deleted user
end
end
row [ " group_id " ] = group_id_from_imported_id ( row [ " group_id " ] ) if row [ " group_id " ]
row [ " category_id " ] = category_id_from_imported_id ( row [ " category_id " ] ) if row [
2018-07-13 05:41:16 +08:00
" category_id "
2023-01-07 19:53:14 +08:00
]
2018-06-16 05:11:58 +08:00
if row [ " topic_id " ] && klass != Category
row [ " topic_id " ] = topic_id_from_imported_id ( row [ " topic_id " ] )
next if row [ " topic_id " ] . nil?
2023-01-07 19:53:14 +08:00
end
2018-06-16 05:11:58 +08:00
if row [ " post_id " ]
row [ " post_id " ] = post_id_from_imported_id ( row [ " post_id " ] )
next if row [ " post_id " ] . nil?
end
row [ " tag_id " ] = tag_id_from_imported_id ( row [ " tag_id " ] ) if row [ " tag_id " ]
row [ " tag_group_id " ] = tag_group_id_from_imported_id ( row [ " tag_group_id " ] ) if row [
" tag_group_id "
2023-01-07 19:53:14 +08:00
]
2018-06-16 05:11:58 +08:00
row [ " upload_id " ] = upload_id_from_imported_id ( row [ " upload_id " ] ) if row [ " upload_id " ]
row [ " deleted_by_id " ] = user_id_from_imported_id ( row [ " deleted_by_id " ] ) if row [
" deleted_by_id "
2023-01-07 19:53:14 +08:00
]
2018-06-20 03:11:02 +08:00
row [ " badge_id " ] = badge_id_from_imported_id ( row [ " badge_id " ] ) if row [ " badge_id " ]
2023-01-07 19:53:14 +08:00
2021-01-08 23:31:39 +08:00
old_id = row [ " id " ] . to_i
2018-06-16 05:11:58 +08:00
if old_id && last_id
row [ " id " ] = ( last_id += 1 )
imported_ids << old_id if has_custom_fields
mapping [ old_id ] = row [ " id " ] if mapping
end
if skip_processing
@raw_connection . put_copy_data ( row . values )
2023-01-07 19:53:14 +08:00
else
2018-06-16 05:11:58 +08:00
process_method_name = " process_ #{ klass . name . underscore } "
2023-01-07 19:53:14 +08:00
2018-06-16 05:11:58 +08:00
processed =
2023-01-07 19:53:14 +08:00
(
2018-06-16 05:11:58 +08:00
if respond_to? ( process_method_name )
send ( process_method_name , HashWithIndifferentAccess . new ( row ) )
2023-01-07 19:53:14 +08:00
else
row
end
)
2018-06-16 05:11:58 +08:00
@raw_connection . put_copy_data columns . map { | c | processed [ c ] } if processed
end
end
end
@sequences [ klass . sequence_name ] = last_id + 1 if last_id
if has_custom_fields
id_mapping_method_name = " #{ klass . name . downcase } _id_from_imported_id " . freeze
return unless respond_to? ( id_mapping_method_name )
create_custom_fields ( klass . name . downcase , " id " , imported_ids ) do | imported_id |
{ record_id : send ( id_mapping_method_name , imported_id ) , value : imported_id }
end
end
end
2018-06-30 00:21:25 +08:00
def process_topic ( topic )
2018-07-13 05:41:16 +08:00
return nil if topic [ " category_id " ] . nil? && topic [ " archetype " ] != Archetype . private_message
2018-07-23 10:05:07 +08:00
topic [ " last_post_user_id " ] = user_id_from_imported_id ( topic [ " last_post_user_id " ] ) || - 1
topic [ " featured_user1_id " ] = user_id_from_imported_id ( topic [ " featured_user1_id " ] ) || - 1
topic [ " featured_user2_id " ] = user_id_from_imported_id ( topic [ " featured_user2_id " ] ) || - 1
topic [ " featured_user3_id " ] = user_id_from_imported_id ( topic [ " featured_user3_id " ] ) || - 1
topic [ " featured_user4_id " ] = user_id_from_imported_id ( topic [ " featured_user4_id " ] ) || - 1
2018-06-30 00:21:25 +08:00
topic
end
2018-06-16 05:11:58 +08:00
def process_post ( post )
2018-07-23 10:05:07 +08:00
post [ " last_editor_id " ] = user_id_from_imported_id ( post [ " last_editor_id " ] ) || - 1
post [ " reply_to_user_id " ] = user_id_from_imported_id ( post [ " reply_to_user_id " ] ) || - 1
post [ " locked_by_id " ] = user_id_from_imported_id ( post [ " locked_by_id " ] ) || - 1
2018-06-16 05:11:58 +08:00
@topic_id_by_post_id [ post [ :id ] ] = post [ :topic_id ]
post
end
def process_post_reply ( post_reply )
2020-01-18 00:24:49 +08:00
post_reply [ " reply_post_id " ] = post_id_from_imported_id (
post_reply [ " reply_post_id " ] ,
) if post_reply [ " reply_post_id " ]
2018-06-16 05:11:58 +08:00
post_reply
end
def process_quoted_post ( quoted_post )
quoted_post [ " quoted_post_id " ] = post_id_from_imported_id (
quoted_post [ " quoted_post_id " ] ,
) if quoted_post [ " quoted_post_id " ]
return nil if quoted_post [ " quoted_post_id " ] . nil?
quoted_post
end
def process_topic_link ( topic_link )
2018-07-19 04:45:48 +08:00
old_topic_id = topic_link [ " link_topic_id " ]
2018-06-16 05:11:58 +08:00
topic_link [ " link_topic_id " ] = topic_id_from_imported_id (
topic_link [ " link_topic_id " ] ,
) if topic_link [ " link_topic_id " ]
topic_link [ " link_post_id " ] = post_id_from_imported_id ( topic_link [ " link_post_id " ] ) if topic_link [
" link_post_id "
]
2018-07-19 04:45:48 +08:00
return nil if topic_link [ " link_topic_id " ] . nil?
r = Regexp . new ( " ^ #{ @source_base_url } /t/([^ \ /]+)/ #{ old_topic_id } (.*) " )
if m = r . match ( topic_link [ " url " ] )
topic_link [ " url " ] = " #{ @source_base_url } /t/ #{ m [ 1 ] } / #{ topic_link [ " link_topic_id " ] } #{ m [ 2 ] } "
end
2018-06-16 05:11:58 +08:00
topic_link
end
def process_post_action ( post_action )
return nil unless post_action [ " post_id " ] . present?
post_action [ " related_post_id " ] = post_id_from_imported_id ( post_action [ " related_post_id " ] )
post_action [ " deferred_by_id " ] = user_id_from_imported_id ( post_action [ " deferred_by_id " ] )
post_action [ " agreed_by_id " ] = user_id_from_imported_id ( post_action [ " agreed_by_id " ] )
post_action [ " disagreed_by_id " ] = user_id_from_imported_id ( post_action [ " disagreed_by_id " ] )
post_action
end
def process_user_action ( user_action )
user_action [ " target_topic_id " ] = topic_id_from_imported_id (
user_action [ " target_topic_id " ] ,
) if user_action [ " target_topic_id " ]
user_action [ " target_post_id " ] = post_id_from_imported_id (
user_action [ " target_post_id " ] ,
) if user_action [ " target_post_id " ]
user_action [ " target_user_id " ] = user_id_from_imported_id (
user_action [ " target_user_id " ] ,
) if user_action [ " target_user_id " ]
user_action [ " acting_user_id " ] = user_id_from_imported_id (
user_action [ " acting_user_id " ] ,
) if user_action [ " acting_user_id " ]
user_action [ " queued_post_id " ] = post_id_from_imported_id (
user_action [ " queued_post_id " ] ,
) if user_action [ " queued_post_id " ]
user_action
end
def process_tag_group ( tag_group )
tag_group [ " parent_tag_id " ] = tag_id_from_imported_id ( tag_group [ " parent_tag_id " ] ) if tag_group [
" parent_tag_id "
]
tag_group
end
def process_category_group ( category_group )
return nil if category_group [ " category_id " ] . nil? || category_group [ " group_id " ] . nil?
category_group
end
def process_group_user ( group_user )
if @auto_group_ids . include? ( group_user [ " group_id " ] . to_i ) &&
@merged_user_ids . include? ( group_user [ " user_id " ] . to_i )
return nil
end
return nil if group_user [ " user_id " ] . to_i < 1
group_user
end
def process_group_history ( group_history )
group_history [ " acting_user_id " ] = user_id_from_imported_id (
group_history [ " acting_user_id " ] ,
) if group_history [ " acting_user_id " ]
group_history [ " target_user_id " ] = user_id_from_imported_id (
group_history [ " target_user_id " ] ,
) if group_history [ " target_user_id " ]
group_history
end
def process_group_archived_message ( gam )
return nil unless gam [ " topic_id " ] . present? && gam [ " group_id " ] . present?
gam
end
def process_topic_link ( topic_link )
topic_link [ " link_topic_id " ] = topic_id_from_imported_id (
topic_link [ " link_topic_id " ] ,
) if topic_link [ " link_topic_id " ]
topic_link [ " link_post_id " ] = post_id_from_imported_id ( topic_link [ " link_post_id " ] ) if topic_link [
" link_post_id "
]
topic_link
end
def process_user_avatar ( user_avatar )
user_avatar [ " custom_upload_id " ] = upload_id_from_imported_id (
user_avatar [ " custom_upload_id " ] ,
) if user_avatar [ " custom_upload_id " ]
user_avatar [ " gravatar_upload_id " ] = upload_id_from_imported_id (
user_avatar [ " gravatar_upload_id " ] ,
) if user_avatar [ " gravatar_upload_id " ]
2018-07-18 09:40:24 +08:00
unless user_avatar [ " custom_upload_id " ] . present? || user_avatar [ " gravatar_upload_id " ] . present?
return nil
2023-01-07 19:53:14 +08:00
end
2018-06-16 05:11:58 +08:00
user_avatar
end
def process_user_history ( user_history )
user_history [ " acting_user_id " ] = user_id_from_imported_id (
user_history [ " acting_user_id " ] ,
) if user_history [ " acting_user_id " ]
user_history [ " target_user_id " ] = user_id_from_imported_id (
user_history [ " target_user_id " ] ,
) if user_history [ " target_user_id " ]
user_history
end
def process_user_warning ( user_warning )
user_warning [ " created_by_id " ] = user_id_from_imported_id (
user_warning [ " created_by_id " ] ,
) if user_warning [ " created_by_id " ]
user_warning
end
2018-06-22 00:16:05 +08:00
def process_post_upload ( post_upload )
return nil unless post_upload [ " upload_id " ] . present?
@imported_post_uploads || = { }
2018-07-18 04:47:23 +08:00
return nil if @imported_post_uploads [ post_upload [ " post_id " ] ] & . include? ( post_upload [ " upload_id " ] )
@imported_post_uploads [ post_upload [ " post_id " ] ] || = [ ]
@imported_post_uploads [ post_upload [ " post_id " ] ] << post_upload [ " upload_id " ]
2023-01-07 19:53:14 +08:00
2018-07-17 01:34:27 +08:00
if PostUpload . where (
post_id : post_upload [ " post_id " ] ,
upload_id : post_upload [ " upload_id " ] ,
) . exists?
return nil
2023-01-07 19:53:14 +08:00
end
2018-07-17 01:34:27 +08:00
2018-06-22 00:16:05 +08:00
post_upload
end
2018-06-16 05:11:58 +08:00
def process_notification ( notification )
notification [ " post_action_id " ] = post_action_id_from_imported_id (
notification [ " post_action_id " ] ,
) if notification [ " post_action_id " ]
notification
end
def process_oauth2_user_info ( r )
return nil if Oauth2UserInfo . where ( uid : r [ " uid " ] , provider : r [ " provider " ] ) . exists?
r
end
2018-11-28 23:49:24 +08:00
def process_user_associated_account ( r )
if UserAssociatedAccount . where ( provider_uid : r [ " uid " ] , provider_name : r [ " provider " ] ) . exists?
return nil
2023-01-07 19:53:14 +08:00
end
2018-11-28 23:49:24 +08:00
r
end
2018-06-16 05:11:58 +08:00
def process_single_sign_on_record ( r )
return nil if SingleSignOnRecord . where ( external_id : r [ " external_id " ] ) . exists?
r
end
2018-06-20 03:11:02 +08:00
def process_user_badge ( user_badge )
user_badge [ " granted_by_id " ] = user_id_from_imported_id (
user_badge [ " granted_by_id " ] ,
) if user_badge [ " granted_by_id " ]
user_badge [ " notification_id " ] = notification_id_from_imported_id (
user_badge [ " notification_id " ] ,
) if user_badge [ " notification_id " ]
2018-07-13 05:43:21 +08:00
if UserBadge . where ( user_id : user_badge [ " user_id " ] , badge_id : user_badge [ " badge_id " ] ) . exists?
return nil
2023-01-07 19:53:14 +08:00
end
2018-06-20 03:11:02 +08:00
user_badge
end
2018-07-23 10:05:07 +08:00
def process_email_change_request ( ecr )
ecr [ " old_email_token_id " ] = email_token_id_from_imported_id ( ecr [ " old_email_token_id " ] ) if ecr [
" old_email_token_id "
]
ecr [ " new_email_token_id " ] = email_token_id_from_imported_id ( ecr [ " new_email_token_id " ] ) if ecr [
" new_email_token_id "
]
ecr
end
2018-08-03 04:05:34 +08:00
def process_tag_user ( x )
return nil if TagUser . where ( tag_id : x [ " tag_id " ] , user_id : x [ " user_id " ] ) . exists?
x
end
def process_topic_tag ( x )
return nil if TopicTag . where ( topic_id : x [ " topic_id " ] , tag_id : x [ " tag_id " ] ) . exists?
x
end
def process_category_tag ( x )
return nil if CategoryTag . where ( category_id : x [ " category_id " ] , tag_id : x [ " tag_id " ] ) . exists?
x
end
def process_category_tag_stat ( x )
return nil if CategoryTagStat . where ( category_id : x [ " category_id " ] , tag_id : x [ " tag_id " ] ) . exists?
x
end
2018-06-16 05:11:58 +08:00
def user_id_from_imported_id ( id )
return id if id . to_i < 1
super ( id )
end
def group_id_from_imported_id ( id )
return id if @auto_group_ids . include? ( id & . to_i )
super ( id )
end
def tag_id_from_imported_id ( id )
@tags [ id . to_s ]
end
def tag_group_id_from_imported_id ( id )
@tag_groups [ id . to_s ]
end
def upload_id_from_imported_id ( id )
@uploads [ id . to_s ]
end
def post_action_id_from_imported_id ( id )
@post_actions [ id . to_s ]
end
2018-06-20 03:11:02 +08:00
def badge_id_from_imported_id ( id )
@badges [ id . to_s ]
end
def notification_id_from_imported_id ( id )
@notifications [ id . to_s ]
end
2018-07-23 10:05:07 +08:00
def email_token_id_from_imported_id ( id )
@email_tokens [ id . to_s ]
end
2018-06-16 05:11:58 +08:00
def fix_primary_keys
@sequences . each do | sequence_name , val |
sql = " SELECT setval(' #{ sequence_name } ', #{ val } ) "
2018-06-22 00:16:05 +08:00
puts sql
2018-06-16 05:11:58 +08:00
@raw_connection . exec ( sql )
end
end
2018-07-23 10:05:07 +08:00
def fix_user_columns
puts " updating foreign keys in the users table... "
User
. where ( " id >= ? " , @first_new_user_id )
. find_each do | u |
arr = [ ]
2021-01-08 23:31:39 +08:00
sql = " UPDATE users SET " . dup
2018-07-23 10:05:07 +08:00
if new_approved_by_id = user_id_from_imported_id ( u . approved_by_id )
arr << " approved_by_id = #{ new_approved_by_id } "
end
if new_primary_group_id = group_id_from_imported_id ( u . primary_group_id )
arr << " primary_group_id = #{ new_primary_group_id } "
end
if new_notification_id = notification_id_from_imported_id ( u . seen_notification_id )
arr << " seen_notification_id = #{ new_notification_id } "
end
next if arr . empty?
sql << arr . join ( " , " )
sql << " WHERE id = #{ u . id } "
@raw_connection . exec ( sql )
end
end
2018-07-19 04:45:48 +08:00
def fix_topic_links
puts " updating topic links in posts... "
update_count = 0
total = @topics . size
current = 0
@topics . each do | old_topic_id , new_topic_id |
current += 1
2018-07-19 23:07:15 +08:00
percent = ( current * 100 ) / total
if current % 200 == 0
puts " #{ current } ( #{ percent } \ %) completed. #{ update_count } rows updated. "
2023-01-07 19:53:14 +08:00
end
2018-07-19 04:45:48 +08:00
if topic = Topic . find_by_id ( new_topic_id )
replace_arg = [
" #{ @source_base_url } /t/ #{ topic . slug } / #{ old_topic_id } " ,
" #{ @source_base_url } /t/ #{ topic . slug } / #{ new_topic_id } " ,
]
r =
@raw_connection . async_exec (
" UPDATE posts
SET raw = replace ( raw , $1 , $2 )
WHERE NOT raw IS NULL
AND topic_id > = #{@first_new_topic_id}
AND raw < > replace ( raw , $1 , $2 ) " ,
replace_arg ,
)
update_count += r . cmd_tuples
r =
@raw_connection . async_exec (
" UPDATE posts
SET cooked = replace ( cooked , $1 , $2 )
WHERE NOT cooked IS NULL
AND topic_id > = #{@first_new_topic_id}
AND cooked < > replace ( cooked , $1 , $2 ) " ,
replace_arg ,
)
update_count += r . cmd_tuples
end
end
puts " updated #{ update_count } rows "
end
2018-06-16 05:11:58 +08:00
end
BulkImport :: DiscourseMerger . new . start