From 5bba959cd5ee19bc08ea9573488e6dd64e62ad12 Mon Sep 17 00:00:00 2001 From: Quangbuu Le Date: Mon, 24 Jul 2017 19:49:22 +0700 Subject: [PATCH] FIX: vBulletin bulk importer: emails and stats --- script/bulk_import/base.rb | 49 +++++++++++++++++++++++- script/bulk_import/vbulletin.rb | 66 ++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index ce29ac3d990..8f50c85794e 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -93,6 +93,7 @@ class BulkImport::Base puts "Loading users indexes..." @last_user_id = User.unscoped.maximum(:id) + @last_user_email_id = UserEmail.unscoped.maximum(:id) @emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email").to_set @usernames_lower = User.unscoped.pluck(:username_lower).to_set @mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h @@ -143,6 +144,17 @@ class BulkImport::Base suspended_at suspended_till last_emailed_at created_at updated_at } + USER_EMAIL_COLUMNS ||= %i{ + id user_id email primary created_at updated_at + } + + USER_STAT_COLUMNS ||= %i{ + user_id topics_entered time_read days_visited posts_read_count + likes_given likes_received topic_reply_count new_since read_faq + first_post_created_at post_count topic_count bounce_score + reset_bounce_score_after + } + USER_PROFILE_COLUMNS ||= %i{ user_id location website bio_raw bio_cooked views } @@ -185,6 +197,8 @@ class BulkImport::Base end end + def create_user_emails(rows, &block) create_records(rows, "user_email", USER_EMAIL_COLUMNS, &block); end + def create_user_stats(rows, &block) create_records(rows, "user_stat", USER_STAT_COLUMNS, &block); end def create_user_profiles(rows, &block); create_records(rows, "user_profile", USER_PROFILE_COLUMNS, &block); end def create_group_users(rows, &block); create_records(rows, "group_user", GROUP_USER_COLUMNS, &block); end def create_categories(rows, &block); create_records(rows, "category", CATEGORY_COLUMNS, &block); end @@ -246,6 +260,38 @@ class BulkImport::Base user end + def process_user_email(user_email) + user_email[:id] = @last_user_email_id += 1; + user_email[:user_id] = @users[user_email[:imported_user_id].to_s] + user_email[:primary] = true + user_email[:created_at] ||= NOW + user_email[:updated_at] ||= user_email[:created_at] + user_email[:email] ||= random_email + user_email[:email].downcase! + + # unique email + user_email[:email] = random_email until user_email[:email] =~ EmailValidator.email_regex && @emails.add?(user_email[:email]) + + user_email + end + + def process_user_stat(user_stat) + user_stat[:user_id] = @users[user_stat[:imported_user_id].to_s] + user_stat[:topic_reply_count] = user_stat[:post_count] - user_stat[:topic_count] + user_stat[:topics_entered] ||= 0 + user_stat[:time_read] ||= 0 + user_stat[:days_visited] ||= 0 + user_stat[:posts_read_count] ||= 0 + user_stat[:likes_given] ||= 0 + user_stat[:likes_received] ||= 0 + user_stat[:topic_reply_count] ||= 0 + user_stat[:new_since] ||= NOW + user_stat[:post_count] ||= 0 + user_stat[:topic_count] ||= 0 + user_stat[:bounce_score] ||= 0 + user_stat + end + def process_user_profile(user_profile) user_profile[:bio_raw] = (user_profile[:bio_raw].presence || "").scrub.strip.presence user_profile[:bio_cooked] = pre_cook(user_profile[:bio_raw]) if user_profile[:bio_raw].present? @@ -428,10 +474,9 @@ class BulkImport::Base def create_records(rows, name, columns) start = Time.now - imported_ids = [] process_method_name = "process_#{name}" - sql = "COPY #{name.pluralize} (#{columns.join(",")}) FROM STDIN" + sql = "COPY #{name.pluralize} (#{columns.map {|c| "\"#{c}\""}.join(",")}) FROM STDIN" @raw_connection.copy_data(sql, @encoder) do rows.each do |row| diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb index 3d33616b2d6..5d641cf3b18 100644 --- a/script/bulk_import/vbulletin.rb +++ b/script/bulk_import/vbulletin.rb @@ -18,6 +18,15 @@ class BulkImport::VBulletin < BulkImport::Base @client = Mysql2::Client.new(host: host, username: username, password: password, database: database) @client.query_options.merge!(as: :array, cache_rows: false) + + @has_post_thanks = mysql_query(<<-SQL + SELECT `COLUMN_NAME` + FROM `INFORMATION_SCHEMA`.`COLUMNS` + WHERE `TABLE_SCHEMA`='#{database}' + AND `TABLE_NAME`='user' + AND `COLUMN_NAME` LIKE 'post_thanks_%' + SQL + ).to_a.count > 0 end def execute @@ -25,6 +34,9 @@ class BulkImport::VBulletin < BulkImport::Base import_users import_group_users + import_user_emails + import_user_stats + import_user_passwords import_user_salts import_user_profiles @@ -73,7 +85,6 @@ class BulkImport::VBulletin < BulkImport::Base u = { imported_id: row[0], username: row[1], - email: row[2], created_at: Time.zone.at(row[3]), date_of_birth: parse_birthday(row[4]), primary_group_id: group_id_from_imported_id(row[6]), @@ -87,6 +98,59 @@ class BulkImport::VBulletin < BulkImport::Base end end + def import_user_emails + puts "Importing user emails..." + + users = mysql_stream <<-SQL + SELECT user.userid, email, joindate + FROM user + WHERE user.userid > #{@last_imported_user_id} + ORDER BY user.userid + SQL + + create_user_emails(users) do |row| + { + imported_id: row[0], + imported_user_id: row[0], + email: row[1], + created_at: Time.zone.at(row[2]) + } + end + end + + def import_user_stats + puts "Importing user stats..." + + users = mysql_stream <<-SQL + SELECT user.userid, joindate, posts, COUNT(thread.threadid) AS threads, post.dateline + #{", post_thanks_user_amount, post_thanks_thanked_times" if @has_post_thanks} + FROM user + LEFT OUTER JOIN post ON post.postid = user.lastpostid + LEFT OUTER JOIN thread ON user.userid = thread.postuserid + WHERE user.userid > #{@last_imported_user_id} + GROUP BY user.userid + ORDER BY user.userid + SQL + + create_user_stats(users) do |row| + user = { + imported_id: row[0], + imported_user_id: row[0], + new_since: Time.zone.at(row[1]), + post_count: row[2], + topic_count: row[3], + first_post_created_at: row[4] && Time.zone.at(row[4]) + } + + if @has_post_thanks + user[:likes_given] = row[5] + user[:likes_received] = row[6] + end + + user + end + end + def import_group_users puts "Importing group users..."