2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
require "csv"
|
2015-03-19 03:30:42 +08:00
|
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
2014-08-12 00:44:17 +08:00
|
|
|
|
2021-10-27 20:22:37 +08:00
|
|
|
# NOTE: this importer expects a text file obtained through Vanilla Porter
|
|
|
|
# user documentation: https://meta.discourse.org/t/how-to-migrate-import-from-vanilla-to-discourse/27273
|
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
class ImportScripts::Vanilla < ImportScripts::Base
|
|
|
|
def initialize
|
|
|
|
super
|
|
|
|
|
|
|
|
@vanilla_file = ARGV[0]
|
|
|
|
if @vanilla_file.blank?
|
|
|
|
raise ArgumentError.new(
|
|
|
|
"Vanilla file argument missing. Provide full path to vanilla csv file.",
|
|
|
|
)
|
2023-01-07 19:53:14 +08:00
|
|
|
end
|
2014-10-17 22:58:19 +08:00
|
|
|
|
|
|
|
@use_lastest_activity_as_user_bio = true if ARGV.include?("use-latest-activity-as-user-bio")
|
2014-08-12 00:44:17 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
check_file_exist
|
|
|
|
parse_file
|
|
|
|
|
|
|
|
import_users
|
|
|
|
import_categories
|
|
|
|
|
|
|
|
import_topics
|
|
|
|
import_posts
|
|
|
|
|
|
|
|
import_private_topics
|
|
|
|
import_private_posts
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def check_file_exist
|
|
|
|
unless File.exist?(@vanilla_file)
|
|
|
|
raise ArgumentError.new("File does not exist: #{@vanilla_file}")
|
2023-01-07 19:53:14 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def parse_file
|
|
|
|
puts "parsing file..."
|
|
|
|
file = read_file
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
# TODO: parse header & validate version number
|
|
|
|
header = file.readline
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
until file.eof?
|
|
|
|
line = file.readline
|
|
|
|
next if line.blank?
|
|
|
|
next if line.start_with?("//")
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
if m = /^Table: (\w+)/.match(line)
|
|
|
|
# extract table name
|
|
|
|
table = m[1].underscore.pluralize
|
|
|
|
# read the data until an empty line
|
|
|
|
data = []
|
|
|
|
# first line is the table definition, turn that into a proper csv header
|
|
|
|
data << file.readline.split(",").map { |c| c.split(":")[0].underscore }.join(",")
|
|
|
|
until (line = file.readline).blank?
|
|
|
|
data << line.strip
|
|
|
|
end
|
|
|
|
# PERF: don't parse useless tables
|
2014-10-17 22:58:19 +08:00
|
|
|
useless_tables = ["user_meta"]
|
|
|
|
useless_tables << "activities" unless @use_lastest_activity_as_user_bio
|
|
|
|
next if useless_tables.include?(table)
|
2014-08-12 00:44:17 +08:00
|
|
|
# parse the data
|
|
|
|
puts "parsing #{table}..."
|
|
|
|
parsed_data =
|
|
|
|
CSV
|
|
|
|
.parse(data.join("\n"), headers: true, header_converters: :symbol)
|
|
|
|
.map { |row| row.to_hash }
|
|
|
|
instance_variable_set("@#{table}".to_sym, parsed_data)
|
|
|
|
end
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def read_file
|
|
|
|
puts "reading file..."
|
|
|
|
string =
|
|
|
|
File
|
|
|
|
.read(@vanilla_file)
|
|
|
|
.gsub("\\N", "")
|
|
|
|
.gsub(/\\$\n/m, "\\n")
|
|
|
|
.gsub("\\,", ",")
|
|
|
|
.gsub(/(?<!\\)\\"/, '""')
|
|
|
|
.gsub(/\\\\\\"/, '\\""')
|
|
|
|
StringIO.new(string)
|
|
|
|
end
|
2014-08-20 17:38:35 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
def import_users
|
|
|
|
puts "", "importing users..."
|
|
|
|
|
2014-10-29 15:43:10 +08:00
|
|
|
admin_role_id = @roles.select { |r| r[:name] == "Administrator" }.first[:role_id]
|
|
|
|
moderator_role_id = @roles.select { |r| r[:name] == "Moderator" }.first[:role_id]
|
2014-08-20 17:38:35 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
activities = (@activities || []).reject { |a| a[:activity_user_id] != a[:regarding_user_id] }
|
|
|
|
|
2014-10-17 22:58:19 +08:00
|
|
|
create_users(@users) do |user|
|
|
|
|
next if user[:name] == "[Deleted User]"
|
2014-08-20 17:38:35 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
if @use_lastest_activity_as_user_bio
|
|
|
|
last_activity = activities.select { |a| user[:user_id] == a[:activity_user_id] }.last
|
|
|
|
bio_raw = last_activity.try(:[], :story) || ""
|
2018-06-07 13:28:18 +08:00
|
|
|
else
|
2014-08-12 00:44:17 +08:00
|
|
|
bio_raw = user[:discovery_text]
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
|
|
|
u = {
|
2014-08-12 00:44:17 +08:00
|
|
|
id: user[:user_id],
|
|
|
|
email: user[:email],
|
2014-08-22 16:11:12 +08:00
|
|
|
username: user[:name],
|
2014-08-12 00:44:17 +08:00
|
|
|
created_at: parse_date(user[:date_inserted]),
|
2014-08-20 17:38:35 +08:00
|
|
|
bio_raw: clean_up(bio_raw),
|
2014-08-14 04:17:16 +08:00
|
|
|
avatar_url: user[:photo],
|
2014-08-12 00:44:17 +08:00
|
|
|
moderator:
|
|
|
|
@user_roles
|
|
|
|
.select { |ur| ur[:user_id] == user[:user_id] }
|
|
|
|
.map { |ur| ur[:role_id] }
|
|
|
|
.include?(moderator_role_id),
|
|
|
|
admin:
|
|
|
|
@user_roles
|
|
|
|
.select { |ur| ur[:user_id] == user[:user_id] }
|
|
|
|
.map { |ur| ur[:role_id] }
|
|
|
|
.include?(admin_role_id),
|
2018-06-07 13:28:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
u
|
2014-08-12 00:44:17 +08:00
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def import_categories
|
2014-08-20 17:38:35 +08:00
|
|
|
puts "", "importing categories..."
|
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
# save some information about the root category
|
|
|
|
@root_category = @categories.select { |c| c[:category_id] == "-1" }.first
|
|
|
|
@root_category_created_at = parse_date(@root_category[:date_inserted])
|
|
|
|
|
|
|
|
# removes root category
|
|
|
|
@categories.reject! { |c| c[:category_id] == "-1" }
|
|
|
|
|
|
|
|
# adds root's child categories
|
|
|
|
first_level_categories = @categories.select { |c| c[:parent_category_id] == "-1" }
|
|
|
|
if first_level_categories.count > 0
|
|
|
|
puts "", "importing first-level categories..."
|
|
|
|
create_categories(first_level_categories) { |category| import_category(category) }
|
|
|
|
|
|
|
|
# adds other categories
|
|
|
|
second_level_categories = @categories.select { |c| c[:parent_category_id] != "-1" }
|
|
|
|
if second_level_categories.count > 0
|
|
|
|
puts "", "importing second-level categories..."
|
|
|
|
create_categories(second_level_categories) { |category| import_category(category) }
|
|
|
|
end
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def import_category(category)
|
|
|
|
c = {
|
|
|
|
id: category[:category_id],
|
|
|
|
name: category[:name],
|
2014-08-14 04:17:16 +08:00
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(category[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
|
2014-08-12 00:44:17 +08:00
|
|
|
position: category[:sort].to_i,
|
|
|
|
created_at: parse_category_date(category[:date_inserted]),
|
|
|
|
description: clean_up(category[:description]),
|
|
|
|
}
|
|
|
|
if category[:parent_category_id] != "-1"
|
2015-03-13 04:15:02 +08:00
|
|
|
c[:parent_category_id] = category_id_from_imported_category_id(category[:parent_category_id])
|
2014-08-12 00:44:17 +08:00
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
c
|
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def parse_category_date(date)
|
|
|
|
date == "0000-00-00 00:00:00" ? @root_category_created_at : parse_date(date)
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_topics
|
|
|
|
puts "", "importing topics..."
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
create_posts(@discussions) do |discussion|
|
|
|
|
{
|
|
|
|
id: "discussion#" + discussion[:discussion_id],
|
2014-08-14 04:17:16 +08:00
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(discussion[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
|
2014-08-12 00:44:17 +08:00
|
|
|
title: discussion[:name],
|
2015-03-13 04:15:02 +08:00
|
|
|
category: category_id_from_imported_category_id(discussion[:category_id]),
|
2014-08-12 00:44:17 +08:00
|
|
|
raw: clean_up(discussion[:body]),
|
|
|
|
created_at: parse_date(discussion[:date_inserted]),
|
|
|
|
}
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def import_posts
|
|
|
|
puts "", "importing posts..."
|
|
|
|
|
|
|
|
create_posts(@comments) do |comment|
|
|
|
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment[:discussion_id])
|
|
|
|
|
|
|
|
{
|
|
|
|
id: "comment#" + comment[:comment_id],
|
2014-08-14 04:17:16 +08:00
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(comment[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
|
2014-08-12 00:44:17 +08:00
|
|
|
topic_id: t[:topic_id],
|
|
|
|
raw: clean_up(comment[:body]),
|
|
|
|
created_at: parse_date(comment[:date_inserted]),
|
|
|
|
}
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def import_private_topics
|
|
|
|
puts "", "importing private topics..."
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
create_posts(@conversations) do |conversation|
|
2014-08-18 21:07:14 +08:00
|
|
|
next if conversation[:first_message_id].blank?
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
# list all other user ids in the conversation
|
|
|
|
user_ids_in_conversation =
|
|
|
|
@user_conversations
|
|
|
|
.select do |uc|
|
|
|
|
uc[:conversation_id] == conversation[:conversation_id] &&
|
|
|
|
uc[:user_id] != conversation[:insert_user_id]
|
2023-01-07 19:53:14 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
.map { |uc| uc[:user_id] }
|
|
|
|
# retrieve their emails
|
|
|
|
user_emails_in_conversation =
|
|
|
|
@users.select { |u| user_ids_in_conversation.include?(u[:user_id]) }.map { |u| u[:email] }
|
|
|
|
# retrieve their usernames from the database
|
2021-10-27 20:22:37 +08:00
|
|
|
target_usernames =
|
|
|
|
User
|
|
|
|
.joins(:user_emails)
|
|
|
|
.where(user_emails: { email: user_emails_in_conversation })
|
|
|
|
.pluck(:username)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
next if target_usernames.blank?
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
user = find_user_by_import_id(conversation[:insert_user_id]) || Discourse.system_user
|
|
|
|
first_message =
|
|
|
|
@conversation_messages
|
|
|
|
.select { |cm| cm[:message_id] == conversation[:first_message_id] }
|
|
|
|
.first
|
2018-06-07 13:28:18 +08:00
|
|
|
|
2014-08-12 00:44:17 +08:00
|
|
|
{
|
|
|
|
id: "conversation#" + conversation[:conversation_id],
|
|
|
|
user_id: user.id,
|
|
|
|
title: "Private message from #{user.username}",
|
|
|
|
target_usernames: target_usernames,
|
|
|
|
raw: clean_up(first_message[:body]),
|
|
|
|
created_at: parse_date(conversation[:date_inserted]),
|
|
|
|
}
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def import_private_posts
|
|
|
|
puts "", "importing private posts..."
|
|
|
|
|
|
|
|
first_message_ids = Set.new(@conversations.map { |c| c[:first_message_id] }.to_a)
|
|
|
|
@conversation_messages.reject! { |cm| first_message_ids.include?(cm[:message_id]) }
|
|
|
|
|
|
|
|
create_posts(@conversation_messages) do |message|
|
|
|
|
unless t = topic_lookup_from_imported_post_id("conversation#" + message[:conversation_id])
|
|
|
|
next
|
2023-01-07 19:53:14 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
archetype: Archetype.private_message,
|
|
|
|
id: "message#" + message[:message_id],
|
2014-08-14 04:17:16 +08:00
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(message[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
|
2014-08-12 00:44:17 +08:00
|
|
|
topic_id: t[:topic_id],
|
|
|
|
raw: clean_up(message[:body]),
|
|
|
|
created_at: parse_date(message[:date_inserted]),
|
|
|
|
}
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
end
|
2014-08-12 00:44:17 +08:00
|
|
|
|
|
|
|
def parse_date(date)
|
|
|
|
DateTime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
|
|
|
end
|
|
|
|
|
|
|
|
def clean_up(raw)
|
2014-08-20 17:38:35 +08:00
|
|
|
return "" if raw.blank?
|
|
|
|
raw
|
|
|
|
.gsub("\\n", "\n")
|
|
|
|
.gsub(%r{</?pre\s*>}i, "\n```\n")
|
|
|
|
.gsub(%r{</?code\s*>}i, "`")
|
|
|
|
.gsub("<", "<")
|
|
|
|
.gsub(">", ">")
|
2014-08-12 00:44:17 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
ImportScripts::Vanilla.new.perform
|