mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 20:02:46 +08:00
c83914e2e5
Various migration scripts define a normalize_raw method to do custom processing of post contents before storing it in the Post.raw and other fields. They normally do not handle nil inputs, but it's a relatively common occurrence in data dumps. Since this method is used from various points in the migration script, as it stands, the experience of using a migration script is that it will fail multiple times at different points, forcing you to fix the data or apply logic hacks every time then restarting. This PR generalizes handling of nil input by returning a <missing> string. Pros: no more messy repeated crashes + restarts consistency Cons: it might hide data issues OTOH we can't print a warning on that method because it will flood the console since it's called from inside loops. * FIX: zendesk import script: support nil inputs in normalize_raw * FIX: return '<missing>' instead of empty string; do it for all methods
399 lines
11 KiB
Ruby
399 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Jive importer
|
|
require "nokogiri"
|
|
require "csv"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Jive < ImportScripts::Base
|
|
BATCH_SIZE = 1000
|
|
CATEGORY_IDS = [2023, 2003, 2004, 2042, 2036, 2029] # categories that should be imported
|
|
|
|
def initialize(path)
|
|
@path = path
|
|
super()
|
|
@bbcode_to_md = true
|
|
|
|
puts "loading post mappings..."
|
|
@post_number_map = {}
|
|
Post
|
|
.pluck(:id, :post_number)
|
|
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
|
|
end
|
|
|
|
def created_post(post)
|
|
@post_number_map[post.id] = post.post_number
|
|
super
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_groups
|
|
import_group_members
|
|
import_categories
|
|
import_posts
|
|
|
|
# Topic.update_all(closed: true)
|
|
end
|
|
|
|
class RowResolver
|
|
def load(row)
|
|
@row = row
|
|
end
|
|
|
|
def self.create(cols)
|
|
Class.new(RowResolver).new(cols)
|
|
end
|
|
|
|
def initialize(cols)
|
|
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
|
|
end
|
|
end
|
|
|
|
def load_user_batch!(users, offset, total)
|
|
if users.length > 0
|
|
create_users(users, offset: offset, total: total) { |user| user }
|
|
users.clear
|
|
end
|
|
end
|
|
|
|
def csv_parse(name)
|
|
filename = "#{@path}/#{name}.csv"
|
|
first = true
|
|
row = nil
|
|
|
|
current_row = +""
|
|
double_quote_count = 0
|
|
|
|
File
|
|
.open(filename)
|
|
.each_line do |line|
|
|
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
|
|
line.strip!
|
|
|
|
current_row << "\n" unless current_row.empty?
|
|
current_row << line
|
|
|
|
double_quote_count += line.scan('"').count
|
|
|
|
next if double_quote_count % 2 == 1
|
|
|
|
raw =
|
|
begin
|
|
CSV.parse(current_row)
|
|
rescue CSV::MalformedCSVError => e
|
|
puts e.message
|
|
puts "*" * 100
|
|
puts "Bad row skipped, line is: #{line}"
|
|
puts
|
|
puts current_row
|
|
puts
|
|
puts "double quote count is : #{double_quote_count}"
|
|
puts "*" * 100
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
next
|
|
end[
|
|
0
|
|
]
|
|
|
|
if first
|
|
row = RowResolver.create(raw)
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
first = false
|
|
next
|
|
end
|
|
|
|
row.load(raw)
|
|
|
|
yield row
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
end
|
|
end
|
|
|
|
def total_rows(table)
|
|
File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
|
|
end
|
|
|
|
def import_groups
|
|
puts "", "importing groups..."
|
|
|
|
rows = []
|
|
csv_parse("groups") { |row| rows << { id: row.groupid, name: row.name } }
|
|
|
|
create_groups(rows) { |row| row }
|
|
end
|
|
|
|
def import_users
|
|
puts "", "creating users"
|
|
|
|
count = 0
|
|
users = []
|
|
|
|
total = total_rows("users")
|
|
|
|
csv_parse("users") do |row|
|
|
id = row.userid
|
|
|
|
email = "#{row.email}"
|
|
|
|
# fake it
|
|
email = fake_email if row.email.blank? || row.email !~ /@/
|
|
|
|
name = "#{row.firstname} #{row.lastname}"
|
|
username = row.username
|
|
created_at = DateTime.parse(row.creationdate)
|
|
last_seen_at = DateTime.parse(row.lastloggedin)
|
|
is_activated = row.userenabled
|
|
|
|
username = name if username == "NULL"
|
|
username = email.split("@")[0] if username.blank?
|
|
name = email.split("@")[0] if name.blank?
|
|
|
|
users << {
|
|
id: id,
|
|
email: email,
|
|
name: name,
|
|
username: username,
|
|
created_at: created_at,
|
|
last_seen_at: last_seen_at,
|
|
active: is_activated.to_i == 1,
|
|
approved: true,
|
|
}
|
|
|
|
count += 1
|
|
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
|
|
end
|
|
|
|
load_user_batch! users, count, total
|
|
end
|
|
|
|
def import_group_members
|
|
puts "", "importing group members..."
|
|
|
|
csv_parse("group_members") do |row|
|
|
user_id = user_id_from_imported_user_id(row.userid)
|
|
group_id = group_id_from_imported_group_id(row.groupid)
|
|
|
|
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
rows = []
|
|
|
|
csv_parse("communities") do |row|
|
|
next unless CATEGORY_IDS.include?(row.communityid.to_i)
|
|
rows << { id: row.communityid, name: "#{row.name} (#{row.communityid})" }
|
|
end
|
|
|
|
create_categories(rows) { |row| row }
|
|
end
|
|
|
|
def normalize_raw!(raw)
|
|
return "<missing>" if raw.blank?
|
|
|
|
raw = raw.dup
|
|
raw = raw[5..-6]
|
|
|
|
doc = Nokogiri::HTML5.fragment(raw)
|
|
doc.css("img").each { |img| img.remove if img["class"] == "jive-image" }
|
|
|
|
raw = doc.to_html
|
|
raw = raw[4..-1]
|
|
|
|
raw
|
|
end
|
|
|
|
def import_post_batch!(posts, topics, offset, total)
|
|
create_posts(posts, total: total, offset: offset) do |post|
|
|
mapped = {}
|
|
|
|
mapped[:id] = post[:id]
|
|
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
|
|
mapped[:raw] = post[:body]
|
|
mapped[:created_at] = post[:created_at]
|
|
|
|
topic = topics[post[:topic_id]]
|
|
|
|
unless topic
|
|
p "MISSING TOPIC #{post[:topic_id]}"
|
|
p post
|
|
next
|
|
end
|
|
|
|
if topic[:post_id]
|
|
parent = topic_lookup_from_imported_post_id(topic[:post_id])
|
|
next unless parent
|
|
|
|
mapped[:topic_id] = parent[:topic_id]
|
|
|
|
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
|
|
if reply_to_post_id
|
|
reply_to_post_number = @post_number_map[reply_to_post_id]
|
|
if reply_to_post_number && reply_to_post_number > 1
|
|
mapped[:reply_to_post_number] = reply_to_post_number
|
|
end
|
|
end
|
|
else
|
|
mapped[:category] = category_id_from_imported_category_id(topic[:category_id])
|
|
mapped[:title] = post[:title]
|
|
topic[:post_id] = post[:id]
|
|
end
|
|
|
|
next if topic[:deleted] || post[:deleted]
|
|
|
|
mapped
|
|
end
|
|
|
|
posts.clear
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "creating topics and posts"
|
|
|
|
topic_map = {}
|
|
thread_map = {}
|
|
|
|
csv_parse("messages") do |thread|
|
|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
|
|
|
|
if !thread.parentmessageid
|
|
# topic
|
|
|
|
thread_map[thread.threadid] = thread.messageid
|
|
|
|
#IMAGE UPLOADER
|
|
if thread.imagecount
|
|
Dir.foreach(
|
|
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
|
|
) do |item|
|
|
next if item == (".") || item == ("..") || item == (".DS_Store")
|
|
photo_path =
|
|
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
|
|
if upload.persisted?
|
|
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
else
|
|
puts "Error: Image upload is not successful for #{photo_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
#ATTACHMENT UPLOADER
|
|
if thread.attachmentcount
|
|
Dir.foreach(
|
|
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
|
|
) do |item|
|
|
next if item == (".") || item == ("..") || item == (".DS_Store")
|
|
attach_path =
|
|
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
|
|
if upload.persisted?
|
|
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
|
|
else
|
|
puts "Error: Attachment upload is not successful for #{attach_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
topic_map[thread.messageid] = {
|
|
id: thread.messageid,
|
|
topic_id: thread.messageid,
|
|
category_id: thread.containerid,
|
|
user_id: thread.userid,
|
|
title: thread.subject,
|
|
body: normalize_raw!(thread.body || thread.subject || "<missing>"),
|
|
created_at: DateTime.parse(thread.creationdate),
|
|
}
|
|
end
|
|
end
|
|
|
|
total = total_rows("messages")
|
|
posts = []
|
|
count = 0
|
|
|
|
topic_map.each do |_, topic|
|
|
posts << topic if topic[:body]
|
|
count += 1
|
|
end
|
|
|
|
csv_parse("messages") do |thread|
|
|
# post
|
|
|
|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
|
|
|
|
if thread.parentmessageid
|
|
#IMAGE UPLOADER
|
|
if thread.imagecount
|
|
Dir.foreach(
|
|
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
|
|
) do |item|
|
|
next if item == (".") || item == ("..") || item == (".DS_Store")
|
|
photo_path =
|
|
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
|
|
if upload.persisted?
|
|
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
else
|
|
puts "Error: Image upload is not successful for #{photo_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
#ATTACHMENT UPLOADER
|
|
if thread.attachmentcount
|
|
Dir.foreach(
|
|
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
|
|
) do |item|
|
|
next if item == (".") || item == ("..") || item == (".DS_Store")
|
|
attach_path =
|
|
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
|
|
if upload.persisted?
|
|
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
|
|
else
|
|
puts "Error: Attachment upload is not successful for #{attach_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
row = {
|
|
id: thread.messageid,
|
|
topic_id: thread_map["#{thread.threadid}"],
|
|
user_id: thread.userid,
|
|
title: thread.subject,
|
|
body: normalize_raw!(thread.body),
|
|
created_at: DateTime.parse(thread.creationdate),
|
|
}
|
|
posts << row
|
|
count += 1
|
|
|
|
if posts.length > 0 && posts.length % BATCH_SIZE == 0
|
|
import_post_batch!(posts, topic_map, count - posts.length, total)
|
|
end
|
|
end
|
|
end
|
|
|
|
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
|
|
end
|
|
end
|
|
|
|
unless ARGV[0] && Dir.exist?(ARGV[0])
|
|
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/jive.rb DIRNAME", ""
|
|
exit 1
|
|
end
|
|
|
|
ImportScripts::Jive.new(ARGV[0]).perform
|