mirror of
https://github.com/discourse/discourse.git
synced 2024-11-27 11:29:59 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
411 lines
11 KiB
Ruby
411 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Jive importer
|
|
require 'nokogiri'
|
|
require 'csv'
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::Jive < ImportScripts::Base
|
|
|
|
BATCH_SIZE = 1000
|
|
CATEGORY_IDS = [2023, 2003, 2004, 2042, 2036, 2029] # categories that should be imported
|
|
|
|
def initialize(path)
|
|
@path = path
|
|
super()
|
|
@bbcode_to_md = true
|
|
|
|
puts "loading post mappings..."
|
|
@post_number_map = {}
|
|
Post.pluck(:id, :post_number).each do |post_id, post_number|
|
|
@post_number_map[post_id] = post_number
|
|
end
|
|
end
|
|
|
|
def created_post(post)
|
|
@post_number_map[post.id] = post.post_number
|
|
super
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_groups
|
|
import_group_members
|
|
import_categories
|
|
import_posts
|
|
|
|
# Topic.update_all(closed: true)
|
|
end
|
|
|
|
class RowResolver
|
|
def load(row)
|
|
@row = row
|
|
end
|
|
|
|
def self.create(cols)
|
|
Class.new(RowResolver).new(cols)
|
|
end
|
|
|
|
def initialize(cols)
|
|
cols.each_with_index do |col, idx|
|
|
self.class.public_send(:define_method, col) do
|
|
@row[idx]
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def load_user_batch!(users, offset, total)
|
|
if users.length > 0
|
|
create_users(users, offset: offset, total: total) do |user|
|
|
user
|
|
end
|
|
users.clear
|
|
end
|
|
end
|
|
|
|
def csv_parse(name)
|
|
filename = "#{@path}/#{name}.csv"
|
|
first = true
|
|
row = nil
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
|
|
File.open(filename).each_line do |line|
|
|
|
|
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
|
|
line.strip!
|
|
|
|
current_row << "\n" unless current_row.empty?
|
|
current_row << line
|
|
|
|
double_quote_count += line.scan('"').count
|
|
|
|
if double_quote_count % 2 == 1
|
|
next
|
|
end
|
|
|
|
raw = begin
|
|
CSV.parse(current_row)
|
|
rescue CSV::MalformedCSVError => e
|
|
puts e.message
|
|
puts "*" * 100
|
|
puts "Bad row skipped, line is: #{line}"
|
|
puts
|
|
puts current_row
|
|
puts
|
|
puts "double quote count is : #{double_quote_count}"
|
|
puts "*" * 100
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
next
|
|
end[0]
|
|
|
|
if first
|
|
row = RowResolver.create(raw)
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
first = false
|
|
next
|
|
end
|
|
|
|
row.load(raw)
|
|
|
|
yield row
|
|
|
|
current_row = ""
|
|
double_quote_count = 0
|
|
end
|
|
end
|
|
|
|
def total_rows(table)
|
|
File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
|
|
end
|
|
|
|
def import_groups
|
|
puts "", "importing groups..."
|
|
|
|
rows = []
|
|
csv_parse("groups") do |row|
|
|
rows << { id: row.groupid, name: row.name }
|
|
end
|
|
|
|
create_groups(rows) do |row|
|
|
row
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "creating users"
|
|
|
|
count = 0
|
|
users = []
|
|
|
|
total = total_rows("users")
|
|
|
|
csv_parse("users") do |row|
|
|
|
|
id = row.userid
|
|
|
|
email = "#{row.email}"
|
|
|
|
# fake it
|
|
if row.email.blank? || row.email !~ /@/
|
|
email = SecureRandom.hex << "@domain.com"
|
|
end
|
|
|
|
name = "#{row.firstname} #{row.lastname}"
|
|
username = row.username
|
|
created_at = DateTime.parse(row.creationdate)
|
|
last_seen_at = DateTime.parse(row.lastloggedin)
|
|
is_activated = row.userenabled
|
|
|
|
username = name if username == "NULL"
|
|
username = email.split("@")[0] if username.blank?
|
|
name = email.split("@")[0] if name.blank?
|
|
|
|
users << {
|
|
id: id,
|
|
email: email,
|
|
name: name,
|
|
username: username,
|
|
created_at: created_at,
|
|
last_seen_at: last_seen_at,
|
|
active: is_activated.to_i == 1,
|
|
approved: true
|
|
}
|
|
|
|
count += 1
|
|
if count % BATCH_SIZE == 0
|
|
load_user_batch! users, count - users.length, total
|
|
end
|
|
|
|
end
|
|
|
|
load_user_batch! users, count, total
|
|
end
|
|
|
|
def import_group_members
|
|
puts "", "importing group members..."
|
|
|
|
csv_parse("group_members") do |row|
|
|
user_id = user_id_from_imported_user_id(row.userid)
|
|
group_id = group_id_from_imported_group_id(row.groupid)
|
|
|
|
if user_id && group_id
|
|
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
rows = []
|
|
|
|
csv_parse("communities") do |row|
|
|
next unless CATEGORY_IDS.include?(row.communityid.to_i)
|
|
rows << { id: row.communityid, name: "#{row.name} (#{row.communityid})" }
|
|
end
|
|
|
|
create_categories(rows) do |row|
|
|
row
|
|
end
|
|
end
|
|
|
|
def normalize_raw!(raw)
|
|
raw = raw.dup
|
|
raw = raw[5..-6]
|
|
|
|
doc = Nokogiri::HTML.fragment(raw)
|
|
doc.css('img').each do |img|
|
|
img.remove if img['class'] == "jive-image"
|
|
end
|
|
|
|
raw = doc.to_html
|
|
raw = raw[4..-1]
|
|
|
|
raw
|
|
end
|
|
|
|
def import_post_batch!(posts, topics, offset, total)
|
|
create_posts(posts, total: total, offset: offset) do |post|
|
|
|
|
mapped = {}
|
|
|
|
mapped[:id] = post[:id]
|
|
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
|
|
mapped[:raw] = post[:body]
|
|
mapped[:created_at] = post[:created_at]
|
|
|
|
topic = topics[post[:topic_id]]
|
|
|
|
unless topic
|
|
p "MISSING TOPIC #{post[:topic_id]}"
|
|
p post
|
|
next
|
|
end
|
|
|
|
unless topic[:post_id]
|
|
mapped[:category] = category_id_from_imported_category_id(topic[:category_id])
|
|
mapped[:title] = post[:title]
|
|
topic[:post_id] = post[:id]
|
|
else
|
|
parent = topic_lookup_from_imported_post_id(topic[:post_id])
|
|
next unless parent
|
|
|
|
mapped[:topic_id] = parent[:topic_id]
|
|
|
|
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
|
|
if reply_to_post_id
|
|
reply_to_post_number = @post_number_map[reply_to_post_id]
|
|
if reply_to_post_number && reply_to_post_number > 1
|
|
mapped[:reply_to_post_number] = reply_to_post_number
|
|
end
|
|
end
|
|
end
|
|
|
|
next if topic[:deleted] || post[:deleted]
|
|
|
|
mapped
|
|
end
|
|
|
|
posts.clear
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "creating topics and posts"
|
|
|
|
topic_map = {}
|
|
thread_map = {}
|
|
|
|
csv_parse("messages") do |thread|
|
|
|
|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
|
|
|
|
if !thread.parentmessageid
|
|
# topic
|
|
|
|
thread_map[thread.threadid] = thread.messageid
|
|
|
|
#IMAGE UPLOADER
|
|
if thread.imagecount
|
|
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
|
|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
|
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
|
|
if upload.persisted?
|
|
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
else
|
|
puts "Error: Image upload is not successful for #{photo_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
#ATTACHMENT UPLOADER
|
|
if thread.attachmentcount
|
|
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
|
|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
|
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
|
|
if upload.persisted?
|
|
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
|
|
else
|
|
puts "Error: Attachment upload is not successful for #{attach_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
topic_map[thread.messageid] = {
|
|
id: thread.messageid,
|
|
topic_id: thread.messageid,
|
|
category_id: thread.containerid,
|
|
user_id: thread.userid,
|
|
title: thread.subject,
|
|
body: normalize_raw!(thread.body || thread.subject || "<missing>"),
|
|
created_at: DateTime.parse(thread.creationdate),
|
|
}
|
|
|
|
end
|
|
end
|
|
|
|
total = total_rows("messages")
|
|
posts = []
|
|
count = 0
|
|
|
|
topic_map.each do |_, topic|
|
|
posts << topic if topic[:body]
|
|
count += 1
|
|
end
|
|
|
|
csv_parse("messages") do |thread|
|
|
# post
|
|
|
|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
|
|
|
|
if thread.parentmessageid
|
|
|
|
#IMAGE UPLOADER
|
|
if thread.imagecount
|
|
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
|
|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
|
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
|
|
if upload.persisted?
|
|
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
else
|
|
puts "Error: Image upload is not successful for #{photo_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
#ATTACHMENT UPLOADER
|
|
if thread.attachmentcount
|
|
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
|
|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
|
|
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
|
|
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
|
|
if upload.persisted?
|
|
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
|
|
thread.body.gsub!(item, upload.url)
|
|
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
|
|
else
|
|
puts "Error: Attachment upload is not successful for #{attach_path}!"
|
|
end
|
|
end
|
|
end
|
|
|
|
row = {
|
|
id: thread.messageid,
|
|
topic_id: thread_map["#{thread.threadid}"],
|
|
user_id: thread.userid,
|
|
title: thread.subject,
|
|
body: normalize_raw!(thread.body),
|
|
created_at: DateTime.parse(thread.creationdate)
|
|
}
|
|
posts << row
|
|
count += 1
|
|
|
|
if posts.length > 0 && posts.length % BATCH_SIZE == 0
|
|
import_post_batch!(posts, topic_map, count - posts.length, total)
|
|
end
|
|
end
|
|
end
|
|
|
|
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
|
|
end
|
|
|
|
end
|
|
|
|
unless ARGV[0] && Dir.exist?(ARGV[0])
|
|
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/jive.rb DIRNAME", ""
|
|
exit 1
|
|
end
|
|
|
|
ImportScripts::Jive.new(ARGV[0]).perform
|