discourse/script/import_scripts/jive.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

411 lines
11 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
2015-10-31 12:28:48 +08:00
# Jive importer
require 'nokogiri'
require 'csv'
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Jive < ImportScripts::Base
BATCH_SIZE = 1000
2017-07-28 09:20:09 +08:00
CATEGORY_IDS = [2023, 2003, 2004, 2042, 2036, 2029] # categories that should be imported
2015-10-31 12:28:48 +08:00
def initialize(path)
@path = path
super()
@bbcode_to_md = true
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
end
def created_post(post)
@post_number_map[post.id] = post.post_number
super
end
def execute
import_users
import_groups
import_group_members
import_categories
import_posts
# Topic.update_all(closed: true)
end
class RowResolver
def load(row)
@row = row
end
def self.create(cols)
Class.new(RowResolver).new(cols)
end
def initialize(cols)
2017-07-28 09:20:09 +08:00
cols.each_with_index do |col, idx|
2019-05-07 09:27:05 +08:00
self.class.public_send(:define_method, col) do
2015-10-31 12:28:48 +08:00
@row[idx]
end
end
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
users.clear
end
end
def csv_parse(name)
filename = "#{@path}/#{name}.csv"
first = true
row = nil
current_row = +""
2015-10-31 12:28:48 +08:00
double_quote_count = 0
File.open(filename).each_line do |line|
2017-07-28 09:20:09 +08:00
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
2015-10-31 12:28:48 +08:00
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
if double_quote_count % 2 == 1
next
end
raw = begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
2017-07-28 09:20:09 +08:00
File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
2015-10-31 12:28:48 +08:00
end
def import_groups
puts "", "importing groups..."
rows = []
2016-01-14 14:54:10 +08:00
csv_parse("groups") do |row|
2017-07-28 09:20:09 +08:00
rows << { id: row.groupid, name: row.name }
2015-10-31 12:28:48 +08:00
end
create_groups(rows) do |row|
row
end
end
def import_users
puts "", "creating users"
count = 0
users = []
2016-01-14 14:54:10 +08:00
total = total_rows("users")
2015-10-31 12:28:48 +08:00
2016-01-14 14:54:10 +08:00
csv_parse("users") do |row|
2015-10-31 12:28:48 +08:00
id = row.userid
2016-01-14 14:54:10 +08:00
email = "#{row.email}"
2015-10-31 12:28:48 +08:00
# fake it
if row.email.blank? || row.email !~ /@/
email = fake_email
2015-10-31 12:28:48 +08:00
end
name = "#{row.firstname} #{row.lastname}"
username = row.username
created_at = DateTime.parse(row.creationdate)
last_seen_at = DateTime.parse(row.lastloggedin)
is_activated = row.userenabled
username = name if username == "NULL"
username = email.split("@")[0] if username.blank?
name = email.split("@")[0] if name.blank?
users << {
id: id,
email: email,
name: name,
username: username,
created_at: created_at,
last_seen_at: last_seen_at,
active: is_activated.to_i == 1,
2016-01-14 14:54:10 +08:00
approved: true
2015-10-31 12:28:48 +08:00
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
end
load_user_batch! users, count, total
end
def import_group_members
puts "", "importing group members..."
csv_parse("group_members") do |row|
user_id = user_id_from_imported_user_id(row.userid)
group_id = group_id_from_imported_group_id(row.groupid)
if user_id && group_id
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
end
end
def import_categories
rows = []
2016-01-14 14:54:10 +08:00
csv_parse("communities") do |row|
2015-10-31 12:28:48 +08:00
next unless CATEGORY_IDS.include?(row.communityid.to_i)
2017-07-28 09:20:09 +08:00
rows << { id: row.communityid, name: "#{row.name} (#{row.communityid})" }
2015-10-31 12:28:48 +08:00
end
create_categories(rows) do |row|
row
end
end
def normalize_raw!(raw)
raw = raw.dup
raw = raw[5..-6]
doc = Nokogiri::HTML.fragment(raw)
doc.css('img').each do |img|
img.remove if img['class'] == "jive-image"
end
raw = doc.to_html
raw = raw[4..-1]
raw
end
def import_post_batch!(posts, topics, offset, total)
2017-07-28 09:20:09 +08:00
create_posts(posts, total: total, offset: offset) do |post|
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
mapped = {}
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
mapped[:id] = post[:id]
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
mapped[:raw] = post[:body]
mapped[:created_at] = post[:created_at]
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
topic = topics[post[:topic_id]]
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
unless topic
p "MISSING TOPIC #{post[:topic_id]}"
p post
next
end
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
unless topic[:post_id]
mapped[:category] = category_id_from_imported_category_id(topic[:category_id])
mapped[:title] = post[:title]
topic[:post_id] = post[:id]
else
parent = topic_lookup_from_imported_post_id(topic[:post_id])
next unless parent
mapped[:topic_id] = parent[:topic_id]
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
if reply_to_post_id
reply_to_post_number = @post_number_map[reply_to_post_id]
if reply_to_post_number && reply_to_post_number > 1
mapped[:reply_to_post_number] = reply_to_post_number
2015-10-31 12:28:48 +08:00
end
end
2017-07-28 09:20:09 +08:00
end
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
next if topic[:deleted] || post[:deleted]
2015-10-31 12:28:48 +08:00
2017-07-28 09:20:09 +08:00
mapped
end
2015-10-31 12:28:48 +08:00
posts.clear
end
def import_posts
puts "", "creating topics and posts"
topic_map = {}
thread_map = {}
2016-01-14 14:54:10 +08:00
csv_parse("messages") do |thread|
2015-10-31 12:28:48 +08:00
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if !thread.parentmessageid
# topic
thread_map[thread.threadid] = thread.messageid
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
2017-07-28 09:20:09 +08:00
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
2017-07-28 09:20:09 +08:00
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
2017-07-28 09:20:09 +08:00
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
2017-07-28 09:20:09 +08:00
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
2017-07-28 09:20:09 +08:00
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
2017-07-28 09:20:09 +08:00
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
2015-10-31 12:28:48 +08:00
topic_map[thread.messageid] = {
id: thread.messageid,
topic_id: thread.messageid,
category_id: thread.containerid,
user_id: thread.userid,
title: thread.subject,
body: normalize_raw!(thread.body || thread.subject || "<missing>"),
created_at: DateTime.parse(thread.creationdate),
}
end
end
2016-01-14 14:54:10 +08:00
total = total_rows("messages")
2015-10-31 12:28:48 +08:00
posts = []
count = 0
topic_map.each do |_, topic|
posts << topic if topic[:body]
2017-07-28 09:20:09 +08:00
count += 1
2015-10-31 12:28:48 +08:00
end
2016-01-14 14:54:10 +08:00
csv_parse("messages") do |thread|
2015-10-31 12:28:48 +08:00
# post
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if thread.parentmessageid
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
2017-07-28 09:20:09 +08:00
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
2017-07-28 09:20:09 +08:00
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
2017-07-28 09:20:09 +08:00
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
2017-07-28 09:20:09 +08:00
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
2017-07-28 09:20:09 +08:00
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
2017-07-28 09:20:09 +08:00
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
2015-10-31 12:28:48 +08:00
row = {
id: thread.messageid,
topic_id: thread_map["#{thread.threadid}"],
user_id: thread.userid,
title: thread.subject,
body: normalize_raw!(thread.body),
created_at: DateTime.parse(thread.creationdate)
}
posts << row
2017-07-28 09:20:09 +08:00
count += 1
2015-10-31 12:28:48 +08:00
if posts.length > 0 && posts.length % BATCH_SIZE == 0
import_post_batch!(posts, topic_map, count - posts.length, total)
end
end
end
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/jive.rb DIRNAME", ""
exit 1
end
ImportScripts::Jive.new(ARGV[0]).perform