discourse/script/import_scripts/getsatisfaction.rb

359 lines
8.5 KiB
Ruby
Raw Normal View History

# getsatisfaction importer
#
# pre-req: you will get a bunch of CSV files, be sure to rename them all so
#
# - users.csv is the users table export (it may come from getsatisfaction as Users-Table 1.csv
# - replies.csv is the reply table export
# - topics.csv is the topics table export
#
#
2016-09-08 17:27:20 +08:00
# note, the importer will import all topics into a new category called 'Old Forum' and optionally close all the topics
#
require 'csv'
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
2016-09-08 17:27:20 +08:00
require 'reverse_markdown' # gem 'reverse_markdown'
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb
class ImportScripts::GetSatisfaction < ImportScripts::Base
BATCH_SIZE = 1000
def initialize(path)
@path = path
super()
@bbcode_to_md = true
2016-09-08 17:27:20 +08:00
@topic_slug = {}
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
end
def created_post(post)
@post_number_map[post.id] = post.post_number
super
end
def execute
c = Category.find_by(name: 'Old Forum') ||
Category.create!(name: 'Old Forum', user: Discourse.system_user)
import_users
import_posts(c)
2016-09-08 17:27:20 +08:00
create_permalinks
# uncomment if you want to close all the topics
# Topic.where(category: c).update_all(closed: true)
end
class RowResolver
def load(row)
@row = row
end
def self.create(cols)
Class.new(RowResolver).new(cols)
end
def initialize(cols)
2017-07-28 09:20:09 +08:00
cols.each_with_index do |col, idx|
self.class.send(:define_method, col) do
@row[idx]
end
end
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
users.clear
end
end
def csv_parse(name)
filename = "#{@path}/#{name}.csv"
first = true
row = nil
current_row = "";
double_quote_count = 0
2016-09-08 17:27:20 +08:00
# In case of Excel export file, I converted it to CSV and used:
# CSV.open(filename, encoding:'iso-8859-1:utf-8').each do |raw|
File.open(filename).each_line do |line|
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
raw = begin
CSV.parse(current_row, col_sep: ";")
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
2016-09-08 17:27:20 +08:00
# In case of Excel export file, I converted it to CSV and used:
# CSV.foreach("#{@path}/#{table}.csv", encoding:'iso-8859-1:utf-8').inject(0) {|c, line| c+1} - 1
2017-07-28 09:20:09 +08:00
File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
end
def import_users
puts "", "creating users"
count = 0
users = []
total = total_rows("users")
csv_parse("users") do |row|
if row.suspended_at
puts "skipping suspended user"
p row
next
end
id = row.user_id
email = row.email
# fake it
if row.email.blank? || row.email !~ /@/
email = SecureRandom.hex << "@domain.com"
end
name = row.real_name
username = row.nick
created_at = DateTime.parse(row.m_created)
username = name if username == "NULL"
username = email.split("@")[0] if username.blank?
name = email.split("@")[0] if name.blank?
users << {
id: id,
email: email,
name: name,
username: username,
created_at: created_at,
active: false
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
end
load_user_batch! users, count, total
end
def import_categories
rows = []
csv_parse("categories") do |row|
2017-07-28 09:20:09 +08:00
rows << { id: row.id, name: row.name, description: row.description }
end
create_categories(rows) do |row|
row
end
end
def normalize_raw!(raw)
2016-09-08 17:27:20 +08:00
return "<missing>" if raw.nil?
raw = raw.dup
# hoist code
hoisted = {}
raw.gsub!(/(<pre>\s*)?<code>(.*?)<\/code>(\s*<\/pre>)?/mi) do
code = $2
hoist = SecureRandom.hex
# tidy code, wow, this is impressively crazy
2017-07-28 09:20:09 +08:00
code.gsub!(/ (\s*)/, "\n\\1")
code.gsub!(/^\s*\n$/, "\n")
code.gsub!(/\n+/m, "\n")
code.strip!
hoisted[hoist] = code
hoist
end
# impressive seems to be using tripple space as a <p> unless hoisted
# in this case double space works best ... so odd
raw.gsub!(" ", "\n\n")
hoisted.each do |hoist, code|
raw.gsub!(hoist, "\n```\n" << code << "\n```\n")
end
2016-09-08 17:27:20 +08:00
raw = CGI.unescapeHTML(raw)
raw = ReverseMarkdown.convert(raw)
raw
end
def import_post_batch!(posts, topics, offset, total)
2017-07-28 09:20:09 +08:00
create_posts(posts, total: total, offset: offset) do |post|
2017-07-28 09:20:09 +08:00
mapped = {}
2017-07-28 09:20:09 +08:00
mapped[:id] = post[:id]
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
mapped[:raw] = post[:body]
mapped[:created_at] = post[:created_at]
2017-07-28 09:20:09 +08:00
topic = topics[post[:topic_id]]
2017-07-28 09:20:09 +08:00
unless topic
p "MISSING TOPIC #{post[:topic_id]}"
p post
next
end
2017-07-28 09:20:09 +08:00
unless topic[:post_id]
mapped[:title] = post[:title] || "Topic title missing"
topic[:post_id] = post[:id]
mapped[:category] = post[:category]
else
parent = topic_lookup_from_imported_post_id(topic[:post_id])
next unless parent
mapped[:topic_id] = parent[:topic_id]
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
if reply_to_post_id
reply_to_post_number = @post_number_map[reply_to_post_id]
if reply_to_post_number && reply_to_post_number > 1
mapped[:reply_to_post_number] = reply_to_post_number
end
end
2017-07-28 09:20:09 +08:00
end
2017-07-28 09:20:09 +08:00
next if topic[:deleted] || post[:deleted]
2017-07-28 09:20:09 +08:00
mapped
end
posts.clear
end
def import_posts(category)
puts "", "creating topics and posts"
topic_map = {}
csv_parse("topics") do |topic|
2016-09-08 17:27:20 +08:00
@topic_slug[topic.id.to_i] = topic.url
topic_map[topic.id] = {
id: topic.id,
topic_id: topic.id,
title: topic.subject,
deleted: topic.removed == "1",
closed: true,
body: normalize_raw!(topic.additional_detail || topic.subject || "<missing>"),
created_at: DateTime.parse(topic.created_at),
user_id: topic.UserId,
category: category.name
}
end
total = total_rows("replies")
posts = []
count = 0
topic_map.each do |_, topic|
# a bit lazy
posts << topic if topic[:body]
end
csv_parse("replies") do |row|
unless row.created_at
puts "NO CREATION DATE FOR POST"
p row
next
end
row = {
id: row.id,
topic_id: row.topic_id,
reply_id: row.parent_id,
user_id: row.UserId,
body: normalize_raw!(row.content),
created_at: DateTime.parse(row.created_at)
}
posts << row
2017-07-28 09:20:09 +08:00
count += 1
if posts.length > 0 && posts.length % BATCH_SIZE == 0
import_post_batch!(posts, topic_map, count - posts.length, total)
end
end
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
end
2016-09-08 17:27:20 +08:00
def create_permalinks
puts '', 'Creating Permalinks...', ''
topic_mapping = []
Topic.listable_topics.find_each do |topic|
tcf = topic.first_post.custom_fields
if tcf && tcf["import_id"]
slug = @topic_slug[tcf["import_id"].to_i]
# TODO: replace "http://community.example.com/" with the URL of your community
slug = slug.gsub("http://community.example.com/", "")
2016-09-08 17:27:20 +08:00
Permalink.create(url: slug, topic_id: topic.id)
end
end
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME", ""
exit 1
end
ImportScripts::GetSatisfaction.new(ARGV[0]).perform