2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2013-12-13 04:35:55 +08:00
|
|
|
require "nokogiri"
|
2015-07-01 09:16:52 +08:00
|
|
|
require "optparse"
|
|
|
|
require File.expand_path(File.dirname(__FILE__) + "/base")
|
|
|
|
|
|
|
|
class ImportScripts::Disqus < ImportScripts::Base
|
2016-08-18 03:10:25 +08:00
|
|
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
|
|
|
|
|
|
|
IMPORT_FILE = File.expand_path("~/import/site/export.xml")
|
|
|
|
IMPORT_CATEGORY = "Front page"
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
abort("File '#{IMPORT_FILE}' not found") if !File.exist?(IMPORT_FILE)
|
|
|
|
|
|
|
|
@category = Category.where(name: IMPORT_CATEGORY).first
|
|
|
|
abort("Category #{IMPORT_CATEGORY} not found") if @category.blank?
|
|
|
|
|
|
|
|
@parser = DisqusSAX.new
|
2025-01-07 19:05:39 +08:00
|
|
|
doc = Nokogiri::XML::SAX::Parser.new(@parser, Encoding::UTF_8)
|
2016-08-18 03:10:25 +08:00
|
|
|
doc.parse_file(IMPORT_FILE)
|
2015-07-01 09:16:52 +08:00
|
|
|
@parser.normalize
|
2016-08-18 03:10:25 +08:00
|
|
|
|
|
|
|
super
|
2015-07-01 09:16:52 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
2016-08-18 03:10:25 +08:00
|
|
|
import_users
|
|
|
|
import_topics_and_posts
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "importing users..."
|
|
|
|
|
|
|
|
by_email = {}
|
|
|
|
|
|
|
|
@parser.posts.each do |id, p|
|
|
|
|
next if p[:is_spam] == "true" || p[:is_deleted] == "true"
|
|
|
|
by_email[p[:author_email]] = { name: p[:author_name], username: p[:author_username] }
|
|
|
|
end
|
|
|
|
|
2015-07-01 09:16:52 +08:00
|
|
|
@parser.threads.each do |id, t|
|
2016-08-18 03:10:25 +08:00
|
|
|
by_email[t[:author_email]] = { name: t[:author_name], username: t[:author_username] }
|
|
|
|
end
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
create_users(by_email.keys) do |email|
|
|
|
|
user = by_email[email]
|
|
|
|
{ id: email, email: email, username: user[:username], name: user[:name], merge: true }
|
|
|
|
end
|
|
|
|
end
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
def import_topics_and_posts
|
|
|
|
puts "", "importing topics..."
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
@parser.threads.each do |id, t|
|
|
|
|
title = t[:title]
|
|
|
|
title.gsub!(/“/, '"')
|
|
|
|
title.gsub!(/”/, '"')
|
|
|
|
title.gsub!(/’/, "'")
|
|
|
|
title.gsub!(/—/, "--")
|
|
|
|
title.gsub!(/–/, "-")
|
|
|
|
|
|
|
|
puts "Creating #{title}... (#{t[:posts].size} posts)"
|
|
|
|
|
2017-11-16 23:26:18 +08:00
|
|
|
topic_user = find_existing_user(t[:author_email], t[:author_username])
|
2016-08-18 03:10:25 +08:00
|
|
|
begin
|
2023-02-28 20:31:59 +08:00
|
|
|
post = TopicEmbed.import_remote(t[:link], title: title, user: topic_user)
|
2016-08-18 03:10:25 +08:00
|
|
|
post.topic.update_column(:category_id, @category.id)
|
|
|
|
rescue OpenURI::HTTPError
|
|
|
|
post = nil
|
|
|
|
end
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
if post.present? && post.topic.posts_count <= 1
|
|
|
|
(t[:posts] || []).each do |p|
|
2017-11-16 23:26:18 +08:00
|
|
|
post_user = find_existing_user(p[:author_email] || "", p[:author_username])
|
2024-05-27 18:27:13 +08:00
|
|
|
next if post_user.blank?
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
attrs = {
|
|
|
|
user_id: post_user.id,
|
|
|
|
topic_id: post.topic_id,
|
|
|
|
raw: p[:cooked],
|
|
|
|
cooked: p[:cooked],
|
|
|
|
created_at: Date.parse(p[:created_at]),
|
|
|
|
}
|
2015-07-01 09:16:52 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
if p[:parent_id]
|
|
|
|
parent = @parser.posts[p[:parent_id]]
|
|
|
|
|
|
|
|
if parent && parent[:discourse_number]
|
|
|
|
attrs[:reply_to_post_number] = parent[:discourse_number]
|
|
|
|
end
|
2015-07-01 09:16:52 +08:00
|
|
|
end
|
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
post = create_post(attrs, p[:id])
|
|
|
|
p[:discourse_number] = post.post_number
|
2015-07-01 09:16:52 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def get_post_as_user(username)
|
|
|
|
user = User.find_by_username_lower(username.downcase)
|
|
|
|
abort("No user found named: '#{username}'") if user.nil?
|
|
|
|
user
|
|
|
|
end
|
|
|
|
end
|
2013-12-13 04:35:55 +08:00
|
|
|
|
|
|
|
class DisqusSAX < Nokogiri::XML::SAX::Document
|
2016-08-18 03:10:25 +08:00
|
|
|
attr_accessor :posts, :threads, :users
|
2013-12-13 04:35:55 +08:00
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
def initialize
|
2013-12-13 04:35:55 +08:00
|
|
|
@inside = {}
|
|
|
|
@posts = {}
|
|
|
|
@threads = {}
|
2016-08-18 03:10:25 +08:00
|
|
|
@users = {}
|
2013-12-13 04:35:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def start_element(name, attrs = [])
|
2016-08-18 03:10:25 +08:00
|
|
|
hashed = Hash[attrs]
|
2013-12-13 04:35:55 +08:00
|
|
|
case name
|
|
|
|
when "post"
|
|
|
|
@post = {}
|
2016-08-18 03:10:25 +08:00
|
|
|
@post[:id] = hashed["dsq:id"] if @post
|
2013-12-13 04:35:55 +08:00
|
|
|
when "thread"
|
2016-08-18 03:10:25 +08:00
|
|
|
id = hashed["dsq:id"]
|
2013-12-13 04:35:55 +08:00
|
|
|
if @post
|
|
|
|
thread = @threads[id]
|
|
|
|
thread[:posts] << @post
|
|
|
|
else
|
|
|
|
@thread = { id: id, posts: [] }
|
|
|
|
end
|
|
|
|
when "parent"
|
|
|
|
if @post
|
2016-08-18 03:10:25 +08:00
|
|
|
id = hashed["dsq:id"]
|
2013-12-13 04:35:55 +08:00
|
|
|
@post[:parent_id] = id
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@inside[name] = true
|
|
|
|
end
|
|
|
|
|
|
|
|
def end_element(name)
|
|
|
|
case name
|
|
|
|
when "post"
|
|
|
|
@posts[@post[:id]] = @post
|
|
|
|
@post = nil
|
|
|
|
when "thread"
|
|
|
|
if @post.nil?
|
|
|
|
@threads[@thread[:id]] = @thread
|
|
|
|
@thread = nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@inside[name] = false
|
|
|
|
end
|
|
|
|
|
|
|
|
def characters(str)
|
|
|
|
record(@post, :author_email, str, "author", "email")
|
|
|
|
record(@post, :author_name, str, "author", "name")
|
2016-08-18 03:10:25 +08:00
|
|
|
record(@post, :author_username, str, "author", "username")
|
2013-12-13 04:35:55 +08:00
|
|
|
record(@post, :author_anonymous, str, "author", "isAnonymous")
|
|
|
|
record(@post, :created_at, str, "createdAt")
|
2016-04-01 23:25:34 +08:00
|
|
|
record(@post, :is_deleted, str, "isDeleted")
|
2016-08-18 03:10:25 +08:00
|
|
|
record(@post, :is_spam, str, "isSpam")
|
2023-01-07 19:53:14 +08:00
|
|
|
|
2013-12-13 04:35:55 +08:00
|
|
|
record(@thread, :link, str, "link")
|
|
|
|
record(@thread, :title, str, "title")
|
2013-12-13 05:31:08 +08:00
|
|
|
record(@thread, :created_at, str, "createdAt")
|
2016-08-18 03:10:25 +08:00
|
|
|
record(@thread, :author_email, str, "author", "email")
|
|
|
|
record(@thread, :author_name, str, "author", "name")
|
|
|
|
record(@thread, :author_username, str, "author", "username")
|
|
|
|
record(@thread, :author_anonymous, str, "author", "isAnonymous")
|
2013-12-13 04:35:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def cdata_block(str)
|
|
|
|
record(@post, :cooked, str, "message")
|
|
|
|
end
|
|
|
|
|
|
|
|
def record(target, sym, str, *params)
|
|
|
|
return if target.nil?
|
2013-12-31 00:32:21 +08:00
|
|
|
|
|
|
|
if inside?(*params)
|
|
|
|
target[sym] ||= ""
|
|
|
|
target[sym] << str
|
|
|
|
end
|
2013-12-13 04:35:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def inside?(*params)
|
2019-11-15 04:10:51 +08:00
|
|
|
!params.find { |p| !@inside[p] }
|
2013-12-13 04:35:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def normalize
|
|
|
|
@threads.each do |id, t|
|
|
|
|
if t[:posts].size == 0
|
2013-12-31 00:32:21 +08:00
|
|
|
# Remove any threads that have no posts
|
2013-12-13 04:35:55 +08:00
|
|
|
@threads.delete(id)
|
2013-12-31 00:32:21 +08:00
|
|
|
else
|
2016-08-18 03:10:25 +08:00
|
|
|
t[:posts].delete_if { |p| p[:is_spam] == "true" || p[:is_deleted] == "true" }
|
2013-12-13 04:35:55 +08:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Merge any threads that have the same title
|
|
|
|
existing_title = {}
|
|
|
|
@threads.each do |id, t|
|
|
|
|
existing = existing_title[t[:title]]
|
|
|
|
if existing.nil?
|
|
|
|
existing_title[t[:title]] = t
|
|
|
|
else
|
|
|
|
existing[:posts] << t[:posts]
|
|
|
|
existing[:posts].flatten!
|
|
|
|
@threads.delete(t[:id])
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-08-18 03:10:25 +08:00
|
|
|
ImportScripts::Disqus.new.perform
|