mirror of
https://github.com/discourse/discourse.git
synced 2024-12-21 15:24:14 +08:00
511 lines
14 KiB
Ruby
511 lines
14 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "nokogiri"
|
|
require "htmlentities"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
# https://developers.jivesoftware.com/api/v3/cloud/rest/index.html
|
|
|
|
class ImportScripts::JiveApi < ImportScripts::Base
|
|
USER_COUNT = 1000
|
|
POST_COUNT = 100
|
|
STAFF_GUARDIAN = Guardian.new(Discourse.system_user)
|
|
|
|
TO_IMPORT = [
|
|
#############################
|
|
# WHOLE CATEGORY OF CONTENT #
|
|
#############################
|
|
# Announcement & News
|
|
{
|
|
jive_object: {
|
|
type: 37,
|
|
id: 1004,
|
|
},
|
|
filters: {
|
|
created_after: 1.year.ago,
|
|
type: "post",
|
|
},
|
|
category_id: 7,
|
|
},
|
|
# Questions & Answers / General Discussions
|
|
{
|
|
jive_object: {
|
|
type: 14,
|
|
id: 2006,
|
|
},
|
|
filters: {
|
|
created_after: 6.months.ago,
|
|
type: "discussion",
|
|
},
|
|
category: Proc.new { |c| c["question"] ? 5 : 21 },
|
|
},
|
|
# Anywhere beta
|
|
{
|
|
jive_object: {
|
|
type: 14,
|
|
id: 2052,
|
|
},
|
|
filters: {
|
|
created_after: 6.months.ago,
|
|
type: "discussion",
|
|
},
|
|
category_id: 22,
|
|
},
|
|
# Tips & Tricks
|
|
{ jive_object: { type: 37, id: 1284 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1319 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1177 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1165 }, filters: { type: "post" }, category_id: 6 },
|
|
# Ambassadors
|
|
{
|
|
jive_object: {
|
|
type: 700,
|
|
id: 1001,
|
|
},
|
|
filters: {
|
|
type: "discussion",
|
|
},
|
|
authenticated: true,
|
|
category_id: 8,
|
|
},
|
|
# Experts
|
|
{
|
|
jive_object: {
|
|
type: 700,
|
|
id: 1034,
|
|
},
|
|
filters: {
|
|
type: "discussion",
|
|
},
|
|
authenticated: true,
|
|
category_id: 15,
|
|
},
|
|
# Feature Requests
|
|
{ jive_object: { type: 14, id: 2015 }, filters: { type: "idea" }, category_id: 31 },
|
|
####################
|
|
# SELECTED CONTENT #
|
|
####################
|
|
# Announcement & News
|
|
{
|
|
jive_object: {
|
|
type: 37,
|
|
id: 1004,
|
|
},
|
|
filters: {
|
|
entities: {
|
|
38 => [1345, 1381, 1845, 2046, 2060, 2061],
|
|
},
|
|
},
|
|
category_id: 7,
|
|
},
|
|
# Problem Solving
|
|
{
|
|
jive_object: {
|
|
type: 14,
|
|
id: 2006,
|
|
},
|
|
filters: {
|
|
entities: {
|
|
2 => [
|
|
116_685,
|
|
160_745,
|
|
177_010,
|
|
223_482,
|
|
225_036,
|
|
233_228,
|
|
257_882,
|
|
285_103,
|
|
292_297,
|
|
345_243,
|
|
363_250,
|
|
434_546,
|
|
],
|
|
},
|
|
},
|
|
category_id: 10,
|
|
},
|
|
# General Discussions
|
|
{
|
|
jive_object: {
|
|
type: 14,
|
|
id: 2006,
|
|
},
|
|
filters: {
|
|
entities: {
|
|
2 => [178_203, 188_350, 312_734],
|
|
},
|
|
},
|
|
category_id: 21,
|
|
},
|
|
# Questions & Answers
|
|
{
|
|
jive_object: {
|
|
type: 14,
|
|
id: 2006,
|
|
},
|
|
filters: {
|
|
entities: {
|
|
2 => [418_811],
|
|
},
|
|
},
|
|
category_id: 5,
|
|
},
|
|
]
|
|
|
|
def initialize
|
|
super
|
|
@base_uri = ENV["BASE_URI"]
|
|
@username = ENV["USERNAME"]
|
|
@password = ENV["PASSWORD"]
|
|
@htmlentities = HTMLEntities.new
|
|
end
|
|
|
|
def execute
|
|
update_existing_users
|
|
import_users
|
|
import_contents
|
|
import_bookmarks
|
|
mark_topics_as_solved
|
|
end
|
|
|
|
def update_existing_users
|
|
puts "", "updating existing users..."
|
|
|
|
# we just need to do this once
|
|
return if User.human_users.limit(101).count > 100
|
|
|
|
User.human_users.find_each do |user|
|
|
people = get("people/email/#{user.email}?fields=initialLogin,-resources", true)
|
|
if people && people["initialLogin"].present?
|
|
created_at = DateTime.parse(people["initialLogin"])
|
|
user.update_columns(created_at: created_at) if user.created_at > created_at
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users..."
|
|
|
|
imported_users = 0
|
|
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
|
|
|
|
loop do
|
|
users =
|
|
get(
|
|
"people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}",
|
|
true,
|
|
)
|
|
create_users(users["list"], offset: imported_users) do |user|
|
|
{
|
|
id: user["id"],
|
|
created_at: user["initialLogin"],
|
|
email: user["emails"].find { |email| email["primary"] }["value"],
|
|
username: user["mentionName"],
|
|
name: user["displayName"],
|
|
avatar_url: user["thumbnailUrl"],
|
|
}
|
|
end
|
|
|
|
break if users["list"].size < USER_COUNT || users.dig("links", "next").blank?
|
|
imported_users += users["list"].size
|
|
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_contents
|
|
puts "", "importing contents..."
|
|
|
|
TO_IMPORT.each do |to_import|
|
|
puts Time.now
|
|
entity = to_import[:jive_object]
|
|
places =
|
|
get(
|
|
"places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})",
|
|
to_import[:authenticated],
|
|
)
|
|
import_place_contents(places["list"][0], to_import) if places && places["list"].present?
|
|
end
|
|
end
|
|
|
|
def import_place_contents(place, to_import)
|
|
puts "", "importing contents for '#{place["name"]}'..."
|
|
|
|
start_index = 0
|
|
|
|
if to_import.dig(:filters, :entities).present?
|
|
path = "contents"
|
|
entities =
|
|
to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
|
|
filters = "filter=entityDescriptor(#{entities.join(",")})"
|
|
else
|
|
path = "places/#{place["placeID"]}/contents"
|
|
filters = +"filter=status(published)"
|
|
if to_import[:filters]
|
|
if to_import[:filters][:type].present?
|
|
filters << "&filter=type(#{to_import[:filters][:type]})"
|
|
end
|
|
if to_import[:filters][:created_after].present?
|
|
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})"
|
|
end
|
|
end
|
|
end
|
|
|
|
loop do
|
|
contents =
|
|
get(
|
|
"#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
|
|
to_import[:authenticated],
|
|
)
|
|
contents["list"].each do |content|
|
|
content_id = content["contentID"].presence || "#{content["type"]}_#{content["id"]}"
|
|
|
|
custom_fields = { import_id: content_id }
|
|
custom_fields[:import_permalink] = content["permalink"] if content["permalink"].present?
|
|
|
|
topic = {
|
|
id: content_id,
|
|
created_at: content["published"],
|
|
title: @htmlentities.decode(content["subject"]),
|
|
raw: process_raw(content["content"]["text"]),
|
|
user_id:
|
|
user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
views: content["viewCount"],
|
|
custom_fields: custom_fields,
|
|
}
|
|
|
|
if to_import[:category]
|
|
topic[:category] = to_import[:category].call(content)
|
|
else
|
|
topic[:category] = to_import[:category_id]
|
|
end
|
|
|
|
post_id = post_id_from_imported_post_id(topic[:id])
|
|
parent_post = post_id ? Post.unscoped.find_by(id: post_id) : create_post(topic, topic[:id])
|
|
|
|
if parent_post&.id && parent_post.topic_id
|
|
resources = content["resources"]
|
|
if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
|
|
import_likes(resources["likes"]["ref"], parent_post.id)
|
|
end
|
|
if content["replyCount"].to_i > 0
|
|
if resources.dig("comments", "ref").present?
|
|
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import)
|
|
end
|
|
if resources.dig("messages", "ref").present?
|
|
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
break if contents["list"].size < POST_COUNT || contents.dig("links", "next").blank?
|
|
break unless start_index = contents["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_likes(url, post_id)
|
|
start_index = 0
|
|
|
|
loop do
|
|
likes = get("#{url}?&count=#{USER_COUNT}&startIndex=#{start_index}", true)
|
|
break if likes["error"]
|
|
likes["list"].each do |like|
|
|
next unless user_id = user_id_from_imported_user_id(like["id"])
|
|
PostActionCreator.like(User.find(user_id), Post.find(post_id))
|
|
end
|
|
|
|
break if likes["list"].size < USER_COUNT || likes.dig("links", "next").blank?
|
|
break unless start_index = likes["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_comments(url, topic_id, to_import)
|
|
start_index = 0
|
|
|
|
loop do
|
|
comments =
|
|
get(
|
|
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
|
|
to_import[:authenticated],
|
|
)
|
|
break if comments["error"]
|
|
comments["list"].each do |comment|
|
|
next if post_id_from_imported_post_id(comment["id"])
|
|
|
|
post = {
|
|
id: comment["id"],
|
|
created_at: comment["published"],
|
|
topic_id: topic_id,
|
|
user_id:
|
|
user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
raw: process_raw(comment["content"]["text"]),
|
|
custom_fields: {
|
|
import_id: comment["id"],
|
|
},
|
|
}
|
|
|
|
if (parent_post_id = comment["parentID"]).to_i > 0
|
|
if parent = topic_lookup_from_imported_post_id(parent_post_id)
|
|
post[:reply_to_post_number] = parent[:post_number]
|
|
end
|
|
end
|
|
|
|
if created_post = create_post(post, post[:id])
|
|
if comment["likeCount"].to_i > 0 && comment.dig("resources", "likes", "ref").present?
|
|
import_likes(comment["resources"]["likes"]["ref"], created_post.id)
|
|
end
|
|
end
|
|
end
|
|
|
|
break if comments["list"].size < POST_COUNT || comments.dig("links", "next").blank?
|
|
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_messages(url, topic_id, to_import)
|
|
start_index = 0
|
|
|
|
loop do
|
|
messages =
|
|
get(
|
|
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
|
|
to_import[:authenticated],
|
|
)
|
|
break if messages["error"]
|
|
messages["list"].each do |message|
|
|
next if post_id_from_imported_post_id(message["id"])
|
|
|
|
post = {
|
|
id: message["id"],
|
|
created_at: message["published"],
|
|
topic_id: topic_id,
|
|
user_id:
|
|
user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
raw: process_raw(message["content"]["text"]),
|
|
custom_fields: {
|
|
import_id: message["id"],
|
|
},
|
|
}
|
|
post[:custom_fields][:is_accepted_answer] = true if message["answer"]
|
|
|
|
if (parent_post_id = message["parentID"].to_i) > 0
|
|
if parent = topic_lookup_from_imported_post_id(parent_post_id)
|
|
post[:reply_to_post_number] = parent[:post_number]
|
|
end
|
|
end
|
|
|
|
if created_post = create_post(post, post[:id])
|
|
if message["likeCount"].to_i > 0 && message.dig("resources", "likes", "ref").present?
|
|
import_likes(message["resources"]["likes"]["ref"], created_post.id)
|
|
end
|
|
end
|
|
end
|
|
|
|
break if messages["list"].size < POST_COUNT || messages.dig("links", "next").blank?
|
|
break unless start_index = messages["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def create_post(options, import_id)
|
|
post = super(options, import_id)
|
|
if Post === post
|
|
add_post(import_id, post)
|
|
add_topic(post)
|
|
end
|
|
post
|
|
end
|
|
|
|
def import_bookmarks
|
|
puts "", "importing bookmarks..."
|
|
|
|
start_index = 0
|
|
fields =
|
|
"fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
|
|
filter = "&filter=creationDate(null,2016-01-01T00:00:00Z)"
|
|
|
|
loop do
|
|
favorites =
|
|
get(
|
|
"contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
|
|
)
|
|
bookmarks_to_create =
|
|
favorites["list"]
|
|
.map do |favorite|
|
|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
|
|
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
|
|
{ user_id: user_id, post_id: post_id }
|
|
end
|
|
.flatten
|
|
|
|
create_bookmarks(bookmarks_to_create) { |row| row }
|
|
|
|
break if favorites["list"].size < POST_COUNT || favorites.dig("links", "next").blank?
|
|
break unless start_index = favorites["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def process_raw(raw)
|
|
doc = Nokogiri::HTML5.fragment(raw)
|
|
|
|
# convert emoticon
|
|
doc
|
|
.css("span.emoticon-inline")
|
|
.each do |span|
|
|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
|
|
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
|
|
end
|
|
|
|
# convert mentions
|
|
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
|
|
|
|
# fix links
|
|
doc
|
|
.css("a[href]")
|
|
.each do |a|
|
|
if a["href"]["#{@base_uri}/docs/DOC-"]
|
|
a["href"] = a["href"][%r{#{Regexp.escape(@base_uri)}/docs/DOC-\d+}]
|
|
elsif a["href"][@base_uri]
|
|
a.replace(a.inner_html)
|
|
end
|
|
end
|
|
|
|
html = doc.at(".jive-rendered-content").to_html
|
|
|
|
HtmlToMarkdown.new(html, keep_img_tags: true).to_markdown
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "Marking topics as solved..."
|
|
|
|
DB.exec <<~SQL
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
FROM post_custom_fields pcf
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
WHERE pcf.name = 'is_accepted_answer'
|
|
SQL
|
|
end
|
|
|
|
def get(url_or_path, authenticated = false)
|
|
tries ||= 3
|
|
|
|
command = %w[curl --silent]
|
|
command << "--user \"#{@username}:#{@password}\"" if !!authenticated
|
|
command << (
|
|
if url_or_path.start_with?("http")
|
|
"\"#{url_or_path}\""
|
|
else
|
|
"\"#{@base_uri}/api/core/v3/#{url_or_path}\""
|
|
end
|
|
)
|
|
|
|
puts command.join(" ") if ENV["VERBOSE"] == "1"
|
|
|
|
JSON.parse `#{command.join(" ")}`
|
|
rescue StandardError
|
|
retry if (tries -= 1) >= 0
|
|
end
|
|
end
|
|
|
|
ImportScripts::JiveApi.new.perform
|