mirror of
https://github.com/discourse/discourse.git
synced 2024-11-27 20:33:41 +08:00
5f64fd0a21
Introduce new patterns for direct sql that are safe and fast. MiniSql is not prone to memory bloat that can happen with direct PG usage. It also has an extremely fast materializer and very a convenient API - DB.exec(sql, *params) => runs sql returns row count - DB.query(sql, *params) => runs sql returns usable objects (not a hash) - DB.query_hash(sql, *params) => runs sql returns an array of hashes - DB.query_single(sql, *params) => runs sql and returns a flat one dimensional array - DB.build(sql) => returns a sql builder See more at: https://github.com/discourse/mini_sql
354 lines
13 KiB
Ruby
354 lines
13 KiB
Ruby
require "nokogiri"
|
|
require "htmlentities"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
# https://developers.jivesoftware.com/api/v3/cloud/rest/index.html
|
|
|
|
class ImportScripts::JiveApi < ImportScripts::Base
|
|
|
|
USER_COUNT ||= 1000
|
|
POST_COUNT ||= 100
|
|
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
|
|
|
|
TO_IMPORT ||= [
|
|
#############################
|
|
# WHOLE CATEGORY OF CONTENT #
|
|
#############################
|
|
|
|
# Announcement & News
|
|
{ jive_object: { type: 37, id: 1004 }, filters: { created_after: 1.year.ago, type: "post" }, category_id: 7 },
|
|
# Questions & Answers / General Discussions
|
|
{ jive_object: { type: 14, id: 2006 }, filters: { created_after: 6.months.ago, type: "discussion" }, category: Proc.new { |c| c["question"] ? 5 : 21 } },
|
|
# Anywhere beta
|
|
{ jive_object: { type: 14, id: 2052 }, filters: { created_after: 6.months.ago, type: "discussion" }, category_id: 22 },
|
|
# Tips & Tricks
|
|
{ jive_object: { type: 37, id: 1284 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1319 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1177 }, filters: { type: "post" }, category_id: 6 },
|
|
{ jive_object: { type: 37, id: 1165 }, filters: { type: "post" }, category_id: 6 },
|
|
# Ambassadors
|
|
{ jive_object: { type: 700, id: 1001 }, filters: { type: "discussion" }, authenticated: true, category_id: 8 },
|
|
# Experts
|
|
{ jive_object: { type: 700, id: 1034 }, filters: { type: "discussion" }, authenticated: true, category_id: 15 },
|
|
# Feature Requests
|
|
{ jive_object: { type: 14, id: 2015 }, filters: { type: "idea" }, category_id: 31 },
|
|
|
|
####################
|
|
# SELECTED CONTENT #
|
|
####################
|
|
|
|
# Announcement & News
|
|
{ jive_object: { type: 37, id: 1004 }, filters: { entities: { 38 => [1345, 1381, 1845, 2046, 2060, 2061] } }, category_id: 7 },
|
|
# Problem Solving
|
|
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [116685, 160745, 177010, 223482, 225036, 233228, 257882, 285103, 292297, 345243, 363250, 434546] } }, category_id: 10 },
|
|
# General Discussions
|
|
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [178203, 188350, 312734] } }, category_id: 21 },
|
|
# Questions & Answers
|
|
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [418811] } }, category_id: 5 },
|
|
]
|
|
|
|
def initialize
|
|
super
|
|
@base_uri = ENV["BASE_URI"]
|
|
@username = ENV["USERNAME"]
|
|
@password = ENV["PASSWORD"]
|
|
@htmlentities = HTMLEntities.new
|
|
end
|
|
|
|
def execute
|
|
update_existing_users
|
|
import_users
|
|
import_contents
|
|
import_bookmarks
|
|
mark_topics_as_solved
|
|
end
|
|
|
|
def update_existing_users
|
|
puts "", "updating existing users..."
|
|
|
|
# we just need to do this once
|
|
return if User.human_users.limit(101).count > 100
|
|
|
|
User.human_users.find_each do |user|
|
|
people = get("people/email/#{user.email}?fields=initialLogin,-resources", true)
|
|
if people && people["initialLogin"].present?
|
|
created_at = DateTime.parse(people["initialLogin"])
|
|
if user.created_at > created_at
|
|
user.update_columns(created_at: created_at)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users..."
|
|
|
|
imported_users = 0
|
|
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
|
|
|
|
loop do
|
|
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
|
|
create_users(users["list"], offset: imported_users) do |user|
|
|
{
|
|
id: user["id"],
|
|
created_at: user["initialLogin"],
|
|
email: user["emails"].find { |email| email["primary"] }["value"],
|
|
username: user["mentionName"],
|
|
name: user["displayName"],
|
|
avatar_url: user["thumbnailUrl"],
|
|
}
|
|
end
|
|
|
|
break if users["list"].size < USER_COUNT || users.dig("links", "next").blank?
|
|
imported_users += users["list"].size
|
|
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_contents
|
|
puts "", "importing contents..."
|
|
|
|
TO_IMPORT.each do |to_import|
|
|
puts Time.now
|
|
entity = to_import[:jive_object]
|
|
places = get("places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})", to_import[:authenticated])
|
|
import_place_contents(places["list"][0], to_import) if places && places["list"].present?
|
|
end
|
|
end
|
|
|
|
def import_place_contents(place, to_import)
|
|
puts "", "importing contents for '#{place["name"]}'..."
|
|
|
|
start_index = 0
|
|
|
|
if to_import.dig(:filters, :entities).present?
|
|
path = "contents"
|
|
entities = to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
|
|
filters = "filter=entityDescriptor(#{entities.join(",")})"
|
|
else
|
|
path = "places/#{place["placeID"]}/contents"
|
|
filters = "filter=status(published)"
|
|
if to_import[:filters]
|
|
filters << "&filter=type(#{to_import[:filters][:type]})" if to_import[:filters][:type].present?
|
|
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})" if to_import[:filters][:created_after].present?
|
|
end
|
|
end
|
|
|
|
loop do
|
|
contents = get("#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
|
|
contents["list"].each do |content|
|
|
content_id = content["contentID"].presence || "#{content["type"]}_#{content["id"]}"
|
|
|
|
custom_fields = { import_id: content_id }
|
|
custom_fields[:import_permalink] = content["permalink"] if content["permalink"].present?
|
|
|
|
topic = {
|
|
id: content_id,
|
|
created_at: content["published"],
|
|
title: @htmlentities.decode(content["subject"]),
|
|
raw: process_raw(content["content"]["text"]),
|
|
user_id: user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
views: content["viewCount"],
|
|
custom_fields: custom_fields,
|
|
}
|
|
|
|
if to_import[:category]
|
|
topic[:category] = to_import[:category].call(content)
|
|
else
|
|
topic[:category] = to_import[:category_id]
|
|
end
|
|
|
|
post_id = post_id_from_imported_post_id(topic[:id])
|
|
parent_post = post_id ? Post.unscoped.find_by(id: post_id) : create_post(topic, topic[:id])
|
|
|
|
if parent_post&.id && parent_post&.topic_id
|
|
resources = content["resources"]
|
|
import_likes(resources["likes"]["ref"], parent_post.id) if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
|
|
if content["replyCount"].to_i > 0
|
|
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import) if resources.dig("comments", "ref").present?
|
|
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import) if resources.dig("messages", "ref").present?
|
|
end
|
|
end
|
|
end
|
|
|
|
break if contents["list"].size < POST_COUNT || contents.dig("links", "next").blank?
|
|
break unless start_index = contents["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_likes(url, post_id)
|
|
start_index = 0
|
|
|
|
loop do
|
|
likes = get("#{url}?&count=#{USER_COUNT}&startIndex=#{start_index}", true)
|
|
break if likes["error"]
|
|
likes["list"].each do |like|
|
|
next unless user_id = user_id_from_imported_user_id(like["id"])
|
|
next if PostAction.exists?(user_id: user_id, post_id: post_id, post_action_type_id: PostActionType.types[:like])
|
|
PostAction.act(User.find(user_id), Post.find(post_id), PostActionType.types[:like])
|
|
end
|
|
|
|
break if likes["list"].size < USER_COUNT || likes.dig("links", "next").blank?
|
|
break unless start_index = likes["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_comments(url, topic_id, to_import)
|
|
start_index = 0
|
|
|
|
loop do
|
|
comments = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
|
|
break if comments["error"]
|
|
comments["list"].each do |comment|
|
|
next if post_id_from_imported_post_id(comment["id"])
|
|
|
|
post = {
|
|
id: comment["id"],
|
|
created_at: comment["published"],
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
raw: process_raw(comment["content"]["text"]),
|
|
custom_fields: { import_id: comment["id"] },
|
|
}
|
|
|
|
if (parent_post_id = comment["parentID"]).to_i > 0
|
|
if parent = topic_lookup_from_imported_post_id(parent_post_id)
|
|
post[:reply_to_post_number] = parent[:post_number]
|
|
end
|
|
end
|
|
|
|
if created_post = create_post(post, post[:id])
|
|
if comment["likeCount"].to_i > 0 && comment.dig("resources", "likes", "ref").present?
|
|
import_likes(comment["resources"]["likes"]["ref"], created_post.id)
|
|
end
|
|
end
|
|
end
|
|
|
|
break if comments["list"].size < POST_COUNT || comments.dig("links", "next").blank?
|
|
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def import_messages(url, topic_id, to_import)
|
|
start_index = 0
|
|
|
|
loop do
|
|
messages = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
|
|
break if messages["error"]
|
|
messages["list"].each do |message|
|
|
next if post_id_from_imported_post_id(message["id"])
|
|
|
|
post = {
|
|
id: message["id"],
|
|
created_at: message["published"],
|
|
topic_id: topic_id,
|
|
user_id: user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
|
raw: process_raw(message["content"]["text"]),
|
|
custom_fields: { import_id: message["id"] },
|
|
}
|
|
post[:custom_fields][:is_accepted_answer] = true if message["answer"]
|
|
|
|
if (parent_post_id = message["parentID"].to_i) > 0
|
|
if parent = topic_lookup_from_imported_post_id(parent_post_id)
|
|
post[:reply_to_post_number] = parent[:post_number]
|
|
end
|
|
end
|
|
|
|
if created_post = create_post(post, post[:id])
|
|
if message["likeCount"].to_i > 0 && message.dig("resources", "likes", "ref").present?
|
|
import_likes(message["resources"]["likes"]["ref"], created_post.id)
|
|
end
|
|
end
|
|
end
|
|
|
|
break if messages["list"].size < POST_COUNT || messages.dig("links", "next").blank?
|
|
break unless start_index = messages["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def create_post(options, import_id)
|
|
post = super(options, import_id)
|
|
if Post === post
|
|
add_post(import_id, post)
|
|
add_topic(post)
|
|
end
|
|
post
|
|
end
|
|
|
|
def import_bookmarks
|
|
puts "", "importing bookmarks..."
|
|
|
|
start_index = 0
|
|
fields = "fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
|
|
filter = "&filter=creationDate(null,2016-01-01T00:00:00Z)"
|
|
|
|
loop do
|
|
favorites = get("contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
|
|
favorites["list"].each do |favorite|
|
|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
|
|
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
|
|
next if PostAction.exists?(user_id: user_id, post_id: post_id, post_action_type_id: PostActionType.types[:bookmark])
|
|
PostAction.act(User.find(user_id), Post.find(post_id), PostActionType.types[:bookmark])
|
|
end
|
|
|
|
break if favorites["list"].size < POST_COUNT || favorites.dig("links", "next").blank?
|
|
break unless start_index = favorites["links"]["next"][/startIndex=(\d+)/, 1]
|
|
end
|
|
end
|
|
|
|
def process_raw(raw)
|
|
doc = Nokogiri::HTML.fragment(raw)
|
|
|
|
# convert emoticon
|
|
doc.css("span.emoticon-inline").each do |span|
|
|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
|
|
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
|
|
end
|
|
|
|
# convert mentions
|
|
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
|
|
|
|
# fix links
|
|
doc.css("a[href]").each do |a|
|
|
if a["href"]["#{@base_uri}/docs/DOC-"]
|
|
a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
|
|
elsif a["href"][@base_uri]
|
|
a.replace(a.inner_html)
|
|
end
|
|
end
|
|
|
|
html = doc.at(".jive-rendered-content").to_html
|
|
|
|
HtmlToMarkdown.new(html, keep_img_tags: true).to_markdown
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "Marking topics as solved..."
|
|
|
|
DB.exec <<~SQL
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
FROM post_custom_fields pcf
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
WHERE pcf.name = 'is_accepted_answer'
|
|
SQL
|
|
end
|
|
|
|
def get(url_or_path, authenticated = false)
|
|
tries ||= 3
|
|
|
|
command = ["curl", "--silent"]
|
|
command << "--user \"#{@username}:#{@password}\"" if !!authenticated
|
|
command << (url_or_path.start_with?("http") ? "\"#{url_or_path}\"" : "\"#{@base_uri}/api/core/v3/#{url_or_path}\"")
|
|
|
|
puts command.join(" ") if ENV["VERBOSE"] == "1"
|
|
|
|
JSON.parse `#{command.join(" ")}`
|
|
rescue
|
|
retry if (tries -= 1) >= 0
|
|
end
|
|
|
|
end
|
|
|
|
ImportScripts::JiveApi.new.perform
|