update JIVE API importer

This commit is contained in:
Régis Hanol 2017-04-24 22:03:12 +02:00
parent d5630d6160
commit 29ddb3a611

View File

@ -1,9 +1,12 @@
require "nokogiri"
require "htmlentities" require "htmlentities"
require_relative "./../../lib/html_to_markdown.rb"
require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::JiveApi < ImportScripts::Base class ImportScripts::JiveApi < ImportScripts::Base
COUNT ||= 100 USER_COUNT ||= 1000
POST_COUNT ||= 100
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user) STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
def initialize def initialize
@ -26,10 +29,10 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing users..." puts "", "importing users..."
imported_users = 0 imported_users = 0
start_index = [0, Math.floor(PostCustomField.where(name: "import_id").count / COUNT.to_f) - COUNT].max start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
loop do loop do
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{COUNT}&startIndex=#{start_index}", true) users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
create_users(users["list"], offset: imported_users) do |user| create_users(users["list"], offset: imported_users) do |user|
{ {
id: user["id"], id: user["id"],
@ -41,7 +44,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
} }
end end
break if users["list"].size < COUNT || users["links"].blank? || users["links"]["next"].blank? break if users["list"].size < USER_COUNT || users["links"].blank? || users["links"]["next"].blank?
imported_users += users["list"].size imported_users += users["list"].size
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1] break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
end end
@ -51,25 +54,23 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing discussions & questions..." puts "", "importing discussions & questions..."
start_index = 0 start_index = 0
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources" fields = "fields=published,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources"
filter = "&filter=creationDate(null,2017-01-01T00:00:00Z)" filter = "&filter=creationDate(null,2017-01-01T00:00:00Z)"
loop do loop do
discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}") discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
discussions["list"].each do |discussion| discussions["list"].each do |discussion|
topic = { topic = {
id: discussion["contentID"], id: discussion["contentID"],
created_at: discussion["published"], created_at: discussion["published"],
title: @htmlentities.decode(discussion["subject"]), title: @htmlentities.decode(discussion["subject"]),
raw: discussion["content"]["text"], raw: process_raw(discussion["content"]["text"]),
user_id: user_id_from_imported_user_id(discussion["author"]["id"]) || Discourse::SYSTEM_USER_ID, user_id: user_id_from_imported_user_id(discussion["author"]["id"]) || Discourse::SYSTEM_USER_ID,
# category: discussion["question"] ? 26 : 21, # category: discussion["question"] ? 5 : 21,
views: discussion["viewCount"], views: discussion["viewCount"],
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: discussion["contentID"] }, custom_fields: { import_id: discussion["contentID"] },
post_create_action: proc do |post| post_create_action: proc do |post|
tags = discussion["tags"].compact.map(&:strip).select(&:present?) DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, ["legacy"])
DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, tags) unless tags.empty?
end end
} }
@ -79,7 +80,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
import_comments(discussion["contentID"], parent_post.topic_id) if parent_post import_comments(discussion["contentID"], parent_post.topic_id) if parent_post
end end
break if discussions["list"].size < COUNT || discussions["links"].blank? || discussions["links"]["next"].blank? break if discussions["list"].size < POST_COUNT || discussions["links"].blank? || discussions["links"]["next"].blank?
break unless start_index = discussions["links"]["next"][/startIndex=(\d+)/, 1] break unless start_index = discussions["links"]["next"][/startIndex=(\d+)/, 1]
end end
end end
@ -89,7 +90,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
fields = "fields=published,author.id,content.text,parent,answer,-resources,-author.resources" fields = "fields=published,author.id,content.text,parent,answer,-resources,-author.resources"
loop do loop do
comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{COUNT}&startIndex=#{start_index}") comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{POST_COUNT}&startIndex=#{start_index}")
comments["list"].each do |comment| comments["list"].each do |comment|
next if post_id_from_imported_post_id(comment["id"]) next if post_id_from_imported_post_id(comment["id"])
@ -98,8 +99,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
created_at: comment["published"], created_at: comment["published"],
topic_id: topic_id, topic_id: topic_id,
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID, user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: comment["content"]["text"], raw: process_raw(comment["content"]["text"]),
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: comment["id"] }, custom_fields: { import_id: comment["id"] },
} }
post[:custom_fields][:is_accepted_answer] = true if comment["answer"] post[:custom_fields][:is_accepted_answer] = true if comment["answer"]
@ -113,7 +113,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
create_post(post, post[:id]) create_post(post, post[:id])
end end
break if comments["list"].size < COUNT || comments["links"].blank? || comments["links"]["next"].blank? break if comments["list"].size < POST_COUNT || comments["links"].blank? || comments["links"]["next"].blank?
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1] break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
end end
end end
@ -122,37 +122,61 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing blog posts..." puts "", "importing blog posts..."
start_index = 0 start_index = 0
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources" fields = "fields=published,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources"
filter = "&filter=creationDate(null,2016-05-01T00:00:00Z)" filter = "&filter=creationDate(null,2016-05-01T00:00:00Z)"
loop do loop do
posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}") posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
posts["list"].each do |post| posts["list"].each do |post|
next if post_id_from_imported_post_id(post["contentID"]) next if post_id_from_imported_post_id(post["contentID"])
pp = { pp = {
id: post["contentID"], id: post["contentID"],
created_at: post["published"], created_at: post["published"],
title: @htmlentities.decode(post["subject"]), title: @htmlentities.decode(post["subject"]),
raw: post["content"]["text"], raw: process_raw(post["content"]["text"]),
user_id: user_id_from_imported_user_id(post["author"]["id"]) || Discourse::SYSTEM_USER_ID, user_id: user_id_from_imported_user_id(post["author"]["id"]) || Discourse::SYSTEM_USER_ID,
# category: 7, category: 7,
views: post["viewCount"], views: post["viewCount"],
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: post["contentID"], import_permalink: post["permalink"] }, custom_fields: { import_id: post["contentID"], import_permalink: post["permalink"] },
post_create_action: proc do |p| post_create_action: proc do |p|
tags = post["tags"].compact.map(&:strip).select(&:present?) DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, ["legacy"])
DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, tags) unless tags.empty?
end end
} }
create_post(pp, pp[:id]) create_post(pp, pp[:id])
end end
break if posts["list"].size < COUNT || posts["links"].blank? || posts["links"]["next"].blank? break if posts["list"].size < POST_COUNT || posts["links"].blank? || posts["links"]["next"].blank?
break unless start_index = posts["links"]["next"][/startIndex=(\d+)/, 1] break unless start_index = posts["links"]["next"][/startIndex=(\d+)/, 1]
end end
end end
def process_raw(raw)
doc = Nokogiri::HTML.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
# convert mentions
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
# fix links
# doc.css("a[href]").each do |a|
# if a["href"]["#{@base_uri}/docs/DOC-"]
# a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
# elsif a["href"][@base_uri]
# a.replace(a.inner_html)
# end
# end
html = doc.at(".jive-rendered-content").to_html
HtmlToMarkdown.new(html).to_markdown
end
def mark_topics_as_solved def mark_topics_as_solved
puts "", "Marking topics as solved..." puts "", "Marking topics as solved..."