update JIVE API importer

This commit is contained in:
Régis Hanol 2017-04-24 22:03:12 +02:00
parent d5630d6160
commit 29ddb3a611

View File

@ -1,9 +1,12 @@
require "nokogiri"
require "htmlentities"
require_relative "./../../lib/html_to_markdown.rb"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::JiveApi < ImportScripts::Base
COUNT ||= 100
USER_COUNT ||= 1000
POST_COUNT ||= 100
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
def initialize
@ -26,10 +29,10 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing users..."
imported_users = 0
start_index = [0, Math.floor(PostCustomField.where(name: "import_id").count / COUNT.to_f) - COUNT].max
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
loop do
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{COUNT}&startIndex=#{start_index}", true)
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
create_users(users["list"], offset: imported_users) do |user|
{
id: user["id"],
@ -41,7 +44,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
}
end
break if users["list"].size < COUNT || users["links"].blank? || users["links"]["next"].blank?
break if users["list"].size < USER_COUNT || users["links"].blank? || users["links"]["next"].blank?
imported_users += users["list"].size
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
end
@ -51,25 +54,23 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing discussions & questions..."
start_index = 0
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources"
fields = "fields=published,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources"
filter = "&filter=creationDate(null,2017-01-01T00:00:00Z)"
loop do
discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}")
discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
discussions["list"].each do |discussion|
topic = {
id: discussion["contentID"],
created_at: discussion["published"],
title: @htmlentities.decode(discussion["subject"]),
raw: discussion["content"]["text"],
raw: process_raw(discussion["content"]["text"]),
user_id: user_id_from_imported_user_id(discussion["author"]["id"]) || Discourse::SYSTEM_USER_ID,
# category: discussion["question"] ? 26 : 21,
# category: discussion["question"] ? 5 : 21,
views: discussion["viewCount"],
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: discussion["contentID"] },
post_create_action: proc do |post|
tags = discussion["tags"].compact.map(&:strip).select(&:present?)
DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, tags) unless tags.empty?
DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, ["legacy"])
end
}
@ -79,7 +80,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
import_comments(discussion["contentID"], parent_post.topic_id) if parent_post
end
break if discussions["list"].size < COUNT || discussions["links"].blank? || discussions["links"]["next"].blank?
break if discussions["list"].size < POST_COUNT || discussions["links"].blank? || discussions["links"]["next"].blank?
break unless start_index = discussions["links"]["next"][/startIndex=(\d+)/, 1]
end
end
@ -89,7 +90,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
fields = "fields=published,author.id,content.text,parent,answer,-resources,-author.resources"
loop do
comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{COUNT}&startIndex=#{start_index}")
comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{POST_COUNT}&startIndex=#{start_index}")
comments["list"].each do |comment|
next if post_id_from_imported_post_id(comment["id"])
@ -98,8 +99,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
created_at: comment["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: comment["content"]["text"],
cook_method: Post.cook_methods[:raw_html],
raw: process_raw(comment["content"]["text"]),
custom_fields: { import_id: comment["id"] },
}
post[:custom_fields][:is_accepted_answer] = true if comment["answer"]
@ -113,7 +113,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
create_post(post, post[:id])
end
break if comments["list"].size < COUNT || comments["links"].blank? || comments["links"]["next"].blank?
break if comments["list"].size < POST_COUNT || comments["links"].blank? || comments["links"]["next"].blank?
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
end
end
@ -122,37 +122,61 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing blog posts..."
start_index = 0
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources"
fields = "fields=published,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources"
filter = "&filter=creationDate(null,2016-05-01T00:00:00Z)"
loop do
posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}")
posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
posts["list"].each do |post|
next if post_id_from_imported_post_id(post["contentID"])
pp = {
id: post["contentID"],
created_at: post["published"],
title: @htmlentities.decode(post["subject"]),
raw: post["content"]["text"],
raw: process_raw(post["content"]["text"]),
user_id: user_id_from_imported_user_id(post["author"]["id"]) || Discourse::SYSTEM_USER_ID,
# category: 7,
category: 7,
views: post["viewCount"],
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: post["contentID"], import_permalink: post["permalink"] },
post_create_action: proc do |p|
tags = post["tags"].compact.map(&:strip).select(&:present?)
DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, tags) unless tags.empty?
DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, ["legacy"])
end
}
create_post(pp, pp[:id])
end
break if posts["list"].size < COUNT || posts["links"].blank? || posts["links"]["next"].blank?
break if posts["list"].size < POST_COUNT || posts["links"].blank? || posts["links"]["next"].blank?
break unless start_index = posts["links"]["next"][/startIndex=(\d+)/, 1]
end
end
def process_raw(raw)
doc = Nokogiri::HTML.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
# convert mentions
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
# fix links
# doc.css("a[href]").each do |a|
# if a["href"]["#{@base_uri}/docs/DOC-"]
# a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
# elsif a["href"][@base_uri]
# a.replace(a.inner_html)
# end
# end
html = doc.at(".jive-rendered-content").to_html
HtmlToMarkdown.new(html).to_markdown
end
def mark_topics_as_solved
puts "", "Marking topics as solved..."