2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-08-28 22:21:39 +08:00
|
|
|
# Zendesk importer
|
|
|
|
#
|
|
|
|
# This one uses their API.
|
|
|
|
|
2021-10-07 00:24:12 +08:00
|
|
|
require "open-uri"
|
2018-08-28 22:21:39 +08:00
|
|
|
require "reverse_markdown"
|
|
|
|
require_relative "base"
|
|
|
|
require_relative "base/generic_database"
|
|
|
|
|
|
|
|
# Call it like this:
|
|
|
|
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
|
2019-03-01 04:59:36 +08:00
|
|
|
class ImportScripts::ZendeskApi < ImportScripts::Base
|
2018-08-28 22:21:39 +08:00
|
|
|
BATCH_SIZE = 1000
|
|
|
|
|
2021-10-07 00:24:12 +08:00
|
|
|
HTTP_ERRORS = [
|
|
|
|
EOFError,
|
|
|
|
Errno::ECONNRESET,
|
|
|
|
Errno::EINVAL,
|
|
|
|
Net::HTTPBadResponse,
|
|
|
|
Net::HTTPHeaderSyntaxError,
|
|
|
|
Net::ProtocolError,
|
|
|
|
Timeout::Error,
|
|
|
|
OpenURI::HTTPError,
|
|
|
|
OpenSSL::SSL::SSLError,
|
|
|
|
]
|
|
|
|
|
|
|
|
MAX_RETRIES = 5
|
|
|
|
|
|
|
|
IMAGE_DOWNLOAD_PATH = "replace-me"
|
|
|
|
|
|
|
|
SUBDOMAIN = "replace-me"
|
|
|
|
|
2018-08-28 22:21:39 +08:00
|
|
|
def initialize(source_url, path, auth_email, auth_token)
|
|
|
|
super()
|
|
|
|
|
|
|
|
@source_url = source_url
|
|
|
|
@path = path
|
|
|
|
@auth_email = auth_email
|
|
|
|
@auth_token = auth_token
|
|
|
|
@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
fetch_from_api
|
|
|
|
|
|
|
|
import_categories
|
|
|
|
import_users
|
|
|
|
import_topics
|
|
|
|
import_posts
|
2019-08-13 23:24:22 +08:00
|
|
|
import_likes
|
2018-08-28 22:21:39 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_from_api
|
2019-08-13 23:24:22 +08:00
|
|
|
fetch_categories
|
|
|
|
fetch_topics
|
|
|
|
fetch_posts
|
|
|
|
fetch_users
|
|
|
|
|
|
|
|
@db.sort_posts_by_created_at
|
|
|
|
end
|
|
|
|
|
|
|
|
def fetch_categories
|
2018-08-28 22:21:39 +08:00
|
|
|
puts "", "fetching categories..."
|
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
get_from_api("/api/v2/community/topics.json", "topics", show_status: true) do |row|
|
2018-08-28 22:21:39 +08:00
|
|
|
@db.insert_category(
|
|
|
|
id: row["id"],
|
|
|
|
name: row["name"],
|
|
|
|
description: row["description"],
|
|
|
|
position: row["position"],
|
|
|
|
url: row["html_url"],
|
|
|
|
)
|
|
|
|
end
|
2019-08-13 23:24:22 +08:00
|
|
|
end
|
2018-08-28 22:21:39 +08:00
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def fetch_topics
|
2018-08-28 22:21:39 +08:00
|
|
|
puts "", "fetching topics..."
|
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
get_from_api("/api/v2/community/posts.json", "posts", show_status: true) do |row|
|
|
|
|
if row["vote_count"] > 0
|
|
|
|
like_user_ids = fetch_likes("/api/v2/community/posts/#{row["id"]}/votes.json")
|
|
|
|
end
|
|
|
|
|
2018-08-28 22:21:39 +08:00
|
|
|
@db.insert_topic(
|
|
|
|
id: row["id"],
|
|
|
|
title: row["title"],
|
|
|
|
raw: row["details"],
|
|
|
|
category_id: row["topic_id"],
|
|
|
|
closed: row["closed"],
|
|
|
|
user_id: row["author_id"],
|
|
|
|
created_at: row["created_at"],
|
2019-08-13 23:24:22 +08:00
|
|
|
url: row["html_url"],
|
|
|
|
like_user_ids: like_user_ids,
|
2018-08-28 22:21:39 +08:00
|
|
|
)
|
|
|
|
end
|
2019-08-13 23:24:22 +08:00
|
|
|
end
|
2018-08-28 22:21:39 +08:00
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def fetch_posts
|
2018-08-28 22:21:39 +08:00
|
|
|
puts "", "fetching posts..."
|
2019-08-13 23:24:22 +08:00
|
|
|
current_count = 0
|
2018-08-28 22:21:39 +08:00
|
|
|
total_count = @db.count_topics
|
|
|
|
start_time = Time.now
|
|
|
|
last_id = ""
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_id = @db.fetch_topics(last_id)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
rows.each do |topic_row|
|
2019-08-13 23:24:22 +08:00
|
|
|
get_from_api(
|
|
|
|
"/api/v2/community/posts/#{topic_row["id"]}/comments.json",
|
|
|
|
"comments",
|
|
|
|
) do |row|
|
|
|
|
if row["vote_count"] > 0
|
|
|
|
like_user_ids =
|
|
|
|
fetch_likes(
|
|
|
|
"/api/v2/community/posts/#{topic_row["id"]}/comments/#{row["id"]}/votes.json",
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2018-08-28 22:21:39 +08:00
|
|
|
@db.insert_post(
|
|
|
|
id: row["id"],
|
|
|
|
raw: row["body"],
|
|
|
|
topic_id: topic_row["id"],
|
|
|
|
user_id: row["author_id"],
|
|
|
|
created_at: row["created_at"],
|
2019-08-13 23:24:22 +08:00
|
|
|
url: row["html_url"],
|
|
|
|
like_user_ids: like_user_ids,
|
2018-08-28 22:21:39 +08:00
|
|
|
)
|
|
|
|
end
|
2019-08-13 23:24:22 +08:00
|
|
|
|
|
|
|
current_count += 1
|
|
|
|
print_status(current_count, total_count, start_time)
|
2018-08-28 22:21:39 +08:00
|
|
|
end
|
|
|
|
end
|
2019-08-13 23:24:22 +08:00
|
|
|
end
|
2018-08-28 22:21:39 +08:00
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def fetch_users
|
2018-08-28 22:21:39 +08:00
|
|
|
puts "", "fetching users..."
|
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
user_ids = @db.execute_sql(<<~SQL).map { |row| row["user_id"] }
|
|
|
|
SELECT user_id FROM topic
|
|
|
|
UNION
|
|
|
|
SELECT user_id FROM post
|
|
|
|
UNION
|
|
|
|
SELECT user_id FROM like
|
|
|
|
SQL
|
2018-08-28 22:21:39 +08:00
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
current_count = 0
|
|
|
|
total_count = user_ids.size
|
2018-08-28 22:21:39 +08:00
|
|
|
start_time = Time.now
|
|
|
|
|
|
|
|
while !user_ids.empty?
|
2019-08-13 23:24:22 +08:00
|
|
|
get_from_api(
|
|
|
|
"/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(",")}",
|
|
|
|
"users",
|
|
|
|
) do |row|
|
2018-08-28 22:21:39 +08:00
|
|
|
@db.insert_user(
|
|
|
|
id: row["id"],
|
|
|
|
email: row["email"],
|
|
|
|
name: row["name"],
|
|
|
|
created_at: row["created_at"],
|
|
|
|
last_seen_at: row["last_login_at"],
|
2019-08-13 23:24:22 +08:00
|
|
|
active: row["active"],
|
|
|
|
avatar_path: row["photo"].present? ? row["photo"]["content_url"] : nil,
|
2018-08-28 22:21:39 +08:00
|
|
|
)
|
2019-08-13 23:24:22 +08:00
|
|
|
|
|
|
|
current_count += 1
|
|
|
|
print_status(current_count, total_count, start_time)
|
2018-08-28 22:21:39 +08:00
|
|
|
end
|
|
|
|
end
|
2019-08-13 23:24:22 +08:00
|
|
|
end
|
2018-08-28 22:21:39 +08:00
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def fetch_likes(url)
|
|
|
|
user_ids = []
|
|
|
|
|
|
|
|
get_from_api(url, "votes") do |row|
|
|
|
|
user_ids << row["user_id"] if row["id"].present? && row["value"] == 1
|
|
|
|
end
|
|
|
|
|
|
|
|
user_ids
|
2018-08-28 22:21:39 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def import_categories
|
|
|
|
puts "", "creating categories"
|
|
|
|
rows = @db.fetch_categories
|
|
|
|
|
|
|
|
create_categories(rows) do |row|
|
|
|
|
{
|
|
|
|
id: row["id"],
|
|
|
|
name: row["name"],
|
|
|
|
description: row["description"],
|
|
|
|
position: row["position"],
|
|
|
|
post_create_action:
|
|
|
|
proc do |category|
|
|
|
|
url = remove_domain(row["url"])
|
|
|
|
Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "creating users"
|
|
|
|
total_count = @db.count_users
|
|
|
|
last_id = ""
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_id = @db.fetch_users(last_id)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
next if all_records_exist?(:users, rows.map { |row| row["id"] })
|
|
|
|
|
|
|
|
create_users(rows, total: total_count, offset: offset) do |row|
|
|
|
|
{
|
|
|
|
id: row["id"],
|
|
|
|
email: row["email"],
|
|
|
|
name: row["name"],
|
|
|
|
created_at: row["created_at"],
|
|
|
|
last_seen_at: row["last_seen_at"],
|
2019-08-13 23:24:22 +08:00
|
|
|
active: row["active"] == 1,
|
|
|
|
post_create_action:
|
|
|
|
proc do |user|
|
|
|
|
if row["avatar_path"].present?
|
2023-01-07 19:53:14 +08:00
|
|
|
begin
|
2019-08-13 23:24:22 +08:00
|
|
|
UserAvatar.import_url_for_user(row["avatar_path"], user)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
2023-01-07 19:53:14 +08:00
|
|
|
end,
|
2018-08-28 22:21:39 +08:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_topics
|
|
|
|
puts "", "creating topics"
|
|
|
|
total_count = @db.count_topics
|
|
|
|
last_id = ""
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_id = @db.fetch_topics(last_id)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) })
|
|
|
|
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
|
|
|
{
|
|
|
|
id: import_topic_id(row["id"]),
|
|
|
|
title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing",
|
2021-10-07 00:24:12 +08:00
|
|
|
raw:
|
|
|
|
normalize_raw(
|
|
|
|
row["raw"],
|
|
|
|
user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
|
|
|
|
),
|
2018-08-28 22:21:39 +08:00
|
|
|
category: category_id_from_imported_category_id(row["category_id"]),
|
|
|
|
user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
|
|
|
|
created_at: row["created_at"],
|
|
|
|
closed: row["closed"] == 1,
|
|
|
|
post_create_action:
|
|
|
|
proc do |post|
|
|
|
|
url = remove_domain(row["url"])
|
|
|
|
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_topic_id(topic_id)
|
|
|
|
"T#{topic_id}"
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_posts
|
|
|
|
puts "", "creating posts"
|
|
|
|
total_count = @db.count_posts
|
|
|
|
last_row_id = 0
|
|
|
|
|
|
|
|
batches do |offset|
|
2019-03-01 04:59:36 +08:00
|
|
|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
|
2018-08-28 22:21:39 +08:00
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
|
|
|
topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"]))
|
|
|
|
|
|
|
|
if topic.nil?
|
|
|
|
p "MISSING TOPIC #{row["topic_id"]}"
|
|
|
|
p row
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
{
|
|
|
|
id: row["id"],
|
2021-10-07 00:24:12 +08:00
|
|
|
raw:
|
|
|
|
normalize_raw(
|
|
|
|
row["raw"],
|
|
|
|
user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
|
|
|
|
),
|
2018-08-28 22:21:39 +08:00
|
|
|
user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
|
|
|
|
topic_id: topic[:topic_id],
|
|
|
|
created_at: row["created_at"],
|
|
|
|
post_create_action:
|
|
|
|
proc do |post|
|
|
|
|
url = remove_domain(row["url"])
|
|
|
|
Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def import_likes
|
|
|
|
puts "", "importing likes..."
|
|
|
|
start_time = Time.now
|
|
|
|
current_count = 0
|
|
|
|
total_count = @db.count_likes
|
|
|
|
last_row_id = 0
|
|
|
|
|
|
|
|
batches do |offset|
|
|
|
|
rows, last_row_id = @db.fetch_likes(last_row_id)
|
|
|
|
break if rows.empty?
|
|
|
|
|
|
|
|
rows.each do |row|
|
|
|
|
import_id = row["topic_id"] ? import_topic_id(row["topic_id"]) : row["post_id"]
|
|
|
|
post = Post.find_by(id: post_id_from_imported_post_id(import_id)) if import_id
|
|
|
|
user = User.find_by(id: user_id_from_imported_user_id(row["user_id"]))
|
|
|
|
|
|
|
|
if post && user
|
|
|
|
begin
|
|
|
|
PostActionCreator.like(user, post) if user && post
|
|
|
|
rescue => e
|
|
|
|
puts "error acting on post #{e}"
|
|
|
|
end
|
|
|
|
else
|
|
|
|
puts "Skipping Like from #{row["user_id"]} on topic #{row["topic_id"]} / post #{row["post_id"]}"
|
|
|
|
end
|
|
|
|
|
|
|
|
current_count += 1
|
|
|
|
print_status(current_count, total_count, start_time)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-10-07 00:24:12 +08:00
|
|
|
def normalize_raw(raw, user_id)
|
2018-08-28 22:21:39 +08:00
|
|
|
raw = raw.gsub('\n', "")
|
|
|
|
raw = ReverseMarkdown.convert(raw)
|
2021-10-07 00:24:12 +08:00
|
|
|
|
|
|
|
# Process images, after the ReverseMarkdown they look like
|
|
|
|
# ![](https://<sub-domain>.zendesk.com/<hash>.<image-format>)
|
|
|
|
raw.gsub!(%r{!\[\]\((https://#{SUBDOMAIN}\.zendesk\.com/hc/user_images/([^).]+\.[^)]+))\)}i) do
|
|
|
|
image_url = $1
|
|
|
|
filename = $2
|
|
|
|
attempts = 0
|
|
|
|
|
|
|
|
begin
|
2021-10-27 16:39:28 +08:00
|
|
|
URI
|
|
|
|
.parse(image_url)
|
|
|
|
.open do |image|
|
2021-10-07 00:24:12 +08:00
|
|
|
# IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image
|
|
|
|
File.open(IMAGE_DOWNLOAD_PATH, "wb") { |file| file.write(image.read) }
|
|
|
|
end
|
|
|
|
rescue *HTTP_ERRORS => e
|
|
|
|
if attempts < MAX_RETRIES
|
|
|
|
attempts += 1
|
|
|
|
sleep(2)
|
|
|
|
retry
|
|
|
|
else
|
|
|
|
puts "Error downloading image"
|
|
|
|
end
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
upl_obj = create_upload(user_id, IMAGE_DOWNLOAD_PATH, filename)
|
|
|
|
|
|
|
|
if upl_obj&.persisted?
|
|
|
|
html = html_for_upload(upl_obj, filename)
|
|
|
|
html
|
|
|
|
else
|
|
|
|
puts "Error creating image upload"
|
|
|
|
"![](#{$1})"
|
|
|
|
exit
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-08-28 22:21:39 +08:00
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_domain(url)
|
|
|
|
url.sub(@source_url, "")
|
|
|
|
end
|
|
|
|
|
|
|
|
def permalink_exists?(url)
|
|
|
|
Permalink.find_by(url: url)
|
|
|
|
end
|
|
|
|
|
|
|
|
def connection
|
|
|
|
@_connection ||=
|
|
|
|
begin
|
|
|
|
connect_uri = URI.parse(@source_url)
|
|
|
|
|
|
|
|
http = Net::HTTP.new(connect_uri.host, connect_uri.port)
|
|
|
|
http.open_timeout = 30
|
|
|
|
http.read_timeout = 30
|
|
|
|
http.use_ssl = connect_uri.scheme == "https"
|
|
|
|
|
|
|
|
http
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def authorization
|
|
|
|
@_authorization ||=
|
|
|
|
begin
|
|
|
|
auth_str = "#{@auth_email}/token:#{@auth_token}"
|
|
|
|
"Basic #{Base64.strict_encode64(auth_str)}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-08-13 23:24:22 +08:00
|
|
|
def get_from_api(path, array_name, show_status: false)
|
2018-08-28 22:21:39 +08:00
|
|
|
url = "#{@source_url}#{path}"
|
|
|
|
start_time = Time.now
|
|
|
|
|
|
|
|
while url
|
|
|
|
get = Net::HTTP::Get.new(url)
|
|
|
|
get["User-Agent"] = "Discourse Zendesk Importer"
|
|
|
|
get["Authorization"] = authorization
|
|
|
|
|
|
|
|
retry_count = 0
|
|
|
|
|
|
|
|
begin
|
|
|
|
while retry_count < 5
|
|
|
|
begin
|
|
|
|
response = connection.request(get)
|
|
|
|
puts("Retry successful!") if retry_count > 0
|
|
|
|
break
|
|
|
|
rescue => e
|
|
|
|
puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
|
|
|
|
sleep(20)
|
|
|
|
retry_count += 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
|
|
|
|
json[array_name].each { |row| yield row }
|
|
|
|
|
|
|
|
url = json["next_page"]
|
|
|
|
|
|
|
|
if show_status
|
|
|
|
if json["page"] && json["page_count"]
|
|
|
|
print_status(json["page"], json["page_count"], start_time)
|
|
|
|
else
|
|
|
|
print "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
unless ARGV.length == 4 && Dir.exist?(ARGV[1])
|
|
|
|
puts "",
|
|
|
|
"Usage:",
|
|
|
|
"",
|
|
|
|
"bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN",
|
|
|
|
""
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2019-03-01 04:59:36 +08:00
|
|
|
ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
|