discourse/script/import_scripts/zendesk_api.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

447 lines
12 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
# Zendesk importer
#
# This one uses their API.
require 'open-uri'
require 'reverse_markdown'
require_relative 'base'
require_relative 'base/generic_database'
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
class ImportScripts::ZendeskApi < ImportScripts::Base
BATCH_SIZE = 1000
HTTP_ERRORS = [
EOFError,
Errno::ECONNRESET,
Errno::EINVAL,
Net::HTTPBadResponse,
Net::HTTPHeaderSyntaxError,
Net::ProtocolError,
Timeout::Error,
OpenURI::HTTPError,
OpenSSL::SSL::SSLError
]
MAX_RETRIES = 5
IMAGE_DOWNLOAD_PATH = "replace-me"
SUBDOMAIN = "replace-me"
def initialize(source_url, path, auth_email, auth_token)
super()
@source_url = source_url
@path = path
@auth_email = auth_email
@auth_token = auth_token
@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
end
def execute
fetch_from_api
import_categories
import_users
import_topics
import_posts
import_likes
end
def fetch_from_api
fetch_categories
fetch_topics
fetch_posts
fetch_users
@db.sort_posts_by_created_at
end
def fetch_categories
puts '', 'fetching categories...'
get_from_api('/api/v2/community/topics.json', 'topics', show_status: true) do |row|
@db.insert_category(
id: row['id'],
name: row['name'],
description: row['description'],
position: row['position'],
url: row['html_url']
)
end
end
def fetch_topics
puts '', 'fetching topics...'
get_from_api('/api/v2/community/posts.json', 'posts', show_status: true) do |row|
if row['vote_count'] > 0
like_user_ids = fetch_likes("/api/v2/community/posts/#{row['id']}/votes.json")
end
@db.insert_topic(
id: row['id'],
title: row['title'],
raw: row['details'],
category_id: row['topic_id'],
closed: row['closed'],
user_id: row['author_id'],
created_at: row['created_at'],
url: row['html_url'],
like_user_ids: like_user_ids
)
end
end
def fetch_posts
puts '', 'fetching posts...'
current_count = 0
total_count = @db.count_topics
start_time = Time.now
last_id = ''
batches do |offset|
rows, last_id = @db.fetch_topics(last_id)
break if rows.empty?
rows.each do |topic_row|
get_from_api("/api/v2/community/posts/#{topic_row['id']}/comments.json", 'comments') do |row|
if row['vote_count'] > 0
like_user_ids = fetch_likes("/api/v2/community/posts/#{topic_row['id']}/comments/#{row['id']}/votes.json")
end
@db.insert_post(
id: row['id'],
raw: row['body'],
topic_id: topic_row['id'],
user_id: row['author_id'],
created_at: row['created_at'],
url: row['html_url'],
like_user_ids: like_user_ids
)
end
current_count += 1
print_status(current_count, total_count, start_time)
end
end
end
def fetch_users
puts '', 'fetching users...'
user_ids = @db.execute_sql(<<~SQL).map { |row| row['user_id'] }
SELECT user_id FROM topic
UNION
SELECT user_id FROM post
UNION
SELECT user_id FROM like
SQL
current_count = 0
total_count = user_ids.size
start_time = Time.now
while !user_ids.empty?
get_from_api("/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(',')}", 'users') do |row|
@db.insert_user(
id: row['id'],
email: row['email'],
name: row['name'],
created_at: row['created_at'],
last_seen_at: row['last_login_at'],
active: row['active'],
avatar_path: row['photo'].present? ? row['photo']['content_url'] : nil
)
current_count += 1
print_status(current_count, total_count, start_time)
end
end
end
def fetch_likes(url)
user_ids = []
get_from_api(url, 'votes') do |row|
if row['id'].present? && row['value'] == 1
user_ids << row['user_id']
end
end
user_ids
end
def import_categories
puts "", "creating categories"
rows = @db.fetch_categories
create_categories(rows) do |row|
{
id: row['id'],
name: row['name'],
description: row['description'],
position: row['position'],
post_create_action: proc do |category|
url = remove_domain(row['url'])
Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
end
}
end
end
def import_users
puts "", "creating users"
total_count = @db.count_users
last_id = ''
batches do |offset|
rows, last_id = @db.fetch_users(last_id)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['id'] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['id'],
email: row['email'],
name: row['name'],
created_at: row['created_at'],
last_seen_at: row['last_seen_at'],
active: row['active'] == 1,
post_create_action: proc do |user|
if row['avatar_path'].present?
UserAvatar.import_url_for_user(row['avatar_path'], user) rescue nil
end
end
}
end
end
end
def import_topics
puts "", "creating topics"
total_count = @db.count_topics
last_id = ''
batches do |offset|
rows, last_id = @db.fetch_topics(last_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) })
create_posts(rows, total: total_count, offset: offset) do |row|
{
id: import_topic_id(row['id']),
title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing",
raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id),
category: category_id_from_imported_category_id(row['category_id']),
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
created_at: row['created_at'],
closed: row['closed'] == 1,
post_create_action: proc do |post|
url = remove_domain(row['url'])
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end
}
end
end
end
def import_topic_id(topic_id)
"T#{topic_id}"
end
def import_posts
puts "", "creating posts"
total_count = @db.count_posts
last_row_id = 0
batches do |offset|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
break if rows.empty?
create_posts(rows, total: total_count, offset: offset) do |row|
topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id']))
if topic.nil?
p "MISSING TOPIC #{row['topic_id']}"
p row
next
end
{
id: row['id'],
raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id),
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
topic_id: topic[:topic_id],
created_at: row['created_at'],
post_create_action: proc do |post|
url = remove_domain(row['url'])
Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
end
}
end
end
end
def import_likes
puts "", "importing likes..."
start_time = Time.now
current_count = 0
total_count = @db.count_likes
last_row_id = 0
batches do |offset|
rows, last_row_id = @db.fetch_likes(last_row_id)
break if rows.empty?
rows.each do |row|
import_id = row['topic_id'] ? import_topic_id(row['topic_id']) : row['post_id']
post = Post.find_by(id: post_id_from_imported_post_id(import_id)) if import_id
user = User.find_by(id: user_id_from_imported_user_id(row['user_id']))
if post && user
begin
PostActionCreator.like(user, post) if user && post
rescue => e
puts "error acting on post #{e}"
end
else
puts "Skipping Like from #{row['user_id']} on topic #{row['topic_id']} / post #{row['post_id']}"
end
current_count += 1
print_status(current_count, total_count, start_time)
end
end
end
def normalize_raw(raw, user_id)
raw = raw.gsub('\n', '')
raw = ReverseMarkdown.convert(raw)
# Process images, after the ReverseMarkdown they look like
# ![](https://<sub-domain>.zendesk.com/<hash>.<image-format>)
raw.gsub!(/!\[\]\((https:\/\/#{SUBDOMAIN}\.zendesk\.com\/hc\/user_images\/([^).]+\.[^)]+))\)/i) do
image_url = $1
filename = $2
attempts = 0
begin
2021-10-27 16:39:28 +08:00
URI.parse(image_url).open do |image|
# IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image
File.open(IMAGE_DOWNLOAD_PATH, "wb") do |file|
file.write(image.read)
end
end
rescue *HTTP_ERRORS => e
if attempts < MAX_RETRIES
attempts += 1
sleep(2)
retry
else
puts "Error downloading image"
end
next
end
upl_obj = create_upload(user_id, IMAGE_DOWNLOAD_PATH, filename)
if upl_obj&.persisted?
html = html_for_upload(upl_obj, filename)
html
else
puts "Error creating image upload"
"![](#{$1})"
exit
end
end
raw
end
def remove_domain(url)
url.sub(@source_url, "")
end
def permalink_exists?(url)
Permalink.find_by(url: url)
end
def connection
@_connection ||= begin
connect_uri = URI.parse(@source_url)
http = Net::HTTP.new(connect_uri.host, connect_uri.port)
http.open_timeout = 30
http.read_timeout = 30
http.use_ssl = connect_uri.scheme == "https"
http
end
end
def authorization
@_authorization ||= begin
auth_str = "#{@auth_email}/token:#{@auth_token}"
"Basic #{Base64.strict_encode64(auth_str)}"
end
end
def get_from_api(path, array_name, show_status: false)
url = "#{@source_url}#{path}"
start_time = Time.now
while url
get = Net::HTTP::Get.new(url)
get['User-Agent'] = 'Discourse Zendesk Importer'
get['Authorization'] = authorization
retry_count = 0
begin
while retry_count < 5
begin
response = connection.request(get)
puts("Retry successful!") if retry_count > 0
break
rescue => e
puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
sleep(20)
retry_count += 1
end
end
end
json = JSON.parse(response.body)
json[array_name].each do |row|
yield row
end
url = json['next_page']
if show_status
if json['page'] && json['page_count']
print_status(json['page'], json['page_count'], start_time)
else
print '.'
end
end
end
end
end
unless ARGV.length == 4 && Dir.exist?(ARGV[1])
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", ""
exit 1
end
ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform