mirror of
https://github.com/discourse/discourse.git
synced 2024-11-24 20:26:35 +08:00
30990006a9
This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging
309 lines
8.0 KiB
Ruby
309 lines
8.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Zendesk importer
|
|
#
|
|
# This one uses their API.
|
|
|
|
require 'reverse_markdown'
|
|
require_relative 'base'
|
|
require_relative 'base/generic_database'
|
|
|
|
# Call it like this:
|
|
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
|
|
class ImportScripts::ZendeskApi < ImportScripts::Base
|
|
BATCH_SIZE = 1000
|
|
|
|
def initialize(source_url, path, auth_email, auth_token)
|
|
super()
|
|
|
|
@source_url = source_url
|
|
@path = path
|
|
@auth_email = auth_email
|
|
@auth_token = auth_token
|
|
@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
|
|
end
|
|
|
|
def execute
|
|
fetch_from_api
|
|
|
|
import_categories
|
|
import_users
|
|
import_topics
|
|
import_posts
|
|
end
|
|
|
|
def fetch_from_api
|
|
puts '', 'fetching categories...'
|
|
|
|
get_from_api('/api/v2/community/topics.json', 'topics') do |row|
|
|
@db.insert_category(
|
|
id: row['id'],
|
|
name: row['name'],
|
|
description: row['description'],
|
|
position: row['position'],
|
|
url: row['html_url']
|
|
)
|
|
end
|
|
|
|
puts '', 'fetching topics...'
|
|
|
|
get_from_api('/api/v2/community/posts.json', 'posts') do |row|
|
|
@db.insert_topic(
|
|
id: row['id'],
|
|
title: row['title'],
|
|
raw: row['details'],
|
|
category_id: row['topic_id'],
|
|
closed: row['closed'],
|
|
user_id: row['author_id'],
|
|
created_at: row['created_at'],
|
|
url: row['html_url']
|
|
)
|
|
end
|
|
|
|
puts '', 'fetching posts...'
|
|
total_count = @db.count_topics
|
|
start_time = Time.now
|
|
last_id = ''
|
|
|
|
batches do |offset|
|
|
rows, last_id = @db.fetch_topics(last_id)
|
|
break if rows.empty?
|
|
|
|
print_status(offset, total_count, start_time)
|
|
|
|
rows.each do |topic_row|
|
|
get_from_api("/api/v2/community/posts/#{topic_row['id']}/comments.json", 'comments', show_status: false) do |row|
|
|
@db.insert_post(
|
|
id: row['id'],
|
|
raw: row['body'],
|
|
topic_id: topic_row['id'],
|
|
user_id: row['author_id'],
|
|
created_at: row['created_at'],
|
|
url: row['html_url']
|
|
)
|
|
end
|
|
end
|
|
end
|
|
|
|
puts '', 'fetching users...'
|
|
|
|
results = @db.execute_sql("SELECT user_id FROM topic")
|
|
user_ids = results.map { |h| h['user_id']&.to_i }
|
|
results = @db.execute_sql("SELECT user_id FROM post")
|
|
user_ids += results.map { |h| h['user_id']&.to_i }
|
|
user_ids.uniq!
|
|
user_ids.sort!
|
|
|
|
total_users = user_ids.size
|
|
start_time = Time.now
|
|
|
|
while !user_ids.empty?
|
|
print_status(total_users - user_ids.size, total_users, start_time)
|
|
get_from_api("/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(',')}", 'users', show_status: false) do |row|
|
|
@db.insert_user(
|
|
id: row['id'],
|
|
email: row['email'],
|
|
name: row['name'],
|
|
created_at: row['created_at'],
|
|
last_seen_at: row['last_login_at'],
|
|
active: row['active']
|
|
)
|
|
end
|
|
end
|
|
|
|
@db.sort_posts_by_created_at
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "creating categories"
|
|
rows = @db.fetch_categories
|
|
|
|
create_categories(rows) do |row|
|
|
{
|
|
id: row['id'],
|
|
name: row['name'],
|
|
description: row['description'],
|
|
position: row['position'],
|
|
post_create_action: proc do |category|
|
|
url = remove_domain(row['url'])
|
|
Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
|
|
end
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "creating users"
|
|
total_count = @db.count_users
|
|
last_id = ''
|
|
|
|
batches do |offset|
|
|
rows, last_id = @db.fetch_users(last_id)
|
|
break if rows.empty?
|
|
|
|
next if all_records_exist?(:users, rows.map { |row| row['id'] })
|
|
|
|
create_users(rows, total: total_count, offset: offset) do |row|
|
|
{
|
|
id: row['id'],
|
|
email: row['email'],
|
|
name: row['name'],
|
|
created_at: row['created_at'],
|
|
last_seen_at: row['last_seen_at'],
|
|
active: row['active'] == 1
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_topics
|
|
puts "", "creating topics"
|
|
total_count = @db.count_topics
|
|
last_id = ''
|
|
|
|
batches do |offset|
|
|
rows, last_id = @db.fetch_topics(last_id)
|
|
break if rows.empty?
|
|
|
|
next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) })
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
|
{
|
|
id: import_topic_id(row['id']),
|
|
title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing",
|
|
raw: normalize_raw(row['raw']),
|
|
category: category_id_from_imported_category_id(row['category_id']),
|
|
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
|
|
created_at: row['created_at'],
|
|
closed: row['closed'] == 1,
|
|
post_create_action: proc do |post|
|
|
url = remove_domain(row['url'])
|
|
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
|
|
end
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_topic_id(topic_id)
|
|
"T#{topic_id}"
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "creating posts"
|
|
total_count = @db.count_posts
|
|
last_row_id = 0
|
|
|
|
batches do |offset|
|
|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
|
|
break if rows.empty?
|
|
|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
|
topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id']))
|
|
|
|
if topic.nil?
|
|
p "MISSING TOPIC #{row['topic_id']}"
|
|
p row
|
|
next
|
|
end
|
|
|
|
{
|
|
id: row['id'],
|
|
raw: normalize_raw(row['raw']),
|
|
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
|
|
topic_id: topic[:topic_id],
|
|
created_at: row['created_at'],
|
|
post_create_action: proc do |post|
|
|
url = remove_domain(row['url'])
|
|
Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
|
|
end
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def normalize_raw(raw)
|
|
raw = raw.gsub('\n', '')
|
|
raw = ReverseMarkdown.convert(raw)
|
|
raw
|
|
end
|
|
|
|
def remove_domain(url)
|
|
url.sub(@source_url, "")
|
|
end
|
|
|
|
def permalink_exists?(url)
|
|
Permalink.find_by(url: url)
|
|
end
|
|
|
|
def connection
|
|
@_connection ||= begin
|
|
connect_uri = URI.parse(@source_url)
|
|
|
|
http = Net::HTTP.new(connect_uri.host, connect_uri.port)
|
|
http.open_timeout = 30
|
|
http.read_timeout = 30
|
|
http.use_ssl = connect_uri.scheme == "https"
|
|
|
|
http
|
|
end
|
|
end
|
|
|
|
def authorization
|
|
@_authorization ||= begin
|
|
auth_str = "#{@auth_email}/token:#{@auth_token}"
|
|
"Basic #{Base64.strict_encode64(auth_str)}"
|
|
end
|
|
end
|
|
|
|
def get_from_api(path, array_name, show_status: true)
|
|
url = "#{@source_url}#{path}"
|
|
start_time = Time.now
|
|
|
|
while url
|
|
get = Net::HTTP::Get.new(url)
|
|
get['User-Agent'] = 'Discourse Zendesk Importer'
|
|
get['Authorization'] = authorization
|
|
|
|
retry_count = 0
|
|
|
|
begin
|
|
while retry_count < 5
|
|
begin
|
|
response = connection.request(get)
|
|
puts("Retry successful!") if retry_count > 0
|
|
break
|
|
rescue => e
|
|
puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
|
|
sleep(20)
|
|
retry_count += 1
|
|
end
|
|
end
|
|
end
|
|
|
|
json = JSON.parse(response.body)
|
|
|
|
json[array_name].each do |row|
|
|
yield row
|
|
end
|
|
|
|
url = json['next_page']
|
|
|
|
if show_status
|
|
if json['page'] && json['page_count']
|
|
print_status(json['page'], json['page_count'], start_time)
|
|
else
|
|
print '.'
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
unless ARGV.length == 4 && Dir.exist?(ARGV[1])
|
|
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", ""
|
|
exit 1
|
|
end
|
|
|
|
ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
|