mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 16:02:46 +08:00
FEATURE: Zendesk importer that uses its API to get data
This commit is contained in:
parent
83bf641292
commit
15f657309a
|
@ -873,7 +873,7 @@ class ImportScripts::Base
|
|||
@start_times.fetch(key) { |k| @start_times[k] = Time.now }
|
||||
end
|
||||
|
||||
def batches(batch_size)
|
||||
def batches(batch_size = 1000)
|
||||
offset = 0
|
||||
loop do
|
||||
yield offset
|
||||
|
|
|
@ -125,6 +125,10 @@ module ImportScripts
|
|||
@db.execute(sql)
|
||||
end
|
||||
|
||||
def get_first_value(sql)
|
||||
@db.get_first_value(sql)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def configure_database
|
||||
|
|
306
script/import_scripts/zendesk_api.rb
Normal file
306
script/import_scripts/zendesk_api.rb
Normal file
|
@ -0,0 +1,306 @@
|
|||
# Zendesk importer
|
||||
#
|
||||
# This one uses their API.
|
||||
|
||||
require 'reverse_markdown'
|
||||
require_relative 'base'
|
||||
require_relative 'base/generic_database'
|
||||
|
||||
# Call it like this:
|
||||
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
|
||||
class ImportScripts::Zendesk < ImportScripts::Base
|
||||
BATCH_SIZE = 1000
|
||||
|
||||
def initialize(source_url, path, auth_email, auth_token)
|
||||
super()
|
||||
|
||||
@source_url = source_url
|
||||
@path = path
|
||||
@auth_email = auth_email
|
||||
@auth_token = auth_token
|
||||
@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
|
||||
end
|
||||
|
||||
def execute
|
||||
fetch_from_api
|
||||
|
||||
import_categories
|
||||
import_users
|
||||
import_topics
|
||||
import_posts
|
||||
end
|
||||
|
||||
def fetch_from_api
|
||||
puts '', 'fetching categories...'
|
||||
|
||||
get_from_api('/api/v2/community/topics.json', 'topics') do |row|
|
||||
@db.insert_category(
|
||||
id: row['id'],
|
||||
name: row['name'],
|
||||
description: row['description'],
|
||||
position: row['position'],
|
||||
url: row['html_url']
|
||||
)
|
||||
end
|
||||
|
||||
puts '', 'fetching topics...'
|
||||
|
||||
get_from_api('/api/v2/community/posts.json', 'posts') do |row|
|
||||
@db.insert_topic(
|
||||
id: row['id'],
|
||||
title: row['title'],
|
||||
raw: row['details'],
|
||||
category_id: row['topic_id'],
|
||||
closed: row['closed'],
|
||||
user_id: row['author_id'],
|
||||
created_at: row['created_at'],
|
||||
url: row['html_url']
|
||||
)
|
||||
end
|
||||
|
||||
puts '', 'fetching posts...'
|
||||
total_count = @db.count_topics
|
||||
start_time = Time.now
|
||||
last_id = ''
|
||||
|
||||
batches do |offset|
|
||||
rows, last_id = @db.fetch_topics(last_id)
|
||||
break if rows.empty?
|
||||
|
||||
print_status(offset, total_count, start_time)
|
||||
|
||||
rows.each do |topic_row|
|
||||
get_from_api("/api/v2/community/posts/#{topic_row['id']}/comments.json", 'comments', show_status: false) do |row|
|
||||
@db.insert_post(
|
||||
id: row['id'],
|
||||
raw: row['body'],
|
||||
topic_id: topic_row['id'],
|
||||
user_id: row['author_id'],
|
||||
created_at: row['created_at'],
|
||||
url: row['html_url']
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
puts '', 'fetching users...'
|
||||
|
||||
results = @db.execute_sql("SELECT user_id FROM topic")
|
||||
user_ids = results.map { |h| h['user_id']&.to_i }
|
||||
results = @db.execute_sql("SELECT user_id FROM post")
|
||||
user_ids += results.map { |h| h['user_id']&.to_i }
|
||||
user_ids.uniq!
|
||||
user_ids.sort!
|
||||
|
||||
total_users = user_ids.size
|
||||
start_time = Time.now
|
||||
|
||||
while !user_ids.empty?
|
||||
print_status(total_users - user_ids.size, total_users, start_time)
|
||||
get_from_api("/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(',')}", 'users', show_status: false) do |row|
|
||||
@db.insert_user(
|
||||
id: row['id'],
|
||||
email: row['email'],
|
||||
name: row['name'],
|
||||
created_at: row['created_at'],
|
||||
last_seen_at: row['last_login_at'],
|
||||
active: row['active']
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@db.sort_posts_by_created_at
|
||||
end
|
||||
|
||||
def import_categories
|
||||
puts "", "creating categories"
|
||||
rows = @db.fetch_categories
|
||||
|
||||
create_categories(rows) do |row|
|
||||
{
|
||||
id: row['id'],
|
||||
name: row['name'],
|
||||
description: row['description'],
|
||||
position: row['position'],
|
||||
post_create_action: proc do |category|
|
||||
url = remove_domain(row['url'])
|
||||
Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "creating users"
|
||||
total_count = @db.count_users
|
||||
last_id = ''
|
||||
|
||||
batches do |offset|
|
||||
rows, last_id = @db.fetch_users(last_id)
|
||||
break if rows.empty?
|
||||
|
||||
next if all_records_exist?(:users, rows.map { |row| row['id'] })
|
||||
|
||||
create_users(rows, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
id: row['id'],
|
||||
email: row['email'],
|
||||
name: row['name'],
|
||||
created_at: row['created_at'],
|
||||
last_seen_at: row['last_seen_at'],
|
||||
active: row['active'] == 1
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_topics
|
||||
puts "", "creating topics"
|
||||
total_count = @db.count_topics
|
||||
last_id = ''
|
||||
|
||||
batches do |offset|
|
||||
rows, last_id = @db.fetch_topics(last_id)
|
||||
break if rows.empty?
|
||||
|
||||
next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) })
|
||||
|
||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
id: import_topic_id(row['id']),
|
||||
title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing",
|
||||
raw: normalize_raw(row['raw']),
|
||||
category: category_id_from_imported_category_id(row['category_id']),
|
||||
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
|
||||
created_at: row['created_at'],
|
||||
closed: row['closed'] == 1,
|
||||
post_create_action: proc do |post|
|
||||
url = remove_domain(row['url'])
|
||||
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_topic_id(topic_id)
|
||||
"T#{topic_id}"
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts "", "creating posts"
|
||||
total_count = @db.count_posts
|
||||
last_row_id = 0
|
||||
|
||||
batches do |offset|
|
||||
rows, last_row_id = @db.fetch_posts(last_row_id)
|
||||
break if rows.empty?
|
||||
|
||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||
topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id']))
|
||||
|
||||
if topic.nil?
|
||||
p "MISSING TOPIC #{row['topic_id']}"
|
||||
p row
|
||||
next
|
||||
end
|
||||
|
||||
{
|
||||
id: row['id'],
|
||||
raw: normalize_raw(row['raw']),
|
||||
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
|
||||
topic_id: topic[:topic_id],
|
||||
created_at: row['created_at'],
|
||||
post_create_action: proc do |post|
|
||||
url = remove_domain(row['url'])
|
||||
Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_raw(raw)
|
||||
raw = raw.gsub('\n', '')
|
||||
raw = ReverseMarkdown.convert(raw)
|
||||
raw
|
||||
end
|
||||
|
||||
def remove_domain(url)
|
||||
url.sub(@source_url, "")
|
||||
end
|
||||
|
||||
def permalink_exists?(url)
|
||||
Permalink.find_by(url: url)
|
||||
end
|
||||
|
||||
def connection
|
||||
@_connection ||= begin
|
||||
connect_uri = URI.parse(@source_url)
|
||||
|
||||
http = Net::HTTP.new(connect_uri.host, connect_uri.port)
|
||||
http.open_timeout = 30
|
||||
http.read_timeout = 30
|
||||
http.use_ssl = connect_uri.scheme == "https"
|
||||
|
||||
http
|
||||
end
|
||||
end
|
||||
|
||||
def authorization
|
||||
@_authorization ||= begin
|
||||
auth_str = "#{@auth_email}/token:#{@auth_token}"
|
||||
"Basic #{Base64.strict_encode64(auth_str)}"
|
||||
end
|
||||
end
|
||||
|
||||
def get_from_api(path, array_name, show_status: true)
|
||||
url = "#{@source_url}#{path}"
|
||||
start_time = Time.now
|
||||
|
||||
while url
|
||||
get = Net::HTTP::Get.new(url)
|
||||
get['User-Agent'] = 'Discourse Zendesk Importer'
|
||||
get['Authorization'] = authorization
|
||||
|
||||
retry_count = 0
|
||||
|
||||
begin
|
||||
while retry_count < 5
|
||||
begin
|
||||
response = connection.request(get)
|
||||
puts("Retry successful!") if retry_count > 0
|
||||
break
|
||||
rescue => e
|
||||
puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
|
||||
sleep(20)
|
||||
retry_count += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
json = JSON.parse(response.body)
|
||||
|
||||
json[array_name].each do |row|
|
||||
yield row
|
||||
end
|
||||
|
||||
url = json['next_page']
|
||||
|
||||
if show_status
|
||||
if json['page'] && json['page_count']
|
||||
print_status(json['page'], json['page_count'], start_time)
|
||||
else
|
||||
print '.'
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
unless ARGV.length == 4 && Dir.exist?(ARGV[1])
|
||||
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", ""
|
||||
exit 1
|
||||
end
|
||||
|
||||
ImportScripts::Zendesk.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
|
Loading…
Reference in New Issue
Block a user