discourse/script/import_scripts/zendesk_api.rb

# frozen_string_literal: true

# Zendesk importer
#
# This one uses their API.

require 'reverse_markdown'
require_relative 'base'
require_relative 'base/generic_database'

# Call it like this:
#   RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
class ImportScripts::ZendeskApi < ImportScripts::Base
  BATCH_SIZE = 1000

  def initialize(source_url, path, auth_email, auth_token)
    super()

    @source_url = source_url
    @path = path
    @auth_email = auth_email
    @auth_token = auth_token
    @db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
  end

  def execute
    fetch_from_api

    import_categories
    import_users
    import_topics
    import_posts
  end

  def fetch_from_api
    puts '', 'fetching categories...'

    get_from_api('/api/v2/community/topics.json', 'topics') do |row|
      @db.insert_category(
        id: row['id'],
        name: row['name'],
        description: row['description'],
        position: row['position'],
        url: row['html_url']
      )
    end

    puts '', 'fetching topics...'

    get_from_api('/api/v2/community/posts.json', 'posts') do |row|
      @db.insert_topic(
        id: row['id'],
        title: row['title'],
        raw: row['details'],
        category_id: row['topic_id'],
        closed: row['closed'],
        user_id: row['author_id'],
        created_at: row['created_at'],
        url: row['html_url']
      )
    end

    puts '', 'fetching posts...'
    total_count = @db.count_topics
    start_time = Time.now
    last_id = ''

    batches do |offset|
      rows, last_id = @db.fetch_topics(last_id)
      break if rows.empty?

      print_status(offset, total_count, start_time)

      rows.each do |topic_row|
        get_from_api("/api/v2/community/posts/#{topic_row['id']}/comments.json", 'comments', show_status: false) do |row|
          @db.insert_post(
            id: row['id'],
            raw: row['body'],
            topic_id: topic_row['id'],
            user_id: row['author_id'],
            created_at: row['created_at'],
            url: row['html_url']
          )
        end
      end
    end

    puts '', 'fetching users...'

    results = @db.execute_sql("SELECT user_id FROM topic")
    user_ids = results.map { |h| h['user_id']&.to_i }
    results = @db.execute_sql("SELECT user_id FROM post")
    user_ids += results.map { |h| h['user_id']&.to_i }
    user_ids.uniq!
    user_ids.sort!

    total_users = user_ids.size
    start_time = Time.now

    while !user_ids.empty?
      print_status(total_users - user_ids.size, total_users, start_time)
      get_from_api("/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(',')}", 'users', show_status: false) do |row|
        @db.insert_user(
          id: row['id'],
          email: row['email'],
          name: row['name'],
          created_at: row['created_at'],
          last_seen_at: row['last_login_at'],
          active: row['active']
        )
      end
    end

    @db.sort_posts_by_created_at
  end

  def import_categories
    puts "", "creating categories"
    rows = @db.fetch_categories

    create_categories(rows) do |row|
      {
        id: row['id'],
        name: row['name'],
        description: row['description'],
        position: row['position'],
        post_create_action: proc do |category|
          url = remove_domain(row['url'])
          Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
        end
      }
    end
  end

  def import_users
    puts "", "creating users"
    total_count = @db.count_users
    last_id = ''

    batches do |offset|
      rows, last_id = @db.fetch_users(last_id)
      break if rows.empty?

      next if all_records_exist?(:users, rows.map { |row| row['id'] })

      create_users(rows, total: total_count, offset: offset) do |row|
        {
          id: row['id'],
          email: row['email'],
          name: row['name'],
          created_at: row['created_at'],
          last_seen_at: row['last_seen_at'],
          active: row['active'] == 1
        }
      end
    end
  end

  def import_topics
    puts "", "creating topics"
    total_count = @db.count_topics
    last_id = ''

    batches do |offset|
      rows, last_id = @db.fetch_topics(last_id)
      break if rows.empty?

      next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) })

      create_posts(rows, total: total_count, offset: offset) do |row|
        {
          id: import_topic_id(row['id']),
          title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing",
          raw: normalize_raw(row['raw']),
          category: category_id_from_imported_category_id(row['category_id']),
          user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
          created_at: row['created_at'],
          closed: row['closed'] == 1,
          post_create_action: proc do |post|
            url = remove_domain(row['url'])
            Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
          end
        }
      end
    end
  end

  def import_topic_id(topic_id)
    "T#{topic_id}"
  end

  def import_posts
    puts "", "creating posts"
    total_count = @db.count_posts
    last_row_id = 0

    batches do |offset|
      rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
      break if rows.empty?

      create_posts(rows, total: total_count, offset: offset) do |row|
        topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id']))

        if topic.nil?
          p "MISSING TOPIC #{row['topic_id']}"
          p row
          next
        end

        {
          id: row['id'],
          raw: normalize_raw(row['raw']),
          user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
          topic_id: topic[:topic_id],
          created_at: row['created_at'],
          post_create_action: proc do |post|
            url = remove_domain(row['url'])
            Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
          end
        }
      end
    end
  end

  def normalize_raw(raw)
    raw = raw.gsub('\n', '')
    raw = ReverseMarkdown.convert(raw)
    raw
  end

  def remove_domain(url)
    url.sub(@source_url, "")
  end

  def permalink_exists?(url)
    Permalink.find_by(url: url)
  end

  def connection
    @_connection ||= begin
      connect_uri = URI.parse(@source_url)

      http = Net::HTTP.new(connect_uri.host, connect_uri.port)
      http.open_timeout = 30
      http.read_timeout = 30
      http.use_ssl = connect_uri.scheme == "https"

      http
    end
  end

  def authorization
    @_authorization ||= begin
      auth_str = "#{@auth_email}/token:#{@auth_token}"
      "Basic #{Base64.strict_encode64(auth_str)}"
    end
  end

  def get_from_api(path, array_name, show_status: true)
    url = "#{@source_url}#{path}"
    start_time = Time.now

    while url
      get = Net::HTTP::Get.new(url)
      get['User-Agent'] = 'Discourse Zendesk Importer'
      get['Authorization'] = authorization

      retry_count = 0

      begin
        while retry_count < 5
          begin
            response = connection.request(get)
            puts("Retry successful!") if retry_count > 0
            break
          rescue => e
            puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
            sleep(20)
            retry_count += 1
          end
        end
      end

      json = JSON.parse(response.body)

      json[array_name].each do |row|
        yield row
      end

      url = json['next_page']

      if show_status
        if json['page'] && json['page_count']
          print_status(json['page'], json['page_count'], start_time)
        else
          print '.'
        end
      end
    end
  end
end

unless ARGV.length == 4 && Dir.exist?(ARGV[1])
  puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN", ""
  exit 1
end

ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform