discourse/script/import_scripts/zendesk_api.rb

# frozen_string_literal: true

# Zendesk importer
#
# This one uses their API.

require "open-uri"
require "reverse_markdown"
require_relative "base"
require_relative "base/generic_database"

# Call it like this:
#   RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
class ImportScripts::ZendeskApi < ImportScripts::Base
  BATCH_SIZE = 1000

  HTTP_ERRORS = [
    EOFError,
    Errno::ECONNRESET,
    Errno::EINVAL,
    Net::HTTPBadResponse,
    Net::HTTPHeaderSyntaxError,
    Net::ProtocolError,
    Timeout::Error,
    OpenURI::HTTPError,
    OpenSSL::SSL::SSLError,
  ]

  MAX_RETRIES = 5

  IMAGE_DOWNLOAD_PATH = "replace-me"

  SUBDOMAIN = "replace-me"

  def initialize(source_url, path, auth_email, auth_token)
    super()

    @source_url = source_url
    @path = path
    @auth_email = auth_email
    @auth_token = auth_token
    @db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
  end

  def execute
    fetch_from_api

    import_categories
    import_users
    import_topics
    import_posts
    import_likes
  end

  def fetch_from_api
    fetch_categories
    fetch_topics
    fetch_posts
    fetch_users

    @db.sort_posts_by_created_at
  end

  def fetch_categories
    puts "", "fetching categories..."

    get_from_api("/api/v2/community/topics.json", "topics", show_status: true) do |row|
      @db.insert_category(
        id: row["id"],
        name: row["name"],
        description: row["description"],
        position: row["position"],
        url: row["html_url"],
      )
    end
  end

  def fetch_topics
    puts "", "fetching topics..."

    get_from_api("/api/v2/community/posts.json", "posts", show_status: true) do |row|
      if row["vote_count"] > 0
        like_user_ids = fetch_likes("/api/v2/community/posts/#{row["id"]}/votes.json")
      end

      @db.insert_topic(
        id: row["id"],
        title: row["title"],
        raw: row["details"],
        category_id: row["topic_id"],
        closed: row["closed"],
        user_id: row["author_id"],
        created_at: row["created_at"],
        url: row["html_url"],
        like_user_ids: like_user_ids,
      )
    end
  end

  def fetch_posts
    puts "", "fetching posts..."
    current_count = 0
    total_count = @db.count_topics
    start_time = Time.now
    last_id = ""

    batches do |offset|
      rows, last_id = @db.fetch_topics(last_id)
      break if rows.empty?

      rows.each do |topic_row|
        get_from_api(
          "/api/v2/community/posts/#{topic_row["id"]}/comments.json",
          "comments",
        ) do |row|
          if row["vote_count"] > 0
            like_user_ids =
              fetch_likes(
                "/api/v2/community/posts/#{topic_row["id"]}/comments/#{row["id"]}/votes.json",
              )
          end

          @db.insert_post(
            id: row["id"],
            raw: row["body"],
            topic_id: topic_row["id"],
            user_id: row["author_id"],
            created_at: row["created_at"],
            url: row["html_url"],
            like_user_ids: like_user_ids,
          )
        end

        current_count += 1
        print_status(current_count, total_count, start_time)
      end
    end
  end

  def fetch_users
    puts "", "fetching users..."

    user_ids = @db.execute_sql(<<~SQL).map { |row| row["user_id"] }
      SELECT user_id FROM topic
      UNION
      SELECT user_id FROM post
      UNION
      SELECT user_id FROM like
    SQL

    current_count = 0
    total_count = user_ids.size
    start_time = Time.now

    while !user_ids.empty?
      get_from_api(
        "/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(",")}",
        "users",
      ) do |row|
        @db.insert_user(
          id: row["id"],
          email: row["email"],
          name: row["name"],
          created_at: row["created_at"],
          last_seen_at: row["last_login_at"],
          active: row["active"],
          avatar_path: row["photo"].present? ? row["photo"]["content_url"] : nil,
        )

        current_count += 1
        print_status(current_count, total_count, start_time)
      end
    end
  end

  def fetch_likes(url)
    user_ids = []

    get_from_api(url, "votes") do |row|
      user_ids << row["user_id"] if row["id"].present? && row["value"] == 1
    end

    user_ids
  end

  def import_categories
    puts "", "creating categories"
    rows = @db.fetch_categories

    create_categories(rows) do |row|
      {
        id: row["id"],
        name: row["name"],
        description: row["description"],
        position: row["position"],
        post_create_action:
          proc do |category|
            url = remove_domain(row["url"])
            Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
          end,
      }
    end
  end

  def import_users
    puts "", "creating users"
    total_count = @db.count_users
    last_id = ""

    batches do |offset|
      rows, last_id = @db.fetch_users(last_id)
      break if rows.empty?

      next if all_records_exist?(:users, rows.map { |row| row["id"] })

      create_users(rows, total: total_count, offset: offset) do |row|
        {
          id: row["id"],
          email: row["email"],
          name: row["name"],
          created_at: row["created_at"],
          last_seen_at: row["last_seen_at"],
          active: row["active"] == 1,
          post_create_action:
            proc do |user|
              if row["avatar_path"].present?
                begin
                  UserAvatar.import_url_for_user(row["avatar_path"], user)
                rescue StandardError
                  nil
                end
              end
            end,
        }
      end
    end
  end

  def import_topics
    puts "", "creating topics"
    total_count = @db.count_topics
    last_id = ""

    batches do |offset|
      rows, last_id = @db.fetch_topics(last_id)
      break if rows.empty?

      next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row["id"]) })

      create_posts(rows, total: total_count, offset: offset) do |row|
        {
          id: import_topic_id(row["id"]),
          title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing",
          raw:
            normalize_raw(
              row["raw"],
              user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
            ),
          category: category_id_from_imported_category_id(row["category_id"]),
          user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
          created_at: row["created_at"],
          closed: row["closed"] == 1,
          post_create_action:
            proc do |post|
              url = remove_domain(row["url"])
              Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
            end,
        }
      end
    end
  end

  def import_topic_id(topic_id)
    "T#{topic_id}"
  end

  def import_posts
    puts "", "creating posts"
    total_count = @db.count_posts
    last_row_id = 0

    batches do |offset|
      rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
      break if rows.empty?

      create_posts(rows, total: total_count, offset: offset) do |row|
        topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"]))

        if topic.nil?
          p "MISSING TOPIC #{row["topic_id"]}"
          p row
          next
        end

        {
          id: row["id"],
          raw:
            normalize_raw(
              row["raw"],
              user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
            ),
          user_id: user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id,
          topic_id: topic[:topic_id],
          created_at: row["created_at"],
          post_create_action:
            proc do |post|
              url = remove_domain(row["url"])
              Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
            end,
        }
      end
    end
  end

  def import_likes
    puts "", "importing likes..."
    start_time = Time.now
    current_count = 0
    total_count = @db.count_likes
    last_row_id = 0

    batches do |offset|
      rows, last_row_id = @db.fetch_likes(last_row_id)
      break if rows.empty?

      rows.each do |row|
        import_id = row["topic_id"] ? import_topic_id(row["topic_id"]) : row["post_id"]
        post = Post.find_by(id: post_id_from_imported_post_id(import_id)) if import_id
        user = User.find_by(id: user_id_from_imported_user_id(row["user_id"]))

        if post && user
          begin
            PostActionCreator.like(user, post) if user && post
          rescue => e
            puts "error acting on post #{e}"
          end
        else
          puts "Skipping Like from #{row["user_id"]} on topic #{row["topic_id"]} / post #{row["post_id"]}"
        end

        current_count += 1
        print_status(current_count, total_count, start_time)
      end
    end
  end

  def normalize_raw(raw, user_id)
    return "<missing>" if raw.blank?

    raw = raw.gsub('\n', "")
    raw = ReverseMarkdown.convert(raw)

    # Process images, after the ReverseMarkdown they look like
    # ![](https://<sub-domain>.zendesk.com/<hash>.<image-format>)
    raw.gsub!(%r{!\[\]\((https://#{SUBDOMAIN}\.zendesk\.com/hc/user_images/([^).]+\.[^)]+))\)}i) do
      image_url = $1
      filename = $2
      attempts = 0

      begin
        URI
          .parse(image_url)
          .open do |image|
            # IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image
            File.open(IMAGE_DOWNLOAD_PATH, "wb") { |file| file.write(image.read) }
          end
      rescue *HTTP_ERRORS => e
        if attempts < MAX_RETRIES
          attempts += 1
          sleep(2)
          retry
        else
          puts "Error downloading image"
        end
        next
      end

      upl_obj = create_upload(user_id, IMAGE_DOWNLOAD_PATH, filename)

      if upl_obj&.persisted?
        html = html_for_upload(upl_obj, filename)
        html
      else
        puts "Error creating image upload"
        exit
      end
    end

    raw
  end

  def remove_domain(url)
    url.sub(@source_url, "")
  end

  def permalink_exists?(url)
    Permalink.find_by(url: url)
  end

  def connection
    @_connection ||=
      begin
        connect_uri = URI.parse(@source_url)

        http = Net::HTTP.new(connect_uri.host, connect_uri.port)
        http.open_timeout = 30
        http.read_timeout = 30
        http.use_ssl = connect_uri.scheme == "https"

        http
      end
  end

  def authorization
    @_authorization ||=
      begin
        auth_str = "#{@auth_email}/token:#{@auth_token}"
        "Basic #{Base64.strict_encode64(auth_str)}"
      end
  end

  def get_from_api(path, array_name, show_status: false)
    url = "#{@source_url}#{path}"
    start_time = Time.now

    while url
      get = Net::HTTP::Get.new(url)
      get["User-Agent"] = "Discourse Zendesk Importer"
      get["Authorization"] = authorization

      retry_count = 0

      begin
        while retry_count < 5
          begin
            response = connection.request(get)
            puts("Retry successful!") if retry_count > 0
            break
          rescue => e
            puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"
            sleep(20)
            retry_count += 1
          end
        end
      end

      json = JSON.parse(response.body)

      json[array_name].each { |row| yield row }

      url = json["next_page"]

      if show_status
        if json["page"] && json["page_count"]
          print_status(json["page"], json["page_count"], start_time)
        else
          print "."
        end
      end
    end
  end
end

unless ARGV.length == 4 && Dir.exist?(ARGV[1])
  puts "",
       "Usage:",
       "",
       "bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN",
       ""
  exit 1
end

ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
DEV: enable frozen string literal on all files This reduces chances of errors where consumers of strings mutate inputs and reduces memory usage of the app. Test suite passes now, but there may be some stuff left, so we will run a few sites on a branch prior to merging 2019-05-03 06:17:27 +08:00			`# frozen_string_literal: true`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`# Zendesk importer`
			`#`
			`# This one uses their API.`

DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`require "open-uri"`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`require "reverse_markdown"`
			`require_relative "base"`
			`require_relative "base/generic_database"`

			`# Call it like this:`
			`# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN`
FEATURE: Import script for AnswerBase Improves the generic database used by some import scripts: * Adds additional columns for users * Adds support for attachments * Allows setting the data type for keys (numeric or string) to ensure correct sorting 2019-03-01 04:59:36 +08:00			`class ImportScripts::ZendeskApi < ImportScripts::Base`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`BATCH_SIZE = 1000`

DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`HTTP_ERRORS = [`
			`EOFError,`
			`Errno::ECONNRESET,`
			`Errno::EINVAL,`
			`Net::HTTPBadResponse,`
			`Net::HTTPHeaderSyntaxError,`
			`Net::ProtocolError,`
			`Timeout::Error,`
			`OpenURI::HTTPError,`
			`OpenSSL::SSL::SSLError,`
			`]`

			`MAX_RETRIES = 5`

			`IMAGE_DOWNLOAD_PATH = "replace-me"`

			`SUBDOMAIN = "replace-me"`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`def initialize(source_url, path, auth_email, auth_token)`
			`super()`

			`@source_url = source_url`
			`@path = path`
			`@auth_email = auth_email`
			`@auth_token = auth_token`
			`@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)`
			`end`

			`def execute`
			`fetch_from_api`

			`import_categories`
			`import_users`
			`import_topics`
			`import_posts`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`import_likes`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`end`

			`def fetch_from_api`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`fetch_categories`
			`fetch_topics`
			`fetch_posts`
			`fetch_users`

			`@db.sort_posts_by_created_at`
			`end`

			`def fetch_categories`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`puts "", "fetching categories..."`

Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`get_from_api("/api/v2/community/topics.json", "topics", show_status: true) do \|row\|`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`@db.insert_category(`
			`id: row["id"],`
			`name: row["name"],`
			`description: row["description"],`
			`position: row["position"],`
			`url: row["html_url"],`
			`)`
			`end`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`end`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def fetch_topics`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`puts "", "fetching topics..."`

Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`get_from_api("/api/v2/community/posts.json", "posts", show_status: true) do \|row\|`
			`if row["vote_count"] > 0`
			`like_user_ids = fetch_likes("/api/v2/community/posts/#{row["id"]}/votes.json")`
			`end`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`@db.insert_topic(`
			`id: row["id"],`
			`title: row["title"],`
			`raw: row["details"],`
			`category_id: row["topic_id"],`
			`closed: row["closed"],`
			`user_id: row["author_id"],`
			`created_at: row["created_at"],`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`url: row["html_url"],`
			`like_user_ids: like_user_ids,`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`)`
			`end`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`end`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def fetch_posts`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`puts "", "fetching posts..."`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`current_count = 0`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`total_count = @db.count_topics`
			`start_time = Time.now`
			`last_id = ""`

			`batches do \|offset\|`
			`rows, last_id = @db.fetch_topics(last_id)`
			`break if rows.empty?`

			`rows.each do \|topic_row\|`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`get_from_api(`
			`"/api/v2/community/posts/#{topic_row["id"]}/comments.json",`
			`"comments",`
			`) do \|row\|`
			`if row["vote_count"] > 0`
			`like_user_ids =`
			`fetch_likes(`
			`"/api/v2/community/posts/#{topic_row["id"]}/comments/#{row["id"]}/votes.json",`
			`)`
			`end`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`@db.insert_post(`
			`id: row["id"],`
			`raw: row["body"],`
			`topic_id: topic_row["id"],`
			`user_id: row["author_id"],`
			`created_at: row["created_at"],`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`url: row["html_url"],`
			`like_user_ids: like_user_ids,`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`)`
			`end`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00
			`current_count += 1`
			`print_status(current_count, total_count, start_time)`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`end`
			`end`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`end`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def fetch_users`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`puts "", "fetching users..."`

Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`user_ids = @db.execute_sql(<<~SQL).map { \|row\| row["user_id"] }`
			`SELECT user_id FROM topic`
			`UNION`
			`SELECT user_id FROM post`
			`UNION`
			`SELECT user_id FROM like`
			`SQL`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`current_count = 0`
			`total_count = user_ids.size`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`start_time = Time.now`

			`while !user_ids.empty?`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`get_from_api(`
			`"/api/v2/users/show_many.json?ids=#{user_ids.shift(50).join(",")}",`
			`"users",`
			`) do \|row\|`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`@db.insert_user(`
			`id: row["id"],`
			`email: row["email"],`
			`name: row["name"],`
			`created_at: row["created_at"],`
			`last_seen_at: row["last_login_at"],`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`active: row["active"],`
			`avatar_path: row["photo"].present? ? row["photo"]["content_url"] : nil,`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`)`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00
			`current_count += 1`
			`print_status(current_count, total_count, start_time)`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`end`
			`end`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`end`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def fetch_likes(url)`
			`user_ids = []`

			`get_from_api(url, "votes") do \|row\|`
			`user_ids << row["user_id"] if row["id"].present? && row["value"] == 1`
			`end`

			`user_ids`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`end`

			`def import_categories`
			`puts "", "creating categories"`
			`rows = @db.fetch_categories`

			`create_categories(rows) do \|row\|`
			`{`
			`id: row["id"],`
			`name: row["name"],`
			`description: row["description"],`
			`position: row["position"],`
			`post_create_action:`
			`proc do \|category\|`
			`url = remove_domain(row["url"])`
			`Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)`
			`end,`
			`}`
			`end`
			`end`

			`def import_users`
			`puts "", "creating users"`
			`total_count = @db.count_users`
			`last_id = ""`

			`batches do \|offset\|`
			`rows, last_id = @db.fetch_users(last_id)`
			`break if rows.empty?`

			`next if all_records_exist?(:users, rows.map { \|row\| row["id"] })`

			`create_users(rows, total: total_count, offset: offset) do \|row\|`
			`{`
			`id: row["id"],`
			`email: row["email"],`
			`name: row["name"],`
			`created_at: row["created_at"],`
			`last_seen_at: row["last_seen_at"],`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`active: row["active"] == 1,`
			`post_create_action:`
			`proc do \|user\|`
			`if row["avatar_path"].present?`
DEV: Apply syntax_tree formatting to `script/*` 2023-01-07 19:53:14 +08:00			`begin`
Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`UserAvatar.import_url_for_user(row["avatar_path"], user)`
			`rescue StandardError`
			`nil`
			`end`
			`end`
DEV: Apply syntax_tree formatting to `script/*` 2023-01-07 19:53:14 +08:00			`end,`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`}`
			`end`
			`end`
			`end`

			`def import_topics`
			`puts "", "creating topics"`
			`total_count = @db.count_topics`
			`last_id = ""`

			`batches do \|offset\|`
			`rows, last_id = @db.fetch_topics(last_id)`
			`break if rows.empty?`

			`next if all_records_exist?(:posts, rows.map { \|row\| import_topic_id(row["id"]) })`

			`create_posts(rows, total: total_count, offset: offset) do \|row\|`
			`{`
			`id: import_topic_id(row["id"]),`
			`title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing",`
DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`raw:`
			`normalize_raw(`
			`row["raw"],`
			`user_id_from_imported_user_id(row["user_id"]) \|\| Discourse.system_user.id,`
			`),`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`category: category_id_from_imported_category_id(row["category_id"]),`
			`user_id: user_id_from_imported_user_id(row["user_id"]) \|\| Discourse.system_user.id,`
			`created_at: row["created_at"],`
			`closed: row["closed"] == 1,`
			`post_create_action:`
			`proc do \|post\|`
			`url = remove_domain(row["url"])`
			`Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)`
			`end,`
			`}`
			`end`
			`end`
			`end`

			`def import_topic_id(topic_id)`
			`"T#{topic_id}"`
			`end`

			`def import_posts`
			`puts "", "creating posts"`
			`total_count = @db.count_posts`
			`last_row_id = 0`

			`batches do \|offset\|`
FEATURE: Import script for AnswerBase Improves the generic database used by some import scripts: * Adds additional columns for users * Adds support for attachments * Allows setting the data type for keys (numeric or string) to ensure correct sorting 2019-03-01 04:59:36 +08:00			`rows, last_row_id = @db.fetch_sorted_posts(last_row_id)`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`break if rows.empty?`

			`create_posts(rows, total: total_count, offset: offset) do \|row\|`
			`topic = topic_lookup_from_imported_post_id(import_topic_id(row["topic_id"]))`

			`if topic.nil?`
			`p "MISSING TOPIC #{row["topic_id"]}"`
			`p row`
			`next`
			`end`

			`{`
			`id: row["id"],`
DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`raw:`
			`normalize_raw(`
			`row["raw"],`
			`user_id_from_imported_user_id(row["user_id"]) \|\| Discourse.system_user.id,`
			`),`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`user_id: user_id_from_imported_user_id(row["user_id"]) \|\| Discourse.system_user.id,`
			`topic_id: topic[:topic_id],`
			`created_at: row["created_at"],`
			`post_create_action:`
			`proc do \|post\|`
			`url = remove_domain(row["url"])`
			`Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)`
			`end,`
			`}`
			`end`
			`end`
			`end`

Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def import_likes`
			`puts "", "importing likes..."`
			`start_time = Time.now`
			`current_count = 0`
			`total_count = @db.count_likes`
			`last_row_id = 0`

			`batches do \|offset\|`
			`rows, last_row_id = @db.fetch_likes(last_row_id)`
			`break if rows.empty?`

			`rows.each do \|row\|`
			`import_id = row["topic_id"] ? import_topic_id(row["topic_id"]) : row["post_id"]`
			`post = Post.find_by(id: post_id_from_imported_post_id(import_id)) if import_id`
			`user = User.find_by(id: user_id_from_imported_user_id(row["user_id"]))`

			`if post && user`
			`begin`
			`PostActionCreator.like(user, post) if user && post`
			`rescue => e`
			`puts "error acting on post #{e}"`
			`end`
			`else`
			`puts "Skipping Like from #{row["user_id"]} on topic #{row["topic_id"]} / post #{row["post_id"]}"`
			`end`

			`current_count += 1`
			`print_status(current_count, total_count, start_time)`
			`end`
			`end`
			`end`

DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`def normalize_raw(raw, user_id)`
FIX: fix normalize_raw method for nil inputs in migration scripts (#22304) Various migration scripts define a normalize_raw method to do custom processing of post contents before storing it in the Post.raw and other fields. They normally do not handle nil inputs, but it's a relatively common occurrence in data dumps. Since this method is used from various points in the migration script, as it stands, the experience of using a migration script is that it will fail multiple times at different points, forcing you to fix the data or apply logic hacks every time then restarting. This PR generalizes handling of nil input by returning a <missing> string. Pros: no more messy repeated crashes + restarts consistency Cons: it might hide data issues OTOH we can't print a warning on that method because it will flood the console since it's called from inside loops. * FIX: zendesk import script: support nil inputs in normalize_raw * FIX: return '<missing>' instead of empty string; do it for all methods 2023-06-30 00:22:47 +08:00			`return "<missing>" if raw.blank?`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`raw = raw.gsub('\n', "")`
			`raw = ReverseMarkdown.convert(raw)`
DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00
			`# Process images, after the ReverseMarkdown they look like`
			`# ![](https://<sub-domain>.zendesk.com/<hash>.<image-format>)`
			`raw.gsub!(%r{!\[\]\((https://#{SUBDOMAIN}\.zendesk\.com/hc/user_images/([^).]+\.[^)]+))\)}i) do`
			`image_url = $1`
			`filename = $2`
			`attempts = 0`

			`begin`
DEV: Fix rubocop issues (#14715) 2021-10-27 16:39:28 +08:00			`URI`
			`.parse(image_url)`
			`.open do \|image\|`
DEV: process image uploads in the Zendesk API import script (#14524) 2021-10-07 00:24:12 +08:00			`# IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image`
			`File.open(IMAGE_DOWNLOAD_PATH, "wb") { \|file\| file.write(image.read) }`
			`end`
			`rescue *HTTP_ERRORS => e`
			`if attempts < MAX_RETRIES`
			`attempts += 1`
			`sleep(2)`
			`retry`
			`else`
			`puts "Error downloading image"`
			`end`
			`next`
			`end`

			`upl_obj = create_upload(user_id, IMAGE_DOWNLOAD_PATH, filename)`

			`if upl_obj&.persisted?`
			`html = html_for_upload(upl_obj, filename)`
			`html`
			`else`
			`puts "Error creating image upload"`
			`exit`
			`end`
			`end`

FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`raw`
			`end`

			`def remove_domain(url)`
			`url.sub(@source_url, "")`
			`end`

			`def permalink_exists?(url)`
			`Permalink.find_by(url: url)`
			`end`

			`def connection`
			`@_connection \|\|=`
			`begin`
			`connect_uri = URI.parse(@source_url)`

			`http = Net::HTTP.new(connect_uri.host, connect_uri.port)`
			`http.open_timeout = 30`
			`http.read_timeout = 30`
			`http.use_ssl = connect_uri.scheme == "https"`

			`http`
			`end`
			`end`

			`def authorization`
			`@_authorization \|\|=`
			`begin`
			`auth_str = "#{@auth_email}/token:#{@auth_token}"`
			`"Basic #{Base64.strict_encode64(auth_str)}"`
			`end`
			`end`

Import avatars and likes in the Zendesk AP importer Co-authored-by: Justin DiRose <justin@justindirose.com> 2019-08-13 23:24:22 +08:00			`def get_from_api(path, array_name, show_status: false)`
FEATURE: Zendesk importer that uses its API to get data 2018-08-28 22:21:39 +08:00			`url = "#{@source_url}#{path}"`
			`start_time = Time.now`

			`while url`
			`get = Net::HTTP::Get.new(url)`
			`get["User-Agent"] = "Discourse Zendesk Importer"`
			`get["Authorization"] = authorization`

			`retry_count = 0`

			`begin`
			`while retry_count < 5`
			`begin`
			`response = connection.request(get)`
			`puts("Retry successful!") if retry_count > 0`
			`break`
			`rescue => e`
			`puts "Request failed #{url}. Waiting and will retry. #{e.class.name} #{e.message}"`
			`sleep(20)`
			`retry_count += 1`
			`end`
			`end`
			`end`

			`json = JSON.parse(response.body)`

			`json[array_name].each { \|row\| yield row }`

			`url = json["next_page"]`

			`if show_status`
			`if json["page"] && json["page_count"]`
			`print_status(json["page"], json["page_count"], start_time)`
			`else`
			`print "."`
			`end`
			`end`
			`end`
			`end`
			`end`

			`unless ARGV.length == 4 && Dir.exist?(ARGV[1])`
			`puts "",`
			`"Usage:",`
			`"",`
			`"bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN",`
			`""`
			`exit 1`
			`end`

FEATURE: Import script for AnswerBase Improves the generic database used by some import scripts: * Adds additional columns for users * Adds support for attachments * Allows setting the data type for keys (numeric or string) to ensure correct sorting 2019-03-01 04:59:36 +08:00			`ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform`