mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 08:49:06 +08:00
DEV: Slack import script (#22386)
It's very simple import script and currently imports only the following content: * Users * Messages as Discourse topics/posts * Attachments Each channel can be mapped to a category and tags. It uses regular expressions to convert formatted messages ("rich text") into Markdown used by Discourse. In the future we could convert the `blocks` attribute from each message into Markdown instead of applying regular expressions on the `text` attribute.
This commit is contained in:
parent
ce0ddfba8c
commit
e09ce99884
|
@ -4,7 +4,7 @@ require "sqlite3"
|
||||||
|
|
||||||
module ImportScripts
|
module ImportScripts
|
||||||
class GenericDatabase
|
class GenericDatabase
|
||||||
def initialize(directory, batch_size:, recreate: false, numeric_keys: false)
|
def initialize(directory, batch_size: 1000, recreate: false, numeric_keys: false)
|
||||||
filename = "#{directory}/index.db"
|
filename = "#{directory}/index.db"
|
||||||
File.delete(filename) if recreate && File.exist?(filename)
|
File.delete(filename) if recreate && File.exist?(filename)
|
||||||
|
|
||||||
|
@ -42,8 +42,8 @@ module ImportScripts
|
||||||
def insert_user(user)
|
def insert_user(user)
|
||||||
@db.execute(<<-SQL, prepare(user))
|
@db.execute(<<-SQL, prepare(user))
|
||||||
INSERT OR REPLACE
|
INSERT OR REPLACE
|
||||||
INTO user (id, email, username, name, bio, avatar_path, created_at, last_seen_at, active)
|
INTO user (id, email, username, name, bio, avatar_path, created_at, last_seen_at, active, staged, admin)
|
||||||
VALUES (:id, :email, :username, :name, :bio, :avatar_path, :created_at, :last_seen_at, :active)
|
VALUES (:id, :email, :username, :name, :bio, :avatar_path, :created_at, :last_seen_at, :active, :staged, :admin)
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -61,8 +61,8 @@ module ImportScripts
|
||||||
|
|
||||||
@db.transaction do
|
@db.transaction do
|
||||||
@db.execute(<<-SQL, prepare(topic))
|
@db.execute(<<-SQL, prepare(topic))
|
||||||
INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url, upload_count)
|
INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url, upload_count, tags)
|
||||||
VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url, :upload_count)
|
VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url, :upload_count, :tags)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
attachments&.each do |attachment|
|
attachments&.each do |attachment|
|
||||||
|
@ -150,6 +150,28 @@ module ImportScripts
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def calculate_user_last_seen_at_dates
|
||||||
|
@db.execute <<~SQL
|
||||||
|
UPDATE user
|
||||||
|
SET last_seen_at = (
|
||||||
|
SELECT MAX(created_at)
|
||||||
|
FROM post
|
||||||
|
WHERE post.user_id = user.id
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_user_created_at_dates
|
||||||
|
@db.execute <<~SQL
|
||||||
|
UPDATE user
|
||||||
|
SET created_at = (
|
||||||
|
SELECT MIN(created_at)
|
||||||
|
FROM post
|
||||||
|
WHERE post.user_id = user.id
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def fetch_categories
|
def fetch_categories
|
||||||
@db.execute(<<-SQL)
|
@db.execute(<<-SQL)
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -325,6 +347,48 @@ module ImportScripts
|
||||||
@db.get_first_value(sql)
|
@db.get_first_value(sql)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def create_missing_topics
|
||||||
|
posts = @db.execute(<<~SQL)
|
||||||
|
WITH missing_topics AS (SELECT *, RANK() OVER ( PARTITION BY topic_id ORDER BY created_at ) AS post_number
|
||||||
|
FROM post p
|
||||||
|
WHERE NOT EXISTS (SELECT 1 FROM topic t WHERE t.id = p.topic_id))
|
||||||
|
SELECT *
|
||||||
|
FROM missing_topics
|
||||||
|
WHERE post_number = 1
|
||||||
|
SQL
|
||||||
|
|
||||||
|
posts.each do |post|
|
||||||
|
@db.execute("DELETE FROM post WHERE id = ?", post["id"])
|
||||||
|
|
||||||
|
topic = post.except("post_number", "reply_to_post_id")
|
||||||
|
topic["id"] = topic.delete("topic_id")
|
||||||
|
topic = yield(topic)
|
||||||
|
|
||||||
|
@db.execute(<<-SQL, prepare(topic))
|
||||||
|
INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url, upload_count, tags)
|
||||||
|
VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url, :upload_count, :tags)
|
||||||
|
SQL
|
||||||
|
|
||||||
|
@db.execute("DELETE FROM post WHERE id = ?", post["id"])
|
||||||
|
|
||||||
|
@db.execute(<<-SQL, topic_id: topic["id"], post_id: post["id"])
|
||||||
|
INSERT OR REPLACE INTO topic_upload (topic_id, path)
|
||||||
|
SELECT :topic_id, path
|
||||||
|
FROM post_upload
|
||||||
|
WHERE post_id = :post_id
|
||||||
|
SQL
|
||||||
|
|
||||||
|
@db.execute("DELETE FROM post_upload WHERE post_id = ?", post["id"])
|
||||||
|
|
||||||
|
@db.execute(<<-SQL, topic_id: topic["id"], post_id: post["id"])
|
||||||
|
UPDATE like
|
||||||
|
SET topic_id = :topic_id,
|
||||||
|
post_id = NULL
|
||||||
|
WHERE post_id = :post_id
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def configure_database
|
def configure_database
|
||||||
|
@ -382,7 +446,9 @@ module ImportScripts
|
||||||
avatar_path TEXT,
|
avatar_path TEXT,
|
||||||
created_at DATETIME,
|
created_at DATETIME,
|
||||||
last_seen_at DATETIME,
|
last_seen_at DATETIME,
|
||||||
active BOOLEAN NOT NULL DEFAULT true
|
active BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
staged BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
admin BOOLEAN NOT NULL DEFAULT false
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
@ -400,7 +466,8 @@ module ImportScripts
|
||||||
user_id #{key_data_type} NOT NULL,
|
user_id #{key_data_type} NOT NULL,
|
||||||
created_at DATETIME,
|
created_at DATETIME,
|
||||||
url TEXT,
|
url TEXT,
|
||||||
upload_count INTEGER DEFAULT 0
|
upload_count INTEGER DEFAULT 0,
|
||||||
|
tags JSON
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
@ -486,6 +553,8 @@ module ImportScripts
|
||||||
hash[key] = value ? 1 : 0
|
hash[key] = value ? 1 : 0
|
||||||
elsif value.is_a?(Date)
|
elsif value.is_a?(Date)
|
||||||
hash[key] = value.to_s
|
hash[key] = value.to_s
|
||||||
|
elsif value.is_a?(Time)
|
||||||
|
hash[key] = value.utc.iso8601
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
299
script/import_scripts/slack.rb
Normal file
299
script/import_scripts/slack.rb
Normal file
|
@ -0,0 +1,299 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "colored2"
|
||||||
|
require_relative "base"
|
||||||
|
require_relative "base/generic_database"
|
||||||
|
|
||||||
|
# Minimal Slack importer
|
||||||
|
# It imports users and messages into Discourse topics/posts
|
||||||
|
|
||||||
|
# Call it like this:
|
||||||
|
# IMPORT=1 bundle install
|
||||||
|
# IMPORT=1 bundle exec ruby script/import_scripts/slack.rb "PATH_TO_SLACK_EXPORT"
|
||||||
|
#
|
||||||
|
# You will need to create a channels-mapping.json file in order to map Slack channels to Discourse categories and tags.
|
||||||
|
# The import script will create a template for you if you don't have one.
|
||||||
|
|
||||||
|
class ImportScripts::Slack < ImportScripts::Base
|
||||||
|
TITLE_TIMEZONE = "America/Los_Angeles"
|
||||||
|
|
||||||
|
def initialize(base_path)
|
||||||
|
super()
|
||||||
|
@base_path = base_path
|
||||||
|
@db = ImportScripts::GenericDatabase.new(@base_path, recreate: true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def execute
|
||||||
|
@channel_mapping = load_channel_mapping
|
||||||
|
read_json_files
|
||||||
|
|
||||||
|
import_users
|
||||||
|
import_categories
|
||||||
|
import_topics
|
||||||
|
import_posts
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def read_json_files
|
||||||
|
puts "", "Reading JSON files..."
|
||||||
|
|
||||||
|
json_from_file("users.json").each do |user|
|
||||||
|
next if user[:deleted]
|
||||||
|
|
||||||
|
@db.insert_user(
|
||||||
|
id: user[:id],
|
||||||
|
email: user[:profile][:email],
|
||||||
|
name: user[:real_name],
|
||||||
|
staged: !user[:is_email_confirmed],
|
||||||
|
admin: user[:is_admin],
|
||||||
|
avatar_path: user[:profile][:image_original],
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
@channel_mapping.each do |mapping|
|
||||||
|
@db.insert_category(id: mapping[:slack_channel], name: mapping[:discourse_category])
|
||||||
|
|
||||||
|
json_from_directory(mapping[:slack_channel]).each do |message|
|
||||||
|
raise "Unknown type: #{message[:type]}" if message[:type] != "message"
|
||||||
|
|
||||||
|
topic_id = message[:thread_ts] || message[:ts]
|
||||||
|
created_at = Time.at(message[:ts].to_f).in_time_zone(TITLE_TIMEZONE)
|
||||||
|
attachments = message[:files]&.map { |file| file[:url_private_download] }
|
||||||
|
|
||||||
|
if message[:ts] == topic_id
|
||||||
|
@db.insert_topic(
|
||||||
|
id: topic_id,
|
||||||
|
title: "Thread starting at #{created_at.iso8601}",
|
||||||
|
raw: message[:text].presence || "No text",
|
||||||
|
category_id: mapping[:slack_channel],
|
||||||
|
created_at: message[:ts],
|
||||||
|
user_id: message[:user],
|
||||||
|
tags: Oj.dump(mapping[:discourse_tags]),
|
||||||
|
attachments: attachments,
|
||||||
|
)
|
||||||
|
else
|
||||||
|
@db.insert_post(
|
||||||
|
id: message[:client_msg_id],
|
||||||
|
raw: message[:text].presence || "No text",
|
||||||
|
topic_id: topic_id,
|
||||||
|
created_at: message[:ts],
|
||||||
|
user_id: message[:user],
|
||||||
|
attachments: attachments,
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@db.create_missing_topics do |topic|
|
||||||
|
created_at = Time.at(topic["created_at"].to_f).in_time_zone(TITLE_TIMEZONE)
|
||||||
|
topic[:title] = "Thread starting at #{created_at.iso8601}"
|
||||||
|
topic[:category_id] = mapping[:slack_channel]
|
||||||
|
topic[:tags] = Oj.dump(mapping[:discourse_tags])
|
||||||
|
topic
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@db.calculate_user_created_at_dates
|
||||||
|
@db.calculate_user_last_seen_at_dates
|
||||||
|
@db.sort_posts_by_created_at
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_categories
|
||||||
|
puts "", "Creating categories..."
|
||||||
|
rows = @db.fetch_categories
|
||||||
|
|
||||||
|
create_categories(rows) { |row| { id: row["id"], name: row["name"] } }
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_users
|
||||||
|
puts "", "Creating users..."
|
||||||
|
total_count = @db.count_users
|
||||||
|
last_id = ""
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_id = @db.fetch_users(last_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:users, rows.map { |row| row["id"] })
|
||||||
|
|
||||||
|
create_users(rows, total: total_count, offset: offset) do |row|
|
||||||
|
{
|
||||||
|
id: row["id"],
|
||||||
|
email: row["email"].presence || fake_email,
|
||||||
|
name: row["name"],
|
||||||
|
created_at: row["created_at"],
|
||||||
|
last_seen_at: row["last_seen_at"],
|
||||||
|
active: row["active"] == 1,
|
||||||
|
staged: row["staged"] == 1,
|
||||||
|
admin: row["admin"] == 1,
|
||||||
|
merge: true,
|
||||||
|
post_create_action:
|
||||||
|
proc do |user|
|
||||||
|
if row["avatar_path"].present?
|
||||||
|
begin
|
||||||
|
UserAvatar.import_url_for_user(row["avatar_path"], user)
|
||||||
|
rescue StandardError
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_topics
|
||||||
|
puts "", "Creating topics..."
|
||||||
|
total_count = @db.count_topics
|
||||||
|
last_id = ""
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_id = @db.fetch_topics(last_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
|
||||||
|
|
||||||
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||||
|
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
|
||||||
|
attachments = @db.fetch_topic_attachments(row["id"]) if row["upload_count"] > 0
|
||||||
|
|
||||||
|
{
|
||||||
|
id: row["id"],
|
||||||
|
title: row["title"].present? ? row["title"].strip[0...255] : "Topic title missing",
|
||||||
|
raw: to_markdown(row["raw"], attachments, user_id),
|
||||||
|
category: category_id_from_imported_category_id(row["category_id"]),
|
||||||
|
user_id: user_id,
|
||||||
|
created_at: Time.at(row["created_at"].to_f),
|
||||||
|
tags: Oj.load(row["tags"]),
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_posts
|
||||||
|
puts "", "creating posts"
|
||||||
|
total_count = @db.count_posts
|
||||||
|
last_row_id = 0
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
|
||||||
|
|
||||||
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||||
|
topic = topic_lookup_from_imported_post_id(row["topic_id"])
|
||||||
|
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
|
||||||
|
attachments = @db.fetch_post_attachments(row["id"]) if row["upload_count"] > 0
|
||||||
|
|
||||||
|
{
|
||||||
|
id: row["id"],
|
||||||
|
raw: to_markdown(row["raw"], attachments, user_id),
|
||||||
|
user_id: user_id,
|
||||||
|
topic_id: topic[:topic_id],
|
||||||
|
created_at: Time.at(row["created_at"].to_f),
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_from_file(relative_path)
|
||||||
|
absolute_path = File.join(@base_path, relative_path)
|
||||||
|
load_json(absolute_path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_from_directory(directory)
|
||||||
|
base_path = File.join(@base_path, directory)
|
||||||
|
raise "Directory #{base_path} does not exist" unless File.directory?(base_path)
|
||||||
|
|
||||||
|
Enumerator.new do |y|
|
||||||
|
# Don't use Dir[] because it allocates an array with the path of every file it finds
|
||||||
|
# which can use a huge amount of memory!
|
||||||
|
IO.popen(["find", base_path, "-name", "*.json"]) do |io|
|
||||||
|
io.each_line do |path|
|
||||||
|
path.chomp!
|
||||||
|
load_json(path).each { |item| y.yield(item) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_json(path)
|
||||||
|
raise "File #{path} does not exist" unless File.exist?(path)
|
||||||
|
Oj.load(File.read(path), { mode: :strict, symbol_keys: true })
|
||||||
|
end
|
||||||
|
|
||||||
|
def load_channel_mapping
|
||||||
|
path = File.join(@base_path, "channel-mapping.json")
|
||||||
|
|
||||||
|
if !File.exist?(path)
|
||||||
|
create_channel_mapping_file(path)
|
||||||
|
puts "", "ERROR: channel-mapping.json is missing".red
|
||||||
|
puts "An example file has been created at #{path}".red, "Please edit it and try again.".red
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
load_json(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_channel_mapping_file(mapping_file_path)
|
||||||
|
mapping =
|
||||||
|
Dir[File.join(@base_path, "/*/")].map do |path|
|
||||||
|
channel = File.basename(path)
|
||||||
|
{ slack_channel: channel, discourse_category: channel, discourse_tags: [] }
|
||||||
|
end
|
||||||
|
|
||||||
|
File.write(mapping_file_path, Oj.dump(mapping, indent: 4))
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_markdown(text, attachments, user_id)
|
||||||
|
# Emoji skin tones
|
||||||
|
text.gsub!(/::skin-tone-(\d):/, ':t\1:')
|
||||||
|
|
||||||
|
# Mentions
|
||||||
|
text.gsub!(/<@(\w+)>/) do
|
||||||
|
user_id = $1
|
||||||
|
username = @lookup.find_username_by_import_id(user_id)
|
||||||
|
username ? "@#{username}" : "`@#{user_id}`"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Links
|
||||||
|
text.gsub!(%r{<(https?://[^|]+?)\|([^>]+?)>}, '[\2](\1)')
|
||||||
|
text.gsub!(%r{<(https?://[^>]+?)>}, '\1')
|
||||||
|
|
||||||
|
# Code blocks
|
||||||
|
text.gsub!(/```(.+?)```/m, "```\n\\1\n```")
|
||||||
|
|
||||||
|
# Images and files
|
||||||
|
if attachments
|
||||||
|
attachments.each do |attachment|
|
||||||
|
upload_markdown = download_file(attachment["path"], user_id)
|
||||||
|
text << "\n#{upload_markdown}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
text
|
||||||
|
end
|
||||||
|
|
||||||
|
def download_file(url, user_id)
|
||||||
|
uri = URI.parse(url)
|
||||||
|
filename = File.basename(uri.path)
|
||||||
|
|
||||||
|
tempfile =
|
||||||
|
FileHelper.download(
|
||||||
|
url,
|
||||||
|
max_file_size: SiteSetting.max_image_size_kb.kilobytes,
|
||||||
|
tmp_file_name: "sso-avatar",
|
||||||
|
follow_redirect: true,
|
||||||
|
)
|
||||||
|
|
||||||
|
return unless tempfile
|
||||||
|
|
||||||
|
upload = UploadCreator.new(tempfile, filename, origin: url).create_for(user_id)
|
||||||
|
html_for_upload(upload, filename)
|
||||||
|
ensure
|
||||||
|
tempfile.close! if tempfile && tempfile.respond_to?(:close!)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ImportScripts::Slack.new(ARGV[0]).perform
|
Loading…
Reference in New Issue
Block a user