mirror of
https://github.com/discourse/discourse.git
synced 2025-01-25 02:27:46 +08:00
5f64fd0a21
Introduce new patterns for direct sql that are safe and fast. MiniSql is not prone to memory bloat that can happen with direct PG usage. It also has an extremely fast materializer and very a convenient API - DB.exec(sql, *params) => runs sql returns row count - DB.query(sql, *params) => runs sql returns usable objects (not a hash) - DB.query_hash(sql, *params) => runs sql returns an array of hashes - DB.query_single(sql, *params) => runs sql and returns a flat one dimensional array - DB.build(sql) => returns a sql builder See more at: https://github.com/discourse/mini_sql
256 lines
6.9 KiB
Ruby
256 lines
6.9 KiB
Ruby
# cf. https://github.com/rails-sqlserver/tiny_tds#install
|
|
require "tiny_tds"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::StackOverflow < ImportScripts::Base
|
|
|
|
BATCH_SIZE ||= 1000
|
|
|
|
def initialize
|
|
super
|
|
|
|
@client = TinyTds::Client.new(
|
|
host: ENV["DB_HOST"],
|
|
username: ENV["DB_USERNAME"],
|
|
password: ENV["DB_PASSWORD"],
|
|
database: ENV["DB_NAME"],
|
|
)
|
|
end
|
|
|
|
def execute
|
|
SiteSetting.tagging_enabled = true
|
|
|
|
# TODO: import_groups
|
|
import_users
|
|
import_posts
|
|
import_likes
|
|
mark_topics_as_solved
|
|
end
|
|
|
|
def import_users
|
|
puts "", "Importing users..."
|
|
|
|
last_user_id = -1
|
|
total = query("SELECT COUNT(*) count FROM Users WHERE Id > 0").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = query(<<~SQL
|
|
SELECT TOP #{BATCH_SIZE}
|
|
Id
|
|
, UserTypeId
|
|
, CreationDate
|
|
, LastLoginDate
|
|
, LastLoginIP
|
|
, Email
|
|
, DisplayName
|
|
, WebsiteUrl
|
|
, RealName
|
|
, Location
|
|
, Birthday
|
|
, ProfileImageUrl
|
|
FROM Users
|
|
WHERE Id > 0
|
|
AND Id > #{last_user_id}
|
|
ORDER BY Id
|
|
SQL
|
|
).to_a
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["Id"]
|
|
user_ids = users.map { |u| u["Id"] }
|
|
|
|
next if all_records_exist?(:users, user_ids)
|
|
|
|
create_users(users, total: total, offset: offset) do |u|
|
|
{
|
|
id: u["Id"],
|
|
admin: u["UserTypeId"] == 4,
|
|
created_at: u["CreationDate"],
|
|
last_seen_at: u["LastLoginDate"],
|
|
ip_address: u["LastLoginIP"],
|
|
email: u["Email"],
|
|
username: u["DisplayName"],
|
|
website: u["WebsiteUrl"],
|
|
name: u["RealName"],
|
|
location: u["Location"],
|
|
date_of_birth: u["Birthday"],
|
|
post_create_action: proc do |user|
|
|
if u["ProfileImageUrl"].present?
|
|
UserAvatar.import_url_for_user(u["ProfileImageUrl"], user) rescue nil
|
|
end
|
|
end
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "Importing posts..."
|
|
|
|
last_post_id = -1
|
|
total = query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] +
|
|
query("SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
posts = query(<<~SQL
|
|
SELECT TOP #{BATCH_SIZE}
|
|
Id
|
|
, PostTypeId
|
|
, CreationDate
|
|
, Body
|
|
, OwnerUserId AS UserId
|
|
, Title
|
|
, Tags
|
|
, DeletionDate
|
|
, ParentId
|
|
, IsAcceptedAnswer
|
|
, CASE WHEN (ClosedDate IS NOT NULL OR LockedDate IS NOT NULL) THEN 1 ELSE 0 END AS Closed
|
|
FROM Posts
|
|
WHERE PostTypeId IN (1,2,3)
|
|
AND Id > #{last_post_id}
|
|
ORDER BY Id
|
|
SQL
|
|
).to_a
|
|
|
|
break if posts.empty?
|
|
|
|
last_post_id = posts[-1]["Id"]
|
|
post_ids = posts.map { |p| p["Id"] }
|
|
|
|
comments = query(<<~SQL
|
|
SELECT CONCAT('Comment-', Id) AS Id
|
|
, PostId AS ParentId
|
|
, Text
|
|
, CreationDate
|
|
, UserId
|
|
FROM PostComments
|
|
WHERE PostId IN (#{post_ids.join(",")})
|
|
ORDER BY Id
|
|
SQL
|
|
).to_a
|
|
|
|
posts_and_comments = (posts + comments).sort_by { |p| p["CreationDate"] }
|
|
post_and_comment_ids = posts_and_comments.map { |p| p["Id"] }
|
|
|
|
next if all_records_exist?(:posts, post_and_comment_ids)
|
|
|
|
create_posts(posts_and_comments, total: total, offset: offset) do |p|
|
|
raw = p["Body"].present? ? HtmlToMarkdown.new(p["Body"]).to_markdown : p["Text"]
|
|
|
|
post = {
|
|
id: p["Id"],
|
|
created_at: p["CreationDate"],
|
|
raw: raw,
|
|
user_id: user_id_from_imported_user_id(p["UserId"]) || -1,
|
|
}
|
|
|
|
if p["Title"].present?
|
|
post[:wiki] = p["PostTypeId"] = 3
|
|
post[:title] = p["Title"]
|
|
post[:tags] = p["Tags"].split("|")
|
|
post[:deleted_at] = p["DeletionDate"]
|
|
post[:closed] = p["Closed"] == 1
|
|
elsif t = topic_lookup_from_imported_post_id(p["ParentId"])
|
|
post[:custom_fields] = { is_accepted_answer: true } if p["IsAcceptedAnswer"]
|
|
post[:topic_id] = t[:topic_id]
|
|
post[:reply_to_post_number] = t[:post_number]
|
|
else
|
|
puts "", "", "#{p["Id"]} was not imported", "", ""
|
|
next
|
|
end
|
|
|
|
post
|
|
end
|
|
end
|
|
end
|
|
|
|
LIKE ||= PostActionType.types[:like]
|
|
|
|
def import_likes
|
|
puts "", "Importing post likes..."
|
|
|
|
last_like_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
likes = query(<<~SQL
|
|
SELECT TOP #{BATCH_SIZE}
|
|
Id
|
|
, PostId
|
|
, UserId
|
|
, CreationDate
|
|
FROM Posts2Votes
|
|
WHERE VoteTypeId = 2
|
|
AND DeletionDate IS NULL
|
|
AND Id > #{last_like_id}
|
|
ORDER BY Id
|
|
SQL
|
|
).to_a
|
|
|
|
break if likes.empty?
|
|
|
|
last_like_id = likes[-1]["Id"]
|
|
|
|
likes.each do |l|
|
|
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
|
next unless post_id = post_id_from_imported_post_id(l["PostId"])
|
|
next unless user = User.find_by(id: user_id)
|
|
next unless post = Post.find_by(id: post_id)
|
|
PostAction.act(user, post, LIKE) rescue nil
|
|
end
|
|
end
|
|
|
|
puts "", "Importing comment likes..."
|
|
|
|
last_like_id = -1
|
|
total = query("SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
likes = query(<<~SQL
|
|
SELECT TOP #{BATCH_SIZE}
|
|
Id
|
|
, CONCAT('Comment-', PostCommentId) AS PostCommentId
|
|
, UserId
|
|
, CreationDate
|
|
FROM Comments2Votes
|
|
WHERE VoteTypeId = 2
|
|
AND DeletionDate IS NULL
|
|
AND Id > #{last_like_id}
|
|
ORDER BY Id
|
|
SQL
|
|
).to_a
|
|
|
|
break if likes.empty?
|
|
|
|
last_like_id = likes[-1]["Id"]
|
|
|
|
likes.each do |l|
|
|
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
|
next unless post_id = post_id_from_imported_post_id(l["PostCommentId"])
|
|
next unless user = User.find_by(id: user_id)
|
|
next unless post = Post.find_by(id: post_id)
|
|
PostAction.act(user, post, LIKE) rescue nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def mark_topics_as_solved
|
|
puts "", "Marking topics as solved..."
|
|
|
|
DB.exec <<~SQL
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
FROM post_custom_fields pcf
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
WHERE pcf.name = 'is_accepted_answer'
|
|
SQL
|
|
end
|
|
|
|
def query(sql)
|
|
@client.execute(sql)
|
|
end
|
|
|
|
end
|
|
|
|
ImportScripts::StackOverflow.new.perform
|