mirror of
https://github.com/discourse/discourse.git
synced 2025-04-03 05:39:41 +08:00
Improve IPBoard3 importer
- fix encoding issues - close topics - don't import queued & not approved topics - don't import deleted posts
This commit is contained in:
parent
2f65393706
commit
49400337b6
@ -16,6 +16,8 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
password: ENV["DB_PW"],
|
password: ENV["DB_PW"],
|
||||||
database: ENV["DB_NAME"],
|
database: ENV["DB_NAME"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@client.query("SET character_set_results = binary")
|
||||||
end
|
end
|
||||||
|
|
||||||
def execute
|
def execute
|
||||||
@ -23,6 +25,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
import_categories
|
import_categories
|
||||||
import_topics
|
import_topics
|
||||||
import_posts
|
import_posts
|
||||||
|
close_topics
|
||||||
import_personal_topics
|
import_personal_topics
|
||||||
import_personal_posts
|
import_personal_posts
|
||||||
end
|
end
|
||||||
@ -59,11 +62,10 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
break if users.empty?
|
break if users.empty?
|
||||||
|
|
||||||
last_user_id = users[-1]["id"]
|
last_user_id = users[-1]["id"]
|
||||||
user_ids = users.map { |u| u["id"] }
|
|
||||||
|
|
||||||
next if all_records_exist?(:users, user_ids)
|
|
||||||
|
|
||||||
create_users(users, total: total_users, offset: offset) do |u|
|
create_users(users, total: total_users, offset: offset) do |u|
|
||||||
|
next if user_id_from_imported_user_id(u["id"])
|
||||||
|
%W{name email title pp_about_me}.each { |k| u[k]&.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "") }
|
||||||
next if u["name"].blank? && !Email.is_valid?(u["email"])
|
next if u["name"].blank? && !Email.is_valid?(u["email"])
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -111,19 +113,21 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
child_categories = categories.select { |c| c["parent_id"] != -1 }
|
child_categories = categories.select { |c| c["parent_id"] != -1 }
|
||||||
|
|
||||||
create_categories(parent_categories) do |c|
|
create_categories(parent_categories) do |c|
|
||||||
|
next if category_id_from_imported_category_id(c["id"])
|
||||||
{
|
{
|
||||||
id: c["id"],
|
id: c["id"],
|
||||||
name: c["name"],
|
name: c["name"].encode("utf-8", "utf-8"),
|
||||||
description: clean_up(c["description"]),
|
description: clean_up(c["description"]),
|
||||||
position: c["position"],
|
position: c["position"],
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
create_categories(child_categories) do |c|
|
create_categories(child_categories) do |c|
|
||||||
|
next if category_id_from_imported_category_id(c["id"])
|
||||||
{
|
{
|
||||||
id: c["id"],
|
id: c["id"],
|
||||||
parent_category_id: category_id_from_imported_category_id(c["parent_id"]),
|
parent_category_id: category_id_from_imported_category_id(c["parent_id"]),
|
||||||
name: c["name"],
|
name: c["name"].encode("utf-8", "utf-8"),
|
||||||
description: clean_up(c["description"]),
|
description: clean_up(c["description"]),
|
||||||
position: c["position"],
|
position: c["position"],
|
||||||
}
|
}
|
||||||
@ -133,13 +137,18 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
def import_topics
|
def import_topics
|
||||||
puts "", "importing topics..."
|
puts "", "importing topics..."
|
||||||
|
|
||||||
|
@closed_topic_ids = []
|
||||||
|
|
||||||
last_topic_id = -1
|
last_topic_id = -1
|
||||||
total_topics = mysql_query(<<~SQL
|
total_topics = mysql_query(<<~SQL
|
||||||
SELECT COUNT(*) count
|
SELECT COUNT(*) count
|
||||||
FROM topics
|
FROM topics
|
||||||
JOIN posts ON tid = topic_id
|
JOIN posts ON tid = topic_id
|
||||||
WHERE tdelete_time = 0
|
WHERE tdelete_time = 0
|
||||||
|
AND pdelete_time = 0
|
||||||
AND new_topic = 1
|
AND new_topic = 1
|
||||||
|
AND approved = 1
|
||||||
|
AND queued = 0
|
||||||
SQL
|
SQL
|
||||||
).first["count"]
|
).first["count"]
|
||||||
|
|
||||||
@ -147,6 +156,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
topics = mysql_query(<<~SQL
|
topics = mysql_query(<<~SQL
|
||||||
SELECT tid id
|
SELECT tid id
|
||||||
, title
|
, title
|
||||||
|
, state
|
||||||
, starter_id
|
, starter_id
|
||||||
, start_date
|
, start_date
|
||||||
, views
|
, views
|
||||||
@ -156,7 +166,10 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
FROM topics
|
FROM topics
|
||||||
JOIN posts ON tid = topic_id
|
JOIN posts ON tid = topic_id
|
||||||
WHERE tdelete_time = 0
|
WHERE tdelete_time = 0
|
||||||
|
AND pdelete_time = 0
|
||||||
AND new_topic = 1
|
AND new_topic = 1
|
||||||
|
AND approved = 1
|
||||||
|
AND queued = 0
|
||||||
AND tid > #{last_topic_id}
|
AND tid > #{last_topic_id}
|
||||||
ORDER BY tid
|
ORDER BY tid
|
||||||
LIMIT #{BATCH_SIZE}
|
LIMIT #{BATCH_SIZE}
|
||||||
@ -166,17 +179,16 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
break if topics.empty?
|
break if topics.empty?
|
||||||
|
|
||||||
last_topic_id = topics[-1]["id"]
|
last_topic_id = topics[-1]["id"]
|
||||||
topic_ids = topics.map { |t| "t-#{t["id"]}" }
|
|
||||||
|
|
||||||
next if all_records_exist?(:posts, topic_ids)
|
|
||||||
|
|
||||||
create_posts(topics, total: total_topics, offset: offset) do |t|
|
create_posts(topics, total: total_topics, offset: offset) do |t|
|
||||||
|
@closed_topic_ids << "t-#{t["id"]}" if t["state"] != "open"
|
||||||
|
next if post_id_from_imported_post_id("t-#{t["id"]}")
|
||||||
created_at = Time.zone.at(t["start_date"])
|
created_at = Time.zone.at(t["start_date"])
|
||||||
user_id = user_id_from_imported_user_id(t["starter_id"]) || -1
|
user_id = user_id_from_imported_user_id(t["starter_id"]) || -1
|
||||||
|
|
||||||
{
|
{
|
||||||
id: "t-#{t["id"]}",
|
id: "t-#{t["id"]}",
|
||||||
title: CGI.unescapeHTML(t["title"]),
|
title: CGI.unescapeHTML(t["title"].encode("utf-8", "utf-8")),
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
created_at: created_at,
|
created_at: created_at,
|
||||||
views: t["views"],
|
views: t["views"],
|
||||||
@ -192,7 +204,14 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
puts "", "importing posts..."
|
puts "", "importing posts..."
|
||||||
|
|
||||||
last_post_id = -1
|
last_post_id = -1
|
||||||
total_posts = mysql_query("SELECT COUNT(*) count FROM posts WHERE new_topic = 0").first["count"]
|
total_posts = mysql_query(<<~SQL
|
||||||
|
SELECT COUNT(*) count
|
||||||
|
FROM posts
|
||||||
|
WHERE new_topic = 0
|
||||||
|
AND pdelete_time = 0
|
||||||
|
AND queued = 0
|
||||||
|
SQL
|
||||||
|
).first["count"]
|
||||||
|
|
||||||
batches(BATCH_SIZE) do |offset|
|
batches(BATCH_SIZE) do |offset|
|
||||||
posts = mysql_query(<<~SQL
|
posts = mysql_query(<<~SQL
|
||||||
@ -201,9 +220,10 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
, post_date
|
, post_date
|
||||||
, post
|
, post
|
||||||
, topic_id
|
, topic_id
|
||||||
, pdelete_time
|
|
||||||
FROM posts
|
FROM posts
|
||||||
WHERE new_topic = 0
|
WHERE new_topic = 0
|
||||||
|
AND pdelete_time = 0
|
||||||
|
AND queued = 0
|
||||||
AND pid > #{last_post_id}
|
AND pid > #{last_post_id}
|
||||||
ORDER BY pid
|
ORDER BY pid
|
||||||
LIMIT #{BATCH_SIZE}
|
LIMIT #{BATCH_SIZE}
|
||||||
@ -213,11 +233,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
break if posts.empty?
|
break if posts.empty?
|
||||||
|
|
||||||
last_post_id = posts[-1]["id"]
|
last_post_id = posts[-1]["id"]
|
||||||
post_ids = posts.map { |p| p["id"] }
|
|
||||||
|
|
||||||
next if all_records_exist?(:posts, post_ids)
|
|
||||||
|
|
||||||
create_posts(posts, total: total_posts, offset: offset) do |p|
|
create_posts(posts, total: total_posts, offset: offset) do |p|
|
||||||
|
next if post_id_from_imported_post_id(p["id"])
|
||||||
next unless t = topic_lookup_from_imported_post_id("t-#{p["topic_id"]}")
|
next unless t = topic_lookup_from_imported_post_id("t-#{p["topic_id"]}")
|
||||||
user_id = user_id_from_imported_user_id(p["author_id"]) || -1
|
user_id = user_id_from_imported_user_id(p["author_id"]) || -1
|
||||||
|
|
||||||
@ -227,12 +245,31 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
created_at: Time.zone.at(p["post_date"]),
|
created_at: Time.zone.at(p["post_date"]),
|
||||||
raw: clean_up(p["post"], user_id),
|
raw: clean_up(p["post"], user_id),
|
||||||
topic_id: t[:topic_id],
|
topic_id: t[:topic_id],
|
||||||
deleted_at: p["pdelete_time"] > 0 ? Time.zone.at(p["pdelete_time"]) : nil,
|
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def close_topics
|
||||||
|
puts "", "closing #{@closed_topic_ids.size} topics..."
|
||||||
|
|
||||||
|
sql = <<~SQL
|
||||||
|
WITH closed_topic_ids AS (
|
||||||
|
SELECT t.id AS topic_id
|
||||||
|
FROM post_custom_fields pcf
|
||||||
|
JOIN posts p ON p.id = pcf.post_id
|
||||||
|
JOIN topics t ON t.id = p.topic_id
|
||||||
|
WHERE pcf.name = 'import_id'
|
||||||
|
AND pcf.value IN (?)
|
||||||
|
)
|
||||||
|
UPDATE topics
|
||||||
|
SET closed = true
|
||||||
|
WHERE id IN (SELECT topic_id FROM closed_topic_ids)
|
||||||
|
SQL
|
||||||
|
|
||||||
|
Topic.exec_sql(sql, @closed_topic_ids)
|
||||||
|
end
|
||||||
|
|
||||||
def import_personal_topics
|
def import_personal_topics
|
||||||
puts "", "import personal topics..."
|
puts "", "import personal topics..."
|
||||||
|
|
||||||
@ -268,11 +305,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
break if personal_topics.empty?
|
break if personal_topics.empty?
|
||||||
|
|
||||||
last_personal_topic_id = personal_topics[-1]["id"]
|
last_personal_topic_id = personal_topics[-1]["id"]
|
||||||
personal_topic_ids = personal_topics.map { |pt| "pt-#{pt["id"]}" }
|
|
||||||
|
|
||||||
next if all_records_exist?(:posts, personal_topic_ids)
|
|
||||||
|
|
||||||
create_posts(personal_topics, total: total_personal_topics, offset: offset) do |pt|
|
create_posts(personal_topics, total: total_personal_topics, offset: offset) do |pt|
|
||||||
|
next if post_id_from_imported_post_id("pt-#{pt["id"]}")
|
||||||
user_id = user_id_from_imported_user_id(pt["mt_starter_id"]) || -1
|
user_id = user_id_from_imported_user_id(pt["mt_starter_id"]) || -1
|
||||||
|
|
||||||
user_ids = [pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
|
user_ids = [pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
|
||||||
@ -284,7 +319,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
archetype: Archetype.private_message,
|
archetype: Archetype.private_message,
|
||||||
id: "pt-#{pt["id"]}",
|
id: "pt-#{pt["id"]}",
|
||||||
created_at: Time.zone.at(pt["mt_date"]),
|
created_at: Time.zone.at(pt["mt_date"]),
|
||||||
title: CGI.unescapeHTML(pt["mt_title"]),
|
title: CGI.unescapeHTML(pt["mt_title"].encode("utf-8", "utf-8")),
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
target_usernames: User.where(id: user_ids).pluck(:username),
|
target_usernames: User.where(id: user_ids).pluck(:username),
|
||||||
raw: clean_up(pt["msg_post"], user_id),
|
raw: clean_up(pt["msg_post"], user_id),
|
||||||
@ -317,11 +352,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
break if personal_posts.empty?
|
break if personal_posts.empty?
|
||||||
|
|
||||||
last_personal_post_id = personal_posts[-1]["id"]
|
last_personal_post_id = personal_posts[-1]["id"]
|
||||||
personal_post_ids = personal_posts.map { |pp| "pp-#{pp["id"]}" }
|
|
||||||
|
|
||||||
next if all_records_exist?(:posts, personal_post_ids)
|
|
||||||
|
|
||||||
create_posts(personal_posts, total: total_personal_posts, offset: offset) do |pp|
|
create_posts(personal_posts, total: total_personal_posts, offset: offset) do |pp|
|
||||||
|
next if post_id_from_imported_post_id("pp-#{pp["id"]}")
|
||||||
next unless t = topic_lookup_from_imported_post_id("pt-#{pp["msg_topic_id"]}")
|
next unless t = topic_lookup_from_imported_post_id("pt-#{pp["msg_topic_id"]}")
|
||||||
user_id = user_id_from_imported_user_id(pp["msg_author_id"]) || -1
|
user_id = user_id_from_imported_user_id(pp["msg_author_id"]) || -1
|
||||||
|
|
||||||
@ -337,6 +370,8 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
end
|
end
|
||||||
|
|
||||||
def clean_up(raw, user_id = -1)
|
def clean_up(raw, user_id = -1)
|
||||||
|
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
|
||||||
|
|
||||||
raw.gsub!(/<(.+)> <\/\1>/, "\n\n")
|
raw.gsub!(/<(.+)> <\/\1>/, "\n\n")
|
||||||
|
|
||||||
doc = Nokogiri::HTML.fragment(raw)
|
doc = Nokogiri::HTML.fragment(raw)
|
||||||
@ -344,7 +379,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
|
|||||||
doc.css("blockquote.ipsBlockquote").each do |bq|
|
doc.css("blockquote.ipsBlockquote").each do |bq|
|
||||||
post_id = post_id_from_imported_post_id(bq["data-cid"])
|
post_id = post_id_from_imported_post_id(bq["data-cid"])
|
||||||
if post = Post.find_by(id: post_id)
|
if post = Post.find_by(id: post_id)
|
||||||
bq.replace %{<p>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]</p>}
|
bq.replace %{<br>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]<br>}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user