discourse/lib/onebox/engine/pubmed_onebox.rb

# frozen_string_literal: true

module Onebox
  module Engine
    class PubmedOnebox
      include Engine
      include LayoutSupport

      matches_regexp(%r{^https?://(?:(?:\w)+\.)?(www.ncbi.nlm.nih)\.gov(?:/)?/pubmed/\d+})

      private

      def xml
        return @xml if defined?(@xml)
        doc = Nokogiri.XML(URI.join(@url, "?report=xml&format=text").open)
        pre = doc.xpath("//pre")
        @xml = Nokogiri.XML("<root>" + pre.text + "</root>")
      end

      def authors
        initials = xml.css("Initials").map { |x| x.content }
        last_names = xml.css("LastName").map { |x| x.content }
        author_list = (initials.zip(last_names)).map { |i, l| i + " " + l }
        if author_list.length > 1
          author_list[-2] = author_list[-2] + " and " + author_list[-1]
          author_list.pop
        end
        author_list.join(", ")
      end

      def date
        xml
          .css("PubDate")
          .children
          .map { |x| x.content }
          .select { |s| !s.match(/^\s+$/) }
          .map { |s| s.split }
          .flatten
          .sort
          .reverse
          .join(" ") # Reverse sort so month before year.
      end

      def data
        {
          title: xml.css("ArticleTitle").text,
          authors: authors,
          journal: xml.css("Title").text,
          abstract: xml.css("AbstractText").text,
          date: date,
          link: @url,
          pmid: match[:pmid],
        }
      end

      def match
        @match ||= @url.match(%r{www\.ncbi\.nlm\.nih\.gov/pubmed/(?<pmid>[0-9]+)})
      end
    end
  end
end