lib/url_to_article/article_extractor.rb

# frozen_string_literal: true

# Uses only Nokogiri (already bundled with Discourse) — no extra gems needed.

require "nokogiri"
require "net/http"
require "uri"
require "timeout"

module UrlToArticle
  class ArticleExtractor
    NOISE_SELECTORS = %w[
      script style noscript iframe nav footer header
      .navigation .nav .menu .sidebar .widget .ad .advertisement
      .cookie-banner .cookie-notice .popup .modal .overlay
      .social-share .share-buttons .related-posts .comments
      #comments #sidebar #navigation #footer #header
      [role=navigation] [role=banner] [role=contentinfo]
      [aria-label=navigation] [aria-label=footer]
    ].freeze

    ARTICLE_SELECTORS = %w[
      article[class*=content]
      article[class*=post]
      article[class*=article]
      article
      [role=main]
      main
      .post-content
      .article-content
      .entry-content
      .article-body
      .story-body
      .post-body
      .content-body
      .page-content
      #article-body
      #post-content
      #main-content
    ].freeze

    Result = Struct.new(:title, :byline, :site_name, :description, :markdown, :url, keyword_init: true)

    def self.extract(url)
      new(url).extract
    end

    def initialize(url)
      @url = url
      @uri = URI.parse(url)
    end

    def extract
      html        = fetch_html
      doc         = Nokogiri::HTML(html)
      title       = extract_title(doc)
      byline      = extract_byline(doc)
      site_name   = extract_site_name(doc)
      description = extract_description(doc)

      content_node = find_content_node(doc)
      clean_node!(content_node)
      markdown = node_to_markdown(content_node)
      markdown = truncate(markdown)

      Result.new(
        title:       title,
        byline:      byline,
        site_name:   site_name,
        description: description,
        markdown:    markdown,
        url:         @url,
      )
    end

    private

    # ------------------------------------------------------------------ #
    # HTTP fetch
    # ------------------------------------------------------------------ #

    def fetch_html
      Timeout.timeout(SiteSetting.url_to_article_fetch_timeout) do
        response = do_get(@uri)

        if response.is_a?(Net::HTTPRedirection) && response["location"]
          @uri = URI.parse(response["location"])
          response = do_get(@uri)
        end

        raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)

        body = response.body
        body = body.force_encoding("UTF-8") rescue body
        body
      end
    end

    def do_get(uri)
      http              = Net::HTTP.new(uri.host, uri.port)
      http.use_ssl      = uri.scheme == "https"
      http.open_timeout = 5
      http.read_timeout = SiteSetting.url_to_article_fetch_timeout

      req                  = Net::HTTP::Get.new(uri.request_uri)
      req["User-Agent"]    = "Mozilla/5.0 (compatible; Discourse/url-to-article)"
      req["Accept"]        = "text/html,application/xhtml+xml"
      req["Accept-Language"] = "en-US,en;q=0.9"

      http.request(req)
    end

    # ------------------------------------------------------------------ #
    # Metadata extraction
    # ------------------------------------------------------------------ #

    def extract_title(doc)
      og = doc.at_css('meta[property="og:title"]')&.attr("content")
      return og.strip if og.present?

      tw = doc.at_css('meta[name="twitter:title"]')&.attr("content")
      return tw.strip if tw.present?

      h1 = doc.at_css("h1")&.text
      return h1.strip if h1.present?

      doc.at_css("title")&.text&.strip || @uri.host
    end

    def extract_byline(doc)
      [
        doc.at_css('meta[name="author"]')&.attr("content"),
        doc.at_css('[rel="author"]')&.text,
        doc.at_css(".author")&.text,
        doc.at_css('[class*="byline"]')&.text,
        doc.at_css("address")&.text,
      ].compact.map(&:strip).reject(&:empty?).first
    end

    def extract_site_name(doc)
      doc.at_css('meta[property="og:site_name"]')&.attr("content")&.strip ||
        @uri.host.sub(/^www\./, "")
    end

    def extract_description(doc)
      doc.at_css('meta[property="og:description"]')&.attr("content")&.strip ||
        doc.at_css('meta[name="description"]')&.attr("content")&.strip
    end

    # ------------------------------------------------------------------ #
    # Content node selection
    # ------------------------------------------------------------------ #

    def find_content_node(doc)
      ARTICLE_SELECTORS.each do |sel|
        node = doc.at_css(sel)
        return node if node && node.text.strip.length > 200
      end
      score_and_pick(doc)
    end

    def score_and_pick(doc)
      best = doc.css("div, section, td").filter_map do |node|
        text_len = node.text.strip.length
        next if text_len < 150
        tag_count = node.css("*").size.to_f
        score = text_len - (tag_count * 3)
        [score, node]
      end.max_by { |score, _| score }

      best&.last || doc.at_css("body") || doc
    end

    # ------------------------------------------------------------------ #
    # Node cleaning
    # ------------------------------------------------------------------ #

    def clean_node!(node)
      return unless node

      NOISE_SELECTORS.each { |sel| node.css(sel).each(&:remove) }

      node.css("[style]").each do |el|
        el.remove if el["style"] =~ /display\s*:\s*none|visibility\s*:\s*hidden/i
      end

      node.css("span, div, p, section").each do |el|
        el.remove if el.text.strip.empty? && el.css("img, video, audio, iframe").empty?
      end

      allowed_attrs = {
        "a"    => %w[href title],
        "img"  => %w[src alt title],
        "td"   => %w[colspan rowspan],
        "th"   => %w[colspan rowspan scope],
        "ol"   => %w[start],
        "li"   => %w[value],
        "code" => %w[class],
        "pre"  => %w[class],
      }

      node.css("*").each do |el|
        tag       = el.name.downcase
        permitted = allowed_attrs[tag] || []
        el.attributes.each_key { |attr| el.remove_attribute(attr) unless permitted.include?(attr) }

        if tag == "img" && el["src"] && !el["src"].to_s.start_with?("http", "//", "data:")
          el["src"] = (URI.join(@url, el["src"]).to_s rescue nil)
        end
        if tag == "a" && el["href"] && !el["href"].to_s.start_with?("http", "//", "#", "mailto:")
          el["href"] = (URI.join(@url, el["href"]).to_s rescue nil)
        end
      end
    end

    # ------------------------------------------------------------------ #
    # HTML → Markdown (zero external dependencies)
    # ------------------------------------------------------------------ #

    def node_to_markdown(node)
      return "" unless node
      convert_node(node, context: {}).strip.gsub(/\n{3,}/, "\n\n")
    end

    def convert_node(node, context:)
      return "" unless node

      case node.node_type
      when Nokogiri::XML::Node::TEXT_NODE
        node.text.gsub(/[[:space:]]+/, " ")
      when Nokogiri::XML::Node::ELEMENT_NODE
        convert_element(node, context: context)
      when Nokogiri::XML::Node::DOCUMENT_NODE
        node.children.map { |c| convert_node(c, context: context) }.join
      else
        ""
      end
    end

    def children_md(node, context)
      node.children.map { |c| convert_node(c, context: context) }.join
    end

    def convert_element(node, context:) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
      tag = node.name.downcase

      case tag
      when "h1" then "\n\n# #{children_md(node, context).strip}\n\n"
      when "h2" then "\n\n## #{children_md(node, context).strip}\n\n"
      when "h3" then "\n\n### #{children_md(node, context).strip}\n\n"
      when "h4" then "\n\n#### #{children_md(node, context).strip}\n\n"
      when "h5" then "\n\n##### #{children_md(node, context).strip}\n\n"
      when "h6" then "\n\n###### #{children_md(node, context).strip}\n\n"
      when "p"  then "\n\n#{children_md(node, context).strip}\n\n"
      when "br" then "  \n"
      when "hr" then "\n\n---\n\n"

      when "strong", "b" then "**#{children_md(node, context).strip}**"
      when "em", "i"     then "*#{children_md(node, context).strip}*"
      when "del", "s"    then "~~#{children_md(node, context).strip}~~"

      when "code"
        context[:pre] ? children_md(node, context) : "`#{children_md(node, context)}`"

      when "pre"
        lang = node.at_css("code")&.attr("class")&.match(/language-(\w+)/)&.[](1) || ""
        "\n\n```#{lang}\n#{node.text}\n```\n\n"

      when "blockquote"
        quoted = children_md(node, context).strip.gsub(/^/, "> ")
        "\n\n#{quoted}\n\n"

      when "a"
        href = node["href"].to_s.strip
        text = children_md(node, context).strip
        href.empty? ? text : "[#{text}](#{href})"

      when "img"
        src = node["src"].to_s.strip
        alt = node["alt"].to_s.strip
        src.empty? ? "" : "![#{alt}](#{src})"

      when "ul"
        items = node.css("> li").map do |li|
          "- #{children_md(li, context).strip}"
        end.join("\n")
        "\n\n#{items}\n\n"

      when "ol"
        start = (node["start"] || 1).to_i
        items = node.css("> li").each_with_index.map do |li, idx|
          "#{start + idx}. #{children_md(li, context).strip}"
        end.join("\n")
        "\n\n#{items}\n\n"

      when "table"
        convert_table(node, context: context)

      when "figure"
        img_el  = node.at_css("img")
        cap     = node.at_css("figcaption")&.text&.strip
        img_md  = img_el ? convert_element(img_el, context: context) : ""
        cap_md  = cap ? "\n*#{cap}*" : ""
        "\n\n#{img_md}#{cap_md}\n\n"

      when "script", "style", "noscript", "button", "input",
           "select", "textarea", "iframe", "object", "embed",
           "head", "link", "meta"
        ""

      else
        children_md(node, context)
      end
    end

    def convert_table(table, context:)
      rows = table.css("tr")
      return "" if rows.empty?

      md_rows = rows.map do |row|
        cells = row.css("th, td").map do |cell|
          children_md(cell, context).strip.gsub("|", "\\|")
        end
        "| #{cells.join(" | ")} |"
      end

      cols = rows.first.css("th, td").size
      sep  = "| #{Array.new(cols, "---").join(" | ")} |"

      "\n\n#{md_rows.first}\n#{sep}\n#{md_rows[1..].join("\n")}\n\n"
    end

    # ------------------------------------------------------------------ #
    # Helpers
    # ------------------------------------------------------------------ #

    def truncate(text)
      max = SiteSetting.url_to_article_max_content_length
      return text if text.length <= max
      text[0...max] + "\n\n*[Content truncated — visit the original article for the full text.]*"
    end
  end
end
init commit 2026-03-18 11:10:07 -04:00			`# frozen_string_literal: true`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# Uses only Nokogiri (already bundled with Discourse) — no extra gems needed.`

init commit 2026-03-18 11:10:07 -04:00			`require "nokogiri"`
			`require "net/http"`
			`require "uri"`
			`require "timeout"`

			`module UrlToArticle`
			`class ArticleExtractor`
			`NOISE_SELECTORS = %w[`
			`script style noscript iframe nav footer header`
			`.navigation .nav .menu .sidebar .widget .ad .advertisement`
			`.cookie-banner .cookie-notice .popup .modal .overlay`
			`.social-share .share-buttons .related-posts .comments`
			`#comments #sidebar #navigation #footer #header`
			`[role=navigation] [role=banner] [role=contentinfo]`
			`[aria-label=navigation] [aria-label=footer]`
			`].freeze`

			`ARTICLE_SELECTORS = %w[`
			`article[class*=content]`
			`article[class*=post]`
			`article[class*=article]`
			`article`
			`[role=main]`
			`main`
			`.post-content`
			`.article-content`
			`.entry-content`
			`.article-body`
			`.story-body`
			`.post-body`
			`.content-body`
			`.page-content`
			`#article-body`
			`#post-content`
			`#main-content`
			`].freeze`

			`Result = Struct.new(:title, :byline, :site_name, :description, :markdown, :url, keyword_init: true)`

			`def self.extract(url)`
			`new(url).extract`
			`end`

			`def initialize(url)`
			`@url = url`
			`@uri = URI.parse(url)`
			`end`

			`def extract`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`html = fetch_html`
			`doc = Nokogiri::HTML(html)`
			`title = extract_title(doc)`
			`byline = extract_byline(doc)`
			`site_name = extract_site_name(doc)`
init commit 2026-03-18 11:10:07 -04:00			`description = extract_description(doc)`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`content_node = find_content_node(doc)`
init commit 2026-03-18 11:10:07 -04:00			`clean_node!(content_node)`
			`markdown = node_to_markdown(content_node)`
			`markdown = truncate(markdown)`

			`Result.new(`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`title: title,`
			`byline: byline,`
			`site_name: site_name,`
init commit 2026-03-18 11:10:07 -04:00			`description: description,`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`markdown: markdown,`
			`url: @url,`
init commit 2026-03-18 11:10:07 -04:00			`)`
			`end`

			`private`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# ------------------------------------------------------------------ #`
			`# HTTP fetch`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def fetch_html`
			`Timeout.timeout(SiteSetting.url_to_article_fetch_timeout) do`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`response = do_get(@uri)`
init commit 2026-03-18 11:10:07 -04:00
			`if response.is_a?(Net::HTTPRedirection) && response["location"]`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`@uri = URI.parse(response["location"])`
			`response = do_get(@uri)`
init commit 2026-03-18 11:10:07 -04:00			`end`

			`raise "HTTP #{response.code}" unless response.is_a?(Net::HTTPSuccess)`
Removed dependencies. 2026-03-18 11:28:55 -04:00
			`body = response.body`
			`body = body.force_encoding("UTF-8") rescue body`
			`body`
init commit 2026-03-18 11:10:07 -04:00			`end`
			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`def do_get(uri)`
			`http = Net::HTTP.new(uri.host, uri.port)`
			`http.use_ssl = uri.scheme == "https"`
			`http.open_timeout = 5`
			`http.read_timeout = SiteSetting.url_to_article_fetch_timeout`

			`req = Net::HTTP::Get.new(uri.request_uri)`
			`req["User-Agent"] = "Mozilla/5.0 (compatible; Discourse/url-to-article)"`
			`req["Accept"] = "text/html,application/xhtml+xml"`
			`req["Accept-Language"] = "en-US,en;q=0.9"`

			`http.request(req)`
			`end`

			`# ------------------------------------------------------------------ #`
			`# Metadata extraction`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def extract_title(doc)`
			`og = doc.at_css('meta[property="og:title"]')&.attr("content")`
			`return og.strip if og.present?`

			`tw = doc.at_css('meta[name="twitter:title"]')&.attr("content")`
			`return tw.strip if tw.present?`

			`h1 = doc.at_css("h1")&.text`
			`return h1.strip if h1.present?`

			`doc.at_css("title")&.text&.strip \|\| @uri.host`
			`end`

			`def extract_byline(doc)`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`[`
init commit 2026-03-18 11:10:07 -04:00			`doc.at_css('meta[name="author"]')&.attr("content"),`
			`doc.at_css('[rel="author"]')&.text,`
			`doc.at_css(".author")&.text,`
			`doc.at_css('[class*="byline"]')&.text,`
			`doc.at_css("address")&.text,`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`].compact.map(&:strip).reject(&:empty?).first`
init commit 2026-03-18 11:10:07 -04:00			`end`

			`def extract_site_name(doc)`
			`doc.at_css('meta[property="og:site_name"]')&.attr("content")&.strip \|\|`
			`@uri.host.sub(/^www\./, "")`
			`end`

			`def extract_description(doc)`
			`doc.at_css('meta[property="og:description"]')&.attr("content")&.strip \|\|`
			`doc.at_css('meta[name="description"]')&.attr("content")&.strip`
			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# ------------------------------------------------------------------ #`
			`# Content node selection`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def find_content_node(doc)`
			`ARTICLE_SELECTORS.each do \|sel\|`
			`node = doc.at_css(sel)`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`return node if node && node.text.strip.length > 200`
init commit 2026-03-18 11:10:07 -04:00			`end`
			`score_and_pick(doc)`
			`end`

			`def score_and_pick(doc)`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`best = doc.css("div, section, td").filter_map do \|node\|`
			`text_len = node.text.strip.length`
			`next if text_len < 150`
init commit 2026-03-18 11:10:07 -04:00			`tag_count = node.css("*").size.to_f`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`score = text_len - (tag_count * 3)`
init commit 2026-03-18 11:10:07 -04:00			`[score, node]`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`end.max_by { \|score, _\| score }`
init commit 2026-03-18 11:10:07 -04:00
Removed dependencies. 2026-03-18 11:28:55 -04:00			`best&.last \|\| doc.at_css("body") \|\| doc`
init commit 2026-03-18 11:10:07 -04:00			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# ------------------------------------------------------------------ #`
			`# Node cleaning`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def clean_node!(node)`
			`return unless node`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`NOISE_SELECTORS.each { \|sel\| node.css(sel).each(&:remove) }`
init commit 2026-03-18 11:10:07 -04:00
			`node.css("[style]").each do \|el\|`
			`el.remove if el["style"] =~ /display\s:\snone\|visibility\s:\shidden/i`
			`end`

			`node.css("span, div, p, section").each do \|el\|`
			`el.remove if el.text.strip.empty? && el.css("img, video, audio, iframe").empty?`
			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`allowed_attrs = {`
			`"a" => %w[href title],`
			`"img" => %w[src alt title],`
			`"td" => %w[colspan rowspan],`
			`"th" => %w[colspan rowspan scope],`
			`"ol" => %w[start],`
			`"li" => %w[value],`
init commit 2026-03-18 11:10:07 -04:00			`"code" => %w[class],`
			`"pre" => %w[class],`
			`}`
Removed dependencies. 2026-03-18 11:28:55 -04:00
init commit 2026-03-18 11:10:07 -04:00			`node.css("*").each do \|el\|`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`tag = el.name.downcase`
			`permitted = allowed_attrs[tag] \|\| []`
			`el.attributes.each_key { \|attr\| el.remove_attribute(attr) unless permitted.include?(attr) }`
init commit 2026-03-18 11:10:07 -04:00
Removed dependencies. 2026-03-18 11:28:55 -04:00			`if tag == "img" && el["src"] && !el["src"].to_s.start_with?("http", "//", "data:")`
			`el["src"] = (URI.join(@url, el["src"]).to_s rescue nil)`
init commit 2026-03-18 11:10:07 -04:00			`end`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`if tag == "a" && el["href"] && !el["href"].to_s.start_with?("http", "//", "#", "mailto:")`
			`el["href"] = (URI.join(@url, el["href"]).to_s rescue nil)`
init commit 2026-03-18 11:10:07 -04:00			`end`
			`end`
			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# ------------------------------------------------------------------ #`
			`# HTML → Markdown (zero external dependencies)`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def node_to_markdown(node)`
			`return "" unless node`
Removed dependencies. 2026-03-18 11:28:55 -04:00			`convert_node(node, context: {}).strip.gsub(/\n{3,}/, "\n\n")`
			`end`

			`def convert_node(node, context:)`
			`return "" unless node`

			`case node.node_type`
			`when Nokogiri::XML::Node::TEXT_NODE`
			`node.text.gsub(/[[:space:]]+/, " ")`
			`when Nokogiri::XML::Node::ELEMENT_NODE`
			`convert_element(node, context: context)`
			`when Nokogiri::XML::Node::DOCUMENT_NODE`
			`node.children.map { \|c\| convert_node(c, context: context) }.join`
			`else`
			`""`
			`end`
			`end`

			`def children_md(node, context)`
			`node.children.map { \|c\| convert_node(c, context: context) }.join`
			`end`

			`def convert_element(node, context:) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity`
			`tag = node.name.downcase`

			`case tag`
			`when "h1" then "\n\n# #{children_md(node, context).strip}\n\n"`
			`when "h2" then "\n\n## #{children_md(node, context).strip}\n\n"`
			`when "h3" then "\n\n### #{children_md(node, context).strip}\n\n"`
			`when "h4" then "\n\n#### #{children_md(node, context).strip}\n\n"`
			`when "h5" then "\n\n##### #{children_md(node, context).strip}\n\n"`
			`when "h6" then "\n\n###### #{children_md(node, context).strip}\n\n"`
			`when "p" then "\n\n#{children_md(node, context).strip}\n\n"`
			`when "br" then " \n"`
			`when "hr" then "\n\n---\n\n"`

			`when "strong", "b" then "#{children_md(node, context).strip}"`
			`when "em", "i" then "#{children_md(node, context).strip}"`
			`when "del", "s" then "~~#{children_md(node, context).strip}~~"`

			`when "code"`
			context[:pre] ? children_md(node, context) : "`#{children_md(node, context)}`"

			`when "pre"`
			`lang = node.at_css("code")&.attr("class")&.match(/language-(\w+)/)&.[](1) \|\| ""`
			"\n\n```#{lang}\n#{node.text}\n```\n\n"

			`when "blockquote"`
			`quoted = children_md(node, context).strip.gsub(/^/, "> ")`
			`"\n\n#{quoted}\n\n"`

			`when "a"`
			`href = node["href"].to_s.strip`
			`text = children_md(node, context).strip`
			`href.empty? ? text : "[#{text}](#{href})"`

			`when "img"`
			`src = node["src"].to_s.strip`
			`alt = node["alt"].to_s.strip`
			`src.empty? ? "" : "![#{alt}](#{src})"`

			`when "ul"`
			`items = node.css("> li").map do \|li\|`
			`"- #{children_md(li, context).strip}"`
			`end.join("\n")`
			`"\n\n#{items}\n\n"`

			`when "ol"`
			`start = (node["start"] \|\| 1).to_i`
			`items = node.css("> li").each_with_index.map do \|li, idx\|`
			`"#{start + idx}. #{children_md(li, context).strip}"`
			`end.join("\n")`
			`"\n\n#{items}\n\n"`

			`when "table"`
			`convert_table(node, context: context)`

			`when "figure"`
			`img_el = node.at_css("img")`
			`cap = node.at_css("figcaption")&.text&.strip`
			`img_md = img_el ? convert_element(img_el, context: context) : ""`
			`cap_md = cap ? "\n#{cap}" : ""`
			`"\n\n#{img_md}#{cap_md}\n\n"`

			`when "script", "style", "noscript", "button", "input",`
			`"select", "textarea", "iframe", "object", "embed",`
			`"head", "link", "meta"`
			`""`

			`else`
			`children_md(node, context)`
			`end`
			`end`

			`def convert_table(table, context:)`
			`rows = table.css("tr")`
			`return "" if rows.empty?`

			`md_rows = rows.map do \|row\|`
			`cells = row.css("th, td").map do \|cell\|`
			`children_md(cell, context).strip.gsub("\|", "\\\|")`
			`end`
			`"\| #{cells.join(" \| ")} \|"`
			`end`

			`cols = rows.first.css("th, td").size`
			`sep = "\| #{Array.new(cols, "---").join(" \| ")} \|"`

			`"\n\n#{md_rows.first}\n#{sep}\n#{md_rows[1..].join("\n")}\n\n"`
init commit 2026-03-18 11:10:07 -04:00			`end`

Removed dependencies. 2026-03-18 11:28:55 -04:00			`# ------------------------------------------------------------------ #`
			`# Helpers`
			`# ------------------------------------------------------------------ #`

init commit 2026-03-18 11:10:07 -04:00			`def truncate(text)`
			`max = SiteSetting.url_to_article_max_content_length`
			`return text if text.length <= max`
			`text[0...max] + "\n\n[Content truncated — visit the original article for the full text.]"`
			`end`
			`end`
			`end`