Files
discourse-url-to-article/app/controllers/url_to_article/articles_controller.rb

64 lines
1.9 KiB
Ruby
Raw Normal View History

2026-03-18 11:10:07 -04:00
# frozen_string_literal: true
module UrlToArticle
class ArticlesController < ::ApplicationController
requires_login
before_action :ensure_enabled!
before_action :validate_url!
def extract
result = ArticleExtractor.extract(@url)
render json: {
title: result.title,
byline: result.byline,
site_name: result.site_name,
description: result.description,
markdown: result.markdown,
url: result.url,
}
rescue => e
Rails.logger.warn("[url-to-article] Extraction failed for #{@url}: #{e.message}")
render json: { error: "Could not extract article: #{e.message}" }, status: :unprocessable_entity
end
private
def ensure_enabled!
raise Discourse::NotFound unless SiteSetting.url_to_article_enabled
end
def validate_url!
raw = params.require(:url)
begin
uri = URI.parse(raw)
rescue URI::InvalidURIError
return render json: { error: "Invalid URL" }, status: :bad_request
end
unless %w[http https].include?(uri.scheme)
return render json: { error: "Only http/https URLs are supported" }, status: :bad_request
end
# SSRF protection — block private/loopback addresses
blocked_domains = SiteSetting.url_to_article_blocked_domains
.split(",").map(&:strip).reject(&:empty?)
if blocked_domains.any? { |d| uri.host&.include?(d) }
return render json: { error: "Domain not allowed" }, status: :forbidden
end
# Optionally enforce an allowlist
allowed_domains = SiteSetting.url_to_article_allowed_domains
.split(",").map(&:strip).reject(&:empty?)
if allowed_domains.any? && !allowed_domains.any? { |d| uri.host&.end_with?(d) }
return render json: { error: "Domain not in allowlist" }, status: :forbidden
end
@url = raw
end
end
end