# frozen_string_literal: true module UrlToArticle class ArticlesController < ::ApplicationController requires_login before_action :ensure_enabled! before_action :validate_url! def extract result = ArticleExtractor.extract(@url) render json: { title: result.title, byline: result.byline, site_name: result.site_name, description: result.description, markdown: result.markdown, url: result.url, } rescue => e Rails.logger.warn("[url-to-article] Extraction failed for #{@url}: #{e.message}") render json: { error: "Could not extract article: #{e.message}" }, status: :unprocessable_entity end private def ensure_enabled! raise Discourse::NotFound unless SiteSetting.url_to_article_enabled end def validate_url! raw = params.require(:url) begin uri = URI.parse(raw) rescue URI::InvalidURIError return render json: { error: "Invalid URL" }, status: :bad_request end unless %w[http https].include?(uri.scheme) return render json: { error: "Only http/https URLs are supported" }, status: :bad_request end # SSRF protection — block private/loopback addresses blocked_domains = SiteSetting.url_to_article_blocked_domains .split(",").map(&:strip).reject(&:empty?) if blocked_domains.any? { |d| uri.host&.include?(d) } return render json: { error: "Domain not allowed" }, status: :forbidden end # Optionally enforce an allowlist allowed_domains = SiteSetting.url_to_article_allowed_domains .split(",").map(&:strip).reject(&:empty?) if allowed_domains.any? && !allowed_domains.any? { |d| uri.host&.end_with?(d) } return render json: { error: "Domain not in allowlist" }, status: :forbidden end @url = raw end end end