64 lines
1.9 KiB
Ruby
64 lines
1.9 KiB
Ruby
|
|
# frozen_string_literal: true
|
||
|
|
|
||
|
|
module UrlToArticle
|
||
|
|
class ArticlesController < ::ApplicationController
|
||
|
|
requires_login
|
||
|
|
before_action :ensure_enabled!
|
||
|
|
before_action :validate_url!
|
||
|
|
|
||
|
|
def extract
|
||
|
|
result = ArticleExtractor.extract(@url)
|
||
|
|
|
||
|
|
render json: {
|
||
|
|
title: result.title,
|
||
|
|
byline: result.byline,
|
||
|
|
site_name: result.site_name,
|
||
|
|
description: result.description,
|
||
|
|
markdown: result.markdown,
|
||
|
|
url: result.url,
|
||
|
|
}
|
||
|
|
rescue => e
|
||
|
|
Rails.logger.warn("[url-to-article] Extraction failed for #{@url}: #{e.message}")
|
||
|
|
render json: { error: "Could not extract article: #{e.message}" }, status: :unprocessable_entity
|
||
|
|
end
|
||
|
|
|
||
|
|
private
|
||
|
|
|
||
|
|
def ensure_enabled!
|
||
|
|
raise Discourse::NotFound unless SiteSetting.url_to_article_enabled
|
||
|
|
end
|
||
|
|
|
||
|
|
def validate_url!
|
||
|
|
raw = params.require(:url)
|
||
|
|
|
||
|
|
begin
|
||
|
|
uri = URI.parse(raw)
|
||
|
|
rescue URI::InvalidURIError
|
||
|
|
return render json: { error: "Invalid URL" }, status: :bad_request
|
||
|
|
end
|
||
|
|
|
||
|
|
unless %w[http https].include?(uri.scheme)
|
||
|
|
return render json: { error: "Only http/https URLs are supported" }, status: :bad_request
|
||
|
|
end
|
||
|
|
|
||
|
|
# SSRF protection — block private/loopback addresses
|
||
|
|
blocked_domains = SiteSetting.url_to_article_blocked_domains
|
||
|
|
.split(",").map(&:strip).reject(&:empty?)
|
||
|
|
|
||
|
|
if blocked_domains.any? { |d| uri.host&.include?(d) }
|
||
|
|
return render json: { error: "Domain not allowed" }, status: :forbidden
|
||
|
|
end
|
||
|
|
|
||
|
|
# Optionally enforce an allowlist
|
||
|
|
allowed_domains = SiteSetting.url_to_article_allowed_domains
|
||
|
|
.split(",").map(&:strip).reject(&:empty?)
|
||
|
|
|
||
|
|
if allowed_domains.any? && !allowed_domains.any? { |d| uri.host&.end_with?(d) }
|
||
|
|
return render json: { error: "Domain not in allowlist" }, status: :forbidden
|
||
|
|
end
|
||
|
|
|
||
|
|
@url = raw
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|