|
2 | 2 |
|
3 | 3 | require 'uri'
|
4 | 4 | require_relative 'auth'
|
| 5 | +require_relative 'xml_builder' |
5 | 6 |
|
6 | 7 | module Html2rss
|
7 | 8 | module Web
|
@@ -87,50 +88,64 @@ def generate_feed_from_stable_id(feed_id, token_data)
|
87 | 88 | end
|
88 | 89 |
|
89 | 90 | def generate_feed_content(url, strategy = 'ssrf_filter')
|
90 |
| - call_strategy(url, strategy) |
| 91 | + feed_content = call_strategy(url, strategy) |
| 92 | + |
| 93 | + # Check if feed is empty and provide better error handling |
| 94 | + if feed_content.respond_to?(:to_s) |
| 95 | + feed_xml = feed_content.to_s |
| 96 | + if feed_xml.include?('<item>') == false |
| 97 | + # Feed has no items - this might be a content extraction issue |
| 98 | + return create_empty_feed_warning(url, strategy) |
| 99 | + end |
| 100 | + end |
| 101 | + |
| 102 | + feed_content |
| 103 | + end |
| 104 | + |
| 105 | + def create_empty_feed_warning(url, strategy) |
| 106 | + site_title = extract_site_title(url) |
| 107 | + XmlBuilder.build_empty_feed_warning( |
| 108 | + url: url, |
| 109 | + strategy: strategy, |
| 110 | + site_title: site_title |
| 111 | + ) |
91 | 112 | end
|
92 | 113 |
|
| 114 | + # rubocop:disable Metrics/MethodLength |
93 | 115 | def call_strategy(url, strategy)
|
94 | 116 | config = {
|
95 | 117 | stylesheets: [{ href: '/rss.xsl', type: 'text/xsl' }],
|
96 | 118 | strategy: strategy.to_sym,
|
97 | 119 | channel: {
|
98 | 120 | url: url,
|
99 |
| - title: "Auto-generated feed for #{url}" |
| 121 | + title: extract_channel_title(url) |
100 | 122 | },
|
101 |
| - auto_source: {} |
| 123 | + auto_source: { |
| 124 | + # Auto source configuration for automatic content detection |
| 125 | + # This allows Html2rss to automatically detect content on the page |
| 126 | + } |
102 | 127 | }
|
103 | 128 |
|
104 | 129 | Html2rss.feed(config)
|
105 | 130 | end
|
| 131 | + # rubocop:enable Metrics/MethodLength |
106 | 132 |
|
107 |
| - def error_feed(message) |
108 |
| - sanitized_message = Auth.sanitize_xml(message) |
109 |
| - build_rss_feed('Error', "Failed to generate auto-source feed: #{sanitized_message}", sanitized_message) |
| 133 | + def extract_channel_title(url) |
| 134 | + Html2rss::Url.for_channel(url).channel_titleized || 'RSS Feed' |
110 | 135 | end
|
111 | 136 |
|
112 |
| - def access_denied_feed(url) |
113 |
| - sanitized_url = Auth.sanitize_xml(url) |
114 |
| - title = 'Access Denied' |
115 |
| - description = 'This URL is not allowed for public auto source generation.' |
116 |
| - item_description = "URL '#{sanitized_url}' is not in the allowed list for public auto source." |
117 |
| - build_rss_feed(title, description, item_description) |
| 137 | + def extract_site_title(url) |
| 138 | + Html2rss::Url.for_channel(url).channel_titleized |
| 139 | + rescue StandardError |
| 140 | + nil |
| 141 | + end |
| 142 | + |
| 143 | + def error_feed(message) |
| 144 | + XmlBuilder.build_error_feed(message: message) |
118 | 145 | end
|
119 | 146 |
|
120 |
| - def build_rss_feed(title, description, item_description) |
121 |
| - <<~RSS |
122 |
| - <?xml version="1.0" encoding="UTF-8"?> |
123 |
| - <rss version="2.0"> |
124 |
| - <channel> |
125 |
| - <title>#{title}</title> |
126 |
| - <description>#{description}</description> |
127 |
| - <item> |
128 |
| - <title>#{title}</title> |
129 |
| - <description>#{item_description}</description> |
130 |
| - </item> |
131 |
| - </channel> |
132 |
| - </rss> |
133 |
| - RSS |
| 147 | + def access_denied_feed(url) |
| 148 | + XmlBuilder.build_access_denied_feed(url) |
134 | 149 | end
|
135 | 150 | end
|
136 | 151 | end
|
|
0 commit comments