rss-reader/src/import.rs

255 lines
7.2 KiB
Rust
Raw Normal View History

2025-02-15 04:59:09 -08:00
// Module for handling OPML feed list imports
use std::io::Cursor;
2025-02-15 18:27:05 -08:00
use quick_xml::de::from_reader;
2025-02-15 04:59:09 -08:00
use rocket::data::ToByteUnit;
use rocket::http::Status;
use rocket::serde::json::Json;
use rocket::serde::{Deserialize, Serialize};
2025-02-16 01:06:36 -08:00
use rocket::{post, Data, State};
2025-02-15 04:59:09 -08:00
use rocket_db_pools::Connection;
2025-02-16 01:06:36 -08:00
use tokio::spawn;
2025-02-16 01:13:29 -08:00
use tracing::error;
2025-02-16 01:06:36 -08:00
use uuid::Uuid;
2025-02-15 04:59:09 -08:00
2025-02-15 18:27:05 -08:00
use crate::feed_utils::fetch_feed;
use crate::feeds::Feed;
2025-02-16 01:06:36 -08:00
use crate::jobs::{JobStatus, SharedJobStore};
2025-02-15 04:59:09 -08:00
use crate::user::AuthenticatedUser;
use crate::Db;
#[derive(Debug, Serialize, Deserialize)]
#[serde(crate = "rocket::serde")]
pub struct ImportResponse {
success: bool,
message: String,
2025-02-16 01:06:36 -08:00
#[serde(skip_serializing_if = "Option::is_none")]
job_id: Option<Uuid>,
2025-02-15 04:59:09 -08:00
}
2025-02-15 18:27:05 -08:00
#[derive(Debug, Deserialize)]
#[serde(crate = "rocket::serde")]
#[serde(rename = "opml")]
struct Opml {
#[serde(rename = "body")]
body: OpmlBody,
}
#[derive(Debug, Deserialize)]
#[serde(crate = "rocket::serde")]
struct OpmlBody {
#[serde(rename = "outline", default)]
outlines: Vec<OpmlOutline>,
}
#[derive(Debug, Deserialize)]
#[serde(crate = "rocket::serde")]
struct OpmlOutline {
#[serde(rename = "@type", default)]
outline_type: Option<String>,
#[serde(rename = "@text", default)]
text: Option<String>,
#[serde(rename = "@title", default)]
title: Option<String>,
#[serde(rename = "@xmlUrl", default)]
xml_url: Option<String>,
#[serde(rename = "@htmlUrl", default)]
html_url: Option<String>,
#[serde(rename = "outline", default)]
outlines: Vec<OpmlOutline>,
}
impl OpmlOutline {
fn is_feed(&self) -> bool {
self.xml_url.is_some()
}
fn get_title(&self) -> Option<String> {
self.title.clone().or_else(|| self.text.clone())
}
}
2025-02-15 04:59:09 -08:00
/// Import feeds from an OPML file
2025-02-16 01:06:36 -08:00
#[post("/import/opml", data = "<file>")]
2025-02-15 04:59:09 -08:00
pub async fn import_opml(
2025-02-16 01:06:36 -08:00
db: Connection<Db>,
2025-02-15 18:27:05 -08:00
user: AuthenticatedUser,
2025-02-16 01:06:36 -08:00
file: Data<'_>,
job_store: &State<SharedJobStore>,
2025-02-15 04:59:09 -08:00
) -> Result<Json<ImportResponse>, Status> {
2025-02-16 01:06:36 -08:00
// Limit file size to 1MB and read the raw bytes
let file_data = file.open(1.mebibytes()).into_bytes().await.map_err(|e| {
error!("Failed to read OPML file: {e}");
2025-02-15 04:59:09 -08:00
Status::BadRequest
})?;
2025-02-16 01:06:36 -08:00
if !file_data.is_complete() {
error!("OPML file too large");
return Err(Status::PayloadTooLarge);
}
let bytes = file_data.value;
2025-02-16 01:08:35 -08:00
2025-02-16 01:06:36 -08:00
// Find the start of the actual XML content by looking for double newline
let content_start = bytes
.windows(4)
.position(|window| window == b"\r\n\r\n")
.map(|pos| pos + 4)
.unwrap_or(0);
// Find the end of the content by looking for the boundary
let content_end = bytes
.windows(2)
.rposition(|window| window == b"\r\n")
.unwrap_or(bytes.len());
2025-02-15 04:59:09 -08:00
2025-02-16 01:06:36 -08:00
// Extract just the XML content
let xml_content = &bytes[content_start..content_end];
let cursor = Cursor::new(xml_content);
2025-02-15 18:27:05 -08:00
2025-02-16 01:06:36 -08:00
// Parse OPML to validate format
2025-02-15 18:27:05 -08:00
let opml: Opml = from_reader(cursor).map_err(|e| {
2025-02-16 01:06:36 -08:00
let preview = String::from_utf8_lossy(&xml_content[..xml_content.len().min(100)]);
error!("Failed to parse OPML: {e}. File starts with: {preview}");
2025-02-15 18:27:05 -08:00
Status::UnprocessableEntity
})?;
2025-02-16 01:06:36 -08:00
// Extract feeds to import
2025-02-15 18:27:05 -08:00
let mut feeds_to_import = Vec::new();
extract_feeds(&opml.body.outlines, String::new(), &mut feeds_to_import);
if feeds_to_import.is_empty() {
return Ok(Json(ImportResponse {
success: false,
message: "No valid feeds found in OPML file".to_string(),
2025-02-16 01:06:36 -08:00
job_id: None,
2025-02-15 18:27:05 -08:00
}));
}
2025-02-16 01:06:36 -08:00
// Create a background job
let job_id = {
let mut store = job_store.write().await;
store.create_job("opml_import".to_string())
};
// Launch background job
let job_store = (*job_store).clone();
let user_id = user.user_id;
let feeds_len = feeds_to_import.len();
spawn(async move {
import_feeds_job(feeds_to_import, db, user_id, job_id, job_store).await;
});
Ok(Json(ImportResponse {
success: true,
message: format!("OPML file validated. Importing {feeds_len} feeds in the background."),
job_id: Some(job_id),
}))
}
async fn import_feeds_job(
feeds: Vec<(String, Option<String>, Option<String>)>,
mut db: Connection<Db>,
user_id: Uuid,
job_id: Uuid,
job_store: SharedJobStore,
) {
let total_feeds = feeds.len();
2025-02-15 18:27:05 -08:00
let mut imported_count = 0;
2025-02-16 01:06:36 -08:00
// Update initial job status
{
let mut store = job_store.write().await;
store.update_job_status(
job_id,
JobStatus::InProgress {
completed: 0,
total: total_feeds,
},
);
}
for (url, title, category) in feeds {
// Update progress
{
let mut store = job_store.write().await;
store.update_job_status(
job_id,
JobStatus::InProgress {
completed: imported_count,
total: total_feeds,
},
);
}
if let Ok(parsed_url) = url::Url::parse(&url) {
2025-02-15 18:27:05 -08:00
match fetch_feed(&parsed_url).await {
Ok(feed_data) => {
let name = feed_data
.title
.map(|t| t.content)
.or(title)
.unwrap_or_else(|| {
parsed_url
.host_str()
.map(|s| s.to_string())
.unwrap_or_else(|| "<Unknown>".to_string())
});
2025-02-16 01:06:36 -08:00
let mut feed = Feed::new(name, parsed_url, user_id);
2025-02-15 18:27:05 -08:00
if let Some(cat) = category {
feed.categorization = vec![cat];
}
2025-02-16 01:06:36 -08:00
if feed.write_to_database(&mut **db).await.is_ok() {
imported_count += 1;
2025-02-15 18:27:05 -08:00
}
}
2025-02-16 01:13:29 -08:00
Err(_) => {
2025-02-16 01:06:36 -08:00
error!("Failed to fetch or parse feed from {url}");
2025-02-15 18:27:05 -08:00
}
}
}
}
2025-02-15 04:59:09 -08:00
2025-02-16 01:06:36 -08:00
// Update final job status
{
let mut store = job_store.write().await;
store.update_job_status(
job_id,
JobStatus::Completed {
success_count: imported_count,
},
);
}
2025-02-15 04:59:09 -08:00
}
2025-02-15 18:27:05 -08:00
fn extract_feeds(
outlines: &[OpmlOutline],
current_category: String,
feeds: &mut Vec<(String, Option<String>, Option<String>)>,
) {
for outline in outlines {
if outline.is_feed() {
if let Some(url) = &outline.xml_url {
feeds.push((
url.clone(),
outline.get_title(),
if current_category.is_empty() {
None
} else {
Some(current_category.clone())
},
));
}
} else {
// This is a category/folder
let new_category = outline
.get_title()
.unwrap_or_else(|| "Uncategorized".to_string());
extract_feeds(&outline.outlines, new_category, feeds);
}
}
}