From 3b7a762125402d327d51efc41eebf744905a2617 Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Tue, 5 Sep 2023 18:26:49 +0100 Subject: [PATCH 1/2] feat: [#261] store original infohashes When you upload a torrent, the infohash might change if the `info` dictionary contains non-standard fields because we remove them. That leads to a different infohash. We store the original infohash in a new table so that we can know if the torrent was previously uploaded. If we do not store the original infohash we could reject uploads producing the same canonical infohash. Still, there is no way for the user to ask if a torrent exists with a given original infohash. They only would be able to interact with the API with the canonical infohash. Sometimes it's useful to use the original infohash, for instance, if you are importing torrents from an external source and you want to check if the original torrent (with the original infohash) was already uploaded. --- .github/workflows/coverage.yaml | 1 + ...7_torrust_multiple_original_infohashes.sql | 46 +++++++ ...7_torrust_multiple_original_infohashes.sql | 48 ++++++++ src/app.rs | 7 +- src/common.rs | 7 +- src/databases/database.rs | 13 ++ src/databases/mysql.rs | 82 ++++++++++--- src/databases/sqlite.rs | 80 +++++++++++-- src/errors.rs | 6 + src/models/torrent_file.rs | 19 +-- src/services/torrent.rs | 113 ++++++++++++++++-- src/tracker/service.rs | 7 +- .../databases/sqlite_v2_0_0.rs | 2 +- src/utils/parse_torrent.rs | 5 +- src/web/api/v1/contexts/torrent/handlers.rs | 4 +- 15 files changed, 390 insertions(+), 50 deletions(-) create mode 100644 migrations/mysql/20230905091837_torrust_multiple_original_infohashes.sql create mode 100644 migrations/sqlite3/20230905091837_torrust_multiple_original_infohashes.sql diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 1a0dfeef..828f2fd6 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -58,6 +58,7 @@ jobs: name: Upload Coverage Report uses: codecov/codecov-action@v3 with: + token: ${{ secrets.CODECOV_TOKEN }} files: ${{ steps.coverage.outputs.report }} verbose: true fail_ci_if_error: true diff --git a/migrations/mysql/20230905091837_torrust_multiple_original_infohashes.sql b/migrations/mysql/20230905091837_torrust_multiple_original_infohashes.sql new file mode 100644 index 00000000..e11a1052 --- /dev/null +++ b/migrations/mysql/20230905091837_torrust_multiple_original_infohashes.sql @@ -0,0 +1,46 @@ +-- Step 1: Create a new table with all infohashes +CREATE TABLE torrust_torrent_info_hashes ( + info_hash CHAR(40) NOT NULL, + canonical_info_hash CHAR(40) NOT NULL, + original_is_known BOOLEAN NOT NULL, + PRIMARY KEY(info_hash), + FOREIGN KEY(canonical_info_hash) REFERENCES torrust_torrents(info_hash) ON DELETE CASCADE +); + +-- Step 2: Create one record for each torrent with only the canonical infohash. +-- The original infohash is NULL so we do not know if it was the same. +-- This happens if the uploaded torrent was uploaded before introducing +-- the feature to store the original infohash +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, FALSE + FROM torrust_torrents + WHERE original_info_hash IS NULL; + +-- Step 3: Create one record for each torrent with the same original and +-- canonical infohashes. +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash IS NOT NULL + AND info_hash = original_info_hash; + +-- Step 4: Create two records for each torrent with a different original and +-- canonical infohashes. One record with the same original and canonical +-- infohashes and one record with the original infohash and the canonical +-- one. +-- Insert the canonical infohash +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash IS NOT NULL + AND info_hash != original_info_hash; +-- Insert the original infohash pointing to the canonical +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT original_info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash IS NOT NULL + AND info_hash != original_info_hash; + +-- Step 5: Delete the `torrust_torrents::original_info_hash` column +ALTER TABLE torrust_torrents DROP COLUMN original_info_hash; + diff --git a/migrations/sqlite3/20230905091837_torrust_multiple_original_infohashes.sql b/migrations/sqlite3/20230905091837_torrust_multiple_original_infohashes.sql new file mode 100644 index 00000000..31585d83 --- /dev/null +++ b/migrations/sqlite3/20230905091837_torrust_multiple_original_infohashes.sql @@ -0,0 +1,48 @@ +-- Step 1: Create a new table with all infohashes +CREATE TABLE IF NOT EXISTS torrust_torrent_info_hashes ( + info_hash TEXT NOT NULL, + canonical_info_hash TEXT NOT NULL, + original_is_known BOOLEAN NOT NULL, + PRIMARY KEY(info_hash), + FOREIGN KEY(canonical_info_hash) REFERENCES torrust_torrents (info_hash) ON DELETE CASCADE +); + +-- Step 2: Create one record for each torrent with only the canonical infohash. +-- The original infohash is NULL so we do not know if it was the same. +-- This happens if the uploaded torrent was uploaded before introducing +-- the feature to store the original infohash +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, FALSE + FROM torrust_torrents + WHERE original_info_hash is NULL; + +-- Step 3: Create one record for each torrent with the same original and +-- canonical infohashes. +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash is NOT NULL + AND info_hash = original_info_hash; + +-- Step 4: Create two records for each torrent with a different original and +-- canonical infohashes. One record with the same original and canonical +-- infohashes and one record with the original infohash and the canonical +-- one. +-- Insert the canonical infohash +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash is NOT NULL + AND info_hash != original_info_hash; +-- Insert the original infohash pointing to the canonical +INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) +SELECT original_info_hash, info_hash, TRUE + FROM torrust_torrents + WHERE original_info_hash is NOT NULL + AND info_hash != original_info_hash; + +-- Step 5: Delete the `torrust_torrents::original_info_hash` column +-- SQLite 2021-03-12 (3.35.0) supports DROP COLUMN +-- https://www.sqlite.org/lang_altertable.html#alter_table_drop_column +ALTER TABLE torrust_torrents DROP COLUMN original_info_hash; + diff --git a/src/app.rs b/src/app.rs index fce0cfe5..614dda02 100644 --- a/src/app.rs +++ b/src/app.rs @@ -12,8 +12,8 @@ use crate::services::authentication::{DbUserAuthenticationRepository, JsonWebTok use crate::services::category::{self, DbCategoryRepository}; use crate::services::tag::{self, DbTagRepository}; use crate::services::torrent::{ - DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoRepository, DbTorrentListingGenerator, - DbTorrentRepository, DbTorrentTagRepository, + DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoHashRepository, DbTorrentInfoRepository, + DbTorrentListingGenerator, DbTorrentRepository, DbTorrentTagRepository, }; use crate::services::user::{self, DbBannedUserList, DbUserProfileRepository, DbUserRepository}; use crate::services::{proxy, settings, torrent}; @@ -68,6 +68,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running let user_authentication_repository = Arc::new(DbUserAuthenticationRepository::new(database.clone())); let user_profile_repository = Arc::new(DbUserProfileRepository::new(database.clone())); let torrent_repository = Arc::new(DbTorrentRepository::new(database.clone())); + let torrent_info_hash_repository = Arc::new(DbTorrentInfoHashRepository::new(database.clone())); let torrent_info_repository = Arc::new(DbTorrentInfoRepository::new(database.clone())); let torrent_file_repository = Arc::new(DbTorrentFileRepository::new(database.clone())); let torrent_announce_url_repository = Arc::new(DbTorrentAnnounceUrlRepository::new(database.clone())); @@ -92,6 +93,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running user_repository.clone(), category_repository.clone(), torrent_repository.clone(), + torrent_info_hash_repository.clone(), torrent_info_repository.clone(), torrent_file_repository.clone(), torrent_announce_url_repository.clone(), @@ -135,6 +137,7 @@ pub async fn run(configuration: Configuration, api_version: &Version) -> Running user_authentication_repository, user_profile_repository, torrent_repository, + torrent_info_hash_repository, torrent_info_repository, torrent_file_repository, torrent_announce_url_repository, diff --git a/src/common.rs b/src/common.rs index 0af991a2..09255678 100644 --- a/src/common.rs +++ b/src/common.rs @@ -7,8 +7,8 @@ use crate::services::authentication::{DbUserAuthenticationRepository, JsonWebTok use crate::services::category::{self, DbCategoryRepository}; use crate::services::tag::{self, DbTagRepository}; use crate::services::torrent::{ - DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoRepository, DbTorrentListingGenerator, - DbTorrentRepository, DbTorrentTagRepository, + DbTorrentAnnounceUrlRepository, DbTorrentFileRepository, DbTorrentInfoHashRepository, DbTorrentInfoRepository, + DbTorrentListingGenerator, DbTorrentRepository, DbTorrentTagRepository, }; use crate::services::user::{self, DbBannedUserList, DbUserProfileRepository, DbUserRepository}; use crate::services::{proxy, settings, torrent}; @@ -34,6 +34,7 @@ pub struct AppData { pub user_authentication_repository: Arc, pub user_profile_repository: Arc, pub torrent_repository: Arc, + pub torrent_info_hash_repository: Arc, pub torrent_info_repository: Arc, pub torrent_file_repository: Arc, pub torrent_announce_url_repository: Arc, @@ -69,6 +70,7 @@ impl AppData { user_authentication_repository: Arc, user_profile_repository: Arc, torrent_repository: Arc, + torrent_info_hash_repository: Arc, torrent_info_repository: Arc, torrent_file_repository: Arc, torrent_announce_url_repository: Arc, @@ -101,6 +103,7 @@ impl AppData { user_authentication_repository, user_profile_repository, torrent_repository, + torrent_info_hash_repository, torrent_info_repository, torrent_file_repository, torrent_announce_url_repository, diff --git a/src/databases/database.rs b/src/databases/database.rs index 72d15c18..6b5e8983 100644 --- a/src/databases/database.rs +++ b/src/databases/database.rs @@ -12,6 +12,7 @@ use crate::models::torrent_file::{DbTorrentInfo, Torrent, TorrentFile}; use crate::models::torrent_tag::{TagId, TorrentTag}; use crate::models::tracker_key::TrackerKey; use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile}; +use crate::services::torrent::OriginalInfoHashes; /// Database tables to be truncated when upgrading from v1.0.0 to v2.0.0. /// They must be in the correct order to avoid foreign key errors. @@ -87,6 +88,7 @@ pub enum Error { TorrentNotFound, TorrentAlreadyExists, // when uploading an already uploaded info_hash TorrentTitleAlreadyExists, + TorrentInfoHashNotFound, } /// Get the Driver of the Database from the Connection String @@ -229,6 +231,17 @@ pub trait Database: Sync + Send { )) } + /// Returns the list of original infohashes ofr a canonical infohash. + /// + /// When you upload a torrent the infohash migth change because the Index + /// remove the non-standard fields in the `info` dictionary. That makes the + /// infohash change. The canonical infohash is the resulting infohash. + /// This function returns the original infohashes of a canonical infohash. + /// The relationship is 1 canonical infohash -> N original infohashes. + async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result; + + async fn insert_torrent_info_hash(&self, original: &InfoHash, canonical: &InfoHash) -> Result<(), Error>; + /// Get torrent's info as `DbTorrentInfo` from `torrent_id`. async fn get_torrent_info_from_id(&self, torrent_id: i64) -> Result; diff --git a/src/databases/mysql.rs b/src/databases/mysql.rs index cb7b3317..8f342f21 100644 --- a/src/databases/mysql.rs +++ b/src/databases/mysql.rs @@ -17,6 +17,7 @@ use crate::models::torrent_file::{DbTorrentAnnounceUrl, DbTorrentFile, DbTorrent use crate::models::torrent_tag::{TagId, TorrentTag}; use crate::models::tracker_key::TrackerKey; use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile}; +use crate::services::torrent::{DbTorrentInfoHash, OriginalInfoHashes}; use crate::utils::clock; use crate::utils::hex::from_bytes; @@ -250,9 +251,8 @@ impl Database for Mysql { .map(|v| i64::try_from(v.last_insert_id()).expect("last ID is larger than i64")) .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("Duplicate entry") { - // Example error message when you try to insert a duplicate category: - // Error: Duplicate entry 'category name SAMPLE_NAME' for key 'torrust_categories.name' + log::error!("DB error: {:?}", err); + if err.message().contains("Duplicate entry") && err.message().contains("name") { database::Error::CategoryAlreadyExists } else { database::Error::Error @@ -425,7 +425,8 @@ impl Database for Mysql { title: &str, description: &str, ) -> Result { - let info_hash = torrent.info_hash(); + let info_hash = torrent.info_hash_hex(); + let canonical_info_hash = torrent.canonical_info_hash(); // open pool connection let mut conn = self.pool.acquire().await.map_err(|_| database::Error::Error)?; @@ -444,7 +445,7 @@ impl Database for Mysql { let private = torrent.info.private.unwrap_or(0); // add torrent - let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, original_info_hash, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())") + let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, UTC_TIMESTAMP())") .bind(uploader_id) .bind(category_id) .bind(info_hash.to_lowercase()) @@ -455,16 +456,14 @@ impl Database for Mysql { .bind(private) .bind(root_hash) .bind(torrent.info.source.clone()) - .bind(original_info_hash.to_hex_string()) - .execute(&self.pool) + .execute(&mut tx) .await .map(|v| i64::try_from(v.last_insert_id()).expect("last ID is larger than i64")) .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("info_hash") { + log::error!("DB error: {:?}", err); + if err.message().contains("Duplicate entry") && err.message().contains("info_hash") { database::Error::TorrentAlreadyExists - } else if err.message().contains("title") { - database::Error::TorrentTitleAlreadyExists } else { database::Error::Error } @@ -472,6 +471,27 @@ impl Database for Mysql { _ => database::Error::Error })?; + // add torrent canonical infohash + + let insert_info_hash_result = + query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)") + .bind(original_info_hash.to_hex_string()) + .bind(canonical_info_hash.to_hex_string()) + .bind(true) + .execute(&mut tx) + .await + .map(|_| ()) + .map_err(|err| { + log::error!("DB error: {:?}", err); + database::Error::Error + }); + + // rollback transaction on error + if let Err(e) = insert_info_hash_result { + drop(tx.rollback().await); + return Err(e); + } + let insert_torrent_files_result = if let Some(length) = torrent.info.length { query("INSERT INTO torrust_torrent_files (md5sum, torrent_id, length) VALUES (?, ?, ?)") .bind(torrent.info.md5sum.clone()) @@ -549,9 +569,8 @@ impl Database for Mysql { .await .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("info_hash") { - database::Error::TorrentAlreadyExists - } else if err.message().contains("title") { + log::error!("DB error: {:?}", err); + if err.message().contains("Duplicate entry") && err.message().contains("title") { database::Error::TorrentTitleAlreadyExists } else { database::Error::Error @@ -573,6 +592,40 @@ impl Database for Mysql { } } + async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result { + let db_info_hashes = query_as::<_, DbTorrentInfoHash>( + "SELECT info_hash, canonical_info_hash, original_is_known FROM torrust_torrent_info_hashes WHERE canonical_info_hash = ?", + ) + .bind(canonical.to_hex_string()) + .fetch_all(&self.pool) + .await + .map_err(|err| database::Error::ErrorWithText(err.to_string()))?; + + let info_hashes: Vec = db_info_hashes + .into_iter() + .map(|db_info_hash| { + InfoHash::from_str(&db_info_hash.info_hash) + .unwrap_or_else(|_| panic!("Invalid info-hash in database: {}", db_info_hash.info_hash)) + }) + .collect(); + + Ok(OriginalInfoHashes { + canonical_info_hash: *canonical, + original_info_hashes: info_hashes, + }) + } + + async fn insert_torrent_info_hash(&self, info_hash: &InfoHash, canonical: &InfoHash) -> Result<(), database::Error> { + query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)") + .bind(info_hash.to_hex_string()) + .bind(canonical.to_hex_string()) + .bind(true) + .execute(&self.pool) + .await + .map(|_| ()) + .map_err(|err| database::Error::ErrorWithText(err.to_string())) + } + async fn get_torrent_info_from_id(&self, torrent_id: i64) -> Result { query_as::<_, DbTorrentInfo>( "SELECT torrent_id, info_hash, name, pieces, piece_length, private, root_hash FROM torrust_torrents WHERE torrent_id = ?", @@ -678,7 +731,8 @@ impl Database for Mysql { .await .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("UNIQUE") { + log::error!("DB error: {:?}", err); + if err.message().contains("Duplicate entry") && err.message().contains("title") { database::Error::TorrentTitleAlreadyExists } else { database::Error::Error diff --git a/src/databases/sqlite.rs b/src/databases/sqlite.rs index 14aa8808..d183cd80 100644 --- a/src/databases/sqlite.rs +++ b/src/databases/sqlite.rs @@ -17,6 +17,7 @@ use crate::models::torrent_file::{DbTorrentAnnounceUrl, DbTorrentFile, DbTorrent use crate::models::torrent_tag::{TagId, TorrentTag}; use crate::models::tracker_key::TrackerKey; use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile}; +use crate::services::torrent::{DbTorrentInfoHash, OriginalInfoHashes}; use crate::utils::clock; use crate::utils::hex::from_bytes; @@ -240,7 +241,8 @@ impl Database for Sqlite { .map(|v| v.last_insert_rowid()) .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("UNIQUE") { + log::error!("DB error: {:?}", err); + if err.message().contains("UNIQUE") && err.message().contains("name") { database::Error::CategoryAlreadyExists } else { database::Error::Error @@ -413,7 +415,8 @@ impl Database for Sqlite { title: &str, description: &str, ) -> Result { - let info_hash = torrent.info_hash(); + let info_hash = torrent.info_hash_hex(); + let canonical_info_hash = torrent.canonical_info_hash(); // open pool connection let mut conn = self.pool.acquire().await.map_err(|_| database::Error::Error)?; @@ -432,7 +435,7 @@ impl Database for Sqlite { let private = torrent.info.private.unwrap_or(0); // add torrent - let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, original_info_hash, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%S',DATETIME('now', 'utc')))") + let torrent_id = query("INSERT INTO torrust_torrents (uploader_id, category_id, info_hash, size, name, pieces, piece_length, private, root_hash, `source`, date_uploaded) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%S',DATETIME('now', 'utc')))") .bind(uploader_id) .bind(category_id) .bind(info_hash.to_lowercase()) @@ -443,16 +446,14 @@ impl Database for Sqlite { .bind(private) .bind(root_hash) .bind(torrent.info.source.clone()) - .bind(original_info_hash.to_hex_string()) - .execute(&self.pool) + .execute(&mut tx) .await .map(|v| v.last_insert_rowid()) .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("info_hash") { + log::error!("DB error: {:?}", err); + if err.message().contains("UNIQUE") && err.message().contains("info_hash") { database::Error::TorrentAlreadyExists - } else if err.message().contains("title") { - database::Error::TorrentTitleAlreadyExists } else { database::Error::Error } @@ -460,6 +461,27 @@ impl Database for Sqlite { _ => database::Error::Error })?; + // add torrent canonical infohash + + let insert_info_hash_result = + query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)") + .bind(original_info_hash.to_hex_string()) + .bind(canonical_info_hash.to_hex_string()) + .bind(true) + .execute(&mut tx) + .await + .map(|_| ()) + .map_err(|err| { + log::error!("DB error: {:?}", err); + database::Error::Error + }); + + // rollback transaction on error + if let Err(e) = insert_info_hash_result { + drop(tx.rollback().await); + return Err(e); + } + let insert_torrent_files_result = if let Some(length) = torrent.info.length { query("INSERT INTO torrust_torrent_files (md5sum, torrent_id, length) VALUES (?, ?, ?)") .bind(torrent.info.md5sum.clone()) @@ -537,9 +559,8 @@ impl Database for Sqlite { .await .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("info_hash") { - database::Error::TorrentAlreadyExists - } else if err.message().contains("title") { + log::error!("DB error: {:?}", err); + if err.message().contains("UNIQUE") && err.message().contains("title") { database::Error::TorrentTitleAlreadyExists } else { database::Error::Error @@ -561,6 +582,40 @@ impl Database for Sqlite { } } + async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result { + let db_info_hashes = query_as::<_, DbTorrentInfoHash>( + "SELECT info_hash, canonical_info_hash, original_is_known FROM torrust_torrent_info_hashes WHERE canonical_info_hash = ?", + ) + .bind(canonical.to_hex_string()) + .fetch_all(&self.pool) + .await + .map_err(|err| database::Error::ErrorWithText(err.to_string()))?; + + let info_hashes: Vec = db_info_hashes + .into_iter() + .map(|db_info_hash| { + InfoHash::from_str(&db_info_hash.info_hash) + .unwrap_or_else(|_| panic!("Invalid info-hash in database: {}", db_info_hash.info_hash)) + }) + .collect(); + + Ok(OriginalInfoHashes { + canonical_info_hash: *canonical, + original_info_hashes: info_hashes, + }) + } + + async fn insert_torrent_info_hash(&self, original: &InfoHash, canonical: &InfoHash) -> Result<(), database::Error> { + query("INSERT INTO torrust_torrent_info_hashes (info_hash, canonical_info_hash, original_is_known) VALUES (?, ?, ?)") + .bind(original.to_hex_string()) + .bind(canonical.to_hex_string()) + .bind(true) + .execute(&self.pool) + .await + .map(|_| ()) + .map_err(|err| database::Error::ErrorWithText(err.to_string())) + } + async fn get_torrent_info_from_id(&self, torrent_id: i64) -> Result { query_as::<_, DbTorrentInfo>( "SELECT torrent_id, info_hash, name, pieces, piece_length, private, root_hash FROM torrust_torrents WHERE torrent_id = ?", @@ -666,7 +721,8 @@ impl Database for Sqlite { .await .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("UNIQUE") { + log::error!("DB error: {:?}", err); + if err.message().contains("UNIQUE") && err.message().contains("title") { database::Error::TorrentTitleAlreadyExists } else { database::Error::Error diff --git a/src/errors.rs b/src/errors.rs index c3cd08ea..6706cc57 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -112,6 +112,9 @@ pub enum ServiceError { #[display(fmt = "This torrent already exists in our database.")] InfoHashAlreadyExists, + #[display(fmt = "A torrent with the same canonical infohash already exists in our database.")] + CanonicalInfoHashAlreadyExists, + #[display(fmt = "This torrent title has already been used.")] TorrentTitleAlreadyExists, @@ -147,6 +150,7 @@ impl From for ServiceError { if let Some(err) = e.as_database_error() { return if err.code() == Some(Cow::from("2067")) { if err.message().contains("torrust_torrents.info_hash") { + println!("info_hash already exists {}", err.message()); ServiceError::InfoHashAlreadyExists } else { ServiceError::InternalServerError @@ -228,6 +232,7 @@ pub fn http_status_code_for_service_error(error: &ServiceError) -> StatusCode { ServiceError::InvalidTag => StatusCode::BAD_REQUEST, ServiceError::Unauthorized => StatusCode::FORBIDDEN, ServiceError::InfoHashAlreadyExists => StatusCode::BAD_REQUEST, + ServiceError::CanonicalInfoHashAlreadyExists => StatusCode::BAD_REQUEST, ServiceError::TorrentTitleAlreadyExists => StatusCode::BAD_REQUEST, ServiceError::TrackerOffline => StatusCode::INTERNAL_SERVER_ERROR, ServiceError::CategoryAlreadyExists => StatusCode::BAD_REQUEST, @@ -259,5 +264,6 @@ pub fn map_database_error_to_service_error(error: &database::Error) -> ServiceEr database::Error::TorrentAlreadyExists => ServiceError::InfoHashAlreadyExists, database::Error::TorrentTitleAlreadyExists => ServiceError::TorrentTitleAlreadyExists, database::Error::UnrecognizedDatabaseDriver => ServiceError::InternalServerError, + database::Error::TorrentInfoHashNotFound => ServiceError::TorrentNotFound, } } diff --git a/src/models/torrent_file.rs b/src/models/torrent_file.rs index 125a457e..97294252 100644 --- a/src/models/torrent_file.rs +++ b/src/models/torrent_file.rs @@ -3,6 +3,7 @@ use serde_bencode::ser; use serde_bytes::ByteBuf; use sha1::{Digest, Sha1}; +use super::info_hash::InfoHash; use crate::config::Configuration; use crate::services::torrent_file::NewTorrentInfoRequest; use crate::utils::hex::{from_bytes, into_bytes}; @@ -228,11 +229,15 @@ impl Torrent { } #[must_use] - pub fn info_hash(&self) -> String { - // todo: return an InfoHash struct + pub fn info_hash_hex(&self) -> String { from_bytes(&self.calculate_info_hash_as_bytes()).to_lowercase() } + #[must_use] + pub fn canonical_info_hash(&self) -> InfoHash { + self.calculate_info_hash_as_bytes().into() + } + #[must_use] pub fn file_size(&self) -> i64 { match self.info.length { @@ -372,7 +377,7 @@ mod tests { httpseeds: None, }; - assert_eq!(torrent.info_hash(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca"); + assert_eq!(torrent.info_hash_hex(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca"); } mod infohash_should_be_calculated_for { @@ -413,7 +418,7 @@ mod tests { httpseeds: None, }; - assert_eq!(torrent.info_hash(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca"); + assert_eq!(torrent.info_hash_hex(), "79fa9e4a2927804fe4feab488a76c8c2d3d1cdca"); } #[test] @@ -452,7 +457,7 @@ mod tests { httpseeds: None, }; - assert_eq!(torrent.info_hash(), "aa2aca91ab650c4d249c475ca3fa604f2ccb0d2a"); + assert_eq!(torrent.info_hash_hex(), "aa2aca91ab650c4d249c475ca3fa604f2ccb0d2a"); } #[test] @@ -487,7 +492,7 @@ mod tests { httpseeds: None, }; - assert_eq!(torrent.info_hash(), "ccc1cf4feb59f3fa85c96c9be1ebbafcfe8a9cc8"); + assert_eq!(torrent.info_hash_hex(), "ccc1cf4feb59f3fa85c96c9be1ebbafcfe8a9cc8"); } #[test] @@ -522,7 +527,7 @@ mod tests { httpseeds: None, }; - assert_eq!(torrent.info_hash(), "d3a558d0a19aaa23ba6f9f430f40924d10fefa86"); + assert_eq!(torrent.info_hash_hex(), "d3a558d0a19aaa23ba6f9f430f40924d10fefa86"); } } } diff --git a/src/services/torrent.rs b/src/services/torrent.rs index aa6b8b0b..19cf082b 100644 --- a/src/services/torrent.rs +++ b/src/services/torrent.rs @@ -1,7 +1,8 @@ //! Torrent service. use std::sync::Arc; -use serde_derive::Deserialize; +use log::debug; +use serde_derive::{Deserialize, Serialize}; use super::category::DbCategoryRepository; use super::user::DbUserRepository; @@ -28,6 +29,7 @@ pub struct Index { user_repository: Arc, category_repository: Arc, torrent_repository: Arc, + torrent_info_hash_repository: Arc, torrent_info_repository: Arc, torrent_file_repository: Arc, torrent_announce_url_repository: Arc, @@ -83,6 +85,7 @@ impl Index { user_repository: Arc, category_repository: Arc, torrent_repository: Arc, + torrent_info_hash_repository: Arc, torrent_info_repository: Arc, torrent_file_repository: Arc, torrent_announce_url_repository: Arc, @@ -96,6 +99,7 @@ impl Index { user_repository, category_repository, torrent_repository, + torrent_info_hash_repository, torrent_info_repository, torrent_file_repository, torrent_announce_url_repository, @@ -162,25 +166,51 @@ impl Index { .await .map_err(|_| ServiceError::InvalidCategory)?; + let canonical_info_hash = torrent.canonical_info_hash(); + + let original_info_hashes = self + .torrent_info_hash_repository + .get_torrent_original_info_hashes(&canonical_info_hash) + .await?; + + if !original_info_hashes.is_empty() { + // Torrent with the same canonical infohash was already uploaded + debug!("Canonical infohash found: {:?}", canonical_info_hash.to_hex_string()); + + if let Some(original_info_hash) = original_info_hashes.find(&original_info_hash) { + // The exact original infohash was already uploaded + debug!("Original infohash found: {:?}", original_info_hash.to_hex_string()); + + return Err(ServiceError::InfoHashAlreadyExists); + } + + // A new original infohash is being uploaded with a canonical infohash that already exists. + debug!("Original infohash not found: {:?}", original_info_hash.to_hex_string()); + + // Add the new associated original infohash to the canonical one. + self.torrent_info_hash_repository + .add(&original_info_hash, &canonical_info_hash) + .await?; + return Err(ServiceError::CanonicalInfoHashAlreadyExists); + } + + // First time a torrent with this original infohash is uploaded. + let torrent_id = self .torrent_repository .add(&original_info_hash, &torrent, &metadata, user_id, category) .await?; - - let info_hash: InfoHash = torrent - .info_hash() - .parse() - .expect("the parsed torrent should have a valid info hash"); + let info_hash = torrent.canonical_info_hash(); drop( self.tracker_statistics_importer - .import_torrent_statistics(torrent_id, &torrent.info_hash()) + .import_torrent_statistics(torrent_id, &torrent.info_hash_hex()) .await, ); // We always whitelist the torrent on the tracker because even if the tracker mode is `public` // it could be changed to `private` later on. - if let Err(e) = self.tracker_service.whitelist_info_hash(torrent.info_hash()).await { + if let Err(e) = self.tracker_service.whitelist_info_hash(torrent.info_hash_hex()).await { // If the torrent can't be whitelisted somehow, remove the torrent from database drop(self.torrent_repository.delete(&torrent_id).await); return Err(e); @@ -518,6 +548,73 @@ impl DbTorrentRepository { } } +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, sqlx::FromRow)] +pub struct DbTorrentInfoHash { + pub info_hash: String, + pub canonical_info_hash: String, + pub original_is_known: bool, +} + +pub struct DbTorrentInfoHashRepository { + database: Arc>, +} + +pub struct OriginalInfoHashes { + pub canonical_info_hash: InfoHash, + pub original_info_hashes: Vec, +} + +impl OriginalInfoHashes { + #[must_use] + pub fn is_empty(&self) -> bool { + self.original_info_hashes.is_empty() + } + + #[must_use] + pub fn find(&self, original_info_hash: &InfoHash) -> Option<&InfoHash> { + self.original_info_hashes.iter().find(|&hash| *hash == *original_info_hash) + } +} + +impl DbTorrentInfoHashRepository { + #[must_use] + pub fn new(database: Arc>) -> Self { + Self { database } + } + + /// It returns all the original infohashes associated to the canonical one. + /// + /// # Errors + /// + /// This function will return an error there is a database error. + pub async fn get_torrent_original_info_hashes(&self, info_hash: &InfoHash) -> Result { + self.database.get_torrent_original_info_hashes(info_hash).await + } + + /// Inserts a new infohash for the torrent. Torrents can be associated to + /// different infohashes because the Index might change the original infohash. + /// The index track the final infohash used (canonical) and all the original + /// ones. + /// + /// # Errors + /// + /// This function will return an error there is a database error. + pub async fn add(&self, original_info_hash: &InfoHash, canonical_info_hash: &InfoHash) -> Result<(), Error> { + self.database + .insert_torrent_info_hash(original_info_hash, canonical_info_hash) + .await + } + + /// Deletes the entire torrent in the database. + /// + /// # Errors + /// + /// This function will return an error there is a database error. + pub async fn delete(&self, torrent_id: &TorrentId) -> Result<(), Error> { + self.database.delete_torrent(*torrent_id).await + } +} + pub struct DbTorrentInfoRepository { database: Arc>, } diff --git a/src/tracker/service.rs b/src/tracker/service.rs index c49c7ac1..e39cf0a6 100644 --- a/src/tracker/service.rs +++ b/src/tracker/service.rs @@ -147,12 +147,17 @@ impl Service { let body = response.text().await; if let Ok(body) = body { + if body == *"torrent not known" { + // todo: temporary fix. the service should return a 404 (StatusCode::NOT_FOUND). + return Err(ServiceError::TorrentNotFound); + } + let torrent_info = serde_json::from_str(&body); if let Ok(torrent_info) = torrent_info { Ok(torrent_info) } else { - error!("Failed to parse torrent info from tracker response"); + error!("Failed to parse torrent info from tracker response. Body: {}", body); Err(ServiceError::InternalServerError) } } else { diff --git a/src/upgrades/from_v1_0_0_to_v2_0_0/databases/sqlite_v2_0_0.rs b/src/upgrades/from_v1_0_0_to_v2_0_0/databases/sqlite_v2_0_0.rs index 8fbf3aa2..eb298687 100644 --- a/src/upgrades/from_v1_0_0_to_v2_0_0/databases/sqlite_v2_0_0.rs +++ b/src/upgrades/from_v1_0_0_to_v2_0_0/databases/sqlite_v2_0_0.rs @@ -118,7 +118,7 @@ impl SqliteDatabaseV2_0_0 { .map(|v| v.last_insert_rowid()) .map_err(|e| match e { sqlx::Error::Database(err) => { - if err.message().contains("UNIQUE") { + if err.message().contains("UNIQUE") && err.message().contains("name") { database::Error::CategoryAlreadyExists } else { database::Error::Error diff --git a/src/utils/parse_torrent.rs b/src/utils/parse_torrent.rs index 0a0999ac..21a219d5 100644 --- a/src/utils/parse_torrent.rs +++ b/src/utils/parse_torrent.rs @@ -98,6 +98,9 @@ mod tests { // The infohash is not the original infohash of the torrent file, // but the infohash of the info dictionary without the custom keys. - assert_eq!(torrent.info_hash(), "8aa01a4c816332045ffec83247ccbc654547fedf".to_string()); + assert_eq!( + torrent.info_hash_hex(), + "8aa01a4c816332045ffec83247ccbc654547fedf".to_string() + ); } } diff --git a/src/web/api/v1/contexts/torrent/handlers.rs b/src/web/api/v1/contexts/torrent/handlers.rs index 2165256c..6f9c158a 100644 --- a/src/web/api/v1/contexts/torrent/handlers.rs +++ b/src/web/api/v1/contexts/torrent/handlers.rs @@ -92,7 +92,7 @@ pub async fn download_torrent_handler( return ServiceError::InternalServerError.into_response(); }; - torrent_file_response(bytes, &format!("{}.torrent", torrent.info.name), &torrent.info_hash()) + torrent_file_response(bytes, &format!("{}.torrent", torrent.info.name), &torrent.info_hash_hex()) } /// It returns a list of torrents matching the search criteria. @@ -242,7 +242,7 @@ pub async fn create_random_torrent_handler(State(_app_data): State> return ServiceError::InternalServerError.into_response(); }; - torrent_file_response(bytes, &format!("{}.torrent", torrent.info.name), &torrent.info_hash()) + torrent_file_response(bytes, &format!("{}.torrent", torrent.info.name), &torrent.info_hash_hex()) } /// Extracts the [`TorrentRequest`] from the multipart form payload. From 110e1596d0af58bd216735ba939b63930d21562a Mon Sep 17 00:00:00 2001 From: Jose Celano Date: Wed, 6 Sep 2023 18:29:55 +0100 Subject: [PATCH 2/2] test: [#261]: do not allow uploading two torrents with the same canonical infohash If you upload a torrent, the infohash migth change if the `info` dictionary contains custom fields. The Index removes non-standard custom fields, and that generates a new infohash for the torrent. If you upload a second torrent which is different from a previous one only in the custom fields, the same canonical infohash will be generated, so the torrent will be rejected as duplicated. The new original infohash will be stored in the database. --- .github/workflows/coverage.yaml | 41 ++++++- .github/workflows/testing.yaml | 8 +- src/databases/database.rs | 7 +- src/databases/mysql.rs | 2 +- src/databases/sqlite.rs | 2 +- src/lib.rs | 2 +- src/services/torrent.rs | 8 +- src/web/api/mod.rs | 2 +- src/web/api/v1/contexts/torrent/mod.rs | 26 +++++ src/web/api/v1/mod.rs | 2 +- tests/common/contexts/torrent/file.rs | 10 +- tests/common/contexts/torrent/fixtures.rs | 105 ++++++++++++++++++ .../web/api/v1/contexts/torrent/contract.rs | 35 +++++- 13 files changed, 226 insertions(+), 24 deletions(-) diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 828f2fd6..2bc0b3e4 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -2,14 +2,37 @@ name: Coverage on: push: - pull_request: + branches: + - develop + pull_request_target: + branches: + - develop env: CARGO_TERM_COLOR: always jobs: + secrets: + name: Secrets + environment: coverage + runs-on: ubuntu-latest + + outputs: + continue: ${{ steps.check.outputs.continue }} + + steps: + - id: check + name: Check + env: + CODECOV_TOKEN: "${{ secrets.CODECOV_TOKEN }}" + if: "${{ env.CODECOV_TOKEN != '' }}" + run: echo "continue=true" >> $GITHUB_OUTPUT + report: name: Report + environment: coverage + needs: secrets + if: needs.secrets.outputs.continue == 'true' runs-on: ubuntu-latest env: CARGO_INCREMENTAL: "0" @@ -17,9 +40,17 @@ jobs: RUSTDOCFLAGS: "-Z profile -C codegen-units=1 -C inline-threshold=0 -C link-dead-code -C overflow-checks=off -C panic=abort -Z panic_abort_tests" steps: - - id: checkout - name: Checkout Repository - uses: actions/checkout@v3 + - id: checkout_push + if: github.event_name == 'push' + name: Checkout Repository (Push) + uses: actions/checkout@v4 + + - id: checkout_pull_request_target + if: github.event_name == 'pull_request_target' + name: Checkout Repository (Pull Request Target) + uses: actions/checkout@v4 + with: + ref: "refs/pull/${{ github.event.pull_request.number }}/head" - id: setup name: Setup Toolchain @@ -61,4 +92,4 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} files: ${{ steps.coverage.outputs.report }} verbose: true - fail_ci_if_error: true + fail_ci_if_error: true \ No newline at end of file diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index beaf0754..6c1fc1e3 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -65,10 +65,14 @@ jobs: name: Run Lint Checks run: cargo clippy --tests --benches --examples --workspace --all-targets --all-features -- -D clippy::correctness -D clippy::suspicious -D clippy::complexity -D clippy::perf -D clippy::style -D clippy::pedantic - - id: doc - name: Run Documentation Checks + - id: testdoc + name: Run Documentation Tests run: cargo test --doc + - id: builddoc + name: Build Documentation + run: cargo doc --no-deps --bins --examples --workspace --all-features + unit: name: Units runs-on: ubuntu-latest diff --git a/src/databases/database.rs b/src/databases/database.rs index 6b5e8983..84b506a5 100644 --- a/src/databases/database.rs +++ b/src/databases/database.rs @@ -231,14 +231,17 @@ pub trait Database: Sync + Send { )) } - /// Returns the list of original infohashes ofr a canonical infohash. + /// Returns the list of all infohashes producing the same canonical infohash. /// /// When you upload a torrent the infohash migth change because the Index /// remove the non-standard fields in the `info` dictionary. That makes the /// infohash change. The canonical infohash is the resulting infohash. /// This function returns the original infohashes of a canonical infohash. + /// + /// If the original infohash was unknown, it returns the canonical infohash. + /// /// The relationship is 1 canonical infohash -> N original infohashes. - async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result; + async fn get_torrent_canonical_info_hash_group(&self, canonical: &InfoHash) -> Result; async fn insert_torrent_info_hash(&self, original: &InfoHash, canonical: &InfoHash) -> Result<(), Error>; diff --git a/src/databases/mysql.rs b/src/databases/mysql.rs index 8f342f21..38edcdde 100644 --- a/src/databases/mysql.rs +++ b/src/databases/mysql.rs @@ -592,7 +592,7 @@ impl Database for Mysql { } } - async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result { + async fn get_torrent_canonical_info_hash_group(&self, canonical: &InfoHash) -> Result { let db_info_hashes = query_as::<_, DbTorrentInfoHash>( "SELECT info_hash, canonical_info_hash, original_is_known FROM torrust_torrent_info_hashes WHERE canonical_info_hash = ?", ) diff --git a/src/databases/sqlite.rs b/src/databases/sqlite.rs index d183cd80..6cae2d4a 100644 --- a/src/databases/sqlite.rs +++ b/src/databases/sqlite.rs @@ -582,7 +582,7 @@ impl Database for Sqlite { } } - async fn get_torrent_original_info_hashes(&self, canonical: &InfoHash) -> Result { + async fn get_torrent_canonical_info_hash_group(&self, canonical: &InfoHash) -> Result { let db_info_hashes = query_as::<_, DbTorrentInfoHash>( "SELECT info_hash, canonical_info_hash, original_is_known FROM torrust_torrent_info_hashes WHERE canonical_info_hash = ?", ) diff --git a/src/lib.rs b/src/lib.rs index faffb360..8712093f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -202,7 +202,7 @@ //! torrent_info_update_interval = 3600 //! ``` //! -//! For more information about configuration you can visit the documentation for the [`config`](crate::config) module. +//! For more information about configuration you can visit the documentation for the [`config`]) module. //! //! Alternatively to the `config.toml` file you can use one environment variable `TORRUST_IDX_BACK_CONFIG` to pass the configuration to the tracker: //! diff --git a/src/services/torrent.rs b/src/services/torrent.rs index 19cf082b..635e6016 100644 --- a/src/services/torrent.rs +++ b/src/services/torrent.rs @@ -170,7 +170,7 @@ impl Index { let original_info_hashes = self .torrent_info_hash_repository - .get_torrent_original_info_hashes(&canonical_info_hash) + .get_canonical_info_hash_group(&canonical_info_hash) .await?; if !original_info_hashes.is_empty() { @@ -582,13 +582,13 @@ impl DbTorrentInfoHashRepository { Self { database } } - /// It returns all the original infohashes associated to the canonical one. + /// It returns all the infohashes associated to the canonical one. /// /// # Errors /// /// This function will return an error there is a database error. - pub async fn get_torrent_original_info_hashes(&self, info_hash: &InfoHash) -> Result { - self.database.get_torrent_original_info_hashes(info_hash).await + pub async fn get_canonical_info_hash_group(&self, info_hash: &InfoHash) -> Result { + self.database.get_torrent_canonical_info_hash_group(info_hash).await } /// Inserts a new infohash for the torrent. Torrents can be associated to diff --git a/src/web/api/mod.rs b/src/web/api/mod.rs index 46ffe2b7..a7b5bf86 100644 --- a/src/web/api/mod.rs +++ b/src/web/api/mod.rs @@ -2,7 +2,7 @@ //! //! Currently, the API has only one version: `v1`. //! -//! Refer to the [`v1`](crate::web::api::v1) module for more information. +//! Refer to the [`v1`]) module for more information. pub mod server; pub mod v1; diff --git a/src/web/api/v1/contexts/torrent/mod.rs b/src/web/api/v1/contexts/torrent/mod.rs index 82536cb8..a4a95de1 100644 --- a/src/web/api/v1/contexts/torrent/mod.rs +++ b/src/web/api/v1/contexts/torrent/mod.rs @@ -2,6 +2,32 @@ //! //! This API context is responsible for handling all torrent related requests. //! +//! # Original and canonical infohashes +//! +//! Uploaded torrents can contain non-standard fields in the `info` dictionary. +//! +//! For example, this is a torrent file in JSON format with a "custom" field. +//! +//! ```json +//! { +//! "info": { +//! "length": 602515, +//! "name": "mandelbrot_set_01", +//! "piece length": 32768, +//! "pieces": "8A 88 32 BE ED 05 5F AA C4 AF 4A 90 4B 9A BF 0D EC 83 42 1C 73 39 05 B8 D6 20 2C 1B D1 8A 53 28 1F B5 D4 23 0A 23 C8 DB AC C4 E6 6B 16 12 08 C7 A4 AD 64 45 70 ED 91 0D F1 38 E7 DF 0C 1A D0 C9 23 27 7C D1 F9 D4 E5 A1 5F F5 E5 A0 E4 9E FB B1 43 F5 4B AD 0E D4 9D CB 49 F7 E6 7B BA 30 5F AF F9 88 56 FB 45 9A B4 95 92 3E 2C 7F DA A6 D3 82 E7 63 A3 BB 4B 28 F3 57 C7 CB 7D 8C 06 E3 46 AB D7 E8 8E 8A 8C 9F C7 E6 C5 C5 64 82 ED 47 BB 2A F1 B7 3F A5 3C 5B 9C AF 43 EC 2A E1 08 68 9A 49 C8 BF 1B 07 AD BE E9 2D 7E BE 9C 18 7F 4C A1 97 0E 54 3A 18 94 0E 60 8D 5C 69 0E 41 46 0D 3C 9A 37 F6 81 62 4F 95 C0 73 92 CA 9A D5 A9 89 AC 8B 85 12 53 0B FB E2 96 26 3E 26 A6 5B 70 53 48 65 F3 6C 27 0F 6B BD 1C EE EB 1A 9D 5F 77 A8 D8 AF D8 14 82 4A E0 B4 62 BC F1 A5 F5 F2 C7 60 F8 38 C8 5B 0B A9 07 DD 86 FA C0 7B F0 26 D7 D1 9A 42 C3 1F 9F B9 59 83 10 62 41 E9 06 3C 6D A1 19 75 01 57 25 9E B7 FE DF 91 04 D4 51 4B 6D 44 02 8D 31 8E 84 26 95 0F 30 31 F0 2C 16 39 BD 53 1D CF D3 5E 3E 41 A9 1E 14 3F 73 24 AC 5E 9E FC 4D C5 70 45 0F 45 8B 9B 52 E6 D0 26 47 8F 43 08 9E 2A 7C C5 92 D5 86 36 FE 48 E9 B8 86 84 92 23 49 5B EE C4 31 B2 1D 10 75 8E 4C 07 84 8F", +//! "custom": "custom03" +//! } +//! } +//! ``` +//! +//! When you upload a torrent file with non-standards fields in the `info` +//! dictionary, the Index removes those non-standard fields. That generates a +//! new info-hash because all fields in the `info` key are used to calculate it. +//! +//! The Index stores the original info-hash. The resulting info-hash after +//! removing the non-standard fields is called "canonical" infohash. The Index +//! stores the relationship between the original info-hash and the canonical one. +//! //! # Endpoints //! //! - [Upload new torrent](#upload-new-torrent) diff --git a/src/web/api/v1/mod.rs b/src/web/api/v1/mod.rs index e9b3e9d6..d7ae5bab 100644 --- a/src/web/api/v1/mod.rs +++ b/src/web/api/v1/mod.rs @@ -2,7 +2,7 @@ //! //! The API is organized in contexts. //! -//! Refer to the [`contexts`](crate::web::api::v1::contexts) module for more +//! Refer to the [`contexts`] module for more //! information. pub mod auth; pub mod contexts; diff --git a/tests/common/contexts/torrent/file.rs b/tests/common/contexts/torrent/file.rs index ce3fbf95..b5f58339 100644 --- a/tests/common/contexts/torrent/file.rs +++ b/tests/common/contexts/torrent/file.rs @@ -8,21 +8,21 @@ use serde::Deserialize; use which::which; /// Attributes parsed from a torrent file. -#[derive(Deserialize, Clone)] +#[derive(Deserialize, Clone, Debug)] pub struct TorrentFileInfo { pub name: String, pub comment: Option, - pub creation_date: u64, - pub created_by: String, + pub creation_date: Option, + pub created_by: Option, pub source: Option, pub info_hash: String, pub torrent_size: u64, pub content_size: u64, pub private: bool, pub tracker: Option, - pub announce_list: Vec>, + pub announce_list: Option>>, pub update_url: Option, - pub dht_nodes: Vec, + pub dht_nodes: Option>, pub piece_size: u64, pub piece_count: u64, pub file_count: u64, diff --git a/tests/common/contexts/torrent/fixtures.rs b/tests/common/contexts/torrent/fixtures.rs index a464651f..5e89ce6e 100644 --- a/tests/common/contexts/torrent/fixtures.rs +++ b/tests/common/contexts/torrent/fixtures.rs @@ -2,7 +2,11 @@ use std::fs::File; use std::io::Write; use std::path::{Path, PathBuf}; +use serde::{Deserialize, Serialize}; +use serde_bytes::ByteBuf; use tempfile::{tempdir, TempDir}; +use torrust_index_backend::services::hasher::sha1; +use torrust_index_backend::utils::hex::into_bytes; use uuid::Uuid; use super::file::{create_torrent, parse_torrent, TorrentFileInfo}; @@ -94,6 +98,45 @@ impl TestTorrent { } } + pub fn with_custom_info_dict_field(id: Uuid, file_contents: &str, custom: &str) -> Self { + let temp_dir = temp_dir(); + + let torrents_dir_path = temp_dir.path().to_owned(); + + // Create the torrent in memory + let torrent = TestTorrentWithCustomInfoField::with_contents(id, file_contents, custom); + + // Bencode the torrent + let torrent_data = TestTorrentWithCustomInfoField::encode(&torrent).unwrap(); + + // Torrent temporary file path + let filename = format!("file-{id}.txt.torrent"); + let torrent_path = torrents_dir_path.join(filename.clone()); + + // Write the torrent file to the temporary file + let mut file = File::create(torrent_path.clone()).unwrap(); + file.write_all(&torrent_data).unwrap(); + + // Load torrent binary file + let torrent_file = BinaryFile::from_file_at_path(&torrent_path); + + // Load torrent file metadata + let torrent_info = parse_torrent(&torrent_path); + + let torrent_to_index = TorrentIndexInfo { + title: format!("title-{id}"), + description: format!("description-{id}"), + category: software_predefined_category_name(), + torrent_file, + name: filename, + }; + + TestTorrent { + file_info: torrent_info, + index_info: torrent_to_index, + } + } + pub fn info_hash(&self) -> InfoHash { self.file_info.info_hash.clone() } @@ -128,3 +171,65 @@ pub fn random_txt_file(dir: &Path, id: &Uuid) -> String { pub fn temp_dir() -> TempDir { tempdir().unwrap() } + +/// A minimal torrent file with a custom field in the info dict. +/// +/// ```json +/// { +/// "info": { +/// "length": 602515, +/// "name": "mandelbrot_set_01", +/// "piece length": 32768, +/// "pieces": "8A 88 32 BE ED 05 5F AA C4 AF 4A 90 4B 9A BF 0D EC 83 42 1C 73 39 05 B8 D6 20 2C 1B D1 8A 53 28 1F B5 D4 23 0A 23 C8 DB AC C4 E6 6B 16 12 08 C7 A4 AD 64 45 70 ED 91 0D F1 38 E7 DF 0C 1A D0 C9 23 27 7C D1 F9 D4 E5 A1 5F F5 E5 A0 E4 9E FB B1 43 F5 4B AD 0E D4 9D CB 49 F7 E6 7B BA 30 5F AF F9 88 56 FB 45 9A B4 95 92 3E 2C 7F DA A6 D3 82 E7 63 A3 BB 4B 28 F3 57 C7 CB 7D 8C 06 E3 46 AB D7 E8 8E 8A 8C 9F C7 E6 C5 C5 64 82 ED 47 BB 2A F1 B7 3F A5 3C 5B 9C AF 43 EC 2A E1 08 68 9A 49 C8 BF 1B 07 AD BE E9 2D 7E BE 9C 18 7F 4C A1 97 0E 54 3A 18 94 0E 60 8D 5C 69 0E 41 46 0D 3C 9A 37 F6 81 62 4F 95 C0 73 92 CA 9A D5 A9 89 AC 8B 85 12 53 0B FB E2 96 26 3E 26 A6 5B 70 53 48 65 F3 6C 27 0F 6B BD 1C EE EB 1A 9D 5F 77 A8 D8 AF D8 14 82 4A E0 B4 62 BC F1 A5 F5 F2 C7 60 F8 38 C8 5B 0B A9 07 DD 86 FA C0 7B F0 26 D7 D1 9A 42 C3 1F 9F B9 59 83 10 62 41 E9 06 3C 6D A1 19 75 01 57 25 9E B7 FE DF 91 04 D4 51 4B 6D 44 02 8D 31 8E 84 26 95 0F 30 31 F0 2C 16 39 BD 53 1D CF D3 5E 3E 41 A9 1E 14 3F 73 24 AC 5E 9E FC 4D C5 70 45 0F 45 8B 9B 52 E6 D0 26 47 8F 43 08 9E 2A 7C C5 92 D5 86 36 FE 48 E9 B8 86 84 92 23 49 5B EE C4 31 B2 1D 10 75 8E 4C 07 84 8F", +/// "custom": "custom03" +/// } +/// } +/// ``` +/// +/// Changing the value of the `custom` field will change the info-hash of the torrent. +#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)] +pub struct TestTorrentWithCustomInfoField { + pub info: InfoDictWithCustomField, +} + +/// A minimal torrent info dict with a custom field. +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize)] +pub struct InfoDictWithCustomField { + #[serde(default)] + pub length: i64, + #[serde(default)] + pub name: String, + #[serde(rename = "piece length")] + pub piece_length: i64, + #[serde(default)] + pub pieces: ByteBuf, + #[serde(default)] + pub custom: String, +} + +impl TestTorrentWithCustomInfoField { + pub fn with_contents(id: Uuid, file_contents: &str, custom: &str) -> Self { + let sha1_of_file_contents = sha1(file_contents); + let pieces = into_bytes(&sha1_of_file_contents).expect("sha1 of test torrent contents cannot be converted to bytes"); + + Self { + info: InfoDictWithCustomField { + length: i64::try_from(file_contents.len()).expect("file contents size in bytes cannot exceed i64::MAX"), + name: format!("file-{id}.txt"), + piece_length: 16384, + pieces: ByteBuf::from(pieces), + custom: custom.to_owned(), + }, + } + } + + pub fn encode(torrent: &Self) -> Result, serde_bencode::Error> { + match serde_bencode::to_bytes(torrent) { + Ok(bencode_bytes) => Ok(bencode_bytes), + Err(e) => { + eprintln!("{e:?}"); + Err(e) + } + } + } +} diff --git a/tests/e2e/web/api/v1/contexts/torrent/contract.rs b/tests/e2e/web/api/v1/contexts/torrent/contract.rs index 9ddd5c33..32236100 100644 --- a/tests/e2e/web/api/v1/contexts/torrent/contract.rs +++ b/tests/e2e/web/api/v1/contexts/torrent/contract.rs @@ -293,10 +293,11 @@ mod for_authenticated_users { use torrust_index_backend::utils::parse_torrent::decode_torrent; use torrust_index_backend::web::api; + use uuid::Uuid; use crate::common::asserts::assert_json_error_response; use crate::common::client::Client; - use crate::common::contexts::torrent::fixtures::random_torrent; + use crate::common::contexts::torrent::fixtures::{random_torrent, TestTorrent}; use crate::common::contexts::torrent::forms::UploadTorrentMultipartForm; use crate::common::contexts::torrent::responses::UploadedTorrentResponse; use crate::e2e::environment::TestEnv; @@ -410,6 +411,38 @@ mod for_authenticated_users { assert_eq!(response.status, 400); } + #[tokio::test] + async fn it_should_not_allow_uploading_a_torrent_whose_canonical_info_hash_already_exists() { + let mut env = TestEnv::new(); + env.start(api::Version::V1).await; + + if !env.provides_a_tracker() { + println!("test skipped. It requires a tracker to be running."); + return; + } + + let uploader = new_logged_in_user(&env).await; + let client = Client::authenticated(&env.server_socket_addr().unwrap(), &uploader.token); + + let id1 = Uuid::new_v4(); + + // Upload the first torrent + let first_torrent = TestTorrent::with_custom_info_dict_field(id1, "data", "custom 01"); + let first_torrent_title = first_torrent.index_info.title.clone(); + let form: UploadTorrentMultipartForm = first_torrent.index_info.into(); + let _response = client.upload_torrent(form.into()).await; + + // Upload the second torrent with the same canonical info-hash as the first one. + // We need to change the title otherwise the torrent will be rejected + // because of the duplicate title. + let mut torrent_with_the_same_canonical_info_hash = TestTorrent::with_custom_info_dict_field(id1, "data", "custom 02"); + torrent_with_the_same_canonical_info_hash.index_info.title = format!("{first_torrent_title}-clone"); + let form: UploadTorrentMultipartForm = torrent_with_the_same_canonical_info_hash.index_info.into(); + let response = client.upload_torrent(form.into()).await; + + assert_eq!(response.status, 400); + } + #[tokio::test] async fn it_should_allow_authenticated_users_to_download_a_torrent_with_a_personal_announce_url() { let mut env = TestEnv::new();