Skip to content

Commit

Permalink
add md5 verification support for file uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
dj-nitehawk committed Oct 25, 2021
1 parent 7d40de8 commit 23cec90
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 21 deletions.
49 changes: 30 additions & 19 deletions MongoDB.Entities/Core/FileEntity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;

Expand Down Expand Up @@ -36,16 +37,16 @@ public abstract class FileEntity : Entity
[BsonElement]
public bool UploadSuccessful { get; internal set; }

/// <summary>
/// If this value is set, the uploaded data will be hashed and matched against this value. If the hash is not equal, an exception will be thrown by the UploadAsync() method.
/// </summary>
[IgnoreDefault]
public string MD5 { get; set; }

/// <summary>
/// Access the DataStreamer class for uploading and downloading data
/// </summary>
public DataStreamer Data
{
get
{
return streamer ?? (streamer = new DataStreamer(this));
}
}
public DataStreamer Data => streamer ??= new DataStreamer(this);
}

[Collection("[BINARY_CHUNKS]")]
Expand All @@ -68,7 +69,7 @@ public string GenerateNewID()
/// </summary>
public class DataStreamer
{
private static readonly HashSet<string> indexedDBs = new HashSet<string>();
private static readonly HashSet<string> indexedDBs = new();

private readonly FileEntity parent;
private readonly Type parentType;
Expand All @@ -78,6 +79,7 @@ public class DataStreamer
private int chunkSize, readCount;
private byte[] buffer;
private List<byte> dataChunk;
private MD5 md5;

internal DataStreamer(FileEntity parent)
{
Expand Down Expand Up @@ -137,21 +139,19 @@ public async Task DownloadAsync(Stream stream, int batchSize = 1, CancellationTo
? chunkCollection.FindAsync(filter, options, cancellation)
: chunkCollection.FindAsync(session, filter, options, cancellation);

using (var cursor = await findTask.ConfigureAwait(false))
{
var hasChunks = false;
using var cursor = await findTask.ConfigureAwait(false);
var hasChunks = false;

while (await cursor.MoveNextAsync(cancellation).ConfigureAwait(false))
while (await cursor.MoveNextAsync(cancellation).ConfigureAwait(false))
{
foreach (var chunk in cursor.Current)
{
foreach (var chunk in cursor.Current)
{
await stream.WriteAsync(chunk, 0, chunk.Length, cancellation).ConfigureAwait(false);
hasChunks = true;
}
await stream.WriteAsync(chunk, 0, chunk.Length, cancellation).ConfigureAwait(false);
hasChunks = true;
}

if (!hasChunks) throw new InvalidOperationException($"No data was found for file entity with ID: {parent.ID}");
}

if (!hasChunks) throw new InvalidOperationException($"No data was found for file entity with ID: {parent.ID}");
}

/// <summary>
Expand Down Expand Up @@ -187,17 +187,26 @@ public async Task UploadAsync(Stream stream, int chunkSizeKB = 256, Cancellation
buffer = new byte[64 * 1024]; // 64kb read buffer
readCount = 0;

if (!string.IsNullOrEmpty(parent.MD5))
md5 = MD5.Create();

try
{
if (stream.CanSeek && stream.Position > 0) stream.Position = 0;

while ((readCount = await stream.ReadAsync(buffer, 0, buffer.Length, cancellation).ConfigureAwait(false)) > 0)
{
md5?.TransformBlock(buffer, 0, readCount, null, 0);
await FlushToDBAsync(session, isLastChunk: false, cancellation).ConfigureAwait(false);
}

if (parent.FileSize > 0)
{
md5?.TransformFinalBlock(buffer, 0, readCount);
if (md5 != null && !BitConverter.ToString(md5.Hash).Replace("-", "").Equals(parent.MD5, StringComparison.OrdinalIgnoreCase))
{
throw new InvalidDataException("MD5 of uploaded data doesn't match with file entity MD5.");
}
await FlushToDBAsync(session, isLastChunk: true, cancellation).ConfigureAwait(false);
parent.UploadSuccessful = true;
}
Expand All @@ -217,6 +226,8 @@ public async Task UploadAsync(Stream stream, int chunkSizeKB = 256, Cancellation
doc = null;
buffer = null;
dataChunk = null;
md5?.Dispose();
md5 = null;
}
}

Expand Down
5 changes: 3 additions & 2 deletions MongoDB.Entities/MongoDB.Entities.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

<PropertyGroup>

<Version>20.26.0-beta1</Version>
<Version>20.26.0</Version>

<PackageReleaseNotes>
- add MD5 verification support for file uploads
- add diacritic support for fuzzy text search
- change project lang version to 9.0
</PackageReleaseNotes>

<TargetFramework>netstandard2.0</TargetFramework>
<RootNamespace>MongoDB.Entities</RootNamespace>
<AssemblyName>MongoDB.Entities</AssemblyName>
Expand Down
35 changes: 35 additions & 0 deletions Tests/TestFileEntity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,41 @@ public async Task uploading_data_from_file_stream()
Assert.AreEqual(img.ChunkCount, count);
}

[TestMethod]
public async Task uploading_with_wrong_hash()
{
await DB.InitAsync(dbName);
DB.DatabaseFor<Image>(dbName);

var img = new Image { Height = 800, Width = 600, Name = "Test-bad-hash.png", MD5 = "wrong-hash" };
await img.SaveAsync().ConfigureAwait(false);

using var stream = File.OpenRead("Models/test.jpg");

await Assert.ThrowsExceptionAsync<InvalidDataException>(async ()
=> await img.Data.UploadAsync(stream).ConfigureAwait(false));
}

[TestMethod]
public async Task uploading_with_correct_hash()
{
await DB.InitAsync(dbName);
DB.DatabaseFor<Image>(dbName);

var img = new Image { Height = 800, Width = 600, Name = "Test-correct-hash.png", MD5 = "cccfa116f0acf41a217cbefbe34cd599" };
await img.SaveAsync().ConfigureAwait(false);

using var stream = File.OpenRead("Models/test.jpg");
await img.Data.UploadAsync(stream).ConfigureAwait(false);

var count = await DB.Database(dbName).GetCollection<FileChunk>(DB.CollectionName<FileChunk>()).AsQueryable()
.Where(c => c.FileID == img.ID)
.CountAsync();

Assert.AreEqual(2047524, img.FileSize);
Assert.AreEqual(img.ChunkCount, count);
}

[TestMethod]
public async Task file_smaller_than_chunk_size()
{
Expand Down

0 comments on commit 23cec90

Please sign in to comment.