mirror of
https://github.com/duplicati/duplicati.git
synced 2025-11-28 03:20:25 +08:00
209 lines
11 KiB
C#
209 lines
11 KiB
C#
// Copyright (C) 2025, The Duplicati Team
|
|
// https://duplicati.com, hello@duplicati.com
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|
// copy of this software and associated documentation files (the "Software"),
|
|
// to deal in the Software without restriction, including without limitation
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS IN THE SOFTWARE.
|
|
|
|
using System;
|
|
using CoCoL;
|
|
using Duplicati.Library.Main.Operation.Common;
|
|
using System.Threading.Tasks;
|
|
using System.Collections.Generic;
|
|
using Duplicati.Library.Utility;
|
|
using Duplicati.Library.Interface;
|
|
using System.IO;
|
|
|
|
namespace Duplicati.Library.Main.Operation.Backup
|
|
{
|
|
internal static class StreamBlockSplitter
|
|
{
|
|
/// <summary>
|
|
/// The tag used for log messages
|
|
/// </summary>
|
|
private static readonly string LOGTAG = Logging.Log.LogTagFromType(typeof(StreamBlockSplitter));
|
|
private static readonly string FILELOGTAG = LOGTAG + ".FileEntry";
|
|
|
|
public static Task Run(Channels channels, Options options, BackupDatabase database, ITaskReader taskreader)
|
|
{
|
|
return AutomationExtensions.RunTask(
|
|
new
|
|
{
|
|
Input = channels.StreamBlock.AsRead(),
|
|
ProgressChannel = channels.ProgressEvents.AsWrite(),
|
|
BlockOutput = channels.OutputBlocks.AsWrite()
|
|
},
|
|
|
|
async self =>
|
|
{
|
|
var blocksize = options.Blocksize;
|
|
var emptymetadata = Utility.WrapMetadata(new Dictionary<string, string>(), options);
|
|
var maxmetadatasize = (options.Blocksize / (long)options.BlockhashSize) * options.Blocksize;
|
|
|
|
using (var filehasher = HashFactory.CreateHasher(options.FileHashAlgorithm))
|
|
using (var blockhasher = HashFactory.CreateHasher(options.BlockHashAlgorithm))
|
|
using (var empty_metadata_stream = new MemoryStream(emptymetadata.Blob))
|
|
{
|
|
while (true)
|
|
{
|
|
// We ignore the stop signal, but not the pause and terminate
|
|
await taskreader.ProgressRendevouz().ConfigureAwait(false);
|
|
var send_close = false;
|
|
var filesize = 0L;
|
|
|
|
var e = await self.Input.ReadAsync();
|
|
var cur = e.Result;
|
|
|
|
try
|
|
{
|
|
var stream = e.Stream;
|
|
|
|
using (var blocklisthashes = new Library.Utility.FileBackedStringList())
|
|
using (var hashcollector = new Library.Utility.FileBackedStringList())
|
|
{
|
|
var blocklistbuffer = new byte[blocksize];
|
|
var blocklistoffset = 0L;
|
|
|
|
long fslen = -1;
|
|
try
|
|
{
|
|
fslen = stream.Length;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
LogExceptionHelper.LogCommonWarning(ex, FILELOGTAG, "FileLengthFailure", e.Path, "Failed to read file length for \"{0}\"");
|
|
}
|
|
|
|
if (e.IsMetadata && fslen > maxmetadatasize)
|
|
{
|
|
//TODO: To fix this, the "WriteFileset" method in BackupHandler needs to
|
|
// be updated such that it can select sets even when there are multiple
|
|
// blocklist hashes for the metadata.
|
|
// This could be done such that an extra query is made if the metadata
|
|
// spans multiple blocklist hashes, as it is not expected to be common
|
|
|
|
Logging.Log.WriteWarningMessage(LOGTAG, "TooLargeMetadata", null, "Metadata size is {0}, but the largest accepted size is {1}, recording empty metadata for {2}", fslen, maxmetadatasize, e.Path);
|
|
empty_metadata_stream.Position = 0;
|
|
stream = empty_metadata_stream;
|
|
fslen = stream.Length;
|
|
}
|
|
|
|
// Don't send progress reports for metadata
|
|
if (!e.IsMetadata)
|
|
{
|
|
await self.ProgressChannel.WriteAsync(new ProgressEvent() { Filepath = e.Path, Length = fslen, Type = EventType.FileStarted });
|
|
send_close = true;
|
|
}
|
|
|
|
filehasher.Initialize();
|
|
var lastread = 0;
|
|
var buf = new byte[blocksize];
|
|
var lastupdate = DateTime.Now;
|
|
|
|
// Core processing loop, read blocks of data and hash individually
|
|
while (((lastread = await stream.ForceStreamReadAsync(buf, blocksize)) != 0))
|
|
{
|
|
// Run file hashing concurrently to squeeze a little extra concurrency out of it
|
|
var pftask = Task.Run(() => filehasher.TransformBlock(buf, 0, lastread, buf, 0));
|
|
|
|
var hashdata = blockhasher.ComputeHash(buf, 0, lastread);
|
|
var hashkey = Convert.ToBase64String(hashdata);
|
|
|
|
// If we have too many hashes, flush the blocklist
|
|
if (blocklistbuffer.Length - blocklistoffset < hashdata.Length)
|
|
{
|
|
var blkey = Convert.ToBase64String(blockhasher.ComputeHash(blocklistbuffer, 0, (int)blocklistoffset));
|
|
blocklisthashes.Add(blkey);
|
|
await DataBlock.AddBlockToOutputAsync(self.BlockOutput, blkey, blocklistbuffer, 0, blocklistoffset, CompressionHint.Noncompressible, true);
|
|
blocklistoffset = 0;
|
|
blocklistbuffer = new byte[blocksize];
|
|
}
|
|
|
|
// Store the current hash in the blocklist
|
|
Array.Copy(hashdata, 0, blocklistbuffer, blocklistoffset, hashdata.Length);
|
|
blocklistoffset += hashdata.Length;
|
|
hashcollector.Add(hashkey);
|
|
filesize += lastread;
|
|
|
|
// Don't spam updates
|
|
if (send_close && (DateTime.Now - lastupdate).TotalSeconds > 5)
|
|
{
|
|
await self.ProgressChannel.WriteAsync(new ProgressEvent() { Filepath = e.Path, Length = filesize, Type = EventType.FileProgressUpdate });
|
|
lastupdate = DateTime.Now;
|
|
}
|
|
|
|
// Make sure the filehasher is done with the buf instance before we pass it on
|
|
await pftask.ConfigureAwait(false);
|
|
await DataBlock.AddBlockToOutputAsync(self.BlockOutput, hashkey, buf, 0, lastread, e.Hint, false);
|
|
buf = new byte[blocksize];
|
|
}
|
|
|
|
// If we have more than a single block of data, output the (trailing) blocklist
|
|
if (hashcollector.Count > 1)
|
|
{
|
|
var blkey = Convert.ToBase64String(blockhasher.ComputeHash(blocklistbuffer, 0, (int)blocklistoffset));
|
|
blocklisthashes.Add(blkey);
|
|
await DataBlock.AddBlockToOutputAsync(self.BlockOutput, blkey, blocklistbuffer, 0, blocklistoffset, CompressionHint.Noncompressible, true);
|
|
}
|
|
|
|
filehasher.TransformFinalBlock(new byte[0], 0, 0);
|
|
var filehash = Convert.ToBase64String(filehasher.Hash);
|
|
var blocksetid = await database.AddBlocksetAsync(filehash, filesize, blocksize, hashcollector, blocklisthashes, taskreader.ProgressToken);
|
|
cur.SetResult(new StreamProcessResult() { Streamlength = filesize, Streamhash = filehash, Blocksetid = blocksetid });
|
|
cur = null;
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
try
|
|
{
|
|
if (cur != null)
|
|
cur.TrySetException(ex);
|
|
}
|
|
catch
|
|
{
|
|
}
|
|
|
|
// Rethrow
|
|
if (ex.IsRetiredException())
|
|
throw;
|
|
}
|
|
finally
|
|
{
|
|
if (cur != null)
|
|
{
|
|
try
|
|
{
|
|
cur.TrySetCanceled();
|
|
}
|
|
catch
|
|
{
|
|
}
|
|
|
|
cur = null;
|
|
}
|
|
|
|
if (send_close)
|
|
await self.ProgressChannel.WriteAsync(new ProgressEvent() { Filepath = e.Path, Length = filesize, Type = EventType.FileClosed });
|
|
send_close = false;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
}
|