diff --git a/src/.vscode/launch.json b/src/.vscode/launch.json index 48b739e0..8805ba74 100644 --- a/src/.vscode/launch.json +++ b/src/.vscode/launch.json @@ -21,7 +21,9 @@ "request": "launch", "preLaunchTask": "build", "program": "${workspaceFolder}/Tutorials/Paillave.Etl.Samples/bin/Debug/net6.0/Paillave.Etl.Samples.dll", - "args": [ ], + "args": [ + "/home/stephane/Desktop/" + ], "cwd": "${workspaceFolder}/Tutorials/Paillave.Etl.Samples", "stopAtEntry": false, "console": "externalTerminal" diff --git a/src/Paillave.EntityFrameworkCoreExtension/Paillave.EntityFrameworkCoreExtension.csproj b/src/Paillave.EntityFrameworkCoreExtension/Paillave.EntityFrameworkCoreExtension.csproj index 5c44be82..7ee88276 100644 --- a/src/Paillave.EntityFrameworkCoreExtension/Paillave.EntityFrameworkCoreExtension.csproj +++ b/src/Paillave.EntityFrameworkCoreExtension/Paillave.EntityFrameworkCoreExtension.csproj @@ -1,7 +1,7 @@ Paillave.EntityFrameworkCoreExtension - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Autofac/Paillave.Etl.Autofac.csproj b/src/Paillave.Etl.Autofac/Paillave.Etl.Autofac.csproj index 96edac8f..54862bc9 100644 --- a/src/Paillave.Etl.Autofac/Paillave.Etl.Autofac.csproj +++ b/src/Paillave.Etl.Autofac/Paillave.Etl.Autofac.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Autofac - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Bloomberg/Paillave.Etl.Bloomberg.csproj b/src/Paillave.Etl.Bloomberg/Paillave.Etl.Bloomberg.csproj index b756193f..d3bc14c3 100644 --- a/src/Paillave.Etl.Bloomberg/Paillave.Etl.Bloomberg.csproj +++ b/src/Paillave.Etl.Bloomberg/Paillave.Etl.Bloomberg.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Bloomberg - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Dropbox/Paillave.Etl.Dropbox.csproj b/src/Paillave.Etl.Dropbox/Paillave.Etl.Dropbox.csproj index cbb839d7..50046f6a 100644 --- a/src/Paillave.Etl.Dropbox/Paillave.Etl.Dropbox.csproj +++ b/src/Paillave.Etl.Dropbox/Paillave.Etl.Dropbox.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Dropbox - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.EntityFrameworkCore/Paillave.Etl.EntityFrameworkCore.csproj b/src/Paillave.Etl.EntityFrameworkCore/Paillave.Etl.EntityFrameworkCore.csproj index 1e3114dc..9fcdb10b 100644 --- a/src/Paillave.Etl.EntityFrameworkCore/Paillave.Etl.EntityFrameworkCore.csproj +++ b/src/Paillave.Etl.EntityFrameworkCore/Paillave.Etl.EntityFrameworkCore.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.EntityFrameworkCore - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.ExcelFile/Core/ExcelFileDefinition.cs b/src/Paillave.Etl.ExcelFile/Core/ExcelFileDefinition.cs index c3165883..8457fed5 100644 --- a/src/Paillave.Etl.ExcelFile/Core/ExcelFileDefinition.cs +++ b/src/Paillave.Etl.ExcelFile/Core/ExcelFileDefinition.cs @@ -82,7 +82,7 @@ public ExcelFileDefinition SetDefaultMapping(bool withColumnHeader = true, Cu } return this; } - public ExcelFileReader GetExcelReader(ExcelWorksheet excelWorksheet = null) + internal ExcelFileReader GetExcelReader(ExcelWorksheet excelWorksheet = null) { if ((_fieldDefinitions?.Count ?? 0) == 0) SetDefaultMapping(); if (_dataRange == null) @@ -161,7 +161,7 @@ public ExcelFileDefinition WithCultureInfo(string name) this._cultureInfo = CultureInfo.GetCultureInfo(name); return this; } - public ExcelFileDefinition MapColumnToProperty(int index, Expression> memberLambda, CultureInfo cultureInfo = null) + internal ExcelFileDefinition MapColumnToProperty(int index, Expression> memberLambda, CultureInfo cultureInfo = null) { SetFieldDefinition(new ExcelFileFieldDefinition { @@ -171,7 +171,7 @@ public ExcelFileDefinition MapColumnToProperty(int index, Expression< }); return this; } - public ExcelFileDefinition MapColumnToProperty(int index, Expression> memberLambda, string cultureInfo) + internal ExcelFileDefinition MapColumnToProperty(int index, Expression> memberLambda, string cultureInfo) { SetFieldDefinition(new ExcelFileFieldDefinition { diff --git a/src/Paillave.Etl.ExcelFile/ExcelDatasetsValuesProvider.cs b/src/Paillave.Etl.ExcelFile/ExcelDatasetsValuesProvider.cs index b125c15c..4c49e05c 100644 --- a/src/Paillave.Etl.ExcelFile/ExcelDatasetsValuesProvider.cs +++ b/src/Paillave.Etl.ExcelFile/ExcelDatasetsValuesProvider.cs @@ -4,12 +4,13 @@ using System.Threading; using ExcelDataReader; using System.Data; +using System.Collections.Generic; namespace Paillave.Etl.ExcelFile { public class ExcelDatasetsValuesProviderArgs { - public Func GetOutput { get; set; } + public Func> GetOutput { get; set; } } public class ExcelDatasetsValuesProvider : ValuesProviderBase { @@ -24,7 +25,22 @@ public override void PushValues(IFileValue input, Action push, Cancellatio var dataset = reader.AsDataSet(); dataset.DataSetName = input.Name; foreach (var item in dataset.Tables.Cast()) - push(_args.GetOutput(item, input)); + _args.GetOutput(item, input).ToList().ForEach(push); + } + } + } + + public class ExcelDataTablesValuesProvider : ValuesProviderBase + { + public override ProcessImpact PerformanceImpact => ProcessImpact.Average; + public override ProcessImpact MemoryFootPrint => ProcessImpact.Average; + public override void PushValues(IFileValue input, Action push, CancellationToken cancellationToken, IDependencyResolver resolver, IInvoker invoker) + { + using (var reader = ExcelReaderFactory.CreateReader(input.GetContent())) + { + var dataset = reader.AsDataSet(); + dataset.DataSetName = input.Name; + dataset.Tables.Cast().ToList().ForEach(push); } } } diff --git a/src/Paillave.Etl.ExcelFile/ExcelFile.Stream.ex.cs b/src/Paillave.Etl.ExcelFile/ExcelFile.Stream.ex.cs index 80738bed..7cffa11b 100644 --- a/src/Paillave.Etl.ExcelFile/ExcelFile.Stream.ex.cs +++ b/src/Paillave.Etl.ExcelFile/ExcelFile.Stream.ex.cs @@ -1,44 +1,64 @@ using Paillave.Etl.Core; +using Paillave.Etl.Core.Mapping; using Paillave.Etl.ExcelFile.Core; using System; +using System.Collections.Generic; using System.Data; using System.IO; +using System.Linq.Expressions; namespace Paillave.Etl.ExcelFile { + public class ExcelFileArgBuilder + { + public ExcelFileDefinition UseMap(Expression> expression) => ExcelFileDefinition.Create(expression); + public ExcelFileDefinition UseType() => new ExcelFileDefinition(); + public ExcelFileDefinition UseType(T prototype) => new ExcelFileDefinition(); + } + public static class ExcelFileEx { #region CrossApplyExcelSheets - public static IStream CrossApplyExcelSheets(this IStream stream, string name, bool noParallelisation = false) + public static IStream CrossApplyExcelSheets( + this IStream stream, + string name, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelSheetsValuesProvider(new ExcelSheetsValuesProviderArgs { GetOutput = (i, j) => i }), noParallelisation); - public static IStream CrossApplyExcelSheets(this IStream stream, string name, Func selector, bool noParallelisation = false) + public static IStream CrossApplyExcelSheets( + this IStream stream, + string name, + Func selector, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelSheetsValuesProvider(new ExcelSheetsValuesProviderArgs { GetOutput = (i, j) => selector(i) }), noParallelisation); - public static IStream CrossApplyExcelDatasets(this IStream stream, string name, Func selector, bool noParallelisation = false) + public static IStream CrossApplyExcelDatasets( + this IStream stream, + string name, + Func> selector, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelDatasetsValuesProvider(new ExcelDatasetsValuesProviderArgs { GetOutput = selector }), noParallelisation); #endregion + #region CrossApplyExcelDatasets - [Obsolete("use CrossApplyExcelDatatables instead")] - public static IStream CrossApplyExcelDatasets(this IStream stream, string name, bool noParallelisation = false) - => stream.CrossApplyExcelDatatables(name, noParallelisation); - [Obsolete("use CrossApplyExcelDatatables instead")] - public static IStream CrossApplyExcelDatasets(this IStream stream, string name, Func selector, bool noParallelisation = false) - => stream.CrossApplyExcelDatatables(name, selector, noParallelisation); - public static IStream CrossApplyExcelDatatables(this IStream stream, string name, bool noParallelisation = false) - => stream.CrossApply(name, new ExcelDatasetsValuesProvider(new ExcelDatasetsValuesProviderArgs - { - GetOutput = (i, j) => i - }), noParallelisation); - public static IStream CrossApplyExcelDatatables(this IStream stream, string name, Func selector, bool noParallelisation = false) + public static IStream CrossApplyExcelDataTables( + this IStream stream, + string name, + bool noParallelisation = false) + => stream.CrossApply(name, new ExcelDataTablesValuesProvider(), noParallelisation); + public static IStream CrossApplyExcelDataTables( + this IStream stream, + string name, + Func> selector, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelDatasetsValuesProvider(new ExcelDatasetsValuesProviderArgs { GetOutput = (i, j) => selector(i) @@ -46,28 +66,96 @@ public static IStream CrossApplyExcelDatatables(this IStream CrossApplyExcelRows(this IStream stream, string name, ExcelFileDefinition mapping, Func selector, bool noParallelisation = false) + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + Func> mapBuilder, + Func selector, + bool noParallelisation = false) + => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs + { + Mapping = mapBuilder(new()), + GetSheetSelection = i => i, + GetOutput = selector + }), noParallelisation); + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + ExcelFileDefinition mapping, + Func selector, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs { Mapping = mapping, GetSheetSelection = i => i, GetOutput = selector }), noParallelisation); - public static IStream CrossApplyExcelRows(this IStream stream, string name, ExcelFileDefinition mapping, Func sheetSelection, Func selector, bool noParallelisation = false) + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + Func> mapBuilder, + Func sheetSelection, + Func selector, + bool noParallelisation = false) + => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs + { + Mapping = mapBuilder(new()), + GetSheetSelection = sheetSelection, + GetOutput = selector + }), noParallelisation); + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + ExcelFileDefinition mapping, + Func sheetSelection, + Func selector, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs { Mapping = mapping, GetSheetSelection = sheetSelection, GetOutput = selector }), noParallelisation); - public static IStream CrossApplyExcelRows(this IStream stream, string name, ExcelFileDefinition mapping, Func sheetSelection, bool noParallelisation = false) + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + Func> mapBuilder, + Func sheetSelection, + bool noParallelisation = false) + => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs + { + Mapping = mapBuilder(new()), + GetSheetSelection = sheetSelection, + GetOutput = (i, j) => i + }), noParallelisation); + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + ExcelFileDefinition mapping, + Func sheetSelection, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs { Mapping = mapping, GetSheetSelection = sheetSelection, GetOutput = (i, j) => i }), noParallelisation); - public static IStream CrossApplyExcelRows(this IStream stream, string name, ExcelFileDefinition mapping, bool noParallelisation = false) + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + Func> mapBuilder, + bool noParallelisation = false) + => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs + { + Mapping = mapBuilder(new()), + GetSheetSelection = i => i, + GetOutput = (i, j) => i + }), noParallelisation); + public static IStream CrossApplyExcelRows( + this IStream stream, + string name, + ExcelFileDefinition mapping, + bool noParallelisation = false) => stream.CrossApply(name, new ExcelRowsValuesProvider(new ExcelRowsValuesProviderArgs { Mapping = mapping, @@ -77,45 +165,54 @@ public static IStream CrossApplyExcelRows(this IStream ToExcelFile(this IStream stream, string name, ISingleStream resourceStream, ExcelFileDefinition mapping = null) - { - return new ToExcelFileStreamNode>(name, new ToExcelFileArgs> + public static IStream ToExcelFile( + this IStream stream, + string name, + ISingleStream resourceStream, + ExcelFileDefinition mapping = null) + => new ToExcelFileStreamNode>(name, new ToExcelFileArgs> { MainStream = stream, TargetStream = resourceStream, Mapping = mapping }).Output; - } - public static ISortedStream ToExcelFile(this ISortedStream stream, string name, ISingleStream resourceStream, ExcelFileDefinition mapping = null) - { - return new ToExcelFileStreamNode>(name, new ToExcelFileArgs> + public static ISortedStream ToExcelFile( + this ISortedStream stream, + string name, + ISingleStream resourceStream, + ExcelFileDefinition mapping = null) + => new ToExcelFileStreamNode>(name, new ToExcelFileArgs> { MainStream = stream, TargetStream = resourceStream, Mapping = mapping }).Output; - } - public static IKeyedStream ToExcelFile(this IKeyedStream stream, string name, ISingleStream resourceStream, ExcelFileDefinition mapping = null) - { - return new ToExcelFileStreamNode>(name, new ToExcelFileArgs> + public static IKeyedStream ToExcelFile( + this IKeyedStream stream, + string name, + ISingleStream resourceStream, + ExcelFileDefinition mapping = null) + => new ToExcelFileStreamNode>(name, new ToExcelFileArgs> { MainStream = stream, TargetStream = resourceStream, Mapping = mapping }).Output; - } + #endregion #region ToExcelFile - public static IStream ToExcelFile(this IStream stream, string name, string fileName, ExcelFileDefinition mapping = null) - { - return new ToExcelFileStreamNode(name, new ToExcelFileArgs + public static IStream ToExcelFile( + this IStream stream, + string name, + string fileName, + ExcelFileDefinition mapping = null) + => new ToExcelFileStreamNode(name, new ToExcelFileArgs { MainStream = stream, Mapping = mapping, FileName = fileName }).Output; - } #endregion } } diff --git a/src/Paillave.Etl.ExcelFile/Paillave.Etl.ExcelFile.csproj b/src/Paillave.Etl.ExcelFile/Paillave.Etl.ExcelFile.csproj index 74b8d28f..f4d41aa0 100644 --- a/src/Paillave.Etl.ExcelFile/Paillave.Etl.ExcelFile.csproj +++ b/src/Paillave.Etl.ExcelFile/Paillave.Etl.ExcelFile.csproj @@ -1,7 +1,7 @@  Paillave.EtlNet.ExcelFile - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.ExecutionToolkit/Paillave.Etl.ExecutionToolkit.csproj b/src/Paillave.Etl.ExecutionToolkit/Paillave.Etl.ExecutionToolkit.csproj index 3c8bc815..1153493e 100644 --- a/src/Paillave.Etl.ExecutionToolkit/Paillave.Etl.ExecutionToolkit.csproj +++ b/src/Paillave.Etl.ExecutionToolkit/Paillave.Etl.ExecutionToolkit.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.ExecutionToolkit - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.FileSystem/Paillave.Etl.FileSystem.csproj b/src/Paillave.Etl.FileSystem/Paillave.Etl.FileSystem.csproj index 5aaeceb8..24e79df1 100644 --- a/src/Paillave.Etl.FileSystem/Paillave.Etl.FileSystem.csproj +++ b/src/Paillave.Etl.FileSystem/Paillave.Etl.FileSystem.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.FileSystem - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.FromConfigurationConnectors/Paillave.Etl.FromConfigurationConnectors.csproj b/src/Paillave.Etl.FromConfigurationConnectors/Paillave.Etl.FromConfigurationConnectors.csproj index 5fabac32..523bdfa9 100644 --- a/src/Paillave.Etl.FromConfigurationConnectors/Paillave.Etl.FromConfigurationConnectors.csproj +++ b/src/Paillave.Etl.FromConfigurationConnectors/Paillave.Etl.FromConfigurationConnectors.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.FromConfigurationConnectors - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Ftp/Paillave.Etl.Ftp.csproj b/src/Paillave.Etl.Ftp/Paillave.Etl.Ftp.csproj index 1943544f..3eafba50 100644 --- a/src/Paillave.Etl.Ftp/Paillave.Etl.Ftp.csproj +++ b/src/Paillave.Etl.Ftp/Paillave.Etl.Ftp.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Ftp - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Mail/Paillave.Etl.Mail.csproj b/src/Paillave.Etl.Mail/Paillave.Etl.Mail.csproj index 928e2be4..4f69f7f6 100644 --- a/src/Paillave.Etl.Mail/Paillave.Etl.Mail.csproj +++ b/src/Paillave.Etl.Mail/Paillave.Etl.Mail.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Mail - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Pdf/Paillave.Etl.Pdf.csproj b/src/Paillave.Etl.Pdf/Paillave.Etl.Pdf.csproj index dd792af1..7200e02b 100644 --- a/src/Paillave.Etl.Pdf/Paillave.Etl.Pdf.csproj +++ b/src/Paillave.Etl.Pdf/Paillave.Etl.Pdf.csproj @@ -1,7 +1,7 @@  Paillave.EtlNet.Pdf - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Scheduler/Paillave.Etl.Scheduler.csproj b/src/Paillave.Etl.Scheduler/Paillave.Etl.Scheduler.csproj index d72313f9..fa2a8690 100644 --- a/src/Paillave.Etl.Scheduler/Paillave.Etl.Scheduler.csproj +++ b/src/Paillave.Etl.Scheduler/Paillave.Etl.Scheduler.csproj @@ -2,7 +2,7 @@ Paillave.EtlNet.Scheduler - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Sftp/Paillave.Etl.Sftp.csproj b/src/Paillave.Etl.Sftp/Paillave.Etl.Sftp.csproj index 5b2d154e..902964c6 100644 --- a/src/Paillave.Etl.Sftp/Paillave.Etl.Sftp.csproj +++ b/src/Paillave.Etl.Sftp/Paillave.Etl.Sftp.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Sftp - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.SqlServer/Paillave.Etl.SqlServer.csproj b/src/Paillave.Etl.SqlServer/Paillave.Etl.SqlServer.csproj index 3844d657..3d0da32c 100644 --- a/src/Paillave.Etl.SqlServer/Paillave.Etl.SqlServer.csproj +++ b/src/Paillave.Etl.SqlServer/Paillave.Etl.SqlServer.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.SqlServer - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.TextFile/FlatFileDefinition.cs b/src/Paillave.Etl.TextFile/FlatFileDefinition.cs index 18ad7d01..4272d7bc 100644 --- a/src/Paillave.Etl.TextFile/FlatFileDefinition.cs +++ b/src/Paillave.Etl.TextFile/FlatFileDefinition.cs @@ -24,6 +24,28 @@ public class FlatFileDefinition private bool _respectHeaderCase = false; public int FirstLinesToIgnore { get; private set; } + private IEnumerable GetDefaultColumnNames() + { + return _fieldDefinitions.Select((i, idx) => new { Name = i.ColumnName ?? i.PropertyInfo.Name, DefinedPosition = i.Position, FallbackPosition = idx }) + .OrderBy(i => i.DefinedPosition) + .ThenBy(i => i.FallbackPosition) + .Select(i => i.Name); + } + private void SetFieldDefinition(FlatFileFieldDefinition fieldDefinition) + { + var existingFieldDefinition = _fieldDefinitions.FirstOrDefault(i => i.PropertyInfo.Name == fieldDefinition.PropertyInfo.Name); + if (existingFieldDefinition == null) + { + if (fieldDefinition.Position == null) + fieldDefinition.Position = (_fieldDefinitions.Max(i => i.Position) ?? 0) + 1; + _fieldDefinitions.Add(fieldDefinition); + } + else + { + if (fieldDefinition.ColumnName != null) existingFieldDefinition.ColumnName = fieldDefinition.ColumnName; + if (fieldDefinition.Position != null) existingFieldDefinition.Position = fieldDefinition.Position; + } + } public FlatFileDefinition IgnoreFirstLines(int firstLinesToIgnore) { FirstLinesToIgnore = firstLinesToIgnore; @@ -55,7 +77,7 @@ public FlatFileDefinition WithMap(Expression> expressio } return this; } - public FlatFileDefinition SetDefaultMapping(bool withColumnHeader = true, CultureInfo cultureInfo = null) + private FlatFileDefinition SetDefaultMapping(bool withColumnHeader = true, CultureInfo cultureInfo = null) { foreach (var item in typeof(T).GetProperties().Select((propertyInfo, index) => new { propertyInfo = propertyInfo, Position = index })) { @@ -108,13 +130,6 @@ public LineSerializer GetSerializer(IEnumerable columnNames = null) return new LineSerializer(_lineSplitter, indexToPropertySerializerDictionary, fileNamePropertyNames, rowNumberPropertyNames, rowGuidPropertyNames); } } - private IEnumerable GetDefaultColumnNames() - { - return _fieldDefinitions.Select((i, idx) => new { Name = i.ColumnName ?? i.PropertyInfo.Name, DefinedPosition = i.Position, FallbackPosition = idx }) - .OrderBy(i => i.DefinedPosition) - .ThenBy(i => i.FallbackPosition) - .Select(i => i.Name); - } public string GenerateDefaultHeaderLine() { return _lineSplitter.Join(GetDefaultColumnNames()); @@ -149,20 +164,5 @@ public FlatFileDefinition WithCultureInfo(string name) this._cultureInfo = CultureInfo.GetCultureInfo(name); return this; } - private void SetFieldDefinition(FlatFileFieldDefinition fieldDefinition) - { - var existingFieldDefinition = _fieldDefinitions.FirstOrDefault(i => i.PropertyInfo.Name == fieldDefinition.PropertyInfo.Name); - if (existingFieldDefinition == null) - { - if (fieldDefinition.Position == null) - fieldDefinition.Position = (_fieldDefinitions.Max(i => i.Position) ?? 0) + 1; - _fieldDefinitions.Add(fieldDefinition); - } - else - { - if (fieldDefinition.ColumnName != null) existingFieldDefinition.ColumnName = fieldDefinition.ColumnName; - if (fieldDefinition.Position != null) existingFieldDefinition.Position = fieldDefinition.Position; - } - } } } diff --git a/src/Paillave.Etl.TextFile/Paillave.Etl.TextFile.csproj b/src/Paillave.Etl.TextFile/Paillave.Etl.TextFile.csproj index 21523cfe..ee0e7418 100644 --- a/src/Paillave.Etl.TextFile/Paillave.Etl.TextFile.csproj +++ b/src/Paillave.Etl.TextFile/Paillave.Etl.TextFile.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.TextFile - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.TextFile/TextFile.Stream.ex.cs b/src/Paillave.Etl.TextFile/TextFile.Stream.ex.cs index 15bb0afa..184860b7 100644 --- a/src/Paillave.Etl.TextFile/TextFile.Stream.ex.cs +++ b/src/Paillave.Etl.TextFile/TextFile.Stream.ex.cs @@ -3,12 +3,30 @@ using Paillave.Etl.Core; using System.Text; using System.Collections.Generic; +using System.Linq.Expressions; +using Paillave.Etl.Core.Mapping; namespace Paillave.Etl.TextFile { + public class FlatFileArgBuilder + { + public FlatFileDefinition UseMap(Expression> expression) => FlatFileDefinition.Create(expression); + public FlatFileDefinition UseType() => new FlatFileDefinition(); + public FlatFileDefinition UseType(T prototype) => new FlatFileDefinition(); + } + public static class TextFileEx { #region CrossApplyTextFile + public static IStream CrossApplyTextFile(this IStream stream, string name, Func> mapBuilder, bool noParallelisation = false) + { + var valuesProvider = new FlatFileValuesProvider(new FlatFileValuesProviderArgs() + { + Mapping = mapBuilder(new()), + ResultSelector = (i, o) => o + }); + return stream.CrossApply(name, valuesProvider, noParallelisation); + } public static IStream CrossApplyTextFile(this IStream stream, string name, FlatFileDefinition args, bool noParallelisation = false) { var valuesProvider = new FlatFileValuesProvider(new FlatFileValuesProviderArgs() @@ -86,6 +104,32 @@ public static ISingleStream ToTextFileValue(this IStream ToTextFileValue(this IStream stream, string name, string fileName, Func, FlatFileDefinition> mapBuilder, Dictionary> destinations = null, object extraMetadata = null, Encoding encoding = null) + { + return new ToFileValueStreamNode(name, new ToTextDataStreamArgs + { + MainStream = stream, + Mapping = mapBuilder(new()), + GetRow = i => i, + FileName = fileName, + Encoding = encoding, + Metadata = extraMetadata, + Destinations = destinations + }).Output; + } + public static ISingleStream ToTextFileValue(this IStream> stream, string name, string fileName, Func, FlatFileDefinition> mapBuilder, Dictionary> destinations = null, object extraMetadata = null, Encoding encoding = null) + { + return new ToFileValueStreamNode, TIn>(name, new ToTextDataStreamArgs, TIn> + { + MainStream = stream, + Mapping = mapBuilder(new()), + GetRow = i => i.Row, + FileName = fileName, + Encoding = encoding, + Metadata = extraMetadata, + Destinations = destinations + }).Output; + } #endregion } } diff --git a/src/Paillave.Etl.XmlFile/Paillave.Etl.XmlFile.csproj b/src/Paillave.Etl.XmlFile/Paillave.Etl.XmlFile.csproj index 97c3a315..3fe5d3e4 100644 --- a/src/Paillave.Etl.XmlFile/Paillave.Etl.XmlFile.csproj +++ b/src/Paillave.Etl.XmlFile/Paillave.Etl.XmlFile.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.XmlFile - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl.Zip/Paillave.Etl.Zip.csproj b/src/Paillave.Etl.Zip/Paillave.Etl.Zip.csproj index 04361470..57ce30d6 100644 --- a/src/Paillave.Etl.Zip/Paillave.Etl.Zip.csproj +++ b/src/Paillave.Etl.Zip/Paillave.Etl.Zip.csproj @@ -1,7 +1,7 @@ Paillave.EtlNet.Zip - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Etl/Extensions/Do.Stream.ex.cs b/src/Paillave.Etl/Extensions/Do.Stream.ex.cs index 608fd8b5..778a6fcd 100644 --- a/src/Paillave.Etl/Extensions/Do.Stream.ex.cs +++ b/src/Paillave.Etl/Extensions/Do.Stream.ex.cs @@ -74,7 +74,7 @@ public static ISingleStream> DoCorrelated(this ISingleStrea #region Process row with injection - public static IStream DoAndResolve(this IStream stream, string name, Func, IDoProcessor> o) + public static IStream ResolveAndDo(this IStream stream, string name, Func, IDoProcessor> o) { return new DoStreamNode>(name, new DoArgs> { @@ -82,7 +82,7 @@ public static IStream DoAndResolve(this IStream stream, string na Stream = stream }).Output; } - public static ISortedStream DoAndResolve(this ISortedStream stream, string name, Func, IDoProcessor> o) + public static ISortedStream ResolveAndDo(this ISortedStream stream, string name, Func, IDoProcessor> o) { return new DoStreamNode>(name, new DoArgs> { @@ -90,7 +90,7 @@ public static ISortedStream DoAndResolve(this ISortedStrea Stream = stream }).Output; } - public static IKeyedStream DoAndResolve(this IKeyedStream stream, string name, Func, IDoProcessor> o) + public static IKeyedStream ResolveAndDo(this IKeyedStream stream, string name, Func, IDoProcessor> o) { return new DoStreamNode>(name, new DoArgs> { @@ -98,7 +98,7 @@ public static IKeyedStream DoAndResolve(this IKeyedStream< Stream = stream }).Output; } - public static ISingleStream DoAndResolve(this ISingleStream stream, string name, Func, IDoProcessor> o) + public static ISingleStream ResolveAndDo(this ISingleStream stream, string name, Func, IDoProcessor> o) { return new DoStreamNode>(name, new DoArgs> { @@ -106,7 +106,7 @@ public static ISingleStream DoAndResolve(this ISingleStream strea Stream = stream }).Output; } - public static IStream> DoCorrelatedAndResolve(this IStream> stream, string name, Func, TIn>, IDoProcessor>> o) + public static IStream> ResolveAndDoCorrelated(this IStream> stream, string name, Func, TIn>, IDoProcessor>> o) { return new DoStreamNode, IStream>>(name, new DoArgs, IStream>> { @@ -114,7 +114,7 @@ public static IStream> DoCorrelatedAndResolve(this IStream< Stream = stream }).Output; } - public static ISortedStream, TKey> DoCorrelatedAndResolve(this ISortedStream, TKey> stream, string name, Func, TIn>, IDoProcessor>> o) + public static ISortedStream, TKey> ResolveAndDoCorrelated(this ISortedStream, TKey> stream, string name, Func, TIn>, IDoProcessor>> o) { return new DoStreamNode, ISortedStream, TKey>>(name, new DoArgs, ISortedStream, TKey>> { @@ -122,7 +122,7 @@ public static ISortedStream, TKey> DoCorrelatedAndResolve, TKey> DoCorrelatedAndResolve(this IKeyedStream, TKey> stream, string name, Func, TIn>, IDoProcessor>> o) + public static IKeyedStream, TKey> ResolveAndDoCorrelated(this IKeyedStream, TKey> stream, string name, Func, TIn>, IDoProcessor>> o) { return new DoStreamNode, IKeyedStream, TKey>>(name, new DoArgs, IKeyedStream, TKey>> { @@ -130,7 +130,7 @@ public static IKeyedStream, TKey> DoCorrelatedAndResolve> DoCorrelatedAndResolve(this ISingleStream> stream, string name, Func, TIn>, IDoProcessor>> o) + public static ISingleStream> ResolveAndDoCorrelated(this ISingleStream> stream, string name, Func, TIn>, IDoProcessor>> o) { return new DoStreamNode, ISingleStream>>(name, new DoArgs, ISingleStream>> { diff --git a/src/Paillave.Etl/Extensions/StreamNodes/ResolveAndSelectStreamNode.cs b/src/Paillave.Etl/Extensions/StreamNodes/ResolveAndSelectStreamNode.cs index 56d25639..fb0018bb 100644 --- a/src/Paillave.Etl/Extensions/StreamNodes/ResolveAndSelectStreamNode.cs +++ b/src/Paillave.Etl/Extensions/StreamNodes/ResolveAndSelectStreamNode.cs @@ -57,7 +57,7 @@ public class ResolverSelector where TService : class private Resolver _resolver; private string _serviceKey; internal ResolverSelector(Resolver resolver, string serviceKey) => (_resolver, _serviceKey) = (resolver, serviceKey); - public Selector Select(Func select) => new Selector(this, select); + public Selector ThenSelect(Func select) => new Selector(this, select); internal TService ResolveService() => _serviceKey == null ? _resolver.ResolveService() : _resolver.ResolveService(_serviceKey); } public class Selector where TService : class diff --git a/src/Paillave.Etl/Paillave.Etl.csproj b/src/Paillave.Etl/Paillave.Etl.csproj index 242b4887..02d2f24a 100644 --- a/src/Paillave.Etl/Paillave.Etl.csproj +++ b/src/Paillave.Etl/Paillave.Etl.csproj @@ -1,7 +1,7 @@  Paillave.EtlNet.Core - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Paillave.Pdf/Paillave.Pdf.csproj b/src/Paillave.Pdf/Paillave.Pdf.csproj index e012a69a..817f9ba8 100644 --- a/src/Paillave.Pdf/Paillave.Pdf.csproj +++ b/src/Paillave.Pdf/Paillave.Pdf.csproj @@ -1,7 +1,7 @@ Paillave.Pdf - 2.0.46 + 2.0.47 Stéphane Royer true diff --git a/src/Tutorials/BlogTutorial/Program copy 2.cs b/src/Tutorials/BlogTutorial/Program copy 2.cs index ecf84383..7f863cf4 100644 --- a/src/Tutorials/BlogTutorial/Program copy 2.cs +++ b/src/Tutorials/BlogTutorial/Program copy 2.cs @@ -29,7 +29,7 @@ private static void DefineProcess(ISingleStream contextStream) contextStream .ResolveAndSelect("get some values", o => o .Resolve() - .Select((context, injected) => $"{context}-{injected.AStringValue}:{injected.AnIntValue}")); + .ThenSelect((context, injected) => $"{context}-{injected.AStringValue}:{injected.AnIntValue}")); } } } diff --git a/src/Tutorials/Paillave.Etl.Samples/Paillave.Etl.Samples.csproj b/src/Tutorials/Paillave.Etl.Samples/Paillave.Etl.Samples.csproj index ca08cc42..34e566bb 100644 --- a/src/Tutorials/Paillave.Etl.Samples/Paillave.Etl.Samples.csproj +++ b/src/Tutorials/Paillave.Etl.Samples/Paillave.Etl.Samples.csproj @@ -3,6 +3,7 @@ + diff --git a/src/Tutorials/Paillave.Etl.Samples/Program3.cs b/src/Tutorials/Paillave.Etl.Samples/Program3.cs index 2e88ab0b..f6db58b2 100644 --- a/src/Tutorials/Paillave.Etl.Samples/Program3.cs +++ b/src/Tutorials/Paillave.Etl.Samples/Program3.cs @@ -8,6 +8,8 @@ using Microsoft.EntityFrameworkCore; using Paillave.Etl.Autofac; using Paillave.Etl.Core; +using Paillave.Etl.ExcelFile; +using Paillave.Etl.FileSystem; using Paillave.Etl.EntityFrameworkCore; using Paillave.Etl.ExecutionToolkit; using Paillave.Pdf; @@ -15,41 +17,82 @@ using UglyToad.PdfPig.Content; using UglyToad.PdfPig.Filters; using UglyToad.PdfPig.Graphics.Colors; +using System.Data; namespace Paillave.Etl.Samples { - class PdfVisitor : IPdfVisitor - { - public Dictionary> Lines { get; } = new Dictionary> - { - [""] = new List() - }; - private List GetLines(string area) - { - if (Lines.TryGetValue(area, out var lines)) return lines; - lines = new List(); - Lines[area] = lines; - return lines; - } - public void ProcessHeader(List section, int pageNumber) - { - } - - public void ProcessLine(string text, int pageNumber, int lineNumber, int lineNumberInParagraph, int lineNumberInPage, List section, HashSet areas) - { - GetLines("").Add(text); - foreach (var area in areas) - GetLines(area).Add(text); - } - - public void ProcessTable(List>> table, int pageNumber, List section) - { - } - } + // class PdfVisitor : IPdfVisitor + // { + // public Dictionary> Lines { get; } = new Dictionary> + // { + // [""] = new List() + // }; + // private List GetLines(string area) + // { + // if (Lines.TryGetValue(area, out var lines)) return lines; + // lines = new List(); + // Lines[area] = lines; + // return lines; + // } + // public void ProcessHeader(List section, int pageNumber) + // { + // } + + // public void ProcessLine(string text, int pageNumber, int lineNumber, int lineNumberInParagraph, int lineNumberInPage, List section, HashSet areas) + // { + // GetLines("").Add(text); + // foreach (var area in areas) + // GetLines(area).Add(text); + // } + + // public void ProcessTable(List>> table, int pageNumber, List section) + // { + // } + // } class Program5 { static void Main(string[] args) { + // The most convenient way + StreamProcessRunner.Create(triggerStream + => triggerStream + .CrossApplyFolderFiles("Get all excel files", "*.xlsx") + .CrossApplyExcelSheets("Get all excel sheets") + .CrossApplyExcelRows("get rows", o => o + .UseMap(m => new + { + A = m.ToNumberColumn("a", "."), + B = m.ToColumn("b"), + C = m.ToNumberColumn("c", ".") + }) + .HasColumnHeader("A1:C1") + .WithDataset("A2:C2")) + .Do("show on screen", i => Console.WriteLine($"First way: {i.A}\t{i.B}\t{i.C}"))) + .ExecuteAsync(args[0]) + .Wait(); + + + + // The fastest way to read a large excel file + // This RegisterProvider is required for fast excel reader + System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); + StreamProcessRunner.Create(triggerStream + => triggerStream + .CrossApplyFolderFiles("Get all excel files", "*.xlsx") + .CrossApplyExcelDataTables("Get all excel data", dt => dt + .AsEnumerable() + .Skip(1) + .Select(i => new + { + A = i.Field(0), + B = i.Field(1), + C = i.Field(2), + })) + .Do("show on screen", i => Console.WriteLine($"Second way: {i.A}\t{i.B}\t{i.C}"))) + .ExecuteAsync(args[0]) + .Wait(); + + // var builder = new DbContextOptionsBuilder(); // builder.UseSqlServer("Server=tcp:fundprocessprod.database.windows.net,1433;Initial Catalog=IREPreProd;Persist Security Info=False;User ID=ApplicationUser;Password=Pms.Application.User;MultipleActiveResultSets=True;Encrypt=True;TrustServerCertificate=False;Connection Timeout=300;"); // var context = new PmsDbContext(builder.Options, new TenantContext(1)); @@ -112,27 +155,27 @@ static void Main(string[] args) // // } // }); - using (var stream = File.OpenRead("InputFiles/testFile.pdf")) - { - var pdfReader = new PdfReader(stream, null, null, ExtractMethod.SimpleLines(), new Areas - { - ["ADDRESS"] = new PdfZone { Left = 9.2, Width = 11, Top = 29.7, Height = 9, PageNumber = 1 }, - }); - var pdfVisitor = new PdfVisitor(); - pdfReader.Read(pdfVisitor); - } + // using (var stream = File.OpenRead("InputFiles/testFile.pdf")) + // { + // var pdfReader = new PdfReader(stream, null, null, ExtractMethod.SimpleLines(), new Areas + // { + // ["ADDRESS"] = new PdfZone { Left = 9.2, Width = 11, Top = 29.7, Height = 9, PageNumber = 1 }, + // }); + // var pdfVisitor = new PdfVisitor(); + // pdfReader.Read(pdfVisitor); + // } // var dpis = new DirectoryInfo("/home/stephane/Downloads/IN").EnumerateFiles("*.pdf").Select(fileInfo => new { FileName = fileInfo.Name, Dpi = GetDpi(fileInfo.OpenRead()) }).ToList(); // foreach (var dpi in dpis) // Console.WriteLine($"{dpi.FileName}\t{dpi.Dpi}"); } - private static int GetDpi(Stream stream) - { - using (var pdfDocument = PdfDocument.Open(stream)) - return (int)pdfDocument.GetPages().SelectMany(page => page.GetImages()).Select(img => GetPpi(img)).Average(i => (i.xPpi + i.yPpi) / 2); - } + // private static int GetDpi(Stream stream) + // { + // using (var pdfDocument = PdfDocument.Open(stream)) + // return (int)pdfDocument.GetPages().SelectMany(page => page.GetImages()).Select(img => GetPpi(img)).Average(i => (i.xPpi + i.yPpi) / 2); + // } // private readonly HashSet _blackAndWhiteColorSpaces=new HashSet{ ColorSpace.CalGray, ColorSpace.DeviceGray }; - private static (int xPpi, int yPpi, bool color, int psize) GetPpi(IPdfImage img) => ((int)(72 * img.WidthInSamples / img.Bounds.Width), (int)(72 * img.HeightInSamples / img.Bounds.Height), img.ColorSpace == ColorSpace.DeviceGray, img.BitsPerComponent); + // private static (int xPpi, int yPpi, bool color, int psize) GetPpi(IPdfImage img) => ((int)(72 * img.WidthInSamples / img.Bounds.Width), (int)(72 * img.HeightInSamples / img.Bounds.Height), img.ColorSpace == ColorSpace.DeviceGray, img.BitsPerComponent); // { // contextStream // .CrossApply("ca", i => Enumerable.Range(0, 5).Select(j => $"{i}-{j}")) @@ -152,8 +195,4 @@ private static int GetDpi(Stream stream) // // .Do("print console", i => Console.WriteLine(i.Label)); // } } -} -public class Tmp -{ - } \ No newline at end of file