Skip to content

Commit

Permalink
New API for downloading LDML files from projects that allow sharing W…
Browse files Browse the repository at this point in the history
…S data (#1309)

* Add "ldmlzip" command to hg command runner

Will return 403 Forbidden if project does not allow sharing ws data with
SLDR. Will also return same 403 Forbidden error code if project does not
exist, to avoid possibly leaking project codes.

If project exists and allows data sharing, command will return a zipfile
containing CachedSettings/WritingSystems/*.ldml from the tip revision.

* Add /api/projects/sldr-export endpoint

Returns 404 if there are no SLDR files available, which is unlikely to
happen in production but can happen in dev environments since none of
our test projects have `addToSldr="true"`. Otherwise it returns a zip
file with projects identified only by project ID.
  • Loading branch information
rmunn authored Jan 17, 2025
1 parent 8f541f8 commit c0dc0bf
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 6 deletions.
12 changes: 12 additions & 0 deletions backend/LexBoxApi/Controllers/ProjectController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,18 @@ private async Task StreamHttpResponse(HttpContent hgResult)
await hgResult.CopyToAsync(writer.AsStream());
}

[HttpGet("sldr-export")]
[AdminRequired]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult> GetLdmlZip(CancellationToken token)
{
var path = await projectService.PrepareLdmlZip(scheduler, token);
if (path is null) return NotFound("No SLDR files available");
var filename = Path.GetFileName(path);
var stream = System.IO.File.OpenRead(path);
return File(stream, "application/zip", filename);
}

[HttpPost("updateMissingLanguageList")]
public async Task<ActionResult<string[]>> UpdateMissingLanguageList(int limit = 10)
{
Expand Down
51 changes: 51 additions & 0 deletions backend/LexBoxApi/Jobs/DeleteTempDirectoryJob.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
using Quartz;

namespace LexBoxApi.Jobs;

public class DeleteTempDirectoryJob() : LexJob
{
public static async Task Queue(ISchedulerFactory schedulerFactory,
string path,
TimeSpan delay,
CancellationToken cancellationToken = default)
{
if (!PathIsInTempDir(path)) return;
await QueueJob(schedulerFactory,
Key,
new JobDataMap { { nameof(Path), path } },
delay,
cancellationToken);
}

public static JobKey Key { get; } = new(nameof(DeleteTempDirectoryJob), "CleanupJobs");
public string? Path { get; set; }

protected override Task ExecuteJob(IJobExecutionContext context)
{
ArgumentException.ThrowIfNullOrEmpty(Path);
if (!PathIsInTempDir(Path)) return Task.CompletedTask;
if (Directory.Exists(Path) && PathIsSafeToDelete(Path)) Directory.Delete(Path, true);
return Task.CompletedTask;
}

private static bool PathIsInTempDir(string path)
{
// Only safe to delete files from the system temp directory
var prefix = System.IO.Path.GetTempPath();
return (!string.IsNullOrEmpty(prefix)) && path.StartsWith(prefix);
}

private static bool PathIsSafeToDelete(string path)
{
try
{
var attributes = File.GetAttributes(path);
// Must be a directory *and* must not be a symlink
return attributes.HasFlag(FileAttributes.Directory) && !attributes.HasFlag(FileAttributes.ReparsePoint);
}
catch
{
return false; // If anything at all goes wrong, we want to abort
}
}
}
15 changes: 14 additions & 1 deletion backend/LexBoxApi/Jobs/LexJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,25 @@ public abstract class LexJob : IJob
protected static async Task QueueJob(ISchedulerFactory schedulerFactory,
JobKey key,
JobDataMap data,
TimeSpan? delay = null,
CancellationToken cancellationToken = default)
{
var scheduler = await schedulerFactory.GetScheduler(cancellationToken);
data[nameof(JobTriggerTraceId)] = Activity.Current?.Context.TraceId.ToHexString() ?? string.Empty;
data[nameof(JobTriggerSpanParentId)] = Activity.Current?.Context.SpanId.ToHexString() ?? string.Empty;
await scheduler.TriggerJob(key, data, cancellationToken);
if (delay is null)
{
await scheduler.TriggerJob(key, data, cancellationToken);
}
else
{
var trigger = TriggerBuilder.Create()
.StartAt(DateTime.UtcNow.Add(delay.Value.Duration()))
.ForJob(key)
.UsingJobData(data)
.Build();
await scheduler.ScheduleJob(trigger, cancellationToken);
}
}

async Task IJob.Execute(IJobExecutionContext context)
Expand Down
2 changes: 1 addition & 1 deletion backend/LexBoxApi/Jobs/RetryEmailJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ await QueueJob(schedulerFactory,
{ nameof(RetryCount), retryCount.ToString() },
{ nameof(RetryWaitSeconds), retryWaitSeconds.ToString() },
},
cancellationToken);
cancellationToken: cancellationToken);
}

public static JobKey Key { get; } = new("RetryEmailJob", "RetryingJobs");
Expand Down
2 changes: 1 addition & 1 deletion backend/LexBoxApi/Jobs/UpdateProjectMetadataJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public static async Task Queue(ISchedulerFactory schedulerFactory,
await QueueJob(schedulerFactory,
Key,
new JobDataMap { { nameof(ProjectCode), projectCode } },
cancellationToken);
cancellationToken: cancellationToken);
}

public static JobKey Key { get; } = new("UpdateProjectMetadataJob", "DataUpdate");
Expand Down
1 change: 1 addition & 0 deletions backend/LexBoxApi/ScheduledTasksKernel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public static void AddScheduledTasks(this IServiceCollection services, IConfigur

//Setup jobs
q.AddJob<CleanupResetBackupJob>(CleanupResetBackupJob.Key);
q.AddJob<DeleteTempDirectoryJob>(DeleteTempDirectoryJob.Key, j => j.StoreDurably());
q.AddJob<UpdateProjectMetadataJob>(UpdateProjectMetadataJob.Key, j => j.StoreDurably());
q.AddJob<RetryEmailJob>(RetryEmailJob.Key, j => j.StoreDurably());
q.AddTrigger(opts => opts.ForJob(CleanupResetBackupJob.Key)
Expand Down
18 changes: 18 additions & 0 deletions backend/LexBoxApi/Services/HgService.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Diagnostics;
using System.IO.Compression;
using System.Net;
using System.Net.Http.Headers;
using System.Runtime.InteropServices;
using System.Text;
Expand Down Expand Up @@ -491,6 +492,13 @@ public async Task<string> HgCommandHealth()
return version.Trim();
}

public async Task<ZipArchive?> GetLdmlZip(ProjectCode code, CancellationToken token = default)
{
var content = await MaybeExecuteHgCommandServerCommand(code, "ldmlzip", [HttpStatusCode.Forbidden], token);
if (content is null) return null;
return new ZipArchive(await content.ReadAsStreamAsync(token), ZipArchiveMode.Read);
}

private async Task<HttpContent> ExecuteHgCommandServerCommand(ProjectCode code, string command, CancellationToken token)
{
var httpClient = _hgClient.Value;
Expand All @@ -500,6 +508,16 @@ private async Task<HttpContent> ExecuteHgCommandServerCommand(ProjectCode code,
return response.Content;
}

private async Task<HttpContent?> MaybeExecuteHgCommandServerCommand(ProjectCode code, string command, IEnumerable<HttpStatusCode> okErrors, CancellationToken token)
{
var httpClient = _hgClient.Value;
var baseUri = _options.Value.HgCommandServer;
var response = await httpClient.GetAsync($"{baseUri}{code}/{command}", HttpCompletionOption.ResponseHeadersRead, token);
if (okErrors.Contains(response.StatusCode)) return null;
response.EnsureSuccessStatusCode();
return response.Content;
}

public async Task<ProjectType> DetermineProjectType(ProjectCode projectCode)
{
var response = await GetResponseMessage(projectCode, "file/tip?style=json-lex");
Expand Down
38 changes: 38 additions & 0 deletions backend/LexBoxApi/Services/ProjectService.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using System.Data.Common;
using System.Globalization;
using System.IO.Compression;
using LexBoxApi.Jobs;
using LexBoxApi.Models.Project;
using LexBoxApi.Services.Email;
using LexCore.Auth;
Expand All @@ -10,6 +13,7 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.Extensions.Options;
using Path = System.IO.Path; // Resolves ambiguous reference with HotChocolate.Path

namespace LexBoxApi.Services;

Expand Down Expand Up @@ -269,6 +273,40 @@ public async Task ResetLexEntryCount(string projectCode)
}
}

public async Task<DirectoryInfo?> ExtractLdmlZip(Project project, string destRoot, CancellationToken token = default)
{
if (project.Type != ProjectType.FLEx) return null;
using var zip = await hgService.GetLdmlZip(project.Code, token);
if (zip is null) return null;
var path = Path.Join(destRoot, project.Id.ToString());
if (Directory.Exists(path)) Directory.Delete(path, true);
var dirInfo = Directory.CreateDirectory(path);
zip.ExtractToDirectory(dirInfo.FullName, true);
return dirInfo;
}

public async Task<string?> PrepareLdmlZip(Quartz.ISchedulerFactory schedulerFactory, CancellationToken token = default)
{
var nowStr = DateTime.UtcNow.ToString("yyyyMMdd-HHmmss", CultureInfo.InvariantCulture);
var path = Path.Join(Path.GetTempPath(), $"sldr-export-{nowStr}");
if (Directory.Exists(path)) Directory.Delete(path, true);
Directory.CreateDirectory(path);
await DeleteTempDirectoryJob.Queue(schedulerFactory, path, TimeSpan.FromHours(4));
var zipRoot = Path.Join(path, "zipRoot");
Directory.CreateDirectory(zipRoot);
await foreach (var project in dbContext.Projects.Where(p => p.Type == ProjectType.FLEx).AsAsyncEnumerable())
{
await ExtractLdmlZip(project, zipRoot, token);
}
var zipFilename = $"sldr-{nowStr}.zip";
var zipFilePath = Path.Join(path, zipFilename);
if (File.Exists(zipFilePath)) File.Delete(zipFilePath);
// If we would create an empty .zip file, just return null instead (will become a 404)
if (!Directory.EnumerateDirectories(zipRoot).Any()) return null;
ZipFile.CreateFromDirectory(zipRoot, zipFilePath, CompressionLevel.Fastest, includeBaseDirectory: false);
return zipFilePath;
}

public async Task<DateTimeOffset?> UpdateLastCommit(string projectCode)
{
var project = await dbContext.Projects.FirstOrDefaultAsync(p => p.Code == projectCode);
Expand Down
2 changes: 2 additions & 0 deletions backend/LexCore/ServiceInterfaces/IHgService.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using LexCore.Entities;
using System.IO.Compression;

namespace LexCore.ServiceInterfaces;

Expand All @@ -22,6 +23,7 @@ public interface IHgService
Task<int?> GetRepoSizeInKb(ProjectCode code, CancellationToken token = default);
Task<int?> GetLexEntryCount(ProjectCode code, ProjectType projectType);
Task<string?> GetRepositoryIdentifier(Project project);
Task<ZipArchive?> GetLdmlZip(ProjectCode code, CancellationToken token = default);
Task<HttpContent> ExecuteHgRecover(ProjectCode code, CancellationToken token);
Task<HttpContent> InvalidateDirCache(ProjectCode code, CancellationToken token = default);
bool HasAbandonedTransactions(ProjectCode projectCode);
Expand Down
26 changes: 23 additions & 3 deletions hgweb/command-runner.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#!/bin/bash

# Define the list of allowed commands
allowed_commands=("verify" "tip" "tipdate" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache")
allowed_commands=("verify" "tip" "tipdate" "ldmlzip" "reposizeinkb" "wesaylexentrycount" "lexentrycount" "flexprojectid" "flexwritingsystems" "flexmodelversion" "recover" "healthz" "invalidatedircache")

# Get the project code and command name from the URL
IFS='/' read -ra PATH_SEGMENTS <<< "$PATH_INFO"
project_code="${PATH_SEGMENTS[1]}"
command_name="${PATH_SEGMENTS[2]}"

# Ensure the project code and command name are safe to use in a shell command
if [[ ! $project_code =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! $command_name =~ ^[a-zA-Z0-9]+$ ]]; then
if [[ ! "$project_code" =~ ^[a-z0-9][a-z0-9-]*$ ]] || [[ ! "$command_name" =~ ^[a-zA-Z0-9]+$ ]]; then
echo "Content-type: text/plain"
echo "Status: 400 Bad Request"
echo ""
Expand Down Expand Up @@ -38,8 +38,23 @@ if [[ $command_name == "healthz" ]]; then
exit 0
fi

if [[ $command_name == "ldmlzip" ]]; then
# Preflight check: ldml zip access is only allowed if LexiconSettings.plsx contains addToSldr="true"
first_char=$(echo $project_code | cut -c1)
if (chg --cwd /var/hg/repos/$first_char/$project_code cat -r tip CachedSettings/SharedSettings/LexiconSettings.plsx | grep '<WritingSystems' | grep 'addToSldr="true"' >/dev/null); then
CONTENT_TYPE="application/zip"
else
echo "Content-type: text/plain"
echo "Status: 403 Forbidden"
echo ""
echo "Forbidden. Project does not allow sharing writing systems with SLDR or project does not exist"
exit 1
fi
fi

CONTENT_TYPE="${CONTENT_TYPE:-text/plain}"
# Start outputting the result right away so the HTTP connection won't be timed out
echo "Content-type: text/plain"
echo "Content-type: ${CONTENT_TYPE}"
echo ""

# Run the hg command, simply output to stdout
Expand Down Expand Up @@ -90,6 +105,11 @@ case $command_name in
du -ks .hg | cut -f1
;;

ldmlzip)
# -p '.' so that resulting zipfiles will *not* have the project name in the file paths
chg archive -p '.' -t zip -r tip -I 'CachedSettings/WritingSystemStore/*.ldml' -
;;

verify)
# Env var PYTHONUNBUFFERED required for commands like verify and recover, so that output can stream back to the project page
export PYTHONUNBUFFERED=1
Expand Down

0 comments on commit c0dc0bf

Please sign in to comment.