Skip to content

Commit

Permalink
Add logging when purging old telemetry files (#1957)
Browse files Browse the repository at this point in the history
* Add logging when purging old telemetry files

* more

* better message

* Better operation logging
  • Loading branch information
trask authored Nov 10, 2021
1 parent 5357377 commit 1cd0408
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public class LocalFileLoader {
private static final OperationLogger operationLogger =
new OperationLogger(LocalFileLoader.class, "Loading telemetry from disk");

private static final OperationLogger updateOperationLogger =
new OperationLogger(LocalFileLoader.class, "Updating local telemetry on disk");

public LocalFileLoader(
LocalFileCache localFileCache,
File telemetryFolder,
Expand Down Expand Up @@ -161,43 +164,37 @@ private static void readFully(FileInputStream fileInputStream, byte[] byteArray,
// either delete it permanently on success or add it back to cache to be processed again later on
// failure
public void updateProcessedFileStatus(boolean success, File file) {
if (!file.exists()) {
// not sure why this would happen
updateOperationLogger.recordFailure("File no longer exists: " + file.getName());
return;
}
if (success) {
deleteFilePermanentlyOnSuccess(file);
// delete a file on the queue permanently when http response returns success.
if (!LocalStorageUtils.deleteFileWithRetries(file)) {
// TODO (heya) track file deletion failure via Statsbeat
updateOperationLogger.recordFailure("Fail to delete " + file.getName());
} else {
updateOperationLogger.recordSuccess();
}
} else {
// rename the temp file back to .trn source file extension
File sourceFile =
new File(telemetryFolder, FilenameUtils.getBaseName(file.getName()) + ".trn");
try {
FileUtils.moveFile(file, sourceFile);
} catch (IOException ex) {
operationLogger.recordFailure(
updateOperationLogger.recordFailure(
"Fail to rename " + file.getName() + " to have a .trn extension.", ex);
return;
}
updateOperationLogger.recordSuccess();

// add the source filename back to local file cache to be processed later.
localFileCache.addPersistedFilenameToMap(sourceFile.getName());
}
}

// delete a file on the queue permanently when http response returns success.
private static void deleteFilePermanentlyOnSuccess(File file) {
if (!file.exists()) {
return;
}

deleteFile(file);
}

private static void deleteFile(File file) {
if (!LocalStorageUtils.deleteFileWithRetries(file)) {
// TODO (heya) track file deletion failure via Statsbeat
operationLogger.recordFailure("Fail to delete " + file.getName());
} else {
operationLogger.recordSuccess();
}
}

private void incrementReadFailureCount() {
if (nonessentialStatsbeat != null) {
nonessentialStatsbeat.incrementReadFailureCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,23 @@ public void run() {

private void purgedExpiredFiles(File folder) {
Collection<File> files = FileUtils.listFiles(folder, new String[] {"trn"}, false);
int numDeleted = 0;
for (File file : files) {
if (expired(file.getName())) {
if (!LocalStorageUtils.deleteFileWithRetries(file)) {
logger.warn(
"Fail to delete the expired {} from folder '{}'.", file.getName(), folder.getName());
} else {
numDeleted++;
}
}
}
if (numDeleted > 0) {
logger.warn(
"{} local telemetry file(s) in folder '{}' expired after 48 hours and were deleted",
numDeleted,
folder.getName());
}
}

// files that are older than expiredIntervalSeconds (default 48 hours) are expired and need to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public class TelemetryChannel {
private static final OperationLogger operationLogger =
new OperationLogger(
TelemetryChannel.class,
"Sending telemetry to the ingestion service (telemetry will be stored to disk on failure):");
"Sending telemetry to the ingestion service (telemetry will be stored to disk on failure and retried later):");

// TODO (kryalama) do we still need this AtomicBoolean, or can we use throttling built in to the
// operationLogger?
Expand Down

0 comments on commit 1cd0408

Please sign in to comment.