Skip to content

Commit

Permalink
Issue 47144: Handle large files on Panorama Public via Symlinks (#344)
Browse files Browse the repository at this point in the history
* - PanoramaPublicFileImporter logs to the job log, and throws an exception if any of the datafileurls could not be fixed.
- PanoramaPublicSymlinkManager.moveAndSymLinkDirectory takes a Logger parameter so the log output can go to the job log
- Added a PanoramaPublicMetadataImporter. I moved some of the code out of CopyExperimentFinalTask into this class. This creates a row in the panoramapublic.experimentannotations table.  It runs before PanoramaPublicFileImporter so that if there is an error, e.g. datafileurls cannot be fixed, the container can be deleted to move files back to the source container.
- Updated test - import a document into a subfolder of the container file root.

* - Fire symlink update events only when file / container being moved / renamed / deleted is in the Panorama Public project. We don't expect folders in other projects to contain symlink targets.
- When handling folder rename (ContainerListener.propertyChange), pass the full paths of the old and renamed containers instead of just the folder names. Otherwise, it can lead to updating all symlinks that have the old folder name in the path.
- When deleting a folder, use ExperimentAnnotationsManager.getExperimentIncludesContainer(c) to lookup the experiment. This method will return the experiment that contains runs from the folder even if it is a subfolder of the folder where the experiment was created.
- When an experiment folder in Panorama Public is deleted, move the files back to next highest experiment version if one exists. Otherwise, move the files back to the source folder.

* Rework datafile alignment

* Scope datafile url to correct container

* Removed PanoramaPublicFileWriter.

* Limit the number of containers to look at when updating symlinks. This should only include the source container in the submitter's project as well as any containers with older versions of the data on Panorama Public.

* Remove code to lookup runs in the source container when aligning datafileUrls. This should not be required anymore due to LabKey/targetedms#724.
Set filePathRoot on the copied expRun to be the target container's file root.
Log error if the data file path is unexpected, i.e. it does not contain "Run<runid>"

Co-authored-by: vagisha <vagisha@gmail.com>
Co-authored-by: Josh Eckels <jeckels@labkey.com>
Co-authored-by: labkey-sweta <swetaj@labkey.com>
  • Loading branch information
4 people authored Jul 1, 2023
1 parent 8fe5720 commit 5bcf018
Show file tree
Hide file tree
Showing 20 changed files with 1,722 additions and 314 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,20 @@ private boolean validateAction(CopyExperimentForm form, BindException errors)
return true;
}

private Path getExportFilesDir(Container c)
{
FileContentService fcs = FileContentService.get();
if(fcs != null)
{
Path fileRoot = fcs.getFileRootPath(c, FileContentService.ContentType.files);
if (fileRoot != null)
{
return fileRoot.resolve(PipelineService.EXPORT_DIR);
}
}
return null;
}

@Override
public boolean handlePost(CopyExperimentForm form, BindException errors)
{
Expand Down Expand Up @@ -1539,13 +1553,15 @@ public boolean handlePost(CopyExperimentForm form, BindException errors)
return false;
}

String previousVersionName = null;
Submission previousSubmission = _journalSubmission.getLatestCopiedSubmission();
if (previousSubmission != null)
{
// Target folder name is automatically populated in the copy experiment form. Unless the admin making the copy changed the
// folder name we expect the previous copy of the data to have the same folder name. Rename the old folder so that we can
// use the same folder name for the new copy.
if (!renamePreviousFolder(previousSubmission, destinationFolder, errors))
previousVersionName = renamePreviousFolder(previousSubmission, destinationFolder, errors);
if (previousVersionName == null)
{
return false;
}
Expand Down Expand Up @@ -1573,8 +1589,13 @@ public boolean handlePost(CopyExperimentForm form, BindException errors)
job.setUsePxTestDb(form.isUsePxTestDb());
job.setAssignDoi(form.isAssignDoi());
job.setUseDataCiteTestApi(form.isUseDataCiteTestApi());
job.setMoveAndSymlink(form.isMoveAndSymlink());
job.setReviewerEmailPrefix(form.getReviewerEmailPrefix());
job.setDeletePreviousCopy(form.isDeleteOldCopy());
job.setPreviousVersionName(previousVersionName);
job.setExportTargetPath(getExportFilesDir(target));
job.setExportSourceContainer(form.getContainer());

PipelineService.get().queueJob(job);

_successURL = PageFlowUtil.urlProvider(PipelineStatusUrls.class).urlBegin(target);
Expand All @@ -1586,12 +1607,14 @@ public boolean handlePost(CopyExperimentForm form, BindException errors)
}
}

private boolean renamePreviousFolder(Submission previousSubmission, String targetContainerName, BindException errors)
private String renamePreviousFolder(Submission previousSubmission, String targetContainerName, BindException errors)
{
String newPath = null;
ExperimentAnnotations previousCopy = ExperimentAnnotationsManager.get(previousSubmission.getCopiedExperimentId());
if (previousCopy != null)
{
Container previousContainer = previousCopy.getContainer();
newPath = previousContainer.getPath();
if (targetContainerName.equals(previousContainer.getName()))
{
try (DbScope.Transaction transaction = PanoramaPublicManager.getSchema().getScope().ensureTransaction())
Expand All @@ -1601,21 +1624,23 @@ private boolean renamePreviousFolder(Submission previousSubmission, String targe
{
errors.reject(ERROR_MSG, "Previous experiment copy (Id: " + previousCopy.getId() + ") does not have a version. " +
"Cannot rename previous folder.");
return false;
return null;
}
// Rename the container where the old copy lives so that the same folder name can be used for the new copy.
String newName = previousContainer.getName() + " V." + version;
if (ContainerManager.getChild(previousContainer.getParent(), newName) != null)
{
errors.reject(ERROR_MSG, "Cannot rename previous folder to '" + newName + "'. A folder with that name already exists.");
return false;
return null;
}
ContainerManager.rename(previousContainer, getUser(), newName);

newPath = FileContentService.get().getFileRoot(previousContainer.getParent()) + File.separator + newName;
transaction.commit();
}
}
}
return true;
return newPath;
}

private ValidEmail getValidEmail(String email, String errMsg, BindException errors)
Expand Down Expand Up @@ -1683,6 +1708,8 @@ public static class CopyExperimentForm extends ExperimentIdForm
private boolean _usePxTestDb; // Use the test database for getting a PX ID if true
private boolean _assignDoi;
private boolean _useDataCiteTestApi;

private boolean _moveAndSymlink;
private boolean _deleteOldCopy;

static void setDefaults(CopyExperimentForm form, ExperimentAnnotations sourceExperiment, Submission currentSubmission)
Expand All @@ -1696,6 +1723,7 @@ static void setDefaults(CopyExperimentForm form, ExperimentAnnotations sourceExp
form.setUsePxTestDb(false);

form.setAssignDoi(true);
form.setMoveAndSymlink(true);
form.setUseDataCiteTestApi(false);

Container sourceExptContainer = sourceExperiment.getContainer();
Expand Down Expand Up @@ -1819,6 +1847,16 @@ public void setUseDataCiteTestApi(boolean useDataCiteTestApi)
_useDataCiteTestApi = useDataCiteTestApi;
}

public boolean isMoveAndSymlink()
{
return _moveAndSymlink;
}

public void setMoveAndSymlink(boolean moveAndSymlink)
{
_moveAndSymlink = moveAndSymlink;
}

public boolean isDeleteOldCopy()
{
return _deleteOldCopy;
Expand Down Expand Up @@ -5626,7 +5664,7 @@ public ExperimentAnnotationsDetails(User user, ExperimentAnnotations exptAnnotat
{
// Display the version only if there is more than one version of this dataset on Panorama Public
_version = _experimentAnnotations.getStringVersion(maxVersion);
if (_experimentAnnotations.getDataVersion().equals(maxVersion))
if (_experimentAnnotations.getDataVersion() != null && _experimentAnnotations.getDataVersion().equals(maxVersion))
{
// This is the current version; Display a link to see all published versions
_versionsUrl = new ActionURL(PanoramaPublicController.ShowPublishedVersions.class, _experimentAnnotations.getContainer());
Expand Down Expand Up @@ -9050,6 +9088,20 @@ public Pair<AttachmentParent, String> getAttachment(AttachmentForm form)
}
}

@RequiresPermission(ReadPermission.class)
public static class VerifySymlinksAction extends ReadOnlyApiAction<CatalogForm>
{
@Override
public Object execute(CatalogForm catalogForm, BindException errors) throws Exception
{
if (PanoramaPublicSymlinkManager.get().verifySymlinks())
return success();

errors.reject(ERROR_MSG, "Problems with symlink registration. See log for details.");
return null;
}
}

@RequiresPermission(ReadPermission.class)
public static class GetCatalogApiAction extends ReadOnlyApiAction<CatalogForm>
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package org.labkey.panoramapublic;

import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.admin.AbstractFolderImportFactory;
import org.labkey.api.admin.FolderImportContext;
import org.labkey.api.admin.FolderImporter;
import org.labkey.api.admin.ImportException;
import org.labkey.api.admin.SubfolderWriter;
import org.labkey.api.data.Container;
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExpRun;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.files.FileContentService;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineService;
import org.labkey.api.query.BatchValidationException;
import org.labkey.api.security.User;
import org.labkey.api.writer.VirtualFile;
import org.labkey.panoramapublic.pipeline.CopyExperimentPipelineJob;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;

/**
* This importer does a file move instead of copy to the temp directory and creates a symlink in place of the original
* file.
*/
public class PanoramaPublicFileImporter implements FolderImporter
{
@Override
public String getDataType()
{
return PanoramaPublicManager.PANORAMA_PUBLIC_FILES;
}

@Override
public String getDescription()
{
return "Panorama Public Files";
}

@Override
public void process(@Nullable PipelineJob job, FolderImportContext ctx, VirtualFile root) throws Exception
{
Logger log = ctx.getLogger();

FileContentService fcs = FileContentService.get();
if (null == fcs)
return;

File targetRoot = fcs.getFileRoot(ctx.getContainer());

if (null == targetRoot)
{
log.error("File copy target folder not found: " + ctx.getContainer().getPath());
return;
}

if (null == job)
{
log.error("Pipeline job not found.");
return;
}

if (job instanceof CopyExperimentPipelineJob expJob)
{
File targetFiles = new File(targetRoot.getPath(), FileContentService.FILES_LINK);

// Get source files including resolving subfolders
String divider = FileContentService.FILES_LINK + File.separator + PipelineService.EXPORT_DIR;
String subProject = root.getLocation().substring(root.getLocation().lastIndexOf(divider) + divider.length());
subProject = subProject.replace(File.separator + SubfolderWriter.DIRECTORY_NAME, "");

Path sourcePath = Paths.get(fcs.getFileRoot(expJob.getExportSourceContainer()).getPath(), subProject);
File sourceFiles = Paths.get(sourcePath.toString(), FileContentService.FILES_LINK).toFile();

if (!targetFiles.exists())
{
log.warn("Panorama public file copy target not found. Creating directory: " + targetFiles);
Files.createDirectories(targetFiles.toPath());
}

log.info("Moving files and creating sym links in folder " + ctx.getContainer().getPath());
PanoramaPublicSymlinkManager.get().moveAndSymLinkDirectory(expJob, sourceFiles, targetFiles, false, log);

alignDataFileUrls(expJob.getUser(), ctx.getContainer(), log);
}
}

private void alignDataFileUrls(User user, Container targetContainer, Logger log) throws BatchValidationException, ImportException
{
log.info("Aligning data files urls in folder: " + targetContainer.getPath());

FileContentService fcs = FileContentService.get();
if (null == fcs)
return;

ExperimentService expService = ExperimentService.get();
List<? extends ExpRun> runs = expService.getExpRuns(targetContainer, null, null);
boolean errors = false;

Path fileRootPath = fcs.getFileRootPath(targetContainer, FileContentService.ContentType.files);
if(fileRootPath == null || !Files.exists(fileRootPath))
{
throw new ImportException("File root path for container " + targetContainer.getPath() + " does not exist: " + fileRootPath);
}

for (ExpRun run : runs)
{
run.setFilePathRootPath(fileRootPath);
run.save(user);
log.debug("Setting filePathRoot on copied run: " + run.getName() + " to: " + fileRootPath);

for (ExpData data : run.getAllDataUsedByRun())
{
if (null != data.getRun() && data.getDataFileUrl().contains(FileContentService.FILES_LINK))
{
String[] parts = Objects.requireNonNull(data.getFilePath()).toString().split("Run\\d+");

if (parts.length > 1)
{
String fileName = parts[1];
Path newDataPath = Paths.get(fileRootPath.toString(), fileName);

if (newDataPath.toFile().exists())
{
data.setDataFileURI(newDataPath.toUri());
data.save(user);
log.debug("Setting dataFileUri on copied data: " + data.getName() + " to: " + newDataPath);
}
else
{
log.error("Data file not found: " + newDataPath.toUri());
errors = true;
}
}
else
{
log.error("Unexpected data file path. Could not align dataFileUri. " + data.getFilePath().toString());
errors = true;
}
}
}
}
if (errors)
{
throw new ImportException("Data files urls could not be aligned.");
}
}

public static class Factory extends AbstractFolderImportFactory
{
@Override
public FolderImporter create()
{
return new PanoramaPublicFileImporter();
}

@Override
public int getPriority()
{
// We want this to run last to do exp.data.datafileurl cleanup
return PanoramaPublicManager.PRIORITY_PANORAMA_PUBLIC_FILES;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package org.labkey.panoramapublic;

import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.data.Container;
import org.labkey.api.data.SQLFragment;
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.files.FileListener;
import org.labkey.api.security.User;

import java.io.File;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;

public class PanoramaPublicFileListener implements FileListener
{

@Override
public String getSourceName()
{
return null;
}

@Override
public void fileCreated(@NotNull File created, @Nullable User user, @Nullable Container container)
{

}

@Override
public int fileMoved(@NotNull File src, @NotNull File dest, @Nullable User user, @Nullable Container container)
{
// Update any symlinks targeting the file
PanoramaPublicSymlinkManager.get().fireSymlinkUpdate(src.toPath(), dest.toPath(), container);

ExpData data = ExperimentService.get().getExpDataByURL(src, null);
if (null != data)
data.setDataFileURI(dest.toURI());

return 0;
}

@Override
public void fileDeleted(@NotNull Path deleted, @Nullable User user, @Nullable Container container)
{
ExpData data = ExperimentService.get().getExpDataByURL(deleted, container);

if (null != data)
data.delete(user);
}

@Override
public Collection<File> listFiles(@Nullable Container container)
{
return Collections.emptyList();
}

@Override
public SQLFragment listFilesQuery()
{
throw new UnsupportedOperationException("Not implemented");
}
}
Loading

0 comments on commit 5bcf018

Please sign in to comment.