Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create vivo/home on application start-up (#192.1) #370

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/
public class ApplicationSetup implements ServletContextListener {
private static final String APPLICATION_SETUP_PATH = "config/applicationSetup.n3";
private static final String APPLICATION_SETUP_DEFAULT_PATH = "config/default.applicationSetup.n3";

private ServletContext ctx;
private StartupStatus ss;
Expand All @@ -45,6 +46,8 @@ public void contextInitialized(ServletContextEvent sce) {
this.vitroHomeDir = VitroHomeDirectory.find(ctx);
ss.info(this, vitroHomeDir.getDiscoveryMessage());

this.vitroHomeDir.populate();

locateApplicationConfigFile();
loadApplicationConfigFile();
createConfigurationBeanLoader();
Expand All @@ -63,11 +66,19 @@ public void contextInitialized(ServletContextEvent sce) {
private void locateApplicationConfigFile() {
Path path = this.vitroHomeDir.getPath().resolve(APPLICATION_SETUP_PATH);

if (!Files.exists(path) || !Files.isReadable(path)) {
path = this.vitroHomeDir.getPath().resolve(APPLICATION_SETUP_DEFAULT_PATH);
}

if (!Files.exists(path)) {
throw new IllegalStateException("'" + path + "' does not exist.");
throw new IllegalStateException("Neither '" + APPLICATION_SETUP_PATH + "' nor '" +
APPLICATION_SETUP_DEFAULT_PATH + "' were found in " +
this.vitroHomeDir.getPath());
}
if (!Files.isReadable(path)) {
throw new IllegalStateException("Can't read '" + path + "'");
throw new IllegalStateException("No readable '" + APPLICATION_SETUP_PATH + "' nor '" +
APPLICATION_SETUP_DEFAULT_PATH + "' files were found in " +
this.vitroHomeDir.getPath());
}
this.configFile = path;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,46 @@

import static edu.cornell.mannlib.vitro.webapp.application.BuildProperties.WEBAPP_PATH_BUILD_PROPERTIES;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.naming.InitialContext;
import javax.servlet.ServletContext;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import edu.cornell.mannlib.vitro.webapp.config.ContextProperties;

/**
* Encapsulates some of the info relating to the Vitro home directory.
* Encapsulates some of the info relating to and initializes the Vitro home directory.
*/
public class VitroHomeDirectory {
private static final Log log = LogFactory.getLog(VitroHomeDirectory.class);

private static final String DIGEST_FILE_NAME = "digest.md5";

private static final Pattern CHECKSUM_PATTERN = Pattern.compile("^[a-f0-9]{32} \\*.+$");

public static VitroHomeDirectory find(ServletContext ctx) {
HomeDirectoryFinder finder = new HomeDirectoryFinder(ctx);
return new VitroHomeDirectory(ctx, finder.getPath(),
Expand Down Expand Up @@ -52,6 +73,219 @@ public String getDiscoveryMessage() {
return discoveryMessage;
}

/**
* Populates VIVO home directory with files required to run.
*
* NOTE: Will not overwrite any modified files on redeploy.
*/
public void populate() {
File vhdDir = getPath().toFile();

if (!vhdDir.isDirectory() || vhdDir.list() == null) {
throw new RuntimeException("Application home dir is not a directory! " + vhdDir);
}

Map<String, String> digest = untar(vhdDir);

writeDigest(digest);
}

/**
* A non-destructive untar process that returns checksum digest of tarred files.
*
* Checksum digest can be manually created with the following command.
*
* `find /vivo/home -type f | grep -E "^/vivo/home/bin/|^/vivo/home/config/|^/vivo/home/rdf/" | xargs md5sum > /vivo/home/digest.md5`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I am using absolute paths for manually made digest.md5, it is not working for me in the untar method (relative paths starting with bin/, rdf/, config/* are expected there), therefore I have replaced this command with the one below. @wwelling can you please check is there any sense in that

Suggested change
* `find /vivo/home -type f | grep -E "^/vivo/home/bin/|^/vivo/home/config/|^/vivo/home/rdf/" | xargs md5sum > /vivo/home/digest.md5`
* `find /vivo/home -type f | grep -E "^/vivo/home/bin/|^/vivo/home/config/|^/vivo/home/rdf/" | sed 's/^[^\.]\+home\///' | xargs md5sum > /vivo/home/digest.md5`

Copy link
Author

@ghost ghost Mar 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I add additional logging in the vivo home directory class, (VitroHomeDirectory.java). Would you prefer DEBUG/WARN/INFO log levels populated each class?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that might be useful. Depending of the status of Vitro home creation process, I suppose FATAL, ERROR, WARN and INFO levels should be used

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chenejac can we standardize the format of the digest file? Parallel File Systems – Enabling insights from our data 4/7/2023 11:04AM CT? If the application generates a POSIX standard file, we should be good to go. I suppose we should write a test for the digest file against the POSIX standard... Can we create an issue for this instead of any further changes to this PR?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chenejac can we standardize the format of the digest file? Parallel File Systems – Enabling insights from our data 4/7/2023 11:04AM CT? If the application generates a POSIX standard file, we should be good to go. I suppose we should write a test for the digest file against the POSIX standard... Can we create an issue for this instead of any further changes to this PR?

Sure, I think it is good idea, and I think it can be resolved in the new PR. @wwelling can you please open a ticket/github issue? Thanks

*
* @param destination VIVO home directory
* @return digest of each files checksum
*/
private Map<String, String> untar(File destination) {
log.info("Syncing VIVO home at: " + destination.getPath());

Map<String, String> digest = new HashMap<>();
Map<String, String> storedDigest = loadDigest();

TarArchiveEntry tarEntry;
try (
InputStream homeDirTar = getHomeDirTar();
TarArchiveInputStream tarInput = new TarArchiveInputStream(homeDirTar);
) {
while ((tarEntry = tarInput.getNextTarEntry()) != null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
while ((tarEntry = tarInput.getNextTarEntry()) != null) {
digest.put("untarVitro", checksum("generated by Vitro in deployment process".getBytes()));
while ((tarEntry = tarInput.getNextTarEntry()) != null) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding an entry in digest file which representing a flag that digest file is not manually created by using a command from terminal. It will allow to make distinction between

  1. if file has not changed in home and is not the same as new file (in new version of VIVO/Vitro)
  2. if file has been changed in home and it is aligned with VIVO_HOME/digest.md5 by manual creation of that using command from terminal, and is not the same as new file (in new version of VIVO/Vitro)

In the first case, it should be overwritten, in the second it shouldn't

Copy link
Author

@ghost ghost Mar 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears, those whomever customized the ontology is not interested in submitting PR back upstream. We can try to force that hand through a digest file. Provide enough justification while being discrete and helpful as possible. I am no lawyer however it seems possible addition can be made to the existing license with enough layers. Anyhow, the digest file is just a hack to not overwrite something that changed manually.

Copy link
Author

@ghost ghost Mar 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or does it overwrite. Yeah, that probably makes the biggest difference. Have to change the narrative around again.

The application defines the VIVO home directory not the implementer. If the end user is defined to be able to modify the VIVO home, they should be able to through the UI only.


// Use the example configurations
String outFilename = tarEntry.getName().replace("example.", "");
File outFile = new File(destination, outFilename);

// Is the entry a directory?
if (tarEntry.isDirectory()) {
if (!outFile.exists()) {
outFile.mkdirs();
}
} else {
// Entry is a File
boolean write = true;

// reading bytes into memory to avoid having to unreliably reset stream
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Be warned, this can be a memory problem for large tar files.
It might be a good idea to do some sort of reasonable size check.
If under a certain max size, then load into memory; otherwise, just suffer the unreliable reset stream.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or not. Give it more memory for the time needed to load the files. Stop trying to make every time I/O runtime. I am pretty sure this is still first-time startup and not every time startup. If we continue to add logic the certainty will be reduced. Closed loops for each time are needed in a centralized isolated scope. Application startup is determined by the orchestration not the composition.

I do agree that the exception should be handled appropriately. Maybe a warning log after exception bubbled to its encapsulation. Let's see.

byte[] bytes = IOUtils.toByteArray(tarInput);
String newFileChecksum = checksum(bytes);
digest.put(outFilename, newFileChecksum);

// if file already exists and stored digest contains the file,
// check to determine if it has changed
if (outFile.exists() && storedDigest.containsKey(outFilename)) {
String existingFileChecksum = checksum(outFile);
// if file has not changed in home and is not the same as new file, overwrite
write = storedDigest.get(outFilename).equals(existingFileChecksum)
chenejac marked this conversation as resolved.
Show resolved Hide resolved
&& !existingFileChecksum.equals(newFileChecksum);
Comment on lines +139 to +140
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is also returning true for scenario number 3 after manual creation of digest.md5 from the command line:
An institution is upgrading VIVO to some future VIVO which is not changing any file which should be copied to config or rdf subdirectories of VIVO_HOME, but institution customized its previous VIVO version by changing something in VIVO_HOME/config or VIVO_HOME/rdf.
So, this is not situation when some institution is creating a fork and making changes from there, this is a situation when a VIVO installer is making customization direct in VIVO_HOME/rdf files. In order to prevent overwriting customized files in this case, I have tried to make change in line 114 in this file (see suggestion there), plus change in this line:

Suggested change
write = storedDigest.get(outFilename).equals(existingFileChecksum)
&& !existingFileChecksum.equals(newFileChecksum);
write = storedDigest.get(outFilename).equals(existingFileChecksum)
&& !existingFileChecksum.equals(newFileChecksum) && storedDigest.containsKey("untarVitro");

If some institution if forking VIVO/Vitro, and customized instance there, I suppose in the process of upgrade they will merge changes at the level of git, and run build from there.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any time a file changes in the VIVO home directory the digest must be updated, or the file will be overwritten on restart.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can just add an environment variable that gets toggled after first time. This of course has to be persisted somewhere and if it changes somehow in the middle of a person making changes to the VIVO home directory it has to be changed back. Well, I have no confident in any expert in the world to make meaningful changes to an ontology without persisting the work they have done. This is accomplished by source versioning. As described before if you are building a fork of VIVO with changes in the home or installer artifact in how the home directory is defined, that will occur all the same as before with a digest file created on next start.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me try to find out some common practices from the VIVO community, I think that VIVO LG is planning some VIVO implementers survey in the next period, I will check whether we can add a couple of questions about direct and indirect changes in the VIVO_HOME directory and the process of upgrading VIVO.

}

if (write) {
outFile.getParentFile().mkdirs();
try (
InputStream is = new ByteArrayInputStream(bytes);
FileOutputStream fos = new FileOutputStream(outFile);
) {
IOUtils.copy(is, fos);
log.info(outFile.getAbsolutePath() + " source has changed and has not been "
+ "edited in home, updated file has been copied to home directory.");
}
} else {
log.debug(outFile.getAbsolutePath() + " has been preserved.");
}
}
}
} catch (IOException | NoSuchAlgorithmException e) {
throw new RuntimeException("Error creating home directory!", e);
}

return digest;
}

/**
* Load checksum digest of VIVO home directory.
*
* @return checksum digest
*/
private Map<String, String> loadDigest() {
File storedDigest = new File(getPath().toFile(), DIGEST_FILE_NAME);
if (storedDigest.exists() && storedDigest.isFile()) {
log.info("Reading VIVO home digest: " + storedDigest.getPath());
try {
return FileUtils
.readLines(storedDigest, StandardCharsets.UTF_8)
.stream()
.filter(CHECKSUM_PATTERN.asPredicate())
.map(this::split)
.collect(Collectors.toMap(this::checksumFile, this::checksumValue));
} catch (IOException e) {
throw new RuntimeException("Error reading VIVO home checksum digest!", e);
}
}
log.info("VIVO home digest not found: " + storedDigest.getPath());

return new HashMap<>();
}

/**
* Write VIVO home checksum digest following md5 format; `<checksum> *<file>`.
*
* @param digest checksum digest to write
*/
private void writeDigest(Map<String, String> digest) {
File storedDigest = new File(getPath().toFile(), DIGEST_FILE_NAME);
try (
FileOutputStream fos = new FileOutputStream(storedDigest);
OutputStreamWriter osw = new OutputStreamWriter(fos);
) {
for (Map.Entry<String, String> entry : digest.entrySet()) {
String filename = entry.getKey();
String checksum = entry.getValue();
osw.write(String.format("%s *%s\n", checksum, filename));
}
} catch (IOException e) {
throw new RuntimeException("Error writing home directory checksum digest!", e);
}
log.info("VIVO home digest created: " + storedDigest.getPath());
}

/**
* Split checksum.
*
* @param checksum checksum delimited by space and asterisks `<checksum> *<file>`
* @return split checksum
*/
private String[] split(String checksum) {
return checksum.split("\\s+");
}

/**
* Get value from split checksum.
*
* @param checksum split checksum
* @return checksum value
*/
private String checksumValue(String[] checksum) {
return checksum[0];
}

/**
* Return file from split checksum.
*
* @param checksum split checksum
* @return filename
*/
private String checksumFile(String[] checksum) {
return checksum[1].substring(1);
}

/**
* Get md5 checksum from file.
*
* @param file file
* @return md5 checksum as string
* @throws IOException
* @throws NoSuchAlgorithmException
*/
private String checksum(File file) throws IOException, NoSuchAlgorithmException {
return checksum(FileUtils.readFileToByteArray(file));
}

/**
* Get md5 checksum from bytes.
*
* @param bytes bytes from file
* @return md5 checksum as string
* @throws NoSuchAlgorithmException
*/
private String checksum(byte[] bytes) throws NoSuchAlgorithmException {
MessageDigest md = MessageDigest.getInstance("MD5");
md.update(bytes);
// bytes to hex
StringBuilder result = new StringBuilder();
for (byte b : md.digest()) {
result.append(String.format("%02x", b));
}

return result.toString();
}

/**
* Get prepacked VIVO home tar file as input stream.
*
* @return input stream of VIVO home tar file
*/
private InputStream getHomeDirTar() {
String tarLocation = "/WEB-INF/resources/home-files/vivo-home.tar";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised it would need a leading slash.

InputStream tar = ctx.getResourceAsStream(tarLocation);
if (tar == null) {
log.error("Application home tar not found in: " + tarLocation);
throw new RuntimeException("Application home tar not found in: " + tarLocation);
}

return tar;
}

/**
* Find something that specifies the location of the Vitro home directory.
* Look in the JDNI environment, the system properties, and the
Expand Down Expand Up @@ -92,23 +326,12 @@ public Path getPath() {
}

public void getVhdFromJndi() {
try {
String vhdPath = (String) new InitialContext()
.lookup(VHD_JNDI_PATH);
if (vhdPath == null) {
log.debug("Didn't find a JNDI value at '" + VHD_JNDI_PATH
+ "'.");
} else {
log.debug("'" + VHD_JNDI_PATH + "' as specified by JNDI: "
+ vhdPath);
String message = String.format(
"JNDI environment '%s' was set to '%s'",
VHD_JNDI_PATH, vhdPath);
foundLocations.add(new Found(Paths.get(vhdPath), message));
}
} catch (Exception e) {
log.debug("JNDI lookup failed. " + e);
}
String vhdPath = ContextProperties.findJndiProperty(VHD_JNDI_PATH);
log.debug("'" + VHD_JNDI_PATH + "' as specified by JNDI: " + vhdPath);
String message = String.format(
"JNDI environment '%s' was set to '%s'",
VHD_JNDI_PATH, vhdPath);
foundLocations.add(new Found(Paths.get(vhdPath), message));
}

private void getVhdFromSystemProperties() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ public class ConfigurationPropertiesImpl extends ConfigurationProperties {

public ConfigurationPropertiesImpl(InputStream stream,
Map<String, String> preemptiveProperties,
Map<String, String> buildProperties) throws IOException {
Map<String, String> buildProperties,
Map<String, String> contextProperties) throws IOException {
Map<String, String> map = new HashMap<>(buildProperties);
map.putAll(contextProperties);

Properties props = loadFromPropertiesFile(stream);
for (String key: props.stringPropertyNames()) {
Expand Down
Loading