Skip to content

Commit

Permalink
Fix for #205 and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
josemduarte committed Apr 15, 2018
1 parent 94e5f5f commit 93b5640
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 16 deletions.
101 changes: 87 additions & 14 deletions eppic-cli/src/main/java/eppic/DataModelAdaptor.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public class DataModelAdaptor {
public static final int INVALID_ASSEMBLY_ID = 0;

private PdbInfoDB pdbInfo;

private EppicParams params;

private RunParametersDB runParameters;
Expand Down Expand Up @@ -170,7 +170,10 @@ public void setPdbMetadata(Structure pdb) {
pdbInfo.setCellBeta(cc.getBeta());
pdbInfo.setCellGamma(cc.getGamma());
}


}

public void setChainClustersData(Structure pdb, Map<String,String> chainOrigNames) {
List<ChainClusterDB> chainClusterDBs = new ArrayList<ChainClusterDB>();

for (EntityInfo compound:pdb.getEntityInfos()) {
Expand All @@ -180,12 +183,12 @@ public void setPdbMetadata(Structure pdb) {
// in mmCIF files some sugars are annotated as compounds with no chains linked to them, e.g. 3s26
if (compound.getChains().isEmpty()) continue;

chainClusterDBs.add(createChainCluster(compound));
chainClusterDBs.add(createChainCluster(compound, chainOrigNames));
}
}
pdbInfo.setNumChainClusters(chainClusterDBs.size());
pdbInfo.setChainClusters(chainClusterDBs);

initAsymIds2chainIdsMap(pdb);
}

Expand All @@ -196,7 +199,8 @@ public void setPdbMetadata(Structure pdb) {
* <p/>
* Note that the map should work in most cases, but it's not guaranteed because there is a one-to-many
* relationship between author chain ids and asym ids (internal ids). This is the best we can do
* with the data available from Biojava 4.2
* with the data available from Biojava 4.2
* TODO check if we still need with BioJava 5
* @param pdb the structure
*/
private void initAsymIds2chainIdsMap(Structure pdb) {
Expand All @@ -206,15 +210,15 @@ private void initAsymIds2chainIdsMap(Structure pdb) {
asymIds2chainIds.put(c.getId(), c.getName());
}
}
private ChainClusterDB createChainCluster(EntityInfo compound) {

private ChainClusterDB createChainCluster(EntityInfo compound, Map<String,String> chainOrigNames) {
ChainClusterDB chainClusterDB = new ChainClusterDB();

chainClusterDB.setPdbCode(pdbInfo.getPdbCode());

chainClusterDB.setRepChain(compound.getRepresentative().getName());
chainClusterDB.setMemberChains(getMemberChainsString(compound));
chainClusterDB.setNumMembers(compound.getChainIds().size());
chainClusterDB.setMemberChains(getMemberChainsString(compound, chainOrigNames));
chainClusterDB.setNumMembers(getUniqueChainNames(compound, chainOrigNames).size());
chainClusterDB.setProtein(compound.getRepresentative().isProtein());

chainClusterDB.setPdbInfo(pdbInfo);
Expand Down Expand Up @@ -299,9 +303,9 @@ private List<Group> getGroups(EntityInfo compound) {
}

public void setInterfaces(StructureInterfaceList interfaces) {


List<StructureInterfaceCluster> interfaceClusters = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF);
List<StructureInterfaceCluster> interfaceClusters = reduceToNcsUnique(interfaces);

List<InterfaceClusterDB> icDBs = new ArrayList<InterfaceClusterDB>();
for (StructureInterfaceCluster ic:interfaceClusters) {
InterfaceClusterDB icDB = new InterfaceClusterDB();
Expand Down Expand Up @@ -452,6 +456,56 @@ public int compare(ContactDB first, ContactDB second) {
pdbInfo.setMaxNumClashesAnyInterface(Collections.max(numClashesPerInterface));

}

private List<StructureInterfaceCluster> reduceToNcsUnique(StructureInterfaceList interfaces) {
List<StructureInterfaceCluster> clusters = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF);

if (!pdbInfo.isNcsOpsPresent()) {
// no NCS case (normal case), return clusters as is
return clusters;
}

// NCS case. We need to reduce to the unique-to-NCS set
List<StructureInterfaceCluster> interfaceClustersNcs = interfaces.getClustersNcs();

List<StructureInterfaceCluster> reduced = new ArrayList<>();
for (StructureInterfaceCluster cluster : clusters) {
Set<Integer> indices = new TreeSet<>();
for (StructureInterface interf : cluster.getMembers()) {
indices.add(getCorrespondingClustersIndex(interf, interfaceClustersNcs));
}

StructureInterfaceCluster reducedCluster = new StructureInterfaceCluster();
reducedCluster.setId(cluster.getId());
reducedCluster.setAverageScore(cluster.getAverageScore());
for (int i : indices) {
// we add one interface per NCS interface cluster
StructureInterface interf = interfaceClustersNcs.get(i).getMembers().get(0);
if (interf.getCluster().getId() != reducedCluster.getId()) {
LOGGER.warn("Interface {} belongs to cluster {}. It should not be added to cluster id {}",
interf.getId(), interf.getCluster().getId(), reducedCluster.getId());
}
reducedCluster.addMember(interf);
// we add also the new back-reference to the parent
interf.setCluster(reducedCluster);
}

reduced.add(reducedCluster);
}

return reduced;
}

private static int getCorrespondingClustersIndex(StructureInterface interf, List<StructureInterfaceCluster> interfaceClustersNcs) {
for (int i = 0; i< interfaceClustersNcs.size(); i++) {
for (StructureInterface s : interfaceClustersNcs.get(i).getMembers()) {
if (s.getId() == interf.getId()) {
return i;
}
}
}
return -1;
}

public void setAssemblies(CrystalAssemblies validAssemblies) {

Expand Down Expand Up @@ -792,6 +846,10 @@ public void setGeometryScores(List<GeometryPredictor> gps, List<GeometryClusterP
// geometry scores per interface
for (int i=0;i<gps.size();i++) {
InterfaceDB ii = pdbInfo.getInterface(i+1);
if (pdbInfo.isNcsOpsPresent() && ii==null) {
LOGGER.info("Not storing geometry scores for redundant NCS interface {}", i+1);
continue;
}
InterfaceScoreDB is = new InterfaceScoreDB();
ii.addInterfaceScore(is);
is.setInterfaceItem(ii);
Expand Down Expand Up @@ -1152,6 +1210,10 @@ public void setResidueBurialDetails(StructureInterfaceList interfaces) {

InterfaceDB ii = pdbInfo.getInterface(interf.getId());

if (pdbInfo.isNcsOpsPresent() && ii==null) {
LOGGER.info("Not storing residue burials info for redundant NCS interface {}", interf.getId());
continue;
}
// we add the residue details

List<ResidueBurialDB> iril = new ArrayList<ResidueBurialDB>();
Expand Down Expand Up @@ -1323,13 +1385,24 @@ public static String getChainClusterString(EntityInfo compound) {
return sb.toString();
}

public static String getMemberChainsString(EntityInfo compound) {

private Set<String> getUniqueChainNames(EntityInfo compound, Map<String, String> chainOrigNames) {
List<Chain> chains = compound.getChains();
Set<String> uniqChainNames = new TreeSet<>();
for (Chain c : chains) {
uniqChainNames.add(c.getName());
String chainName;
if(chainOrigNames!=null) { // will only be not null in cases with NCS ops
chainName = chainOrigNames.get(c.getName());
} else {
chainName = c.getName();
}
uniqChainNames.add(chainName);
}
return uniqChainNames;
}

private String getMemberChainsString(EntityInfo compound, Map<String, String> chainOrigNames) {

Set<String> uniqChainNames = getUniqueChainNames(compound, chainOrigNames);

StringBuilder sb = new StringBuilder();
int i = 0;
Expand Down
6 changes: 4 additions & 2 deletions eppic-cli/src/main/java/eppic/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -264,16 +264,18 @@ public void doFindInterfaces() throws EppicException {

LOGGER.info("Calculating possible interfaces");
CrystalBuilder interfFinder;
Map<String,String> chainOrigNames = null;
if (modelAdaptor.getPdbInfo().isNcsOpsPresent()) {
Map<String,String> chainOrigNames = new HashMap<>();
chainOrigNames = new HashMap<>();
Map<String, Matrix4d > chainNcsOps = new HashMap<>();
CrystalBuilder.expandNcsOps(pdb,chainOrigNames,chainNcsOps);
modelAdaptor.setPdbMetadata(pdb);
interfFinder = new CrystalBuilder(pdb,chainOrigNames,chainNcsOps);
} else {
interfFinder = new CrystalBuilder(pdb);
}

modelAdaptor.setChainClustersData(pdb, chainOrigNames);

interfaces = interfFinder.getUniqueInterfaces(EppicParams.INTERFACE_DIST_CUTOFF);
LOGGER.info("Calculating ASAs");
interfaces.calcAsas(params.getnSpherePointsASAcalc(), params.getNumThreads(), params.getMinSizeCofactorForAsa());
Expand Down
152 changes: 152 additions & 0 deletions eppic-cli/src/test/java/eppic/TestLargeStructures.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,28 @@
package eppic;

//import org.junit.Ignore;
import eppic.assembly.TestLatticeGraph;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.contact.StructureInterface;
import org.biojava.nbio.structure.contact.StructureInterfaceCluster;
import org.biojava.nbio.structure.contact.StructureInterfaceList;
import org.biojava.nbio.structure.xtal.CrystalBuilder;
import org.junit.Test;

import eppic.model.ChainClusterDB;
import eppic.model.InterfaceClusterDB;
import eppic.model.InterfaceDB;
import eppic.model.PdbInfoDB;

import javax.vecmath.Matrix4d;

import static org.junit.Assert.*;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* An integration test that makes sure that large structures are correctly handled
Expand Down Expand Up @@ -51,6 +62,10 @@ public void test4v9e() throws IOException {
m.run(params);

PdbInfoDB pdbInfo = m.getDataModelAdaptor().getPdbInfo();

// the title should be set, this checks that DataModelAdaptor.setPdbMetadata worked
assertNotNull(pdbInfo.getTitle());
assertTrue(pdbInfo.getTitle().length()>2);

ChainClusterDB cc = pdbInfo.getChainCluster("AA");
assertEquals(36, cc.getNumMembers());
Expand Down Expand Up @@ -78,4 +93,141 @@ public void test4v9e() throws IOException {

}

/**
* NCS output needs to be less redundant.
* Issue https://github.com/eppic-team/eppic/issues/205
* @throws IOException
*/
@Test
public void test1auy() throws IOException {

File outDir = new File(TMPDIR, "eppicTestLargeStructures");

outDir.mkdir();

assertTrue(outDir.isDirectory());


String pdbId = "1auy";
EppicParams params = Utils.generateEppicParams(pdbId, outDir);

Main m = new Main();

m.run(params);

PdbInfoDB pdbInfo = m.getDataModelAdaptor().getPdbInfo();

assertTrue(pdbInfo.isNcsOpsPresent());

assertEquals(1, pdbInfo.getNumChainClusters());
ChainClusterDB ccdb = pdbInfo.getChainClusters().get(0);
assertEquals(3, ccdb.getNumMembers());

assertEquals(10, pdbInfo.getInterfaceClusters().size());

assertEquals(4, pdbInfo.getAssemblies().size());

// the cluster members should be reduced to NCS equivalents: it should be a low number
int count = 0;
for (InterfaceClusterDB interfCluster : pdbInfo.getInterfaceClusters()) {
assertTrue(interfCluster.size()<10);
assertTrue(interfCluster.getAvgContactOverlapScore() > 0);
for (InterfaceDB idb : interfCluster.getInterfaces()) {
// can't assert this, the n chains are still in some interfaces
//assertFalse(idb.getChain1().endsWith("n"));
assertEquals(interfCluster.getClusterId(), idb.getClusterId());
count++;
}
}

assertTrue(count<20);

outDir.delete();

}

/**
* As an extra test for NCS: some sanity checks that the grouping by NCS and clustering by contact
* overlap score are consistent with each other.
* @throws Exception
*/
@Test
public void testInterfaceNcsGrouping() throws Exception {
Structure s = TestLatticeGraph.getStructure("1auy");

Map<String,String> chainOrigNames = new HashMap<>();
Map<String, Matrix4d> chainNcsOps = new HashMap<>();
CrystalBuilder.expandNcsOps(s,chainOrigNames,chainNcsOps);
CrystalBuilder cb = new CrystalBuilder(s,chainOrigNames,chainNcsOps);

StructureInterfaceList interfaces = cb.getUniqueInterfaces();
int spherePoints = StructureInterfaceList.DEFAULT_ASA_SPHERE_POINTS / 10;
interfaces.calcAsas(spherePoints,
Runtime.getRuntime().availableProcessors(),
StructureInterfaceList.DEFAULT_MIN_COFACTOR_SIZE);
interfaces.removeInterfacesBelowArea();

List<StructureInterfaceCluster> full = interfaces.getClusters(EppicParams.CLUSTERING_CONTACT_OVERLAP_SCORE_CUTOFF);
List<StructureInterfaceCluster> ncs = interfaces.getClustersNcs();

int idx = 0;
for (StructureInterfaceCluster c : ncs) {
int refId = 0;
int jdx = 0;
for (StructureInterface i : c.getMembers()) {
// it seems that ncs list does not filter for area (bug in biojava 5.0.0), this is a workaround
if (i.getTotalArea()<StructureInterfaceList.DEFAULT_MINIMUM_INTERFACE_AREA) continue;
StructureInterfaceCluster correspondingFull = findCorrespondingInterfCluster(i, full);
assertNotNull(correspondingFull);
if (jdx==0) refId = correspondingFull.getId();
assertEquals("Interface "+i.getId()+" from NCS group with index "+idx+" should have same cluster id in full as first in group",
refId, correspondingFull.getId());
jdx++;
}
idx++;
}

// and the other way around
idx = 0;
for (StructureInterfaceCluster c : full) {
int refId = 0;
int jdx = 0;
for (StructureInterface i : c.getMembers()) {
StructureInterfaceCluster correspondingNcs = findCorrespondingInterfCluster(i, ncs);
assertNotNull(correspondingNcs);
if (jdx==0) refId = correspondingNcs.getId();
assertEquals("Interface "+i.getId()+" from full group with index "+idx+" should have same cluster id in NCS as first in group",
refId, correspondingNcs.getId());
jdx++;
}
idx++;
}


// for (StructureInterfaceCluster c : full) {
// System.out.println("### Cluster "+c.getId());
// for (StructureInterface i : c.getMembers()) {
// StructureInterface corresponding = findCorrespondingInterf(i, ncs);
// System.out.println("id " + i.getId() + ": " + i + " --- "+corresponding);
// }
// }
}

private StructureInterfaceCluster findCorrespondingInterfCluster(StructureInterface interf, List<StructureInterfaceCluster> clusters) {
for (StructureInterfaceCluster c : clusters) {
for (StructureInterface i : c.getMembers()) {
if (interf.getId() == i.getId()) return c;
}
}
return null;
}

// private StructureInterface findCorrespondingInterf(StructureInterface interf, List<StructureInterfaceCluster> clusters) {
// for (StructureInterfaceCluster c : clusters) {
// for (StructureInterface i : c.getMembers()) {
// if (interf.getId() == i.getId()) return i;
// }
// }
// return null;
// }
}

0 comments on commit 93b5640

Please sign in to comment.