diff --git a/.classpath b/.classpath
index 68057ea2..a92d7195 100644
--- a/.classpath
+++ b/.classpath
@@ -23,7 +23,7 @@
-
+
diff --git a/lib/verveine.extractor.java.jar b/lib/verveine.extractor.java.jar
index 2cafdde0..0ba884d0 100644
Binary files a/lib/verveine.extractor.java.jar and b/lib/verveine.extractor.java.jar differ
diff --git a/src/fr/inria/verveine/extractor/java/EntityDictionary.java b/src/fr/inria/verveine/extractor/java/EntityDictionary.java
index 1a6e2765..e7050f3f 100644
--- a/src/fr/inria/verveine/extractor/java/EntityDictionary.java
+++ b/src/fr/inria/verveine/extractor/java/EntityDictionary.java
@@ -77,7 +77,6 @@
import org.moosetechnology.model.famix.famixtraits.TWithTypes;
import ch.akuhn.fame.Repository;
-import fr.inria.verveine.extractor.java.utils.FileContentExtractor;
import fr.inria.verveine.extractor.java.utils.ImplicitVarBinding;
import fr.inria.verveine.extractor.java.utils.Util;
@@ -2666,30 +2665,38 @@ public ImplicitVariable ensureFamixImplicitVariable(String name, TType tType, TM
}
/**
- * Creates and returns a FAMIX Comment and associates it with an Entity (ex: for Javadocs)
+ * Creates and returns a Famix Comment and associates it with an Entity (ex: for Javadocs)
* @param jCmt -- the content (String) of the comment
* @param owner -- the entity that is commented
- * @param commentText -- whether to export the source anchor of the comment (position in file) or its content (string)
- * @return the FAMIX Comment
+ * @return the Famix Comment
*/
- public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner, boolean commentText) {
+ public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner) {
Comment cmt = null;
if ( (jCmt != null) && (owner != null) ) {
cmt = new Comment();
- if (commentText) {
- IndexedFileAnchor position = createIndexedFileAnchor(jCmt);
- if (position != null) {
- cmt.setContent(
- FileContentExtractor.getFileContent(position.getFileName(),
- (int)position.getStartPos(),
- (int)position.getEndPos()) );
- }
- }
- else {
- addSourceAnchor(cmt, jCmt);
- }
+ addSourceAnchor(cmt, jCmt);
+ famixRepoAdd(cmt);
+ cmt.setCommentedEntity(owner);
+ }
+
+ return cmt;
+ }
+
+ /**
+ * Creates and returns a Famix Comment and associates it with an Entity
+ * @param jCmt -- the content (String) of the comment
+ * @param owner -- the entity that is commented
+ * @param content -- the text of the comment
+ * @return the Famix Comment
+ */
+ public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner, String content) {
+ Comment cmt = null;
+
+ if ( (jCmt != null) && (owner != null) ) {
+ cmt = new Comment();
+ cmt.setContent(content );
famixRepoAdd(cmt);
cmt.setCommentedEntity(owner);
}
diff --git a/src/fr/inria/verveine/extractor/java/VerveineJOptions.java b/src/fr/inria/verveine/extractor/java/VerveineJOptions.java
index 2862711a..735a1a73 100644
--- a/src/fr/inria/verveine/extractor/java/VerveineJOptions.java
+++ b/src/fr/inria/verveine/extractor/java/VerveineJOptions.java
@@ -4,6 +4,7 @@
import org.eclipse.jdt.core.dom.ASTParser;
import java.io.*;
+import java.nio.charset.Charset;
import java.util.*;
import java.util.regex.Pattern;
@@ -38,6 +39,11 @@ public static AnchorOptions getValue(String option) {
*/
public final static String OUTPUT_FILE = "output";
+ /**
+ * Default encodings of the java files to read
+ */
+ private static final String DEFAULT_FILE_ENCODING = "UTF-8";
+
/**
* Option for MSE output format
*/
@@ -85,6 +91,11 @@ public static AnchorOptions getValue(String option) {
*/
protected Collection excludeMatchers;
+ /**
+ * File encoding to use to read java files
+ */
+ protected String fileEncoding = DEFAULT_FILE_ENCODING;
+
/**
* Name of the file where to put the MSE model.
* Defaults to {@link VerveineParser#OUTPUT_FILE}
@@ -142,6 +153,7 @@ public void setOptions( String[] args) {
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
usage();
+ throw e;
}
}
@@ -178,6 +190,7 @@ protected int setOption( String[] args, int i) throws IllegalArgumentException {
if (arg.equals("-h")) {
usage();
+ System.exit(0);
}
else if (arg.matches("-1\\.[1-7]") || arg.matches("-[1-7]")) {
setCodeVersion(arg);
@@ -188,6 +201,9 @@ else if (arg.matches("-1\\.[1-7]") || arg.matches("-[1-7]")) {
} else if ((arg.charAt(0) == '-') && (arg.endsWith("cp"))) {
classPathOptions = setOptionClassPath(classPathOptions, args, i);
argumentsTreated++;
+ } else if (arg.equals("-encoding")) {
+ setOptionEncoding(args, i);
+ argumentsTreated++;
} else if (arg.equals("-anchor")) {
setOptionAnchor(args, i);
argumentsTreated++;
@@ -227,21 +243,21 @@ else if (arg.equals("-debugging")) {
*/
protected String[] setOptionClassPath( String[] classPath, String[] args, int i) throws IllegalArgumentException {
if (args[i].equals("-autocp")) {
- if (i < args.length) {
+ if (i+1 < args.length) {
return addToClassPath(classPath, collectAllJars(args[i+1]) );
} else {
throw new IllegalArgumentException("-autocp requires a root folder");
}
}
else if (args[i].equals("-filecp")) {
- if (i < args.length) {
+ if (i+1 < args.length) {
return addToClassPath(classPath, readAllJars(args[i+1]));
} else {
throw new IllegalArgumentException("-filecp requires a filename");
}
}
else if (args[i].equals("-cp")) {
- if (i < args.length) {
+ if (i+1 < args.length) {
return addToClassPath(classPath, Arrays.asList(args[i+1].split(System.getProperty("path.separator"))));
}
else {
@@ -251,6 +267,40 @@ else if (args[i].equals("-cp")) {
return classPath;
}
+ protected void setOptionEncoding(String[] args, int i) {
+ if (i+1 < args.length) {
+ this.fileEncoding = args[i + 1].trim();
+ if (Charset.availableCharsets().get(this.fileEncoding) == null) {
+ throw new IllegalArgumentException("Unknown file encoding: -encoding " + this.fileEncoding);
+ }
+ } else {
+ throw new IllegalArgumentException("-encoding requires an encoding name (eg. " + DEFAULT_FILE_ENCODING + ")");
+ }
+ }
+
+ protected void setOptionAnchor(String[] args, int i) {
+ if (i+1 < args.length) {
+ String anchor = args[i + 1].trim();
+ anchors = VerveineJOptions.AnchorOptions.getValue(anchor);
+ if (anchors == null) {
+ throw new IllegalArgumentException("unknown option to -anchor: " + anchor);
+ }
+ } else {
+ throw new IllegalArgumentException("-anchor requires an option (none|default|assoc)");
+ }
+ }
+
+ protected void setOptionFormat(String[] args, int i) {
+ if (i+1 < args.length) {
+ this.outputFormat = args[i + 1].trim();
+ if ((! this.outputFormat.equalsIgnoreCase(MSE_OUTPUT_FORMAT)) && (! this.outputFormat.equalsIgnoreCase(JSON_OUTPUT_FORMAT))) {
+ throw new IllegalArgumentException("unknown option to -format: " + outputFormat);
+ }
+ } else {
+ throw new IllegalArgumentException("-format requires an option (mse|json)");
+ }
+ }
+
protected List collectAllJars(String sDir) {
File[] faFiles = new File(sDir).listFiles();
List tmpPath = new ArrayList();
@@ -299,29 +349,6 @@ protected List readAllJars(String filename) {
return tmpPath;
}
- protected void setOptionAnchor(String[] args, int i) {
- if (i < args.length) {
- String anchor = args[i + 1].trim();
- anchors = VerveineJOptions.AnchorOptions.getValue(anchor);
- if (anchors == null) {
- throw new IllegalArgumentException("unknown option to -anchor: " + anchor);
- }
- } else {
- throw new IllegalArgumentException("-anchor requires an option (none|default|assoc)");
- }
- }
-
- protected void setOptionFormat(String[] args, int i) {
- if (i < args.length) {
- outputFormat = args[i + 1].trim();
- if ((!outputFormat.equalsIgnoreCase(MSE_OUTPUT_FORMAT)) && (!outputFormat.equalsIgnoreCase(JSON_OUTPUT_FORMAT))) {
- throw new IllegalArgumentException("unknown option to -format: " + outputFormat);
- }
- } else {
- throw new IllegalArgumentException("-format requires an option (mse|json)");
- }
- }
-
protected void usage() {
System.err.println("Usage: VerveineJ [-h] [-i] [-o ] [-prettyPrint] [-summary] [-alllocals] [-anchor (none|default|assoc)] [-cp CLASSPATH | -autocp DIR] [-1.1 | -1 | -1.2 | -2 | ... | -1.7 | -7] | ");
System.err.println(" [-h] prints this message");
@@ -333,6 +360,7 @@ protected void usage() {
System.err.println(" Summarizing at the level of classes does not produce Methods, Attributes, Accesses, and Invocations");
System.err.println(" Everything is represented as references between classes: e.g. \"A.m1() invokes B.m2()\" is uplifted to \"A references B\"");
System.err.println(" [-alllocals] Forces outputing all local variables, even those with primitive type (incompatible with \"-summary\")");
+ System.err.println(" [-encoding ] File encoding to use for reading the source code default: " + DEFAULT_FILE_ENCODING);
System.err.println(" [-anchor (none|entity|default|assoc)] options for source anchor information:\n" +
" - no entity\n" +
" - only named entities [default]\n" +
@@ -344,14 +372,13 @@ protected void usage() {
System.err.println(" [-excludepath GLOBBINGEXPR] A globbing expression of file path to exclude from parsing");
System.err.println(" [-1.1 | -1 | -1.2 | -2 | ... | -1.7 | -7] specifies version of Java");
System.err.println(" | list of source files to parse or directories to search for source files");
- System.exit(0);
-
}
protected void setCodeVersion(String arg) {
if (codeVers != null) {
System.err.println("Trying to set twice code versions: " + codeVers + " and " + arg);
usage();
+ throw new IllegalArgumentException();
} else if (arg.equals("-1.1") || arg.equals("-1")) {
codeVers = JavaCore.VERSION_1_1;
} else if (arg.equals("-1.2") || arg.equals("-2")) {
@@ -551,4 +578,8 @@ public boolean commentsAsText() {
return commentText;
}
+ public String getFileEncoding() {
+ return fileEncoding;
+ }
+
}
\ No newline at end of file
diff --git a/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java b/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java
deleted file mode 100644
index 2fd5fa1e..00000000
--- a/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package fr.inria.verveine.extractor.java.utils;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.charset.StandardCharsets;
-
-/**
- * A utility to read the content of files.
- * To make it faster, we try to keep the file opened so that successive reads in the same file
- * do not have to re-open.
- * Also to make it easier to use, everything is static
(not proud of it)
- * @author anquetil
- */
-public class FileContentExtractor {
-
- static protected String filename = null;
- static protected RandomAccessFile openedFile = null;
-
- public static String getFileContent( String inputfile, int start, int end) {
- if (filename != inputfile) {
- closeCurrentFile();
- openFile(inputfile);
- }
- return getFileContent(start, end);
- }
-
- protected static void openFile(String fname) {
- try {
- openedFile = new RandomAccessFile( fname, "r");
- filename = fname;
- } catch (FileNotFoundException e) {
- System.err.println("Error opening "+fname+" for reading");
- }
- }
-
- protected static void closeCurrentFile() {
- if (openedFile!= null) {
- try {
- openedFile.close();
- } catch (IOException e) {
- // nothing
- }
- }
- }
-
- protected static String getFileContent( int start, int end) {
- byte buffer[] = new byte[ end-start+1];
- try {
- openedFile.seek(start - 1); // moose position start at 1, Java at 0
- int ret = openedFile.read(buffer);
- if (ret < end-start+1) {
- System.err.println("missing bytes, read "+ret+" instead of "+(end-start+1));
- return "";
- }
- return new String(buffer, StandardCharsets.UTF_8);
-
- } catch (IOException e) {
- e.printStackTrace();
- }
- return "";
- }
-
-}
diff --git a/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java b/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java
index 3fde81ff..aa11e3f8 100644
--- a/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java
+++ b/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java
@@ -1,5 +1,12 @@
package fr.inria.verveine.extractor.java.visitors.defvisitors;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
import java.util.List;
import org.eclipse.jdt.core.dom.ASTNode;
@@ -74,6 +81,11 @@ public class VisitorComments extends GetVisitedEntityAbstractVisitor {
*/
private int methodStartPosition;
+ protected String currentFilename = null;
+ protected BufferedReader openedFile = null;
+ protected int lastPositionRead = 0;
+
+
public VisitorComments(EntityDictionary dico, VerveineJOptions options) {
super(dico, options);
classMemberDeclarations = false;
@@ -83,7 +95,7 @@ public VisitorComments(EntityDictionary dico, VerveineJOptions options) {
@Override
public boolean visit(CompilationUnit node) {
- allComments = node.getCommentList();
+ initializeCommentsReader(node);
if (allComments.size() == 0) {
// no comment, not visiting
return false;
@@ -97,6 +109,7 @@ public boolean visit(CompilationUnit node) {
@Override
public void endVisit(CompilationUnit node) {
endVisitCompilationUnit(node);
+ closeFile();
}
@Override
@@ -249,7 +262,7 @@ protected void assignCommentsInInterval(int start, int end, TWithComments fmx) {
while ( searchComment && pendingComments() ) {
Comment cmt = allComments.get(nextComment);
if (commentIsInside(cmt, start, end)) {
- dico.createFamixComment(cmt, fmx, options.commentsAsText());
+ commentCreation( cmt, fmx);
nextComment++;
}
else {
@@ -258,6 +271,16 @@ protected void assignCommentsInInterval(int start, int end, TWithComments fmx) {
}
}
+ private void commentCreation(Comment cmt, TWithComments fmx) {
+ if (options.commentsAsText()) {
+
+ dico.createFamixComment(cmt, fmx, getFileContent(cmt.getStartPosition(), cmt.getLength()));
+ }
+ else {
+ dico.createFamixComment(cmt, fmx);
+ }
+ }
+
/**
* Whether there is still some "pending" comments
*/
@@ -272,4 +295,57 @@ protected boolean commentIsInside(Comment cmt, int start, int end) {
return (start <= cmt.getStartPosition()) && (end >= cmt.getStartPosition() + cmt.getLength());
}
+ protected void initializeCommentsReader(CompilationUnit node) {
+ allComments = node.getCommentList();
+
+ currentFilename = (String) ((CompilationUnit)node).getProperty(EntityDictionary.SOURCE_FILENAME_PROPERTY);
+ try {
+ InputStream is = new FileInputStream(currentFilename);
+ this.openedFile = new BufferedReader(new InputStreamReader(is, options.getFileEncoding()));
+
+ this.lastPositionRead = 0;
+ } catch (FileNotFoundException|UnsupportedEncodingException e) {
+ System.err.println("Not able to read comments from "+currentFilename);
+ }
+
+ }
+
+ protected String getFileContent( int start, int lenghtToRead) {
+ char buffer[];
+
+ if(openedFile == null) {
+ return "";
+ }
+
+ buffer = new char[lenghtToRead];
+ try {
+
+ openedFile.skip(start - lastPositionRead);
+ int ret = openedFile.read( buffer, /*offset in buffer*/0, lenghtToRead);
+
+ if (ret < lenghtToRead) {
+ System.err.println("missing bytes in "+ currentFilename + ", read " + ret + " instead of " + lenghtToRead);
+ return "";
+ }
+ lastPositionRead = start + ret;
+
+ return new String(buffer);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return "";
+ }
+
+ protected void closeFile() {
+ if (openedFile != null) {
+ try {
+ openedFile.close();
+ } catch (IOException e) {
+ // nothing
+ }
+ openedFile= null;
+ }
+ }
+
}
\ No newline at end of file
diff --git a/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java b/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java
index 54cc34ff..1dd22fab 100644
--- a/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java
+++ b/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java
@@ -1,6 +1,7 @@
package fr.inria.verveine.extractor.java;
import org.junit.Before;
+import org.junit.Rule;
import org.junit.Test;
import org.moosetechnology.model.famix.famixjavaentities.*;
import org.moosetechnology.model.famix.famixjavaentities.Class;
@@ -141,6 +142,21 @@ public void testAlllocals() {
assertEquals(3, accessSetNom);
}
+ @Test
+ public void testEncodingUTF8() {
+ parser.configure(new String[]{"-encoding", "UTF-8", "test_src/comments"});
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEncodingMissing() {
+ parser.configure(new String[]{"-encoding"});
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEncodingWrong() {
+ parser.configure(new String[]{"-encoding", "BLAH", "test_src/comments"});
+ }
+
@Test
public void testCommentsText() {
parse(new String[]{"-commenttext", "test_src/comments"});
@@ -190,6 +206,19 @@ else if (meth.getName().equals("methodWithoutBody")) {
assertEquals(6, numberTested); // check that all expected methods were actually found and tested
}
+ @Test
+ public void testCommentsAnchor() {
+ parse(new String[]{"test_src/comments"});
+
+ assertEquals(14, entitiesOfType(Comment.class).size());
+ for (Comment cmt : entitiesOfType(Comment.class)) {
+ assertNotNull(cmt.getSourceAnchor());
+ assertNull( cmt.getContent());
+ int len = (int)((IndexedFileAnchor)cmt.getSourceAnchor()).getEndPos() - (int)((IndexedFileAnchor)cmt.getSourceAnchor()).getStartPos();
+ assertTrue( len >= 20); // none of the comments have less than 20 characters
+ }
+ }
+
@Test
public void testClassDeclsInExpr() {
parse(new String[]{"-alllocals", "test_src/ad_hoc/SpecialLocalVarDecls.java"});