diff --git a/.classpath b/.classpath index 68057ea2..a92d7195 100644 --- a/.classpath +++ b/.classpath @@ -23,7 +23,7 @@ - + diff --git a/lib/verveine.extractor.java.jar b/lib/verveine.extractor.java.jar index 2cafdde0..0ba884d0 100644 Binary files a/lib/verveine.extractor.java.jar and b/lib/verveine.extractor.java.jar differ diff --git a/src/fr/inria/verveine/extractor/java/EntityDictionary.java b/src/fr/inria/verveine/extractor/java/EntityDictionary.java index 1a6e2765..e7050f3f 100644 --- a/src/fr/inria/verveine/extractor/java/EntityDictionary.java +++ b/src/fr/inria/verveine/extractor/java/EntityDictionary.java @@ -77,7 +77,6 @@ import org.moosetechnology.model.famix.famixtraits.TWithTypes; import ch.akuhn.fame.Repository; -import fr.inria.verveine.extractor.java.utils.FileContentExtractor; import fr.inria.verveine.extractor.java.utils.ImplicitVarBinding; import fr.inria.verveine.extractor.java.utils.Util; @@ -2666,30 +2665,38 @@ public ImplicitVariable ensureFamixImplicitVariable(String name, TType tType, TM } /** - * Creates and returns a FAMIX Comment and associates it with an Entity (ex: for Javadocs) + * Creates and returns a Famix Comment and associates it with an Entity (ex: for Javadocs) * @param jCmt -- the content (String) of the comment * @param owner -- the entity that is commented - * @param commentText -- whether to export the source anchor of the comment (position in file) or its content (string) - * @return the FAMIX Comment + * @return the Famix Comment */ - public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner, boolean commentText) { + public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner) { Comment cmt = null; if ( (jCmt != null) && (owner != null) ) { cmt = new Comment(); - if (commentText) { - IndexedFileAnchor position = createIndexedFileAnchor(jCmt); - if (position != null) { - cmt.setContent( - FileContentExtractor.getFileContent(position.getFileName(), - (int)position.getStartPos(), - (int)position.getEndPos()) ); - } - } - else { - addSourceAnchor(cmt, jCmt); - } + addSourceAnchor(cmt, jCmt); + famixRepoAdd(cmt); + cmt.setCommentedEntity(owner); + } + + return cmt; + } + + /** + * Creates and returns a Famix Comment and associates it with an Entity + * @param jCmt -- the content (String) of the comment + * @param owner -- the entity that is commented + * @param content -- the text of the comment + * @return the Famix Comment + */ + public Comment createFamixComment(org.eclipse.jdt.core.dom.Comment jCmt, TWithComments owner, String content) { + Comment cmt = null; + + if ( (jCmt != null) && (owner != null) ) { + cmt = new Comment(); + cmt.setContent(content ); famixRepoAdd(cmt); cmt.setCommentedEntity(owner); } diff --git a/src/fr/inria/verveine/extractor/java/VerveineJOptions.java b/src/fr/inria/verveine/extractor/java/VerveineJOptions.java index 2862711a..735a1a73 100644 --- a/src/fr/inria/verveine/extractor/java/VerveineJOptions.java +++ b/src/fr/inria/verveine/extractor/java/VerveineJOptions.java @@ -4,6 +4,7 @@ import org.eclipse.jdt.core.dom.ASTParser; import java.io.*; +import java.nio.charset.Charset; import java.util.*; import java.util.regex.Pattern; @@ -38,6 +39,11 @@ public static AnchorOptions getValue(String option) { */ public final static String OUTPUT_FILE = "output"; + /** + * Default encodings of the java files to read + */ + private static final String DEFAULT_FILE_ENCODING = "UTF-8"; + /** * Option for MSE output format */ @@ -85,6 +91,11 @@ public static AnchorOptions getValue(String option) { */ protected Collection excludeMatchers; + /** + * File encoding to use to read java files + */ + protected String fileEncoding = DEFAULT_FILE_ENCODING; + /** * Name of the file where to put the MSE model. * Defaults to {@link VerveineParser#OUTPUT_FILE} @@ -142,6 +153,7 @@ public void setOptions( String[] args) { } catch (IllegalArgumentException e) { System.err.println(e.getMessage()); usage(); + throw e; } } @@ -178,6 +190,7 @@ protected int setOption( String[] args, int i) throws IllegalArgumentException { if (arg.equals("-h")) { usage(); + System.exit(0); } else if (arg.matches("-1\\.[1-7]") || arg.matches("-[1-7]")) { setCodeVersion(arg); @@ -188,6 +201,9 @@ else if (arg.matches("-1\\.[1-7]") || arg.matches("-[1-7]")) { } else if ((arg.charAt(0) == '-') && (arg.endsWith("cp"))) { classPathOptions = setOptionClassPath(classPathOptions, args, i); argumentsTreated++; + } else if (arg.equals("-encoding")) { + setOptionEncoding(args, i); + argumentsTreated++; } else if (arg.equals("-anchor")) { setOptionAnchor(args, i); argumentsTreated++; @@ -227,21 +243,21 @@ else if (arg.equals("-debugging")) { */ protected String[] setOptionClassPath( String[] classPath, String[] args, int i) throws IllegalArgumentException { if (args[i].equals("-autocp")) { - if (i < args.length) { + if (i+1 < args.length) { return addToClassPath(classPath, collectAllJars(args[i+1]) ); } else { throw new IllegalArgumentException("-autocp requires a root folder"); } } else if (args[i].equals("-filecp")) { - if (i < args.length) { + if (i+1 < args.length) { return addToClassPath(classPath, readAllJars(args[i+1])); } else { throw new IllegalArgumentException("-filecp requires a filename"); } } else if (args[i].equals("-cp")) { - if (i < args.length) { + if (i+1 < args.length) { return addToClassPath(classPath, Arrays.asList(args[i+1].split(System.getProperty("path.separator")))); } else { @@ -251,6 +267,40 @@ else if (args[i].equals("-cp")) { return classPath; } + protected void setOptionEncoding(String[] args, int i) { + if (i+1 < args.length) { + this.fileEncoding = args[i + 1].trim(); + if (Charset.availableCharsets().get(this.fileEncoding) == null) { + throw new IllegalArgumentException("Unknown file encoding: -encoding " + this.fileEncoding); + } + } else { + throw new IllegalArgumentException("-encoding requires an encoding name (eg. " + DEFAULT_FILE_ENCODING + ")"); + } + } + + protected void setOptionAnchor(String[] args, int i) { + if (i+1 < args.length) { + String anchor = args[i + 1].trim(); + anchors = VerveineJOptions.AnchorOptions.getValue(anchor); + if (anchors == null) { + throw new IllegalArgumentException("unknown option to -anchor: " + anchor); + } + } else { + throw new IllegalArgumentException("-anchor requires an option (none|default|assoc)"); + } + } + + protected void setOptionFormat(String[] args, int i) { + if (i+1 < args.length) { + this.outputFormat = args[i + 1].trim(); + if ((! this.outputFormat.equalsIgnoreCase(MSE_OUTPUT_FORMAT)) && (! this.outputFormat.equalsIgnoreCase(JSON_OUTPUT_FORMAT))) { + throw new IllegalArgumentException("unknown option to -format: " + outputFormat); + } + } else { + throw new IllegalArgumentException("-format requires an option (mse|json)"); + } + } + protected List collectAllJars(String sDir) { File[] faFiles = new File(sDir).listFiles(); List tmpPath = new ArrayList(); @@ -299,29 +349,6 @@ protected List readAllJars(String filename) { return tmpPath; } - protected void setOptionAnchor(String[] args, int i) { - if (i < args.length) { - String anchor = args[i + 1].trim(); - anchors = VerveineJOptions.AnchorOptions.getValue(anchor); - if (anchors == null) { - throw new IllegalArgumentException("unknown option to -anchor: " + anchor); - } - } else { - throw new IllegalArgumentException("-anchor requires an option (none|default|assoc)"); - } - } - - protected void setOptionFormat(String[] args, int i) { - if (i < args.length) { - outputFormat = args[i + 1].trim(); - if ((!outputFormat.equalsIgnoreCase(MSE_OUTPUT_FORMAT)) && (!outputFormat.equalsIgnoreCase(JSON_OUTPUT_FORMAT))) { - throw new IllegalArgumentException("unknown option to -format: " + outputFormat); - } - } else { - throw new IllegalArgumentException("-format requires an option (mse|json)"); - } - } - protected void usage() { System.err.println("Usage: VerveineJ [-h] [-i] [-o ] [-prettyPrint] [-summary] [-alllocals] [-anchor (none|default|assoc)] [-cp CLASSPATH | -autocp DIR] [-1.1 | -1 | -1.2 | -2 | ... | -1.7 | -7] | "); System.err.println(" [-h] prints this message"); @@ -333,6 +360,7 @@ protected void usage() { System.err.println(" Summarizing at the level of classes does not produce Methods, Attributes, Accesses, and Invocations"); System.err.println(" Everything is represented as references between classes: e.g. \"A.m1() invokes B.m2()\" is uplifted to \"A references B\""); System.err.println(" [-alllocals] Forces outputing all local variables, even those with primitive type (incompatible with \"-summary\")"); + System.err.println(" [-encoding ] File encoding to use for reading the source code default: " + DEFAULT_FILE_ENCODING); System.err.println(" [-anchor (none|entity|default|assoc)] options for source anchor information:\n" + " - no entity\n" + " - only named entities [default]\n" + @@ -344,14 +372,13 @@ protected void usage() { System.err.println(" [-excludepath GLOBBINGEXPR] A globbing expression of file path to exclude from parsing"); System.err.println(" [-1.1 | -1 | -1.2 | -2 | ... | -1.7 | -7] specifies version of Java"); System.err.println(" | list of source files to parse or directories to search for source files"); - System.exit(0); - } protected void setCodeVersion(String arg) { if (codeVers != null) { System.err.println("Trying to set twice code versions: " + codeVers + " and " + arg); usage(); + throw new IllegalArgumentException(); } else if (arg.equals("-1.1") || arg.equals("-1")) { codeVers = JavaCore.VERSION_1_1; } else if (arg.equals("-1.2") || arg.equals("-2")) { @@ -551,4 +578,8 @@ public boolean commentsAsText() { return commentText; } + public String getFileEncoding() { + return fileEncoding; + } + } \ No newline at end of file diff --git a/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java b/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java deleted file mode 100644 index 2fd5fa1e..00000000 --- a/src/fr/inria/verveine/extractor/java/utils/FileContentExtractor.java +++ /dev/null @@ -1,64 +0,0 @@ -package fr.inria.verveine.extractor.java.utils; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.charset.StandardCharsets; - -/** - * A utility to read the content of files. - * To make it faster, we try to keep the file opened so that successive reads in the same file - * do not have to re-open. - * Also to make it easier to use, everything is static (not proud of it) - * @author anquetil - */ -public class FileContentExtractor { - - static protected String filename = null; - static protected RandomAccessFile openedFile = null; - - public static String getFileContent( String inputfile, int start, int end) { - if (filename != inputfile) { - closeCurrentFile(); - openFile(inputfile); - } - return getFileContent(start, end); - } - - protected static void openFile(String fname) { - try { - openedFile = new RandomAccessFile( fname, "r"); - filename = fname; - } catch (FileNotFoundException e) { - System.err.println("Error opening "+fname+" for reading"); - } - } - - protected static void closeCurrentFile() { - if (openedFile!= null) { - try { - openedFile.close(); - } catch (IOException e) { - // nothing - } - } - } - - protected static String getFileContent( int start, int end) { - byte buffer[] = new byte[ end-start+1]; - try { - openedFile.seek(start - 1); // moose position start at 1, Java at 0 - int ret = openedFile.read(buffer); - if (ret < end-start+1) { - System.err.println("missing bytes, read "+ret+" instead of "+(end-start+1)); - return ""; - } - return new String(buffer, StandardCharsets.UTF_8); - - } catch (IOException e) { - e.printStackTrace(); - } - return ""; - } - -} diff --git a/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java b/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java index 3fde81ff..aa11e3f8 100644 --- a/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java +++ b/src/fr/inria/verveine/extractor/java/visitors/defvisitors/VisitorComments.java @@ -1,5 +1,12 @@ package fr.inria.verveine.extractor.java.visitors.defvisitors; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; import java.util.List; import org.eclipse.jdt.core.dom.ASTNode; @@ -74,6 +81,11 @@ public class VisitorComments extends GetVisitedEntityAbstractVisitor { */ private int methodStartPosition; + protected String currentFilename = null; + protected BufferedReader openedFile = null; + protected int lastPositionRead = 0; + + public VisitorComments(EntityDictionary dico, VerveineJOptions options) { super(dico, options); classMemberDeclarations = false; @@ -83,7 +95,7 @@ public VisitorComments(EntityDictionary dico, VerveineJOptions options) { @Override public boolean visit(CompilationUnit node) { - allComments = node.getCommentList(); + initializeCommentsReader(node); if (allComments.size() == 0) { // no comment, not visiting return false; @@ -97,6 +109,7 @@ public boolean visit(CompilationUnit node) { @Override public void endVisit(CompilationUnit node) { endVisitCompilationUnit(node); + closeFile(); } @Override @@ -249,7 +262,7 @@ protected void assignCommentsInInterval(int start, int end, TWithComments fmx) { while ( searchComment && pendingComments() ) { Comment cmt = allComments.get(nextComment); if (commentIsInside(cmt, start, end)) { - dico.createFamixComment(cmt, fmx, options.commentsAsText()); + commentCreation( cmt, fmx); nextComment++; } else { @@ -258,6 +271,16 @@ protected void assignCommentsInInterval(int start, int end, TWithComments fmx) { } } + private void commentCreation(Comment cmt, TWithComments fmx) { + if (options.commentsAsText()) { + + dico.createFamixComment(cmt, fmx, getFileContent(cmt.getStartPosition(), cmt.getLength())); + } + else { + dico.createFamixComment(cmt, fmx); + } + } + /** * Whether there is still some "pending" comments */ @@ -272,4 +295,57 @@ protected boolean commentIsInside(Comment cmt, int start, int end) { return (start <= cmt.getStartPosition()) && (end >= cmt.getStartPosition() + cmt.getLength()); } + protected void initializeCommentsReader(CompilationUnit node) { + allComments = node.getCommentList(); + + currentFilename = (String) ((CompilationUnit)node).getProperty(EntityDictionary.SOURCE_FILENAME_PROPERTY); + try { + InputStream is = new FileInputStream(currentFilename); + this.openedFile = new BufferedReader(new InputStreamReader(is, options.getFileEncoding())); + + this.lastPositionRead = 0; + } catch (FileNotFoundException|UnsupportedEncodingException e) { + System.err.println("Not able to read comments from "+currentFilename); + } + + } + + protected String getFileContent( int start, int lenghtToRead) { + char buffer[]; + + if(openedFile == null) { + return ""; + } + + buffer = new char[lenghtToRead]; + try { + + openedFile.skip(start - lastPositionRead); + int ret = openedFile.read( buffer, /*offset in buffer*/0, lenghtToRead); + + if (ret < lenghtToRead) { + System.err.println("missing bytes in "+ currentFilename + ", read " + ret + " instead of " + lenghtToRead); + return ""; + } + lastPositionRead = start + ret; + + return new String(buffer); + + } catch (IOException e) { + e.printStackTrace(); + } + return ""; + } + + protected void closeFile() { + if (openedFile != null) { + try { + openedFile.close(); + } catch (IOException e) { + // nothing + } + openedFile= null; + } + } + } \ No newline at end of file diff --git a/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java b/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java index 54cc34ff..1dd22fab 100644 --- a/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java +++ b/tests/fr/inria/verveine/extractor/java/VerveineJTest_Configuration.java @@ -1,6 +1,7 @@ package fr.inria.verveine.extractor.java; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; import org.moosetechnology.model.famix.famixjavaentities.*; import org.moosetechnology.model.famix.famixjavaentities.Class; @@ -141,6 +142,21 @@ public void testAlllocals() { assertEquals(3, accessSetNom); } + @Test + public void testEncodingUTF8() { + parser.configure(new String[]{"-encoding", "UTF-8", "test_src/comments"}); + } + + @Test(expected = IllegalArgumentException.class) + public void testEncodingMissing() { + parser.configure(new String[]{"-encoding"}); + } + + @Test(expected = IllegalArgumentException.class) + public void testEncodingWrong() { + parser.configure(new String[]{"-encoding", "BLAH", "test_src/comments"}); + } + @Test public void testCommentsText() { parse(new String[]{"-commenttext", "test_src/comments"}); @@ -190,6 +206,19 @@ else if (meth.getName().equals("methodWithoutBody")) { assertEquals(6, numberTested); // check that all expected methods were actually found and tested } + @Test + public void testCommentsAnchor() { + parse(new String[]{"test_src/comments"}); + + assertEquals(14, entitiesOfType(Comment.class).size()); + for (Comment cmt : entitiesOfType(Comment.class)) { + assertNotNull(cmt.getSourceAnchor()); + assertNull( cmt.getContent()); + int len = (int)((IndexedFileAnchor)cmt.getSourceAnchor()).getEndPos() - (int)((IndexedFileAnchor)cmt.getSourceAnchor()).getStartPos(); + assertTrue( len >= 20); // none of the comments have less than 20 characters + } + } + @Test public void testClassDeclsInExpr() { parse(new String[]{"-alllocals", "test_src/ad_hoc/SpecialLocalVarDecls.java"});