-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
nix repl: Provide documentation from comment when evaluating to lambda #1652
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,231 @@ | ||||||
#include <iostream> | ||||||
#include <fstream> | ||||||
#include <sstream> | ||||||
#include <regex> | ||||||
#include <climits> | ||||||
#include <algorithm> | ||||||
|
||||||
#include "comment.hh" | ||||||
#include "util.hh" | ||||||
|
||||||
/* This module looks for documentation comments in the source code. | ||||||
|
||||||
Documentation is not retained during parsing, and it should not be, | ||||||
for performance reasons. Because of this the code has to jump | ||||||
through some hoops, to perform its task. | ||||||
|
||||||
Adapting the parser was not considered an option, so this code | ||||||
parses the comments from scratch, using regular expressions. These | ||||||
do not support all syntactic constructs, so in rare cases, they | ||||||
will fail and the code will report no documentation. | ||||||
|
||||||
One such situation is where documentation is requested for a | ||||||
partially applied function, where the outer lambda pattern | ||||||
matches an attribute set. This is not supported in the regexes | ||||||
because it potentially requires (almost?) the entire grammar. | ||||||
|
||||||
This module has been designed not to report the wrong | ||||||
documentation; considering that the wrong documentation is worse | ||||||
than no documentation. The regular expressions will only match | ||||||
simple, well understood syntactic structures, or not match at all. | ||||||
|
||||||
This approach to finding documentation does not cause extra runtime | ||||||
overhead, until used. | ||||||
|
||||||
This module does not support tab ('\t') characters. In some places | ||||||
they are treated as single spaces. They should be avoided. | ||||||
*/ | ||||||
namespace nix::Comment { | ||||||
|
||||||
struct Doc emptyDoc("", "", "", 0); | ||||||
|
||||||
/* parseDoc will try to recover a Doc by looking at the text that leads up to a term | ||||||
definition.*/ | ||||||
static struct Doc parseDoc(std::string sourcePrefix); | ||||||
|
||||||
/* stripComment unpacks a comment, by unindenting and stripping " * " prefixes as | ||||||
applicable. The argument should include any preceding whitespace. */ | ||||||
static std::string stripComment(std::string rawComment); | ||||||
|
||||||
/* Consistent unindenting. It will only remove entire columns. */ | ||||||
static std::string unindent(std::string s); | ||||||
|
||||||
static std::string trimUnindent(std::string s) { | ||||||
return trim(unindent(s)); | ||||||
} | ||||||
|
||||||
static std::string stripPrefix(std::string prefix, std::string s) { | ||||||
std::string::size_type index = s.find(prefix); | ||||||
return (index == 0) ? s.erase(0, prefix.length()) : s; | ||||||
} | ||||||
|
||||||
static std::string readFileUpToPos(const Pos & pos) { | ||||||
|
||||||
std::ifstream ifs(static_cast<const std::string>(pos.file)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we read from the buffer instead? Because for static analysis tooling, actually there is no such file on the filesystem, this is a problem we deal with #6530 |
||||||
std::stringstream ret; | ||||||
size_t lineNum = 1; | ||||||
std::string line; | ||||||
|
||||||
while (getline(ifs, line) && lineNum <= pos.line) { | ||||||
if (lineNum < pos.line) { | ||||||
ret << line << "\n"; | ||||||
} else if (lineNum == pos.line) { | ||||||
ret << line.substr(0, pos.column-1); | ||||||
} | ||||||
lineNum++; | ||||||
} | ||||||
|
||||||
return ret.str(); | ||||||
} | ||||||
|
||||||
struct Doc lookupDoc(const Pos & pos) { | ||||||
try { | ||||||
return parseDoc(readFileUpToPos(pos)); | ||||||
} catch (std::exception e) { | ||||||
ignoreException(); | ||||||
return emptyDoc; | ||||||
} | ||||||
} | ||||||
|
||||||
/* See lambdas in parseDoc */ | ||||||
static int countLambdas(std::string piece) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return std::count(piece.begin(), piece.end(), ':'); | ||||||
} | ||||||
|
||||||
/* Try to recover a Doc by looking at the text that leads up to a term | ||||||
definition */ | ||||||
static struct Doc parseDoc(std::string sourcePrefix) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather like to preserve comment information in our lexer, not using regexes here. That is, we store pointers to comments here, and construct & strip indentation after parsing state. Lines 305 to 306 in 9428d7d
This has no overhead but we must carefully deal with the lifetime of these references/pointers. I think we should do this in the lexer because it will be flexible for further changes & keep consistent with the lexer. |
||||||
|
||||||
std::string wss("[ \t\r\n]*"); | ||||||
std::string spaces("[ \t]*"); | ||||||
|
||||||
std::string singleLineComment(spaces + "#[^\r\n]*(?:\n|\r\n)"); | ||||||
std::string multiSingleLineComment("(?:" + singleLineComment + ")*"); | ||||||
std::string multiLineComment("\\/\\*(?:[^*]|\\*+[^*/])*\\*+\\/"); | ||||||
std::string commentUnit("(" + multiSingleLineComment + "|" + spaces + multiLineComment + ")" + wss); | ||||||
|
||||||
std::string ident("[a-zA-Z_][a-zA-Z0-9_'-]*" + wss); | ||||||
std::string identKeep("([a-zA-Z_][a-zA-Z0-9_'-]*)" + wss); | ||||||
|
||||||
/* lvalue for nested attrset construction, but not matching | ||||||
quoted identifiers or ${...} or comments inbetween etc */ | ||||||
std::string simplePath("(?:" + wss + ident + "\\.)*" + identKeep); | ||||||
|
||||||
std::string lambda(ident + wss + ":" + wss); | ||||||
|
||||||
/* see countLambdas */ | ||||||
std::string lambdas("((:?" + lambda + ")*)"); | ||||||
|
||||||
std::string assign("=" + wss); | ||||||
|
||||||
std::string re(commentUnit + simplePath + assign + lambdas + "$"); | ||||||
std::regex e(re); | ||||||
|
||||||
#define REGEX_GROUP_COMMENT 1 | ||||||
#define REGEX_GROUP_NAME 2 | ||||||
#define REGEX_GROUP_LAMBDAS 3 | ||||||
#define REGEX_GROUP_MAX 4 | ||||||
|
||||||
std::smatch matches; | ||||||
regex_search(sourcePrefix, matches, e); | ||||||
|
||||||
std::stringstream buffer; | ||||||
if (matches.length() < REGEX_GROUP_MAX) { | ||||||
return emptyDoc; | ||||||
} | ||||||
|
||||||
std::string rawComment = matches[REGEX_GROUP_COMMENT]; | ||||||
std::string name = matches[REGEX_GROUP_NAME]; | ||||||
int timesApplied = countLambdas(matches[REGEX_GROUP_LAMBDAS]); | ||||||
return Doc(rawComment, stripComment(rawComment), name, timesApplied); | ||||||
} | ||||||
|
||||||
static std::string stripComment(std::string rawComment) { | ||||||
rawComment.erase(rawComment.find_last_not_of("\n")+1); | ||||||
|
||||||
std::string s(trimUnindent(rawComment)); | ||||||
|
||||||
if (s[0] == '/' && s[1] == '*') { | ||||||
// Remove the "/*" | ||||||
// Indentation will be removed consistently later on | ||||||
s[0] = ' '; | ||||||
s[1] = ' '; | ||||||
|
||||||
// Remove the "*/" | ||||||
if (!s.empty() && *(--s.end()) == '/') | ||||||
s.pop_back(); | ||||||
if (!s.empty() && *(--s.end()) == '*') | ||||||
s.pop_back(); | ||||||
|
||||||
s = trimUnindent(s); | ||||||
|
||||||
std::istringstream inStream(s); | ||||||
std::ostringstream stripped; | ||||||
|
||||||
std::string line; | ||||||
|
||||||
/* at first, we assume a comment | ||||||
* that is formatted like this | ||||||
* with '*' characters at the beginning | ||||||
* of the line. | ||||||
*/ | ||||||
bool hasStars = true; | ||||||
|
||||||
while(std::getline(inStream,line,'\n')){ | ||||||
if (hasStars && ( | ||||||
(!line.empty() && line[0] == '*') | ||||||
|| (line.length() >= 2 && line[0] == ' ' && line[1] == '*') | ||||||
)) { | ||||||
if (line[0] == ' ') { | ||||||
line = stripPrefix(" *", line); | ||||||
} else { | ||||||
line = stripPrefix("*", line); | ||||||
} | ||||||
} else { | ||||||
hasStars = false; | ||||||
} | ||||||
|
||||||
stripped << line << std::endl; | ||||||
} | ||||||
return trimUnindent(stripped.str()); | ||||||
} | ||||||
else { | ||||||
std::istringstream inStream(s); | ||||||
std::ostringstream stripped; | ||||||
|
||||||
std::string line; | ||||||
while(std::getline(inStream, line, '\n')) { | ||||||
line.erase(0, line.find("#") + 1); | ||||||
stripped << line << std::endl; | ||||||
} | ||||||
return trimUnindent(stripped.str()); | ||||||
} | ||||||
} | ||||||
|
||||||
static std::string unindent(std::string s) { | ||||||
size_t maxIndent = 1000; | ||||||
{ | ||||||
std::istringstream inStream(s); | ||||||
for (std::string line; std::getline(inStream, line); ) { | ||||||
size_t firstNonWS = line.find_first_not_of(" \t\r\n"); | ||||||
if (firstNonWS != std::string::npos) { | ||||||
maxIndent = std::min(firstNonWS, maxIndent); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
std::ostringstream unindentedStream; | ||||||
{ | ||||||
std::istringstream inStream(s); | ||||||
for (std::string line; std::getline(inStream, line); ) { | ||||||
if (line.length() >= maxIndent) { | ||||||
unindentedStream << line.substr(maxIndent) << std::endl; | ||||||
} else { | ||||||
unindentedStream << std::endl; | ||||||
} | ||||||
} | ||||||
} | ||||||
return unindentedStream.str(); | ||||||
} | ||||||
|
||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,41 @@ | ||||||
#pragma once | ||||||
|
||||||
#include "nixexpr.hh" | ||||||
|
||||||
namespace nix::Comment { | ||||||
|
||||||
struct Doc { | ||||||
|
||||||
// Name that the term is assigned to | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
std::string name; | ||||||
|
||||||
std::string rawComment; | ||||||
std::string comment; | ||||||
|
||||||
// Number of times the curried function must be applied to get the value | ||||||
// that this structure documents. | ||||||
// | ||||||
// This is useful when showing the documentation for a partially applied | ||||||
// curried function. The documentation is for the unapplied function, so | ||||||
// this is crucial information. | ||||||
int timesApplied; | ||||||
|
||||||
Doc(std::string rawComment, std::string comment, std::string name, int timesApplied) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would like to prefer the C++ way writing ctors
Suggested change
and remove We are using copy constructors for
|
||||||
this->name = name; | ||||||
this->rawComment = rawComment; | ||||||
this->comment = comment; | ||||||
this->timesApplied = timesApplied; | ||||||
} | ||||||
|
||||||
}; | ||||||
|
||||||
extern struct Doc emptyDoc; | ||||||
|
||||||
// lookupDoc will try to recover a Doc. This will perform perform I/O, | ||||||
// because documentation is not retained by the parser. | ||||||
// | ||||||
// Will return empty values if nothing can be found. | ||||||
// For its limitations, see the docs of the implementation. | ||||||
struct Doc lookupDoc(const Pos & pos); | ||||||
|
||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -14,6 +14,7 @@ | |||||
#include "globals.hh" | ||||||
#include "command.hh" | ||||||
#include "finally.hh" | ||||||
#include "comment.hh" | ||||||
|
||||||
#include "src/linenoise/linenoise.h" | ||||||
|
||||||
|
@@ -25,6 +26,7 @@ namespace nix { | |||||
#define ESC_BLU "\033[34;1m" | ||||||
#define ESC_MAG "\033[35m" | ||||||
#define ESC_CYA "\033[36m" | ||||||
#define ESC_BLK "\033[30;1m" | ||||||
#define ESC_END "\033[0m" | ||||||
|
||||||
struct NixRepl | ||||||
|
@@ -59,7 +61,11 @@ struct NixRepl | |||||
|
||||||
typedef set<Value *> ValuesSeen; | ||||||
std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth); | ||||||
std::ostream & printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth); | ||||||
std::ostream & printValue(std::ostream & str, Value & v, unsigned int maxDepth, ValuesSeen & seen); | ||||||
|
||||||
// Only prints if a comment is found preceding the position. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
void tryPrintDoc(std::ostream & str, Pos & pos); | ||||||
}; | ||||||
|
||||||
|
||||||
|
@@ -425,7 +431,7 @@ bool NixRepl::processLine(string line) | |||||
} else { | ||||||
Value v; | ||||||
evalString(line, v); | ||||||
printValue(std::cout, v, 1) << std::endl; | ||||||
printValueAndDoc(std::cout, v, 1) << std::endl; | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -509,6 +515,59 @@ void NixRepl::evalString(string s, Value & v) | |||||
} | ||||||
|
||||||
|
||||||
std::ostream & NixRepl::printValueAndDoc(std::ostream & str, Value & v, unsigned int maxDepth) | ||||||
{ | ||||||
printValue(str, v, maxDepth); | ||||||
|
||||||
switch (v.type) { | ||||||
case tLambda: | ||||||
tryPrintDoc(str, v.lambda.fun->pos); | ||||||
break; | ||||||
default: | ||||||
break; | ||||||
} | ||||||
return str; | ||||||
} | ||||||
|
||||||
void NixRepl::tryPrintDoc(std::ostream & str, Pos & pos) { | ||||||
|
||||||
Comment::Doc doc = Comment::lookupDoc(pos); | ||||||
|
||||||
if (!doc.name.empty()) { | ||||||
str << std::endl; | ||||||
str << std::endl << "| " << ESC_BLK << doc.name << ESC_END << std::endl; | ||||||
str << "| "; | ||||||
for (size_t i = 0; i < doc.name.length(); i++) { | ||||||
str << '-'; | ||||||
} | ||||||
str << std::endl; | ||||||
str << "| " << std::endl; | ||||||
} | ||||||
|
||||||
// If we're showing any form of doc at all, we need to inform | ||||||
// user about partially applied functions. | ||||||
if (!doc.name.empty() || !doc.comment.empty()) { | ||||||
|
||||||
if (doc.timesApplied > 0) { | ||||||
str << "| " << ESC_YEL << "NOTE: " << ESC_BLK << "This function has already been applied!" << ESC_END << std::endl | ||||||
<< "| You should ignore the first " | ||||||
<< ESC_BLK << std::to_string(doc.timesApplied) << ESC_END | ||||||
<< " parameter(s) in this documentation," << std::endl | ||||||
<< "| because they have already been applied." << std::endl | ||||||
<< "|" << std::endl; | ||||||
} | ||||||
} | ||||||
|
||||||
if (!doc.comment.empty()) { | ||||||
std::stringstream commentStream(doc.comment); | ||||||
std::string line; | ||||||
while(std::getline(commentStream,line,'\n')){ | ||||||
str << "| " << line << std::endl; | ||||||
} | ||||||
} | ||||||
|
||||||
} | ||||||
|
||||||
std::ostream & NixRepl::printValue(std::ostream & str, Value & v, unsigned int maxDepth) | ||||||
{ | ||||||
ValuesSeen seen; | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit pick (NFC)