-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
xpath: support for indexed-repeat
function
#195
Changes from 1 commit
6e2cf9d
aad995b
fc30485
59ccb8a
541cd12
f01973e
dddbae3
fcb402f
b3b803c
00ed5f3
8ed21a7
8f24309
7172db0
746aa8a
7742cb9
bbbd2a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,15 @@ | ||
import { UpsertableWeakMap } from '@getodk/common/lib/collections/UpsertableWeakMap.ts'; | ||
import { ScopedElementLookup } from '@getodk/common/lib/dom/compatibility.ts'; | ||
import type { LocalNamedElement } from '@getodk/common/types/dom.ts'; | ||
import type { Evaluation } from '../../evaluations/Evaluation.ts'; | ||
import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts'; | ||
import type { EvaluableArgument } from '../../evaluator/functions/FunctionImplementation.ts'; | ||
import { NodeSetFunction } from '../../evaluator/functions/NodeSetFunction.ts'; | ||
import { NumberFunction } from '../../evaluator/functions/NumberFunction.ts'; | ||
import { StringFunction } from '../../evaluator/functions/StringFunction.ts'; | ||
import { XFormsXPathEvaluator } from '../../index.ts'; | ||
import { seededRandomize } from '../../lib/collections/sort.ts'; | ||
import type { MaybeElementNode } from '../../lib/dom/types.ts'; | ||
import type { ContextNode, MaybeElementNode } from '../../lib/dom/types.ts'; | ||
import type { ModelElement } from '../../xforms/XFormsXPathEvaluator.ts'; | ||
|
||
export const countNonEmpty = new NumberFunction( | ||
|
@@ -32,6 +34,221 @@ export const countNonEmpty = new NumberFunction( | |
} | ||
); | ||
|
||
type AssertArgument = (index: number, arg?: EvaluableArgument) => asserts arg is EvaluableArgument; | ||
|
||
const assertArgument: AssertArgument = (index, arg) => { | ||
if (arg == null) { | ||
throw new Error(`Argument ${index + 1} expected`); | ||
} | ||
}; | ||
|
||
type AssertIsLocationPathEvaluation = ( | ||
evaluation?: Evaluation | ||
) => asserts evaluation is LocationPathEvaluation; | ||
|
||
/** | ||
* @todo This is a concern in several `FunctionImplementation`s. It would be | ||
* much nicer if it were handled as part of the signature, then inferred in the | ||
* types and validated automatically at runtime. It would also make sense, as a | ||
* minor stopgap improvement, to generalize checks like this in a single place | ||
* (e.g. as a static method on {@link LocationPathEvaluation} itself). Deferred | ||
* here because there is exploratory work on both, but both are out of scope for | ||
* work in progress to support {@link indexedRepeat}. | ||
*/ | ||
const assertIsLocationPathEvaluation: AssertIsLocationPathEvaluation = (evaluation) => { | ||
if (!(evaluation instanceof LocationPathEvaluation)) { | ||
throw new Error('Expected a node-set result'); | ||
} | ||
}; | ||
|
||
/** | ||
* Note: this function is not intended to be general outside of usage by | ||
* {@link indexedRepeat}. | ||
* | ||
* Evaluation of the provided argument is eager—i.e. materializing the complete | ||
* array of results, rather than the typical `Iterable<ContextNode>` produced in | ||
* most cases—because it is expected that in most cases the eagerness will not | ||
* be terribly expensive, and all results will usually be consumed, either to be | ||
* indexed or filtered in other ways applicable at call sites. | ||
* | ||
* Function is named to reflect that expectation. | ||
*/ | ||
const evaluateArgumentToFilterableNodes = ( | ||
context: LocationPathEvaluation, | ||
arg: EvaluableArgument | ||
): readonly ContextNode[] => { | ||
const evaluation = arg.evaluate(context); | ||
|
||
assertIsLocationPathEvaluation(evaluation); | ||
|
||
return Array.from(evaluation.contextNodes); | ||
}; | ||
|
||
interface EvaluatedIndexedRepeatArgumentPair { | ||
readonly repeats: readonly ContextNode[]; | ||
readonly position: number; | ||
} | ||
|
||
type DepthSortResult = -1 | 0 | 1; | ||
|
||
/** | ||
* @todo This is **obviously cacheable**, but it would make most sense to cache | ||
* it at the expression level (or at the expression + bound context node level). | ||
* All of the expression analysis machinery is further up the stack (as it | ||
* generally ought to be with current designs), but it would be nice to consider | ||
* how we'd address caching with these kinds of dynamics at play. | ||
*/ | ||
const compareContainmentDepth = ( | ||
{ repeats: a }: EvaluatedIndexedRepeatArgumentPair, | ||
{ repeats: b }: EvaluatedIndexedRepeatArgumentPair | ||
): DepthSortResult => { | ||
for (const repeatA of a) { | ||
for (const repeatB of b) { | ||
if (repeatA.contains(repeatB)) { | ||
return -1; | ||
} | ||
|
||
if (repeatB.contains(repeatA)) { | ||
return 1; | ||
} | ||
} | ||
} | ||
|
||
if (a.length === 0 || b.length === 0) { | ||
return 0; | ||
} | ||
|
||
// TODO: if we reach this point, there is no hierarchical relationship between | ||
// the repeats in `repeatN` and `repeatN + M`. This seems to violate **at | ||
// least the intent** of the spec. We should probably produce an error here? | ||
return 0; | ||
}; | ||
|
||
export const indexedRepeat = new NodeSetFunction( | ||
'indexed-repeat', | ||
[ | ||
// spec: arg | ||
{ arityType: 'required', typeHint: 'node' }, | ||
// spec: repeat1 | ||
{ arityType: 'required', typeHint: 'node' }, | ||
// spec: index1 | ||
{ arityType: 'required', typeHint: 'number' }, | ||
// spec: repeatN=0 -> repeat2 | ||
{ arityType: 'optional', typeHint: 'node' }, | ||
// spec: indexN=0 -> index2 | ||
{ arityType: 'optional', typeHint: 'number' }, | ||
// spec: repeatN=1 -> repeat3 | ||
{ arityType: 'optional', typeHint: 'node' }, | ||
// spec: indexN=1 -> index3 | ||
{ arityType: 'optional', typeHint: 'number' }, | ||
|
||
// Go beyond spec? Why the heck not! It's clearly a variadic design. | ||
{ arityType: 'variadic', typeHint: 'any' }, | ||
], | ||
(context, args) => { | ||
// First argument is `target` (per spec) of the deepest resolved repeat | ||
const target = args[0]!; | ||
|
||
let pairs: EvaluatedIndexedRepeatArgumentPair[] = []; | ||
|
||
// Iterate through rest of arguments, collecting pairs of: | ||
// | ||
// - `repeats`: **all** nodes matching the supplied node-set for the | ||
// `repeatN` argument in this pair | ||
// - `position`: the resolved number value for the `indexN` (per spec) | ||
// argument at in this pair | ||
// | ||
// For **all `repeatN`/`indexN` pairs**, arguments are evaluated in the | ||
// calling context (in typical XForms usage, this will be the context of the | ||
// bound node). This is the core difference between this approach and the | ||
// original in https://github.com/getodk/web-forms/pull/150. That | ||
// understanding was clarified in review of that orignal effort, and is | ||
// borne out by new tests exercising depth > 1, which demonstrate the same | ||
// behavior in JavaRosa. | ||
// | ||
// Note: we start iterating here at index 1 so assertions related to | ||
// positional argument index are clear. | ||
for (let i = 1; i < args.length; i += 2) { | ||
const repeatsArg = args[i]; | ||
const positionArg = args[i + 1]; | ||
|
||
assertArgument(i, repeatsArg); | ||
assertArgument(i + 1, positionArg); | ||
|
||
// Evaluate position first, because... | ||
const position = positionArg.evaluate(context).toNumber(); | ||
|
||
// ... if any "index" (position) is `NaN`, we short-circuit. This is | ||
// expected behavior because the equivalent `/data/repN[posN]/target` | ||
// expression would do the same. | ||
if (Number.isNaN(position)) { | ||
return []; | ||
} | ||
|
||
// Reiterating the point made describing this loop for future clarity: | ||
// this collects **all** of the nodes matching the `repeatN` expression. | ||
// We filter them in a later step. | ||
const repeats = evaluateArgumentToFilterableNodes(context, repeatsArg); | ||
|
||
// No repeats = nothing to "index" = short circuit | ||
if (repeats.length === 0) { | ||
return []; | ||
} | ||
|
||
pairs.push({ | ||
repeats, | ||
position, | ||
}); | ||
} | ||
|
||
// Sort the results of each `repeatN`/`indexN` pair, by containment order. | ||
// | ||
// Note: the `repeatN`/`indexN` pairs can be supplied in any order (this is | ||
// consistent with behavior in JavaRosa, likely as a side effect of the | ||
// function being implemented there by transforming the expression to its | ||
// LocationPath equivalent). | ||
pairs = pairs.sort(compareContainmentDepth); | ||
|
||
// Resolve repeats at the specified/evaluated position, in document depth | ||
// order by: | ||
// | ||
// 1. Filtering each set of repeats to include **only** the nodes contained | ||
// by the previously resolved repeat. | ||
// | ||
// 2. Selecting the repeat at the specified/evaluated position (of those | ||
// filtered in 1). | ||
let repeatContextNode: ContextNode; | ||
|
||
for (const pair of pairs) { | ||
const { position } = pair; | ||
const repeats = pair.repeats.filter((repeat) => { | ||
return repeatContextNode?.contains(repeat) !== false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This means invalid parameter pairs will be silently ignored, right? It feels like it would be helpful for the issue to be surfaced in case it's a typo but that could come later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, but I can see why it would seem that way! The reasoning for this was also less clear to me reading it today than when I wrote it. It's a special case for the first iteration of the loop. I hope 7742cb9 is clarifying. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Helpful for the first iteration, yes! What happens if I do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm about to wrap up for the day so I can test tomorrow to say for sure, but I believe it will produce an empty node-set/blank string. Ah, and now I understand the question better! The You can think of it as conceptually similar to how some custom CSS selector engines work: they'll often work right to left (well, hierarchically bottom up), matching the deepest part of the selector first and then working hierarchically up the selector to refine the result set from there. (As described here, it's usually a performance optimization technique; and it's one we might also want to think about someday) But yes, it will also filter out invalid hierarchies as in your example, that's just not all it's doing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, thanks! How about tracking the unrelated reference case as part of future error work? It would be nice to give explicit feedback as some sort of error if possible. |
||
}); | ||
|
||
// Select next repeat context at the current `repeatN`/`indexN` position. | ||
// | ||
// Note: despite terminology used in the spec, `indexN` is treated as | ||
lognaturel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// equivalent to an XPath position predicate: it is 1-based. | ||
const positionedRepeat = repeats[position - 1]; | ||
|
||
// No repeat context is found = nothing to target = short-circuit | ||
if (positionedRepeat == null) { | ||
return []; | ||
} | ||
|
||
repeatContextNode = positionedRepeat; | ||
} | ||
|
||
// Resolve **all** target nodes. | ||
const targetNodes = evaluateArgumentToFilterableNodes(context, target); | ||
|
||
// Filter only the target nodes contained by the deepest repeat context node. | ||
return targetNodes.filter((targetNode) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll refer to the casting details in my comment the other thread, with emphasis on how node-sets are cast to strings. This is also how XPath semantics addresses node-set values. Anyway, as I said in that thread: I'm open to making this less capable if that's what we decide we really want. But to be clear, a |
||
return repeatContextNode.contains(targetNode); | ||
}); | ||
} | ||
); | ||
|
||
interface InstanceElement extends LocalNamedElement<'instance'> {} | ||
|
||
const identifiedInstanceLookup = new ScopedElementLookup(':scope > instance[id]', 'instance[id]'); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this needs to be
StringFunction
, in the spec return type ofindexedRepeat
is stringThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh, thanks for flagging this, I wondered about it briefly when reviewing but didn't come back to it. I've never seen attempted usage that results in a nodeset but it certainly could be useful. I'm not sure what Collect will do in that case. I think it would be fine as long as you use it in a context where a nodeset is allowed like
count(indexed-repeat(/data/repeat1/repeat2/foo, /data/repeat1, 3))
so it might be the spec that needs updating. We should try something like that in Collect (I can later if no one beats me to it)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe I made a case for this in #150. I am open to changing it, but I do want to make the case for it again here.
My interpretation of the spec is that the function...
repeatN
/indexN
pair [in containment depth order]arg
target (this is the result produced by the spec example's non-shortcut equivalent)Importantly: XPath semantics only has a concept of node-set value/result type, not a single node. Any single node result—whether returned by a function, or produced by a LocationPath Step—is a node-set with a context size of 1.
In this case, the production of a "single node value" occurs in step 4, where XPath casting semantics produces the string value of the first node in a node-set (regardless of its context size).
The tests clearly demonstrate that behavior: anywhere a string value is consumed, the expected string value is produced from the single (first) node value in the resulting node-set.
In
@getodk/xpath
this casting from node-set to string occurs further up the call stack, applied by intentionally isolated casting logic, and applied consistently:evaluate
callFunctionImplementation
, any part of a LocationPath, or any other result from any other XPath syntaxOur isolation of casting logic is important. XPath casting semantics have a fair bit of nuance which can easily be lost or become inconsistent otherwise (and did earlier in implementation!). That isolation is also crucial to our ability to support some ODK XForms functionality, i.e. date / time / datetime data types.
In terms of spec conformance: the only way to deviate from the spec behavior is to intentionally consume the return value as a node-set, as in @lognaturel's
count
example, or by callingevaluate
with any of theXPathResult
types producing one or more nodes **and then explicitly accessing the evaluated result's node APIs, or by calling any of our explicitly node-returningevaluate*
convenience methods; any of which are explicit and intentional. Doing so does go beyond the spec, but it does not conflict with the spec. This is the same reasoning I applied to implementing the function with a variadic signature (rather than restricting calls to a maximum of threerepeatN
/indexN
pairs as in the spec).Since the spec signature returns a string, there's no precedent for different behavior when an
indexed-repeat
return value is deliberately used as a node-set: you can't cast a string to a node-set.The only conflict this interpretation could introduce with the spec is if we think it's more valuable to users to actively prevent the beyond-spec usage than to (quietly, apart from this discussion) permit it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation, it feels fine to leave as-is to me. 👍 @sadiqkhoja?
Strong agree.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just saw the test below and the edited comment. Ok, seems like there's more to consider!