Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xpath: [incorrect] support for indexed-repeat function #150

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/xpath/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ Both evaluator classes provide the following convenience methods:

We intend to support the full ODK XForms function library, but support is currently incomplete. The following functions are not yet supported (the `jr:` prefix is used by convention to refer to the JavaRosa namespace):

- `indexed-repeat`
- `pulldata`
- `jr:choice-name`

Expand Down
30 changes: 30 additions & 0 deletions packages/xpath/src/evaluations/LocationPathEvaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ import type {
EvaluationContextTreeWalkers,
} from '../context/EvaluationContext.ts';
import type { Evaluator } from '../evaluator/Evaluator.ts';
import type { ExpressionEvaluator } from '../evaluator/expression/ExpressionEvaluator.ts';
import type { FilterPathExpressionEvaluator } from '../evaluator/expression/FilterPathExpressionEvaluator.ts';
import type { LocationPathEvaluator } from '../evaluator/expression/LocationPathEvaluator.ts';
import type { LocationPathExpressionEvaluator } from '../evaluator/expression/LocationPathExpressionEvaluator.ts';
import type { EvaluableArgument } from '../evaluator/functions/FunctionImplementation.ts';
import type { FunctionLibraryCollection } from '../evaluator/functions/FunctionLibraryCollection.ts';
import type { NodeSetFunction } from '../evaluator/functions/NodeSetFunction.ts';
import type { AnyStep } from '../evaluator/step/Step.ts';
Expand Down Expand Up @@ -103,6 +105,8 @@ type ArbitraryNodesTemporaryCallee =
// eslint-disable-next-line @typescript-eslint/no-explicit-any
| NodeSetFunction<any>;

type PositionPredicateEvaluator = EvaluableArgument | ExpressionEvaluator;

// TODO: naming, general design/approach. This class has multiple, overlapping
// purposes:
//
Expand Down Expand Up @@ -338,6 +342,32 @@ export class LocationPathEvaluation
return result;
}

evaluatePositionPredicate(predicate: PositionPredicateEvaluator): LocationPathEvaluation {
let currentPosition = 0;
let positioned: LocationPathEvaluation | null = null;

const contextPosition = predicate.evaluate(this).toNumber();

if (contextPosition >= 1) {
for (const item of this) {
currentPosition += 1;

if (currentPosition === contextPosition) {
positioned = item;
break;
}
}
}

return (
positioned ??
new LocationPathEvaluation(this, [], {
contextPosition,
contextSize: () => this.contextSize(),
})
);
}

protected _isEmpty: boolean | null = null;

protected isEmpty(): boolean {
Expand Down
23 changes: 10 additions & 13 deletions packages/xpath/src/evaluator/expression/LocationPathEvaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,29 +76,26 @@ export class LocationPathEvaluator
for (const step of rest) {
currentContext = currentContext.step(step);

// TODO: predicate *logic* feels like it nicely belongs here (so long as it continues to pertain directly to syntax nodes), but application of predicates is definitely a concern that feels it better belongs in `LocationPathEvaluation`
// TODO: a previous observation here had noted that it may be appropriate
// to move predicate logic to `LocationPathEvaluation`. With some
// hindsight, and now with a subset of position predicate logic moved
// there to suppport equivalent logic from a function implementation, it
// seems quite reasonable that predicate logic would move there entirely.
// In fact it almost certainly should.
for (const predicateNode of step.predicates) {
const [predicateExpressionNode] = predicateNode.children;
const predicateExpression = createExpression(predicateExpressionNode);

let positionPredicate: number | null = null;

// Static/explicit position predicate
if (predicateExpression instanceof NumberExpressionEvaluator) {
positionPredicate = predicateExpression.evaluate(currentContext).toNumber();
currentContext = currentContext.evaluatePositionPredicate(predicateExpression);

continue;
}

const filteredNodes: Node[] = [];

for (const self of currentContext) {
if (positionPredicate != null) {
if (self.contextPosition() === positionPredicate) {
filteredNodes.push(...self.contextNodes);
break;
} else {
continue;
}
}

const predicateResult = predicateExpression.evaluate(self);

// TODO: it's surprising there aren't tests exercising this, but it
Expand Down
201 changes: 201 additions & 0 deletions packages/xpath/src/functions/xforms/node-set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { UpsertableWeakMap } from '@getodk/common/lib/collections/UpsertableWeak
import { ScopedElementLookup } from '@getodk/common/lib/dom/compatibility.ts';
import type { LocalNamedElement } from '@getodk/common/types/dom.ts';
import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts';
import { FunctionImplementation } from '../../evaluator/functions/FunctionImplementation.ts';
import { NodeSetFunction } from '../../evaluator/functions/NodeSetFunction.ts';
import { NumberFunction } from '../../evaluator/functions/NumberFunction.ts';
import { StringFunction } from '../../evaluator/functions/StringFunction.ts';
Expand Down Expand Up @@ -32,6 +33,206 @@ export const countNonEmpty = new NumberFunction(
}
);

/**
* Filters a node-set {@link evaluation} to derive a node-set (and evaluation
* context) which only includes nodes descending from one or more nodes in the
* {@link ancestorContext} node-set/context.
*
* @todo This is general enough we could theoretically use it elsewhere. But
* it's unclear if we **should**. There is a clear expectation of this behavior
* from ported JavaRosa tests of `indexed-repeat`, which is specialized
* shorthand with clear intent. Broader application could break with some of our
* assumptions about absolute/relative expressions otherwise.
*/
const filterContextDescendants = (
ancestorContext: LocationPathEvaluation,
evaluation: LocationPathEvaluation
): LocationPathEvaluation => {
const resultNodes = Array.from(evaluation.nodes).filter((node) => {
return ancestorContext.some((context) => {
return context.value.contains(node);
});
});

return LocationPathEvaluation.fromArbitraryNodes(ancestorContext, resultNodes, indexedRepeat);
};

/**
* Per {@link https://getodk.github.io/xforms-spec/#fn:indexed-repeat}:
*
* > `indexed-repeat(//node, /path/to/repeat, //index1,
* > /path/to/repeat/nested-repeat, //index2)` is meant to be a shortcut for
* > `//repeat[position()=//index1]/nested-repeat[position()=index2]/node`
*
* **FOR DISCUSSION**
*
* This implementation currently makes the following judgement calls:
*
* 1. None of the logic is repeat specific, and the function will behave the
* same way when specifying node-sets that do not correspond to repeats (or
* even to an XForm document structure). While the naming and spec language
* are clearly repeat-specific, the spec's explanation shows how the function
* maps to a more general LocationPath expression. The intent (at least for
* now) is to support that generalization, and only add repeat-specific
* restrictions if there's a compelling reason. (Besides being simpler this
* way, it will almost certainly perform better as well.)
*
* 2. Node-set results produced by the first `target` argument are
* contextualized to their containing "repeats" node-set (i.e. whichever node
* is resolved by the last `repeatN`/`indexN` pair). This is directly tested
* by `scenario` tests ported from JavaRosa. Notably, **this deviates** from
* any other handling of absolute paths by this package. A case can be made
* that it's consistent with the underlying XPath specification, if a bit
* surprising.
*
* 3. The node-set produced by `repeat1` is **not contextualized** to its
* containing node-set (i.e. the evaluation context), and this is also tested
* directly by `scenario` tests ported from JavaRosa. This should be
* consistent with expectations; it's noted here mostly because it differs
* from the next point...
*
* 4. The respective node-sets produced by `repeat2`, `repeat3` (and beyond\*)
* **are contextualized**, respectively, to the previous node-set context
* (i.e. `repeat2` is filtered to include only descendants of
* `repeat1[index1]`, `repeat3` is in turn filtered to include only
* descendants of `repeat2[index2]`). This behavior is not tested directly by
* JavaRosa, but the intent is inferred by behavior of the first `target`
* argument, and assumed from the spec example.
*
* 5. **!!!** None of the `indexN` arguments are contextualized in any special
* way, they're evaluated just as if they were written as a position
* predicate. At present, it's not clear whether there's a reliable way to
* infer intent otherwise:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not feeling quite right. A common case is parallel repeats in which position(..) is used as the indexN. The intent is to use the position of the current repeat. Example XLSForm (not the first weird one with the relevance directly on the repeat but the second one with relevance on an inner group)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haven’t had a chance to write the tests I wanted, I’ll come back to it Friday! I wanted to clarify that my understanding is that the intended context for the index expressions is the single node binding, NOT the repeat reference. Unfortunately all the JR tests have index paths that end up with the same contextualized ref regardless of which is used.

Copy link
Member Author

@eyelidlessness eyelidlessness Jul 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I played a bit with the linked form. I agree it doesn't seem to be doing what I would intuitively expect from the definition. That said...

I wanted to clarify that my understanding is that the intended context for the index expressions is the single node binding, NOT the repeat reference.

This strongly suggests to me that the spec example could use clarification. Here's what the spec says:

Returns a single node value from a node-set by selecting the 1-based index of a repeat node-set that this node is a child of. It does this up to 3 repeat levels deep. E.g. indexed-repeat(//node, /path/to/repeat, //index1, /path/to/repeat/nested-repeat, //index2) is meant to be a shortcut for //repeat[position()=//index1]/nested-repeat[position()=index2]/node in native XPath syntax.

Note that as I mentioned when we discussed this Tuesday, the way that the example maps //1 to sub-expression parts is already pretty loosey goosey. But to be really clear...

//repeat[position()=//index1]/nested-repeat[position()=index2]/node
         ^ (1)                              ^ (2)
  1. XPath specifies this predicate as contextualized to //repeat.

    • The position() call is evaluated in that context (which we can hopefully take as read).

    • The //index1 expression is an abbreviated absolute path, so it isn't really contextualized to anything at all. I really don't know how to interpret the intent of this part of the example. But taken literally, the XPath spec answer would be "the first index1 in the document, in document order".

  2. XPath specifies this predicate as contextualized to //repeat[position()=//index1]/nested-repeat.

    • Here again, the position() call is evaluated in that context.

    • The index2 expression is a relative path expression, with a leading abbreviated child axis step. Again taking the example literally, the XPath spec would say that this should resolve to the first index2 child of the current nested-repeat context node.

Footnotes

  1. Shorthand for /descendant-or-self::node()/, which the spec doesn't support, and as I recall the spec was updated in recent memory to remove other similar usage.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The spec was backfilled from the JR implementation so I think we should use the implementation as the intended behavior and update the spec as needed.

I agree it’s loosey goosey! I’ve interpreted it as “evaluate the indexN expression using the single node binding context and use that as the positional predicate.” It’s a bit of a stretch given what’s written but I think that’s the implementation.

Copy link
Member

@lognaturel lognaturel Jul 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started backfilling a couple tests in JR to show the intended evaluation contexts and describe the general intent behind the function: getodk/javarosa#776 @eyelidlessness and I have looked at it together and he'll take over from there!

*
* - The spec's example uses only relative references (and uses them in a
* fairly confusing and inconsistent way)
*
* - The ported JavaRosa tests do use an absolute `index1` expression, but it
* references a single static node, outside of the repeat hierarchy.
*
* - It seems likely (although I haven't tested the hunch) that JavaRosa
* **would contextualize** the `indexN` arguments. But this wouldn't be a
* strong signal of intent, because JavaRosa contextualizes absolute
* expressions in a much broader set of scenarios that we now know are out
* of scope.
*
* 6. The `indexed-repeat` function as specified should return a string. We
* currently return a node-set, because the function's logic is clearly
* designed to deal with node-sets. We do, however, filter the final (first
* `target` argument) result to the first node in that node-set (if any). In
* any case, wherever usage expects a string, it will go through the normal
* type casting logic consistent with spec, so returning a single node
* node-set seems safe. In theory, by returning a node-set, we could also use
* this implementation in a `FilterExpr` (e.g.
* `indexed-repeat(...)/even-deeper/steps[also="predicates"]).
lognaturel marked this conversation as resolved.
Show resolved Hide resolved
*
* \* Bonus judgement call: the function is specified to a maximum depth of 3,
* but this implementation is variadic to any depth. We discussed this
* briefly, and it seemed like the consensus at the time was that it should
* be fine.
*
* - - -
*
* @todo Other parts of the ODK XForms spec suggest that `//` syntax is not
* actually expected to be supported. Specifically
* {@link https://getodk.github.io/xforms-spec/#xpath-axes | XPath Axes } says
* that "only the _parent_, _child_ and _self_ axes are supported" (whereas `//`
* is shorthand for `/descendant-or-self::node()/`). However, that is presumably
* a JavaRosa limitation. This package supports all of the XPath 1.0 axes, as
* well as that shorthand syntax. At time of writing, as far as I can see, the
* quote above describing `indexed-repeat` is the only remaining part of the
* spec referencing that axis. There are `scenario` tests ported from JavaRosa
* exercising some of this function's behavior already, but they do not exercise
* the syntax as referenced in the description. We should add unit tests in this
* package to test that case, but this was deferred for now as usage in the spec
* example is confusing.
*/
export const indexedRepeat = new FunctionImplementation(
'indexed-repeat',
[
{ arityType: 'required', typeHint: 'node' }, // arg
{ arityType: 'required', typeHint: 'node' }, // repeat1
{ arityType: 'required', typeHint: 'number' }, // index1
{ arityType: 'optional', typeHint: 'node' }, // repeatN=0 -> repeat2
{ arityType: 'optional', typeHint: 'number' }, // indexN=0 -> index2
{ arityType: 'optional', typeHint: 'node' }, // repeatN=1 -> repeat3
{ arityType: 'optional', typeHint: 'number' }, // indexN=1 -> index3

// Go beyond spec? Why the heck not! It's clearly a variadic design.
{ arityType: 'variadic', typeHint: 'any' },
],
(context, [target /* arg */, ...rest /* repeat1, index1[, repeatN, indexN]* */]) => {
let currentContext = context;

for (let i = 0; i < rest.length; i += 2) {
const repeatN = rest[i];
const indexN = rest[i + 1];

if (repeatN == null) {
break;
}

if (indexN == null) {
throw 'todo: repeat/index pairs are spec for signature';
}

const evaluation = repeatN.evaluate(currentContext);

if (!(evaluation instanceof LocationPathEvaluation)) {
throw 'todo: not a node-set';
}

let repeats = evaluation;

// repeat1 is inherently filtered by the initial context, while repeatN >
// where N > 1 must (implicitly) be filtered to include only descendants
// of the first iteration:
//
// - if the repeat1 expression is relative, evaluating it with the
// expression context will be filtered automatically
//
// - if it is absolute, it is expected to resolve absolute (to the
// context root); this way computations can call `indexed-repeat` from
// other arbitrary context nodes (as is the case in ported JR tests)
//
// - if repeat2 (and so on) is absolute, it **must** be implicitly scoped
// to the context established by the previous iteration (otherwise the
// function signature makes no sense! Only the last indexN would apply)
if (i > 0) {
repeats = filterContextDescendants(currentContext, repeats);
}

currentContext = repeats.evaluatePositionPredicate(indexN);
}

// Non-null assertion here should be safe because the required parameter
// is checked by `FunctionImplementation`.
const targetsResult = target!.evaluate(currentContext);

if (!(targetsResult instanceof LocationPathEvaluation)) {
// Not fond of adding more throw string statements, but this will make it
// easier to find along with all of the other cases of this exact same
// assertion. We have a broader story around error propagation which will
// implicate all of these. We should also consider internal APIs which
// will do checks like this where appropriate without them being scattered
// ad-hoc throughout the function implementations concerned with them.
throw 'todo: not a node-set';
}

const results = filterContextDescendants(currentContext, targetsResult);

// Awkward bit of internal API. This returns either:
//
// - The first node in the resulting node-set, or
// - An empty node-set in the result's context
//
// It would be nice to reuse `evaluatePositionPredicate` here, but we'd
// need to fake up a compatible "evaluator" interface and then fake an
// `EvaluationResult` for it to produce. This is considerably simpler.
return results.first() ?? results;
}
);

interface InstanceElement extends LocalNamedElement<'instance'> {}

const identifiedInstanceLookup = new ScopedElementLookup(':scope > instance[id]', 'instance[id]');
Expand Down
Loading