getodk · eyelidlessness · Aug 28, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 21, 2024
diff --git a/packages/xpath/src/functions/xforms/node-set.ts b/packages/xpath/src/functions/xforms/node-set.ts
@@ -1,13 +1,15 @@
 import { UpsertableWeakMap } from '@getodk/common/lib/collections/UpsertableWeakMap.ts';
 import { ScopedElementLookup } from '@getodk/common/lib/dom/compatibility.ts';
 import type { LocalNamedElement } from '@getodk/common/types/dom.ts';
+import type { Evaluation } from '../../evaluations/Evaluation.ts';
 import { LocationPathEvaluation } from '../../evaluations/LocationPathEvaluation.ts';
+import type { EvaluableArgument } from '../../evaluator/functions/FunctionImplementation.ts';
 import { NodeSetFunction } from '../../evaluator/functions/NodeSetFunction.ts';
 import { NumberFunction } from '../../evaluator/functions/NumberFunction.ts';
 import { StringFunction } from '../../evaluator/functions/StringFunction.ts';
 import { XFormsXPathEvaluator } from '../../index.ts';
 import { seededRandomize } from '../../lib/collections/sort.ts';
-import type { MaybeElementNode } from '../../lib/dom/types.ts';
+import type { ContextNode, MaybeElementNode } from '../../lib/dom/types.ts';
 import type { ModelElement } from '../../xforms/XFormsXPathEvaluator.ts';
 
 export const countNonEmpty = new NumberFunction(
@@ -32,6 +34,221 @@ export const countNonEmpty = new NumberFunction(
 	}
 );
 
+type AssertArgument = (index: number, arg?: EvaluableArgument) => asserts arg is EvaluableArgument;
+
+const assertArgument: AssertArgument = (index, arg) => {
+	if (arg == null) {
+		throw new Error(`Argument ${index + 1} expected`);
+	}
+};
+
+type AssertIsLocationPathEvaluation = (
+	evaluation?: Evaluation
+) => asserts evaluation is LocationPathEvaluation;
+
+/**
+ * @todo This is a concern in several `FunctionImplementation`s. It would be
+ * much nicer if it were handled as part of the signature, then inferred in the
+ * types and validated automatically at runtime. It would also make sense, as a
+ * minor stopgap improvement, to generalize checks like this in a single place
+ * (e.g. as a static method on {@link LocationPathEvaluation} itself). Deferred
+ * here because there is exploratory work on both, but both are out of scope for
+ * work in progress to support {@link indexedRepeat}.
+ */
+const assertIsLocationPathEvaluation: AssertIsLocationPathEvaluation = (evaluation) => {
+	if (!(evaluation instanceof LocationPathEvaluation)) {
+		throw new Error('Expected a node-set result');
+	}
+};
+
+/**
+ * Note: this function is not intended to be general outside of usage by
+ * {@link indexedRepeat}.
+ *
+ * Evaluation of the provided argument is eager—i.e. materializing the complete
+ * array of results, rather than the typical `Iterable<ContextNode>` produced in
+ * most cases—because it is expected that in most cases the eagerness will not
+ * be terribly expensive, and all results will usually be consumed, either to be
+ * indexed or filtered in other ways applicable at call sites.
+ *
+ * Function is named to reflect that expectation.
+ */
+const evaluateArgumentToFilterableNodes = (
+	context: LocationPathEvaluation,
+	arg: EvaluableArgument
+): readonly ContextNode[] => {
+	const evaluation = arg.evaluate(context);
+
+	assertIsLocationPathEvaluation(evaluation);
+
+	return Array.from(evaluation.contextNodes);
+};
+
+interface EvaluatedIndexedRepeatArgumentPair {
+	readonly repeats: readonly ContextNode[];
+	readonly position: number;
+}
+
+type DepthSortResult = -1 | 0 | 1;
+
+/**
+ * @todo This is **obviously cacheable**, but it would make most sense to cache
+ * it at the expression level (or at the expression + bound context node level).
+ * All of the expression analysis machinery is further up the stack (as it
+ * generally ought to be with current designs), but it would be nice to consider
+ * how we'd address caching with these kinds of dynamics at play.
+ */
+const compareContainmentDepth = (
+	{ repeats: a }: EvaluatedIndexedRepeatArgumentPair,
+	{ repeats: b }: EvaluatedIndexedRepeatArgumentPair
+): DepthSortResult => {
+	for (const repeatA of a) {
+		for (const repeatB of b) {
+			if (repeatA.contains(repeatB)) {
+				return -1;
+			}
+
+			if (repeatB.contains(repeatA)) {
+				return 1;
+			}
+		}
+	}
+
+	if (a.length === 0 || b.length === 0) {
+		return 0;
+	}
+
+	// TODO: if we reach this point, there is no hierarchical relationship between
+	// the repeats in `repeatN` and `repeatN + M`. This seems to violate **at
+	// least the intent** of the spec. We should probably produce an error here?
+	return 0;
+};
+
+export const indexedRepeat = new NodeSetFunction(
+	'indexed-repeat',
+	[
+		// spec: arg
+		{ arityType: 'required', typeHint: 'node' },
+		// spec: repeat1
+		{ arityType: 'required', typeHint: 'node' },
+		// spec: index1
+		{ arityType: 'required', typeHint: 'number' },
+		// spec: repeatN=0 -> repeat2
+		{ arityType: 'optional', typeHint: 'node' },
+		// spec: indexN=0 -> index2
+		{ arityType: 'optional', typeHint: 'number' },
+		// spec: repeatN=1 -> repeat3
+		{ arityType: 'optional', typeHint: 'node' },
+		// spec: indexN=1 -> index3
+		{ arityType: 'optional', typeHint: 'number' },
+
+		// Go beyond spec? Why the heck not! It's clearly a variadic design.
+		{ arityType: 'variadic', typeHint: 'any' },
+	],
+	(context, args) => {
+		// First argument is `target` (per spec) of the deepest resolved repeat
+		const target = args[0]!;
+
+		let pairs: EvaluatedIndexedRepeatArgumentPair[] = [];
+
+		// Iterate through rest of arguments, collecting pairs of:
+		//
+		// - `repeats`: **all** nodes matching the supplied node-set for the
+		//   `repeatN` argument in this pair
+		// - `position`: the resolved number value for the `indexN` (per spec)
+		//   argument at in this pair
+		//
+		// For **all `repeatN`/`indexN` pairs**, arguments are evaluated in the
+		// calling context (in typical XForms usage, this will be the context of the
+		// bound node). This is the core difference between this approach and the
+		// original in https://github.com/getodk/web-forms/pull/150. That
+		// understanding was clarified in review of that orignal effort, and is
+		// borne out by new tests exercising depth > 1, which demonstrate the same
+		// behavior in JavaRosa.
+		//
+		// Note: we start iterating here at index 1 so assertions related to
+		// positional argument index are clear.
+		for (let i = 1; i < args.length; i += 2) {
+			const repeatsArg = args[i];
+			const positionArg = args[i + 1];
+
+			assertArgument(i, repeatsArg);
+			assertArgument(i + 1, positionArg);
+
+			// Evaluate position first, because...
+			const position = positionArg.evaluate(context).toNumber();
+
+			// ... if any "index" (position) is `NaN`, we short-circuit. This is
+			// expected behavior because the equivalent `/data/repN[posN]/target`
+			// expression would do the same.
+			if (Number.isNaN(position)) {
+				return [];
+			}
+
+			// Reiterating the point made describing this loop for future clarity:
+			// this collects **all** of the nodes matching the `repeatN` expression.
+			// We filter them in a later step.
+			const repeats = evaluateArgumentToFilterableNodes(context, repeatsArg);
+
+			// No repeats = nothing to "index" = short circuit
+			if (repeats.length === 0) {
+				return [];
+			}
+
+			pairs.push({
+				repeats,
+				position,
+			});
+		}
+
+		// Sort the results of each `repeatN`/`indexN` pair, by containment order.
+		//
+		// Note: the `repeatN`/`indexN` pairs can be supplied in any order (this is
+		// consistent with behavior in JavaRosa, likely as a side effect of the
+		// function being implemented there by transforming the expression to its
+		// LocationPath equivalent).
+		pairs = pairs.sort(compareContainmentDepth);
+
+		// Resolve repeats at the specified/evaluated position, in document depth
+		// order by:
+		//
+		// 1. Filtering each set of repeats to include **only** the nodes contained
+		//    by the previously resolved repeat.
+		//
+		// 2. Selecting the repeat at the specified/evaluated position (of those
+		//    filtered in 1).
+		let repeatContextNode: ContextNode;
+
+		for (const pair of pairs) {
+			const { position } = pair;
+			const repeats = pair.repeats.filter((repeat) => {
+				return repeatContextNode?.contains(repeat) !== false;
+			});
+
+			// Select next repeat context at the current `repeatN`/`indexN` position.
+			//
+			// Note: despite terminology used in the spec, `indexN` is treated as
+			// equivalent to an XPath position predicate: it is 1-based.
+			const positionedRepeat = repeats[position - 1];
+
+			// No repeat context is found = nothing to target = short-circuit
+			if (positionedRepeat == null) {
+				return [];
+			}
+
+			repeatContextNode = positionedRepeat;
+		}
+
+		// Resolve **all** target nodes.
+		const targetNodes = evaluateArgumentToFilterableNodes(context, target);
+
+		// Filter only the target nodes contained by the deepest repeat context node.
+		return targetNodes.filter((targetNode) => {
+			return repeatContextNode.contains(targetNode);
+		});
+	}
+);
+
 interface InstanceElement extends LocalNamedElement<'instance'> {}
 
 const identifiedInstanceLookup = new ScopedElementLookup(':scope > instance[id]', 'instance[id]');