From b210b1e6e0c3775e0628a72703a7a34da3a3b0c7 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 10 Oct 2023 13:48:50 -0400 Subject: [PATCH 01/14] Allowing for glob patterns for fields in ExportWriter --- .../solr/handler/export/ExportWriter.java | 77 ++++++++++++++++--- .../solr/handler/export/TestExportWriter.java | 37 +++++++++ 2 files changed, 104 insertions(+), 10 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 51ba5551b69..14ad566f444 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -27,9 +27,14 @@ import java.io.PrintWriter; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; +import org.apache.commons.io.FilenameUtils; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; @@ -487,19 +492,14 @@ void writeDoc( public FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) throws IOException { - IndexSchema schema = searcher.getSchema(); - FieldWriter[] writers = new FieldWriter[fields.length]; DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(searcher, false); - for (int i = 0; i < fields.length; i++) { - String field = fields[i]; - SchemaField schemaField = null; - try { - schemaField = schema.getField(field); - } catch (Exception e) { - throw new IOException(e); - } + List expandedFields = expandFieldList(fields, searcher); + FieldWriter[] writers = new FieldWriter[expandedFields.size()]; + for (int i = 0; i < expandedFields.size(); i++) { + SchemaField schemaField = expandedFields.get(i); + String field = schemaField.getName(); if (!schemaField.hasDocValues()) { throw new IOException(schemaField + " must have DocValues to use this feature."); } @@ -844,4 +844,61 @@ public String getMessage() { return "Early Client Disconnect"; } } + + /** + * Creates a complete field list using the provided field list by expanding any glob patterns into + * field names + * + * @param fields the original set of fields provided + * @param searcher an index searcher to access schema info + * @return a complete list of fields included any fields matching glob patterns + * @throws IOException if a provided field does not exist or cannot be retrieved from the schema + * info + */ + private List expandFieldList(String[] fields, SolrIndexSearcher searcher) + throws IOException { + List expandedFields = new ArrayList<>(fields.length); + Set fieldsProcessed = new HashSet<>(); + for (String field : fields) { + try { + if (field.contains("*")) { + getGlobFields(field, searcher, fieldsProcessed, expandedFields); + } else { + if (fieldsProcessed.add(field)) { + expandedFields.add(searcher.getSchema().getField(field)); + } + } + } catch (Exception e) { + throw new IOException(e); + } + } + + return expandedFields; + } + + /** + * Create a list of schema fields that match a given glob pattern + * + * @param fieldPattern the glob pattern to match + * @param searcher an index search to access schema info + * @param fieldsProcessed the set of field names already processed to avoid duplicating + * @param expandedFields the list of fields to add expanded field names into + */ + private void getGlobFields( + String fieldPattern, + SolrIndexSearcher searcher, + Set fieldsProcessed, + List expandedFields) { + for (FieldInfo fi : searcher.getFieldInfos()) { + if (FilenameUtils.wildcardMatch(fi.getName(), fieldPattern)) { + SchemaField schemaField = searcher.getSchema().getField(fi.getName()); + if (fieldsProcessed.add(fi.getName()) + && schemaField.hasDocValues() + && (!(schemaField.getType() instanceof SortableTextField) + || schemaField.useDocValuesAsStored())) { + expandedFields.add(schemaField); + } + } + } + } } diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index 8337609faf9..e37f26efc94 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -1298,6 +1298,43 @@ public void testExpr() throws Exception { .contains("Must have useDocValuesAsStored='true'")); } + @Test + public void testGlobFields() throws Exception { + assertU(delQ("*:*")); + assertU(commit()); + createLargeIndex(); + SolrQueryRequest req = + req("q", "*:*", "qt", "/export", "fl", "id,*_udvas,*_i_p", "sort", "id asc"); + assertJQ( + req, + "response/numFound==100000", + "response/docs/[0]/id=='0'", + "response/docs/[1]/id=='1'", + "response/docs/[0]/sortabledv_udvas=='0'", + "response/docs/[1]/sortabledv_udvas=='1'", + "response/docs/[0]/small_i_p==0", + "response/docs/[1]/small_i_p==1"); + + assertU(delQ("*:*")); + assertU(commit()); + createLargeIndex(); + req = req("q", "*:*", "qt", "/export", "fl", "*", "sort", "id asc"); + assertJQ( + req, + "response/numFound==100000", + "response/docs/[0]/id=='0'", + "response/docs/[1]/id=='1'", + "response/docs/[0]/sortabledv_udvas=='0'", + "response/docs/[1]/sortabledv_udvas=='1'", + "response/docs/[0]/small_i_p==0", + "response/docs/[1]/small_i_p==1"); + + String jq = JQ(req); + assertFalse( + "Fields without docvalues and useDocValuesAsStored should not be returned", + jq.contains("\"sortabledv\"")); + } + @SuppressWarnings("rawtypes") private void validateSort(int numDocs) throws Exception { // 10 fields From 8a04a11fec9c8a781edd20cadca4e9a0866bcb88 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 10 Oct 2023 13:55:01 -0400 Subject: [PATCH 02/14] Tidying code --- .../src/java/org/apache/solr/handler/export/ExportWriter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 14ad566f444..5eec4d1cf4f 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -894,7 +894,7 @@ private void getGlobFields( SchemaField schemaField = searcher.getSchema().getField(fi.getName()); if (fieldsProcessed.add(fi.getName()) && schemaField.hasDocValues() - && (!(schemaField.getType() instanceof SortableTextField) + && (!(schemaField.getType() instanceof SortableTextField) || schemaField.useDocValuesAsStored())) { expandedFields.add(schemaField); } From fe66c1fb38b83a337594d05db55a0e55ee76e7c0 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 10 Oct 2023 13:55:18 -0400 Subject: [PATCH 03/14] Adding support for glob expression for fields in SelectStream --- solr/solrj-streaming/build.gradle | 1 + .../client/solrj/io/stream/SelectStream.java | 29 +++++++++++++++++-- .../StreamExpressionToExpessionTest.java | 3 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/solr/solrj-streaming/build.gradle b/solr/solrj-streaming/build.gradle index 9e4b6b4f3cc..b5c2de8cbe6 100644 --- a/solr/solrj-streaming/build.gradle +++ b/solr/solrj-streaming/build.gradle @@ -27,6 +27,7 @@ dependencies { implementation 'org.apache.httpcomponents:httpclient' implementation 'org.apache.httpcomponents:httpcore' implementation 'org.apache.commons:commons-math3' + implementation 'commons-io:commons-io' testImplementation project(':solr:test-framework') testImplementation project(':solr:core') diff --git a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java index 80219e797bb..57cc75985eb 100644 --- a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java +++ b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java @@ -24,6 +24,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import org.apache.commons.io.FilenameUtils; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.StreamComparator; import org.apache.solr.client.solrj.io.eval.EvaluatorException; @@ -52,14 +53,21 @@ public class SelectStream extends TupleStream implements Expressible { private TupleStream stream; private StreamContext streamContext; private Map selectedFields; + private List selectedFieldGlobPatterns; private Map selectedEvaluators; private List operations; public SelectStream(TupleStream stream, List selectedFields) throws IOException { this.stream = stream; this.selectedFields = new HashMap<>(); + this.selectedFieldGlobPatterns = new ArrayList<>(); for (String selectedField : selectedFields) { - this.selectedFields.put(selectedField, selectedField); + if (selectedField.contains("*")) { + // selected field is a glob pattern + this.selectedFieldGlobPatterns.add(selectedField); + } else { + this.selectedFields.put(selectedField, selectedField); + } } operations = new ArrayList<>(); selectedEvaluators = new LinkedHashMap<>(); @@ -68,6 +76,7 @@ public SelectStream(TupleStream stream, List selectedFields) throws IOEx public SelectStream(TupleStream stream, Map selectedFields) throws IOException { this.stream = stream; this.selectedFields = selectedFields; + selectedFieldGlobPatterns = new ArrayList<>(); operations = new ArrayList<>(); selectedEvaluators = new LinkedHashMap<>(); } @@ -123,6 +132,7 @@ public SelectStream(StreamExpression expression, StreamFactory factory) throws I stream = factory.constructStream(streamExpressions.get(0)); selectedFields = new HashMap<>(); + selectedFieldGlobPatterns = new ArrayList<>(); selectedEvaluators = new LinkedHashMap<>(); for (StreamExpressionParameter parameter : selectAsFieldsExpressions) { StreamExpressionValue selectField = (StreamExpressionValue) parameter; @@ -175,7 +185,11 @@ public SelectStream(StreamExpression expression, StreamFactory factory) throws I selectedFields.put(asValue, asName); } } else { - selectedFields.put(value, value); + if (value.contains("*")) { + selectedFieldGlobPatterns.add(value); + } else { + selectedFields.put(value, value); + } } } @@ -217,6 +231,11 @@ private StreamExpression toExpression(StreamFactory factory, boolean includeStre } } + // selected glob patterns + for (String selectFieldGlobPattern : selectedFieldGlobPatterns) { + expression.addParameter(selectFieldGlobPattern); + } + // selected evaluators for (Map.Entry selectedEvaluator : selectedEvaluators.entrySet()) { expression.addParameter( @@ -308,6 +327,12 @@ public Tuple read() throws IOException { workingForEvaluators.put(fieldName, original.get(fieldName)); if (selectedFields.containsKey(fieldName)) { workingToReturn.put(selectedFields.get(fieldName), original.get(fieldName)); + } else { + for (String globPattern : selectedFieldGlobPatterns) { + if (FilenameUtils.wildcardMatch(fieldName, globPattern)) { + workingToReturn.put(fieldName, original.get(fieldName)); + } + } } } diff --git a/solr/solrj-streaming/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java b/solr/solrj-streaming/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java index 4069b671b32..2c941f142d7 100644 --- a/solr/solrj-streaming/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java +++ b/solr/solrj-streaming/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java @@ -105,7 +105,7 @@ public void testSelectStream() throws Exception { try (SelectStream stream = new SelectStream( StreamExpressionParser.parse( - "select(\"a_s as fieldA\", search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\"))"), + "select(\"a_s as fieldA\", a_*, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\"))"), factory)) { expressionString = stream.toExpression(factory).toString(); assertTrue(expressionString.contains("select(search(collection1,")); @@ -113,6 +113,7 @@ public void testSelectStream() throws Exception { assertTrue(expressionString.contains("fl=\"id,a_s,a_i,a_f\"")); assertTrue(expressionString.contains("sort=\"a_f asc, a_i asc\"")); assertTrue(expressionString.contains("a_s as fieldA")); + assertTrue(expressionString.contains("a_*")); } } From 23626fb7a5f49c96c6be492c9d0e8a219bb67283 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 10 Oct 2023 14:01:59 -0400 Subject: [PATCH 04/14] Breaking after a field matches any glob pattern to avoid continuing to iterate patterns for the same field --- .../org/apache/solr/client/solrj/io/stream/SelectStream.java | 1 + 1 file changed, 1 insertion(+) diff --git a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java index 57cc75985eb..f4d923fcc09 100644 --- a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java +++ b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java @@ -331,6 +331,7 @@ public Tuple read() throws IOException { for (String globPattern : selectedFieldGlobPatterns) { if (FilenameUtils.wildcardMatch(fieldName, globPattern)) { workingToReturn.put(fieldName, original.get(fieldName)); + break; } } } From ac25ebbc0833f8382091d4e61b9a97517bfa7523 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 10:47:31 -0400 Subject: [PATCH 05/14] Adding GlobPatternUtil class to provide a common place for glob pattern matching, current implementation uses Java NIO path matching. Replaced uses of FilenameUtils.wildcardMatches to reduce commons-io usage. --- .../solr/handler/export/ExportWriter.java | 4 ++-- .../apache/solr/search/SolrReturnFields.java | 4 ++-- solr/solrj-streaming/build.gradle | 1 - .../client/solrj/io/stream/SelectStream.java | 4 ++-- .../solr/common/util/GlobPatternUtil.java | 11 +++++++++++ .../solr/common/util/TestGlobPatternUtil.java | 17 +++++++++++++++++ 6 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java create mode 100644 solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 5eec4d1cf4f..79d0b852c17 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -33,7 +33,6 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; -import org.apache.commons.io.FilenameUtils; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -58,6 +57,7 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StreamParams; +import org.apache.solr.common.util.GlobPatternUtil; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.core.SolrCore; import org.apache.solr.metrics.SolrMetricsContext; @@ -890,7 +890,7 @@ private void getGlobFields( Set fieldsProcessed, List expandedFields) { for (FieldInfo fi : searcher.getFieldInfos()) { - if (FilenameUtils.wildcardMatch(fi.getName(), fieldPattern)) { + if (GlobPatternUtil.matches(fieldPattern, fi.getName())) { SchemaField schemaField = searcher.getSchema().getField(fi.getName()); if (fieldsProcessed.add(fi.getName()) && schemaField.hasDocValues() diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index 7d0583ce63a..1966b7358a3 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -28,7 +28,6 @@ import java.util.Map; import java.util.Set; import java.util.function.Supplier; -import org.apache.commons.io.FilenameUtils; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.QueryValueSource; @@ -37,6 +36,7 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.GlobPatternUtil; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.DocTransformer; import org.apache.solr.response.transform.DocTransformers; @@ -578,7 +578,7 @@ public boolean wantsField(String name) { } for (String s : globs) { // TODO something better? - if (FilenameUtils.wildcardMatch(name, s)) { + if (GlobPatternUtil.matches(name, s)) { okFieldNames.add(name); // Don't calculate it again return true; } diff --git a/solr/solrj-streaming/build.gradle b/solr/solrj-streaming/build.gradle index b5c2de8cbe6..9e4b6b4f3cc 100644 --- a/solr/solrj-streaming/build.gradle +++ b/solr/solrj-streaming/build.gradle @@ -27,7 +27,6 @@ dependencies { implementation 'org.apache.httpcomponents:httpclient' implementation 'org.apache.httpcomponents:httpcore' implementation 'org.apache.commons:commons-math3' - implementation 'commons-io:commons-io' testImplementation project(':solr:test-framework') testImplementation project(':solr:core') diff --git a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java index f4d923fcc09..647a1c59d4c 100644 --- a/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java +++ b/solr/solrj-streaming/src/java/org/apache/solr/client/solrj/io/stream/SelectStream.java @@ -24,7 +24,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; -import org.apache.commons.io.FilenameUtils; import org.apache.solr.client.solrj.io.Tuple; import org.apache.solr.client.solrj.io.comp.StreamComparator; import org.apache.solr.client.solrj.io.eval.EvaluatorException; @@ -39,6 +38,7 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; +import org.apache.solr.common.util.GlobPatternUtil; /** * Selects fields from the incoming stream and applies optional field renaming. Does not reorder the @@ -329,7 +329,7 @@ public Tuple read() throws IOException { workingToReturn.put(selectedFields.get(fieldName), original.get(fieldName)); } else { for (String globPattern : selectedFieldGlobPatterns) { - if (FilenameUtils.wildcardMatch(fieldName, globPattern)) { + if (GlobPatternUtil.matches(globPattern, fieldName)) { workingToReturn.put(fieldName, original.get(fieldName)); break; } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java new file mode 100644 index 00000000000..1da1e2287c0 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java @@ -0,0 +1,11 @@ +package org.apache.solr.common.util; + +import java.nio.file.FileSystems; +import java.nio.file.Paths; + +public class GlobPatternUtil { + + public static boolean matches(String pattern, String input) { + return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input)); + } +} diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java b/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java new file mode 100644 index 00000000000..9c07a218c28 --- /dev/null +++ b/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java @@ -0,0 +1,17 @@ +package org.apache.solr.common.util; + +import org.apache.solr.SolrTestCase; + +public class TestGlobPatternUtil extends SolrTestCase { + + public void testMatches() { + assertTrue(GlobPatternUtil.matches("*_str", "user_str")); + assertFalse(GlobPatternUtil.matches("*_str", "str_user")); + assertTrue(GlobPatternUtil.matches("str_*", "str_user")); + assertFalse(GlobPatternUtil.matches("str_*", "user_str")); + assertTrue(GlobPatternUtil.matches("str?", "str1")); + assertFalse(GlobPatternUtil.matches("str?", "str_user")); + assertTrue(GlobPatternUtil.matches("user_*_str", "user_type_str")); + assertFalse(GlobPatternUtil.matches("user_*_str", "user_str")); + } +} From bfe31fa6fb770992bfb6a86b3e32d3c0bd29db88 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 10:48:31 -0400 Subject: [PATCH 06/14] Tidying code --- .../java/org/apache/solr/common/util/GlobPatternUtil.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java index 1da1e2287c0..bd900ea49f5 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java @@ -5,7 +5,7 @@ public class GlobPatternUtil { - public static boolean matches(String pattern, String input) { - return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input)); - } + public static boolean matches(String pattern, String input) { + return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input)); + } } From fad2f18f037d505dec662e58682d8adfe3f83688 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 11:00:48 -0400 Subject: [PATCH 07/14] Adding missing headers --- .../apache/solr/common/util/GlobPatternUtil.java | 16 ++++++++++++++++ .../solr/common/util/TestGlobPatternUtil.java | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java index bd900ea49f5..354b1561499 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.solr.common.util; import java.nio.file.FileSystems; diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java b/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java index 9c07a218c28..a5bdcad92fa 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/TestGlobPatternUtil.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.solr.common.util; import org.apache.solr.SolrTestCase; From 0d862cf6a000fcd4cc77b4203bffa14a952968f9 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 11:18:47 -0400 Subject: [PATCH 08/14] Updating ref docs --- .../modules/query-guide/pages/exporting-result-sets.adoc | 5 ++++- .../query-guide/pages/stream-decorator-reference.adoc | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index 6719d378013..a84b1f0fb5a 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -70,7 +70,10 @@ It can get worse otherwise. The `fl` property defines the fields that will be exported with the result set. Any of the field types that can be sorted (i.e., int, long, float, double, string, date, boolean) can be used in the field list. The fields can be single or multi-valued. -However, returning scores and wildcards are not supported at this time. + +Wildcard patterns can be used for the field list (e.g., `fl=*_i`) and will be expanded to the list of fields that match the pattern and are able to be exported, see <>. + +Returning scores is not supported at this time. === Specifying the Local Streaming Expression diff --git a/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc b/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc index 447bb1b8d56..7717618cb25 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc @@ -1375,7 +1375,7 @@ One can provide a list of operations and evaluators to perform on any fields, su === select Parameters * `StreamExpression` -* `fieldName`: name of field to include in the output tuple (can include multiple of these), such as `outputTuple[fieldName] = inputTuple[fieldName]` +* `fieldName`: name of field to include in the output tuple (can include multiple of these), such as `outputTuple[fieldName] = inputTuple[fieldName]`. The `fieldName` can be a wildcard pattern, i.e. `a_*` to select all fields that start with `a_`. * `fieldName as aliasFieldName`: aliased field name to include in the output tuple (can include multiple of these), such as `outputTuple[aliasFieldName] = incomingTuple[fieldName]` * `replace(fieldName, value, withValue=replacementValue)`: if `incomingTuple[fieldName] == value` then `outgoingTuple[fieldName]` will be set to `replacementValue`. `value` can be the string "null" to replace a null value with some other value. From f1c62f9c3cb2adaa92d579d9e4c388ae057fbe88 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 11:23:05 -0400 Subject: [PATCH 09/14] Fixing refdoc changes --- .../modules/query-guide/pages/exporting-result-sets.adoc | 2 +- .../modules/query-guide/pages/stream-decorator-reference.adoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc index a84b1f0fb5a..1c16d9a46a7 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/exporting-result-sets.adoc @@ -71,7 +71,7 @@ The `fl` property defines the fields that will be exported with the result set. Any of the field types that can be sorted (i.e., int, long, float, double, string, date, boolean) can be used in the field list. The fields can be single or multi-valued. -Wildcard patterns can be used for the field list (e.g., `fl=*_i`) and will be expanded to the list of fields that match the pattern and are able to be exported, see <>. +Wildcard patterns can be used for the field list (e.g. `fl=*_i`) and will be expanded to the list of fields that match the pattern and are able to be exported, see <>. Returning scores is not supported at this time. diff --git a/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc b/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc index 7717618cb25..0a811412498 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/stream-decorator-reference.adoc @@ -1375,7 +1375,7 @@ One can provide a list of operations and evaluators to perform on any fields, su === select Parameters * `StreamExpression` -* `fieldName`: name of field to include in the output tuple (can include multiple of these), such as `outputTuple[fieldName] = inputTuple[fieldName]`. The `fieldName` can be a wildcard pattern, i.e. `a_*` to select all fields that start with `a_`. +* `fieldName`: name of field to include in the output tuple (can include multiple of these), such as `outputTuple[fieldName] = inputTuple[fieldName]`. The `fieldName` can be a wildcard pattern, e.g. `a_*` to select all fields that start with `a_`. * `fieldName as aliasFieldName`: aliased field name to include in the output tuple (can include multiple of these), such as `outputTuple[aliasFieldName] = incomingTuple[fieldName]` * `replace(fieldName, value, withValue=replacementValue)`: if `incomingTuple[fieldName] == value` then `outgoingTuple[fieldName]` will be set to `replacementValue`. `value` can be the string "null" to replace a null value with some other value. From 0382445e45bb8927be7a50703cdbe9029a25e23d Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Wed, 11 Oct 2023 11:55:08 -0400 Subject: [PATCH 10/14] Fixing arg order for SolrReturnFields glob pattern matching --- solr/core/src/java/org/apache/solr/search/SolrReturnFields.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index 1966b7358a3..8c8f7143895 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -578,7 +578,7 @@ public boolean wantsField(String name) { } for (String s : globs) { // TODO something better? - if (GlobPatternUtil.matches(name, s)) { + if (GlobPatternUtil.matches(s, name)) { okFieldNames.add(name); // Don't calculate it again return true; } From 19e97ae5dd0661d14fbb2f6a8f0d59a5f4a52d6f Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Fri, 13 Oct 2023 11:53:08 -0400 Subject: [PATCH 11/14] Fixing up comments --- .../java/org/apache/solr/search/SolrReturnFields.java | 1 - .../org/apache/solr/common/util/GlobPatternUtil.java | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java index 8c8f7143895..af35245af15 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java +++ b/solr/core/src/java/org/apache/solr/search/SolrReturnFields.java @@ -577,7 +577,6 @@ public boolean wantsField(String name) { return true; } for (String s : globs) { - // TODO something better? if (GlobPatternUtil.matches(s, name)) { okFieldNames.add(name); // Don't calculate it again return true; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java index 354b1561499..8b26ab5a355 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/GlobPatternUtil.java @@ -19,8 +19,18 @@ import java.nio.file.FileSystems; import java.nio.file.Paths; +/** Provides methods for matching glob patterns against input strings. */ public class GlobPatternUtil { + /** + * Matches an input string against a provided glob patterns. This uses Java NIO FileSystems + * PathMatcher to match glob patterns in the same way to how glob patterns are matches for file + * paths, rather than implementing our own glob pattern matching. + * + * @param pattern the glob pattern to match against + * @param input the input string to match against a glob pattern + * @return true if the input string matches the glob pattern, false otherwise + */ public static boolean matches(String pattern, String input) { return FileSystems.getDefault().getPathMatcher("glob:" + pattern).matches(Paths.get(input)); } From 7f7b49df3641be68a3b53080e227cd7c76c09504 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Fri, 13 Oct 2023 11:56:36 -0400 Subject: [PATCH 12/14] Modifying ExportWriter to have same behavior for glob pattern fields as select --- .../src/java/org/apache/solr/handler/export/ExportWriter.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 79d0b852c17..7d10a8b3bf3 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -894,8 +894,7 @@ private void getGlobFields( SchemaField schemaField = searcher.getSchema().getField(fi.getName()); if (fieldsProcessed.add(fi.getName()) && schemaField.hasDocValues() - && (!(schemaField.getType() instanceof SortableTextField) - || schemaField.useDocValuesAsStored())) { + && schemaField.useDocValuesAsStored()) { expandedFields.add(schemaField); } } From 91133ae8a2dd0d31a2ee834d3e640d6f9bece4d6 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 19 Dec 2023 12:11:24 -0500 Subject: [PATCH 13/14] Updating ExportWriter to use SolrReturnFields --- .../solr/handler/export/ExportWriter.java | 127 ++++++------------ 1 file changed, 39 insertions(+), 88 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 7d10a8b3bf3..a39f28b1828 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -28,12 +28,9 @@ import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.TreeSet; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; @@ -57,7 +54,6 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StreamParams; -import org.apache.solr.common.util.GlobPatternUtil; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.core.SolrCore; import org.apache.solr.metrics.SolrMetricsContext; @@ -81,6 +77,7 @@ import org.apache.solr.schema.StrField; import org.apache.solr.search.DocValuesIteratorCache; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; import org.apache.solr.search.SortSpec; import org.apache.solr.search.SyntaxError; import org.slf4j.Logger; @@ -126,7 +123,7 @@ public boolean write( private int priorityQueueSize; StreamExpression streamExpression; StreamContext streamContext; - FieldWriter[] fieldWriters; + List fieldWriters; int totalHits = 0; FixedBitSet[] sets = null; PushWriter writer; @@ -298,7 +295,7 @@ private void _write(OutputStream os) throws IOException { } try { - fieldWriters = getFieldWriters(fields, req.getSearcher()); + fieldWriters = getFieldWriters(fields, req); } catch (Exception e) { writeException(e, writer, true); return; @@ -478,7 +475,7 @@ void fillOutDocs(MergeIterator mergeIterator, ExportBuffers.Buffer buffer) throw } void writeDoc( - SortDoc sortDoc, List leaves, EntryWriter ew, FieldWriter[] writers) + SortDoc sortDoc, List leaves, EntryWriter ew, List writers) throws IOException { int ord = sortDoc.ord; LeafReaderContext context = leaves.get(ord); @@ -490,77 +487,87 @@ void writeDoc( } } - public FieldWriter[] getFieldWriters(String[] fields, SolrIndexSearcher searcher) + public List getFieldWriters(String[] fields, SolrQueryRequest req) throws IOException { - DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(searcher, false); + DocValuesIteratorCache dvIterCache = new DocValuesIteratorCache(req.getSearcher(), false); - List expandedFields = expandFieldList(fields, searcher); + SolrReturnFields solrReturnFields = new SolrReturnFields(fields, req); - FieldWriter[] writers = new FieldWriter[expandedFields.size()]; - for (int i = 0; i < expandedFields.size(); i++) { - SchemaField schemaField = expandedFields.get(i); - String field = schemaField.getName(); + List writers = new ArrayList<>(); + for (String field : req.getSearcher().getFieldNames()) { + if (!solrReturnFields.wantsField(field)) { + continue; + } + SchemaField schemaField = req.getSchema().getField(field); if (!schemaField.hasDocValues()) { throw new IOException(schemaField + " must have DocValues to use this feature."); } boolean multiValued = schemaField.multiValued(); FieldType fieldType = schemaField.getType(); + FieldWriter writer; - if (fieldType instanceof SortableTextField && schemaField.useDocValuesAsStored() == false) { - throw new IOException( - schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); + if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { + if (solrReturnFields.getRequestedFieldNames() != null && solrReturnFields.getRequestedFieldNames().contains(field)) { + // Explicitly requested field cannot be used due to not having useDocValuesAsStored=true, throw exception + throw new IOException( + schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); + } else { + // Glob pattern matched field cannot be used due to not having useDocValuesAsStored=true + continue; + } } DocValuesIteratorCache.FieldDocValuesSupplier docValuesCache = dvIterCache.getSupplier(field); if (docValuesCache == null) { - writers[i] = EMPTY_FIELD_WRITER; + writer = EMPTY_FIELD_WRITER; } else if (fieldType instanceof IntValueFieldType) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); } else { - writers[i] = new IntFieldWriter(field, docValuesCache); + writer = new IntFieldWriter(field, docValuesCache); } } else if (fieldType instanceof LongValueFieldType) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); } else { - writers[i] = new LongFieldWriter(field, docValuesCache); + writer = new LongFieldWriter(field, docValuesCache); } } else if (fieldType instanceof FloatValueFieldType) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); } else { - writers[i] = new FloatFieldWriter(field, docValuesCache); + writer = new FloatFieldWriter(field, docValuesCache); } } else if (fieldType instanceof DoubleValueFieldType) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); } else { - writers[i] = new DoubleFieldWriter(field, docValuesCache); + writer = new DoubleFieldWriter(field, docValuesCache); } } else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, false, docValuesCache); } else { - writers[i] = new StringFieldWriter(field, fieldType, docValuesCache); + writer = new StringFieldWriter(field, fieldType, docValuesCache); } } else if (fieldType instanceof DateValueFieldType) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, false, docValuesCache); } else { - writers[i] = new DateFieldWriter(field, docValuesCache); + writer = new DateFieldWriter(field, docValuesCache); } } else if (fieldType instanceof BoolField) { if (multiValued) { - writers[i] = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); + writer = new MultiFieldWriter(field, fieldType, schemaField, true, docValuesCache); } else { - writers[i] = new BoolFieldWriter(field, fieldType, docValuesCache); + writer = new BoolFieldWriter(field, fieldType, docValuesCache); } } else { throw new IOException( "Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText"); } + writers.add(writer); } return writers; } @@ -844,60 +851,4 @@ public String getMessage() { return "Early Client Disconnect"; } } - - /** - * Creates a complete field list using the provided field list by expanding any glob patterns into - * field names - * - * @param fields the original set of fields provided - * @param searcher an index searcher to access schema info - * @return a complete list of fields included any fields matching glob patterns - * @throws IOException if a provided field does not exist or cannot be retrieved from the schema - * info - */ - private List expandFieldList(String[] fields, SolrIndexSearcher searcher) - throws IOException { - List expandedFields = new ArrayList<>(fields.length); - Set fieldsProcessed = new HashSet<>(); - for (String field : fields) { - try { - if (field.contains("*")) { - getGlobFields(field, searcher, fieldsProcessed, expandedFields); - } else { - if (fieldsProcessed.add(field)) { - expandedFields.add(searcher.getSchema().getField(field)); - } - } - } catch (Exception e) { - throw new IOException(e); - } - } - - return expandedFields; - } - - /** - * Create a list of schema fields that match a given glob pattern - * - * @param fieldPattern the glob pattern to match - * @param searcher an index search to access schema info - * @param fieldsProcessed the set of field names already processed to avoid duplicating - * @param expandedFields the list of fields to add expanded field names into - */ - private void getGlobFields( - String fieldPattern, - SolrIndexSearcher searcher, - Set fieldsProcessed, - List expandedFields) { - for (FieldInfo fi : searcher.getFieldInfos()) { - if (GlobPatternUtil.matches(fieldPattern, fi.getName())) { - SchemaField schemaField = searcher.getSchema().getField(fi.getName()); - if (fieldsProcessed.add(fi.getName()) - && schemaField.hasDocValues() - && schemaField.useDocValuesAsStored()) { - expandedFields.add(schemaField); - } - } - } - } } From c1ebac18eebb9d1d4e993fca7b57735ddda0c815 Mon Sep 17 00:00:00 2001 From: Justin Sweeney Date: Tue, 19 Dec 2023 13:16:55 -0500 Subject: [PATCH 14/14] Tidying code --- .../java/org/apache/solr/handler/export/ExportWriter.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index a39f28b1828..e5e40e6d07e 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -507,8 +507,10 @@ public List getFieldWriters(String[] fields, SolrQueryRequest req) FieldWriter writer; if (fieldType instanceof SortableTextField && !schemaField.useDocValuesAsStored()) { - if (solrReturnFields.getRequestedFieldNames() != null && solrReturnFields.getRequestedFieldNames().contains(field)) { - // Explicitly requested field cannot be used due to not having useDocValuesAsStored=true, throw exception + if (solrReturnFields.getRequestedFieldNames() != null + && solrReturnFields.getRequestedFieldNames().contains(field)) { + // Explicitly requested field cannot be used due to not having useDocValuesAsStored=true, + // throw exception throw new IOException( schemaField + " Must have useDocValuesAsStored='true' to be used with export writer"); } else {