Skip to content

Commit

Permalink
CDPD-45931: ORC-523: Update ReaderImpl to work with column encryption.
Browse files Browse the repository at this point in the history
Fixes apache#408

Signed-off-by: Owen O'Malley <omalley@apache.org>
Change-Id: I91f61633dd2645b14e30259500344d35459e563e
  • Loading branch information
omalley authored and Dmitriy Fingerman committed Mar 7, 2023
1 parent f44a374 commit 6f89fb7
Show file tree
Hide file tree
Showing 10 changed files with 649 additions and 67 deletions.
14 changes: 11 additions & 3 deletions java/core/src/java/org/apache/orc/OrcUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,17 @@ TypeDescription convertTypeFromProtobuf(List<OrcProto.Type> types,

public static List<StripeInformation> convertProtoStripesToStripes(
List<OrcProto.StripeInformation> stripes) {
List<StripeInformation> result = new ArrayList<StripeInformation>(stripes.size());
for (OrcProto.StripeInformation info : stripes) {
result.add(new ReaderImpl.StripeInformationImpl(info));
List<StripeInformation> result = new ArrayList<>(stripes.size());
long previousStripeId = -1;
byte[][] previousKeys = null;
long stripeId = 0;
for (OrcProto.StripeInformation stripeProto: stripes) {
ReaderImpl.StripeInformationImpl stripe =
new ReaderImpl.StripeInformationImpl(stripeProto, stripeId++,
previousStripeId, previousKeys);
result.add(stripe);
previousStripeId = stripe.getEncryptionStripeId();
previousKeys = stripe.getEncryptedLocalKeys();
}
return result;
}
Expand Down
21 changes: 21 additions & 0 deletions java/core/src/java/org/apache/orc/StripeInformation.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,25 @@ public interface StripeInformation {
* @return a count of the number of rows
*/
long getNumberOfRows();

/**
* Get the index of this stripe in the current file.
* @return 0 to number_of_stripes - 1
*/
long getStripeId();

/**
* Get the original stripe id that was used when the stripe was originally
* written. This is only different that getStripeId in merged files.
* @return the original stripe id
*/
long getEncryptionStripeId();

/**
* Get the encrypted keys starting from this stripe until overridden by
* a new set in a following stripe. The top level array is one for each
* encryption variant. Each element is an encrypted key.
* @return the array of encrypted keys
*/
byte[][] getEncryptedLocalKeys();
}
33 changes: 12 additions & 21 deletions java/core/src/java/org/apache/orc/impl/OrcTail.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -27,6 +27,7 @@
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
import org.apache.orc.StripeInformation;
import org.apache.orc.StripeStatistics;

Expand Down Expand Up @@ -77,11 +78,7 @@ public OrcFile.WriterVersion getWriterVersion() {
}

public List<StripeInformation> getStripes() {
List<StripeInformation> result = new ArrayList<>(fileTail.getFooter().getStripesCount());
for (OrcProto.StripeInformation stripeProto : fileTail.getFooter().getStripesList()) {
result.add(new ReaderImpl.StripeInformationImpl(stripeProto));
}
return result;
return OrcUtils.convertProtoStripesToStripes(getFooter().getStripesList());
}

public CompressionKind getCompressionKind() {
Expand All @@ -99,21 +96,21 @@ public int getCompressionBufferSize() {
* @deprecated Use {@link #getStripeStatistics(boolean, boolean)} instead
*/
@Deprecated
public List<StripeStatistics> getStripeStatistics()
public List<StripeStatistics> getStripeStatistics(InStream.StreamOptions options)
throws IOException {
OrcProto.Footer footer = fileTail.getFooter();
boolean writerUsedProlepticGregorian =
footer.hasCalendar() ?
footer.getCalendar() == OrcProto.CalendarKind.PROLEPTIC_GREGORIAN :
false;
return getStripeStatistics(writerUsedProlepticGregorian, false);
return getStripeStatistics(options, writerUsedProlepticGregorian, false);
}

public List<StripeStatistics> getStripeStatistics(
public List<StripeStatistics> getStripeStatistics(InStream.StreamOptions options,
boolean writerUsedProlepticGregorian, boolean convertToProlepticGregorian)
throws IOException {
List<StripeStatistics> result = new ArrayList<>();
List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto();
List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto(options);
if (ssProto != null) {
for (OrcProto.StripeStatistics ss : ssProto) {
result.add(new StripeStatistics(ss.getColStatsList(), writerUsedProlepticGregorian, convertToProlepticGregorian));
Expand All @@ -122,17 +119,12 @@ public List<StripeStatistics> getStripeStatistics(
return result;
}

public List<OrcProto.StripeStatistics> getStripeStatisticsProto() throws IOException {
public List<OrcProto.StripeStatistics> getStripeStatisticsProto(InStream.StreamOptions options) throws IOException {
if (serializedTail == null) return null;
if (metadata == null) {
CompressionCodec codec = OrcCodecPool.getCodec(getCompressionKind());
try {
metadata = extractMetadata(serializedTail, 0,
(int) fileTail.getPostscript().getMetadataLength(),
InStream.options().withCodec(codec).withBufferSize(getCompressionBufferSize()));
} finally {
OrcCodecPool.returnCodec(getCompressionKind(), codec);
}
metadata = extractMetadata(serializedTail, 0,
(int) fileTail.getPostscript().getMetadataLength(),
options);
// clear does not clear the contents but sets position to 0 and limit = capacity
serializedTail.clear();
}
Expand All @@ -156,7 +148,6 @@ public OrcProto.FileTail getMinimalFileTail() {
OrcProto.Footer.Builder footerBuilder = OrcProto.Footer.newBuilder(fileTail.getFooter());
footerBuilder.clearStatistics();
fileTailBuilder.setFooter(footerBuilder.build());
OrcProto.FileTail result = fileTailBuilder.build();
return result;
return fileTailBuilder.build();
}
}
Loading

0 comments on commit 6f89fb7

Please sign in to comment.