From b067c9ee234892582c222ecfda97cc164fcf3c14 Mon Sep 17 00:00:00 2001 From: Ryu Kobayashi Date: Fri, 15 May 2026 15:47:23 +0900 Subject: [PATCH] HIVE-29598: Fix vectorized outer join wrong results due to stale scratch column values --- ...torMapJoinOuterGenerateResultOperator.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index e83b178e4dc7..0a2447eb52a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -27,7 +27,10 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -590,6 +593,7 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; + clearVectorValue(colVector, batchIndex); } // Small table values are set to null. @@ -597,6 +601,7 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; + clearVectorValue(colVector, batchIndex); } } } @@ -749,6 +754,7 @@ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; + clearVectorValue(colVector, 0); } for (int column : smallTableValueColumnMap) { @@ -756,6 +762,24 @@ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; + clearVectorValue(colVector, 0); + } + } + + private static void clearVectorValue(ColumnVector colVector, int index) { + if (colVector instanceof LongColumnVector) { + ((LongColumnVector) colVector).vector[index] = 0L; + } else if (colVector instanceof DoubleColumnVector) { + ((DoubleColumnVector) colVector).vector[index] = 0.0; + } else if (colVector instanceof BytesColumnVector) { + BytesColumnVector bcv = (BytesColumnVector) colVector; + bcv.vector[index] = null; + bcv.start[index] = 0; + bcv.length[index] = 0; + } else if (colVector instanceof TimestampColumnVector) { + ((TimestampColumnVector) colVector).setNullValue(index); + } else if (colVector instanceof IntervalDayTimeColumnVector) { + ((IntervalDayTimeColumnVector) colVector).setNullValue(index); } }