From 61c0798cb039c90dc6919d28baddc5dedb847ea0 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Thu, 5 Mar 2026 16:00:25 +0900
Subject: [PATCH 01/10] Implement basic row cache
---
.../org/apache/hadoop/hbase/HConstants.java | 6 +
.../io/encoding/BufferedDataBlockEncoder.java | 8 +-
.../MetricsRegionServerSource.java | 7 +
.../MetricsRegionServerSourceImpl.java | 6 +
.../MetricsRegionServerWrapper.java | 10 +
.../hadoop/hbase/regionserver/HRegion.java | 85 ++-
.../MetricsRegionServerWrapperImpl.java | 28 +
.../hbase/regionserver/RSRpcServices.java | 21 +-
.../hadoop/hbase/regionserver/RowCache.java | 236 ++++++--
.../hadoop/hbase/regionserver/RowCells.java | 3 +-
.../regionserver/TinyLfuRowCacheStrategy.java | 113 ++++
.../MetricsRegionServerWrapperStub.java | 25 +
.../regionserver/TestMetricsRegionServer.java | 5 +
.../hbase/regionserver/TestRowCache.java | 547 ++++++++++++++++++
.../regionserver/TestRowCacheCanCacheRow.java | 266 +++++++++
.../TestRowCacheConfiguration.java | 81 +++
.../TestRowCacheEvictOnClose.java | 129 +++++
.../regionserver/TestRowCacheHRegion.java | 97 ++++
...heWithBucketCacheAndDataBlockEncoding.java | 154 +++++
.../regionserver/TestRowCacheWithMock.java | 397 +++++++++++++
.../tool/TestRowCacheBulkLoadHFiles.java | 199 +++++++
21 files changed, 2369 insertions(+), 54 deletions(-)
create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 6a51172e9a73..f140783067af 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1029,6 +1029,12 @@ public enum OperationStatusCode {
public static final String ROW_CACHE_ENABLED_KEY = "row.cache.enabled";
public static final boolean ROW_CACHE_ENABLED_DEFAULT = false;
+ /**
+ * Configuration key for the evict the row cache on close
+ */
+ public static final String ROW_CACHE_EVICT_ON_CLOSE_KEY = "row.cache.evictOnClose";
+ public static final boolean ROW_CACHE_EVICT_ON_CLOSE_DEFAULT = false;
+
/**
* Configuration key for the memory size of the block cache
*/
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
index 5ec39fa5803d..54505dfce955 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
@@ -547,8 +547,8 @@ public void setTimestamp(byte[] ts) throws IOException {
@Override
public ExtendedCell deepClone() {
- // This is not used in actual flow. Throwing UnsupportedOperationException
- throw new UnsupportedOperationException();
+ // To garbage collect the objects referenced by this cell, we need to deep clone it
+ return ExtendedCell.super.deepClone();
}
}
@@ -796,8 +796,8 @@ public void write(ByteBuffer buf, int offset) {
@Override
public ExtendedCell deepClone() {
- // This is not used in actual flow. Throwing UnsupportedOperationException
- throw new UnsupportedOperationException();
+ // To cache row, we need to deep clone it
+ return super.deepClone();
}
}
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index c88a77b51407..166484fe8991 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -430,6 +430,13 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
String L2_CACHE_HIT_RATIO_DESC = "L2 cache hit ratio.";
String L2_CACHE_MISS_RATIO = "l2CacheMissRatio";
String L2_CACHE_MISS_RATIO_DESC = "L2 cache miss ratio.";
+
+ String ROW_CACHE_HIT_COUNT = "rowCacheHitCount";
+ String ROW_CACHE_MISS_COUNT = "rowCacheMissCount";
+ String ROW_CACHE_EVICTED_ROW_COUNT = "rowCacheEvictedRowCount";
+ String ROW_CACHE_SIZE = "rowCacheSize";
+ String ROW_CACHE_COUNT = "rowCacheCount";
+
String RS_START_TIME_NAME = "regionServerStartTime";
String ZOOKEEPER_QUORUM_NAME = "zookeeperQuorum";
String SERVER_NAME_NAME = "serverName";
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
index b214c8f8f4e7..90ea2a1165c8 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
@@ -452,6 +452,12 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) {
.addCounter(Interns.info(BLOCK_CACHE_DELETE_FAMILY_BLOOM_HIT_COUNT, ""),
rsWrap.getDeleteFamilyBloomHitCount())
.addCounter(Interns.info(BLOCK_CACHE_TRAILER_HIT_COUNT, ""), rsWrap.getTrailerHitCount())
+ .addCounter(Interns.info(ROW_CACHE_HIT_COUNT, ""), rsWrap.getRowCacheHitCount())
+ .addCounter(Interns.info(ROW_CACHE_MISS_COUNT, ""), rsWrap.getRowCacheMissCount())
+ .addCounter(Interns.info(ROW_CACHE_EVICTED_ROW_COUNT, ""),
+ rsWrap.getRowCacheEvictedRowCount())
+ .addGauge(Interns.info(ROW_CACHE_SIZE, ""), rsWrap.getRowCacheSize())
+ .addGauge(Interns.info(ROW_CACHE_COUNT, ""), rsWrap.getRowCacheCount())
.addCounter(Interns.info(UPDATES_BLOCKED_TIME, UPDATES_BLOCKED_DESC),
rsWrap.getUpdatesBlockedTime())
.addCounter(Interns.info(FLUSHED_CELLS, FLUSHED_CELLS_DESC), rsWrap.getFlushedCellsCount())
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
index 5b957d9bf08f..68e43b276ee2 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
@@ -635,6 +635,16 @@ public interface MetricsRegionServerWrapper {
long getTrailerHitCount();
+ long getRowCacheHitCount();
+
+ long getRowCacheMissCount();
+
+ long getRowCacheSize();
+
+ long getRowCacheCount();
+
+ long getRowCacheEvictedRowCount();
+
long getTotalRowActionRequestCount();
long getByteBuffAllocatorHeapAllocationBytes();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 60bd4cee6b73..3a5c3f34313a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hbase.regionserver;
import static org.apache.hadoop.hbase.HConstants.REPLICATION_SCOPE_LOCAL;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_DEFAULT;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_KEY;
import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY;
import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.REGION_NAMES_KEY;
import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.ROW_LOCK_READ_LOCK_KEY;
@@ -145,6 +147,7 @@
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
import org.apache.hadoop.hbase.ipc.RpcCall;
+import org.apache.hadoop.hbase.ipc.RpcCallContext;
import org.apache.hadoop.hbase.ipc.RpcServer;
import org.apache.hadoop.hbase.ipc.ServerCall;
import org.apache.hadoop.hbase.mob.MobFileCache;
@@ -946,7 +949,7 @@ public HRegion(final HRegionFileSystem fs, final WAL wal, final Configuration co
this.isRowCacheEnabled = checkRowCacheConfig();
}
- private boolean checkRowCacheConfig() {
+ boolean checkRowCacheConfig() {
Boolean fromDescriptor = htableDescriptor.getRowCacheEnabled();
// The setting from TableDescriptor has higher priority than the global configuration
return fromDescriptor != null
@@ -954,6 +957,11 @@ private boolean checkRowCacheConfig() {
: conf.getBoolean(HConstants.ROW_CACHE_ENABLED_KEY, HConstants.ROW_CACHE_ENABLED_DEFAULT);
}
+ // For testing only
+ void setRowCache(RowCache rowCache) {
+ this.rowCache = rowCache;
+ }
+
private void setHTableSpecificConf() {
if (this.htableDescriptor == null) {
return;
@@ -1963,6 +1971,8 @@ public Pair> call() throws IOException {
}
}
+ evictRowCache();
+
status.setStatus("Writing region close event to WAL");
// Always write close marker to wal even for read only table. This is not a big problem as we
// do not write any data into the region; it is just a meta edit in the WAL file.
@@ -2003,6 +2013,22 @@ public Pair> call() throws IOException {
}
}
+ private void evictRowCache() {
+ boolean evictOnClose = getReadOnlyConfiguration().getBoolean(ROW_CACHE_EVICT_ON_CLOSE_KEY,
+ ROW_CACHE_EVICT_ON_CLOSE_DEFAULT);
+
+ if (!evictOnClose) {
+ return;
+ }
+
+ if (!(rsServices instanceof HRegionServer regionServer)) {
+ return;
+ }
+
+ RowCache rowCache = regionServer.getRSRpcServices().getServer().getRowCache();
+ rowCache.evictRowsByRegion(this);
+ }
+
/** Wait for all current flushes and compactions of the region to complete */
// TODO HBASE-18906. Check the usage (if any) in Phoenix and expose this or give alternate way for
// Phoenix needs.
@@ -3259,8 +3285,8 @@ public RegionScannerImpl getScanner(Scan scan) throws IOException {
return getScanner(scan, null);
}
- RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results)
- throws IOException {
+ RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results,
+ RpcCallContext context) throws IOException {
if (!rowCache.canCacheRow(get, this)) {
return getScannerWithResults(scan, results);
}
@@ -3268,12 +3294,23 @@ RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results)
// Try get from row cache
RowCacheKey key = new RowCacheKey(this, get.getRow());
if (rowCache.tryGetFromCache(key, get, results)) {
+ addReadRequestsCount(1);
+ if (getMetrics() != null) {
+ getMetrics().updateReadRequestCount();
+ }
+
// Cache is hit, and then no scanner is created
return null;
}
RegionScannerImpl scanner = getScannerWithResults(scan, results);
- rowCache.populateCache(results, key);
+
+ // When results came from memstore only, do not populate the row cache
+ boolean readFromMemStoreOnly = context.getBlockBytesScanned() < 1;
+ if (!readFromMemStoreOnly) {
+ rowCache.populateCache(this, results, key);
+ }
+
return scanner;
}
@@ -3435,6 +3472,15 @@ private void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, b
@Override
public void put(Put put) throws IOException {
TraceUtil.trace(() -> {
+ // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot
+ // track TTL expiration
+ if (isRowCacheEnabled) {
+ if (put.getTTL() != Long.MAX_VALUE) {
+ throw new DoNotRetryIOException(
+ "Tables with row cache enabled do not allow setting TTL on Puts");
+ }
+ }
+
checkReadOnly();
// Do a rough check that we have resources to accept a write. The check is
@@ -4811,7 +4857,12 @@ public OperationStatus[] batchMutate(Mutation[] mutations, boolean atomic, long
// checkAndMutate.
// * coprocessor calls (see ex. BulkDeleteEndpoint).
// So nonces are not really ever used by HBase. They could be by coprocs, and checkAnd...
- return batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce));
+ if (rowCache == null) {
+ return batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce));
+ }
+
+ return rowCache.mutateWithRowCacheBarrier(this, Arrays.asList(mutations),
+ () -> batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce)));
}
@Override
@@ -4823,10 +4874,9 @@ public OperationStatus[] batchMutate(Mutation[] mutations) throws IOException {
}
OperationStatus[] batchMutate(Mutation[] mutations, boolean atomic) throws IOException {
- OperationStatus[] operationStatuses =
- rowCache.mutateWithRowCacheBarrier(this, Arrays.asList(mutations),
- () -> this.batchMutate(mutations, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE));
- return TraceUtil.trace(() -> operationStatuses, () -> createRegionSpan("Region.batchMutate"));
+ return TraceUtil.trace(
+ () -> batchMutate(mutations, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE),
+ () -> createRegionSpan("Region.batchMutate"));
}
/**
@@ -5111,8 +5161,17 @@ public CheckAndMutateResult checkAndMutate(CheckAndMutate checkAndMutate) throws
public CheckAndMutateResult checkAndMutate(CheckAndMutate checkAndMutate, long nonceGroup,
long nonce) throws IOException {
- CheckAndMutateResult checkAndMutateResult = rowCache.mutateWithRowCacheBarrier(this,
- checkAndMutate.getRow(), () -> this.checkAndMutate(checkAndMutate, nonceGroup, nonce));
+ CheckAndMutateResult checkAndMutateResult =
+ rowCache.mutateWithRowCacheBarrier(this, checkAndMutate.getRow(),
+ () -> this.checkAndMutateInternal(checkAndMutate, nonceGroup, nonce));
+ return TraceUtil.trace(() -> checkAndMutateResult,
+ () -> createRegionSpan("Region.checkAndMutate"));
+ }
+
+ public CheckAndMutateResult checkAndMutate(List mutations,
+ CheckAndMutate checkAndMutate, long nonceGroup, long nonce) throws IOException {
+ CheckAndMutateResult checkAndMutateResult = rowCache.mutateWithRowCacheBarrier(this, mutations,
+ () -> this.checkAndMutateInternal(checkAndMutate, nonceGroup, nonce));
return TraceUtil.trace(() -> checkAndMutateResult,
() -> createRegionSpan("Region.checkAndMutate"));
}
@@ -5312,6 +5371,10 @@ private OperationStatus mutate(Mutation mutation, boolean atomic) throws IOExcep
private OperationStatus mutate(Mutation mutation, boolean atomic, long nonceGroup, long nonce)
throws IOException {
+ if (rowCache == null) {
+ return this.mutateInternal(mutation, atomic, nonceGroup, nonce);
+ }
+
return rowCache.mutateWithRowCacheBarrier(this, mutation.getRow(),
() -> this.mutateInternal(mutation, atomic, nonceGroup, nonce));
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index c8f7f96a033b..b4dabf7fb3bf 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -69,6 +69,7 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
private BlockCache l2Cache = null;
private MobFileCache mobFileCache;
private CacheStats cacheStats;
+ private final RowCache rowCache;
private CacheStats l1Stats = null;
private CacheStats l2Stats = null;
private volatile long numWALFiles = 0;
@@ -99,6 +100,8 @@ public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
this.regionServer = regionServer;
initBlockCache();
initMobFileCache();
+ RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices();
+ this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache();
this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager();
this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
@@ -1194,6 +1197,31 @@ public long getTrailerHitCount() {
return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L;
}
+ @Override
+ public long getRowCacheHitCount() {
+ return this.rowCache != null ? this.rowCache.getHitCount() : 0L;
+ }
+
+ @Override
+ public long getRowCacheMissCount() {
+ return this.rowCache != null ? this.rowCache.getMissCount() : 0L;
+ }
+
+ @Override
+ public long getRowCacheSize() {
+ return this.rowCache != null ? this.rowCache.getSize() : 0L;
+ }
+
+ @Override
+ public long getRowCacheCount() {
+ return this.rowCache != null ? this.rowCache.getCount() : 0L;
+ }
+
+ @Override
+ public long getRowCacheEvictedRowCount() {
+ return this.rowCache != null ? this.rowCache.getEvictedRowCount() : 0L;
+ }
+
@Override
public long getByteBuffAllocatorHeapAllocationBytes() {
return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 35371cb74ae7..7a21ab8a5504 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -668,7 +668,7 @@ private CheckAndMutateResult checkAndMutate(HRegion region, List{@code RowCache} coordinates cache access for Get operations and
- * enforces cache consistency during mutations. It delegates actual
- * storage and eviction policy decisions (e.g., LRU, LFU) to a
- * {@link RowCacheStrategy} implementation. | | |
- *
- * This class is responsible for:
+ *
+ * {@code RowCache} coordinates cache access for Get operations and enforces cache consistency
+ * during mutations. It delegates actual storage and eviction policy decisions (e.g., LRU, LFU) to a
+ * {@link RowCacheStrategy} implementation.
+ *
+ *
+ * This class is responsible for:
*
- * Determining whether row cache is enabled for a region
- * Attempting cache lookups before falling back to the normal read path
- * Populating the cache after successful reads
- * Evicting affected rows on mutations to maintain correctness
+ * Determining whether row cache is enabled for a region
+ * Attempting cache lookups before falling back to the normal read path
+ * Populating the cache after successful reads
+ * Evicting affected rows on mutations to maintain correctness
*
- *
- * {@code RowCache} does not implement caching policy or storage directly;
- * those concerns are encapsulated by {@code RowCacheStrategy}.
+ *
+ * {@code RowCache} does not implement caching policy or storage directly; those concerns are
+ * encapsulated by {@code RowCacheStrategy}.
+ *
*/
@org.apache.yetus.audience.InterfaceAudience.Private
public class RowCache {
+ /**
+ * A barrier that prevents the row cache from being populated during region operations, such as
+ * bulk loads. It is implemented as a counter to address issues that arise when the same region is
+ * updated concurrently.
+ */
+ private final Map regionLevelBarrierMap = new ConcurrentHashMap<>();
+ /**
+ * A barrier that prevents the row cache from being populated during row mutations. It is
+ * implemented as a counter to address issues that arise when the same row is mutated
+ * concurrently.
+ */
+ private final Map rowLevelBarrierMap = new ConcurrentHashMap<>();
+
private final boolean enabledByConf;
private final RowCacheStrategy rowCacheStrategy;
@@ -63,8 +85,8 @@ R execute(RowOperation operation) throws IOException {
RowCache(Configuration conf) {
enabledByConf =
conf.getFloat(HConstants.ROW_CACHE_SIZE_KEY, HConstants.ROW_CACHE_SIZE_DEFAULT) > 0;
- // TODO: implement row cache
- rowCacheStrategy = null;
+ // Currently we only support TinyLfu implementation
+ rowCacheStrategy = new TinyLfuRowCacheStrategy(MemorySizeUtil.getRowCacheSize(conf));
}
R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation operation)
@@ -74,9 +96,39 @@ R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation oper
}
RowCacheKey key = new RowCacheKey(region, row);
- // TODO: implement mutate with row cache barrier logic
- evictRow(key);
- return execute(operation);
+ try {
+ // Creates a barrier that prevents the row cache from being populated for this row
+ // during mutation. Reads for the row can instead be served from HFiles or the block cache.
+ createRowLevelBarrier(key);
+
+ // After creating the barrier, evict the existing row cache for this row,
+ // as it becomes invalid after the mutation
+ evictRow(key);
+
+ return execute(operation);
+ } finally {
+ // Remove the barrier after mutation to allow the row cache to be populated again
+ removeRowLevelBarrier(key);
+ }
+ }
+
+ /**
+ * Remove the barrier after mutation to allow the row cache to be populated again
+ * @param key the cache key of the row
+ */
+ void removeRowLevelBarrier(RowCacheKey key) {
+ rowLevelBarrierMap.computeIfPresent(key, (k, counter) -> {
+ int remaining = counter.decrementAndGet();
+ return (remaining <= 0) ? null : counter;
+ });
+ }
+
+ /**
+ * Creates a barrier to prevent the row cache from being populated for this row during mutation
+ * @param key the cache key of the row
+ */
+ void createRowLevelBarrier(RowCacheKey key) {
+ rowLevelBarrierMap.computeIfAbsent(key, k -> new AtomicInteger(0)).incrementAndGet();
}
R mutateWithRowCacheBarrier(HRegion region, List mutations,
@@ -85,21 +137,88 @@ R mutateWithRowCacheBarrier(HRegion region, List mutations,
return operation.execute();
}
- // TODO: implement mutate with row cache barrier logic
Set rowCacheKeys = new HashSet<>(mutations.size());
- mutations.forEach(mutation -> rowCacheKeys.add(new RowCacheKey(region, mutation.getRow())));
- rowCacheKeys.forEach(this::evictRow);
+ try {
+ // Evict the entire row cache
+ mutations.forEach(mutation -> rowCacheKeys.add(new RowCacheKey(region, mutation.getRow())));
+ rowCacheKeys.forEach(key -> {
+ // Creates a barrier that prevents the row cache from being populated for this row
+ // during mutation. Reads for the row can instead be served from HFiles or the block cache.
+ createRowLevelBarrier(key);
- return execute(operation);
+ // After creating the barrier, evict the existing row cache for this row,
+ // as it becomes invalid after the mutation
+ evictRow(key);
+ });
+
+ return execute(operation);
+ } finally {
+ // Remove the barrier after mutation to allow the row cache to be populated again
+ rowCacheKeys.forEach(this::removeRowLevelBarrier);
+ }
}
void evictRow(RowCacheKey key) {
rowCacheStrategy.evictRow(key);
}
+ void evictRowsByRegion(HRegion region) {
+ rowCacheStrategy.evictRowsByRegion(region);
+ }
+
+ // @formatter:off
+ /**
+ * Row cache is only enabled when the following conditions are met:
+ * - Row cache is enabled at the table level.
+ * - Cache blocks is enabled in the get request.
+ * - A Get object cannot be distinguished from others except by its row key.
+ * So we check equality for the following:
+ * - filter
+ * - retrieving cells
+ * - TTL
+ * - attributes
+ * - CheckExistenceOnly
+ * - ColumnFamilyTimeRange
+ * - Consistency
+ * - MaxResultsPerColumnFamily
+ * - ReplicaId
+ * - RowOffsetPerColumnFamily
+ * @param get the Get request
+ * @param region the Region
+ * @return true if the row can be cached, false otherwise
+ */
+ // @formatter:on
boolean canCacheRow(Get get, Region region) {
- // TODO: implement logic to determine if the row can be cached
- return false;
+ return enabledByConf && region.isRowCacheEnabled() && get.getCacheBlocks()
+ && get.getFilter() == null && isRetrieveAllCells(get, region) && isDefaultTtl(region)
+ && get.getAttributesMap().isEmpty() && !get.isCheckExistenceOnly()
+ && get.getColumnFamilyTimeRange().isEmpty() && get.getConsistency() == Consistency.STRONG
+ && get.getMaxResultsPerColumnFamily() == -1 && get.getReplicaId() == -1
+ && get.getRowOffsetPerColumnFamily() == 0 && get.getTimeRange().isAllTime();
+ }
+
+ private static boolean isRetrieveAllCells(Get get, Region region) {
+ if (region.getTableDescriptor().getColumnFamilyCount() != get.numFamilies()) {
+ return false;
+ }
+
+ boolean hasQualifier = get.getFamilyMap().values().stream().anyMatch(Objects::nonNull);
+ return !hasQualifier;
+ }
+
+ private static boolean isDefaultTtl(Region region) {
+ return Arrays.stream(region.getTableDescriptor().getColumnFamilies())
+ .allMatch(cfd -> cfd.getTimeToLive() == ColumnFamilyDescriptorBuilder.DEFAULT_TTL);
+ }
+
+ // For testing only
+ public RowCells getRow(RowCacheKey key) {
+ return getRow(key, true);
+ }
+
+ // For testing only
+ RowCells getRow(RowCacheKey key, boolean caching) {
+ return rowCacheStrategy.getRow(key, caching);
}
boolean tryGetFromCache(RowCacheKey key, Get get, List results) {
@@ -110,16 +229,67 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) {
}
results.addAll(row.getCells());
- // TODO: implement update of metrics
return true;
}
- void populateCache(List results, RowCacheKey key) {
- // TODO: implement with barrier to avoid cache read during mutation
- try {
- rowCacheStrategy.cacheRow(key, new RowCells(results));
- } catch (CloneNotSupportedException ignored) {
- // Not able to cache row cells, ignore
- }
+ void populateCache(HRegion region, List results, RowCacheKey key) {
+ // The row cache is populated only when no region level barriers remain
+ regionLevelBarrierMap.computeIfAbsent(region, t -> {
+ // The row cache is populated only when no row level barriers remain
+ rowLevelBarrierMap.computeIfAbsent(key, k -> {
+ try {
+ rowCacheStrategy.cacheRow(key, new RowCells(results));
+ } catch (CloneNotSupportedException ignored) {
+ // Not able to cache row cells, ignore
+ }
+ return null;
+ });
+ return null;
+ });
+ }
+
+ void createRegionLevelBarrier(HRegion region) {
+ regionLevelBarrierMap.computeIfAbsent(region, k -> new AtomicInteger(0)).incrementAndGet();
+ }
+
+ void increaseRowCacheSeqNum(HRegion region) {
+ region.increaseRowCacheSeqNum();
+ }
+
+ void removeTableLevelBarrier(HRegion region) {
+ regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> {
+ int remaining = counter.decrementAndGet();
+ return (remaining <= 0) ? null : counter;
+ });
+ }
+
+ long getHitCount() {
+ return rowCacheStrategy.getHitCount();
+ }
+
+ long getMissCount() {
+ return rowCacheStrategy.getMissCount();
+ }
+
+ long getSize() {
+ return rowCacheStrategy.getSize();
+ }
+
+ long getCount() {
+ return rowCacheStrategy.getCount();
+ }
+
+ long getEvictedRowCount() {
+ return rowCacheStrategy.getEvictedRowCount();
+ }
+
+ // For testing only
+ AtomicInteger getRowLevelBarrier(RowCacheKey key) {
+ return rowLevelBarrierMap.get(key);
+ }
+
+ // For testing only
+ AtomicInteger getRegionLevelBarrier(HRegion region) {
+ return regionLevelBarrierMap.get(region);
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
index 2f44058e0a24..af0a0ea4c537 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
@@ -39,8 +39,7 @@ public RowCells(List cells) throws CloneNotSupportedException {
// To garbage collect the objects referenced by the cells
this.cells.add(extCell.deepClone());
} catch (RuntimeException e) {
- // throw new CloneNotSupportedException("Deep clone failed");
- this.cells.add(extCell);
+ throw new CloneNotSupportedException("Deep clone failed");
}
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
new file mode 100644
index 000000000000..e141bd3cbb2b
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.Policy;
+import com.github.benmanes.caffeine.cache.RemovalCause;
+import com.github.benmanes.caffeine.cache.RemovalListener;
+import java.util.Optional;
+import java.util.OptionalLong;
+import java.util.concurrent.atomic.LongAdder;
+import org.checkerframework.checker.nullness.qual.NonNull;
+
+@org.apache.yetus.audience.InterfaceAudience.Private
+public class TinyLfuRowCacheStrategy implements RowCacheStrategy {
+ private final class EvictionListener
+ implements RemovalListener<@NonNull RowCacheKey, @NonNull RowCells> {
+ @Override
+ public void onRemoval(RowCacheKey key, RowCells value, @NonNull RemovalCause cause) {
+ evictedRowCount.increment();
+ }
+ }
+
+ private final Cache<@NonNull RowCacheKey, RowCells> cache;
+
+ // Cache.stats() does not provide eviction count for entries, so we maintain our own counter.
+ private final LongAdder evictedRowCount = new LongAdder();
+
+ TinyLfuRowCacheStrategy(long maxSizeBytes) {
+ if (maxSizeBytes <= 0) {
+ cache = Caffeine.newBuilder().maximumSize(0).build();
+ return;
+ }
+
+ cache =
+ Caffeine.newBuilder().maximumWeight(maxSizeBytes).removalListener(new EvictionListener())
+ .weigher((RowCacheKey key,
+ RowCells value) -> (int) Math.min(key.heapSize() + value.heapSize(), Integer.MAX_VALUE))
+ .recordStats().build();
+ }
+
+ @Override
+ public void cacheRow(RowCacheKey key, RowCells value) {
+ cache.put(key, value);
+ }
+
+ @Override
+ public void evictRow(RowCacheKey key) {
+ cache.asMap().remove(key);
+ }
+
+ @Override
+ public void evictRowsByRegion(HRegion region) {
+ cache.asMap().keySet().removeIf(key -> key.isSameRegion(region));
+ }
+
+ @Override
+ public long getCount() {
+ return cache.estimatedSize();
+ }
+
+ @Override
+ public long getEvictedRowCount() {
+ return evictedRowCount.sum();
+ }
+
+ @Override
+ public long getHitCount() {
+ return cache.stats().hitCount();
+ }
+
+ @Override
+ public long getMaxSize() {
+ Optional result = cache.policy().eviction().map(Policy.Eviction::getMaximum);
+ return result.orElse(-1L);
+ }
+
+ @Override
+ public long getMissCount() {
+ return cache.stats().missCount();
+ }
+
+ @Override
+ public RowCells getRow(RowCacheKey key, boolean caching) {
+ if (!caching) {
+ return null;
+ }
+
+ return cache.getIfPresent(key);
+ }
+
+ @Override
+ public long getSize() {
+ Optional result = cache.policy().eviction().map(Policy.Eviction::weightedSize);
+ return result.orElse(OptionalLong.of(-1L)).orElse(-1L);
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
index f1b6efe50a99..6b677f2d1223 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
@@ -662,6 +662,31 @@ public long getTrailerHitCount() {
return 0;
}
+ @Override
+ public long getRowCacheHitCount() {
+ return 2;
+ }
+
+ @Override
+ public long getRowCacheMissCount() {
+ return 1;
+ }
+
+ @Override
+ public long getRowCacheEvictedRowCount() {
+ return 0;
+ }
+
+ @Override
+ public long getRowCacheSize() {
+ return 1;
+ }
+
+ @Override
+ public long getRowCacheCount() {
+ return 2;
+ }
+
@Override
public int getSplitQueueSize() {
return 0;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
index aac2a5922b9b..76c2a8ad6e42 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java
@@ -148,6 +148,11 @@ public void testWrapperSource() {
HELPER.assertGauge("l2CacheHitRatio", 90, serverSource);
HELPER.assertGauge("l2CacheMissRatio", 10, serverSource);
HELPER.assertCounter("updatesBlockedTime", 419, serverSource);
+ HELPER.assertCounter("rowCacheHitCount", 2, serverSource);
+ HELPER.assertCounter("rowCacheMissCount", 1, serverSource);
+ HELPER.assertCounter("rowCacheEvictedRowCount", 0, serverSource);
+ HELPER.assertGauge("rowCacheSize", 1, serverSource);
+ HELPER.assertGauge("rowCacheCount", 2, serverSource);
}
@Test
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
new file mode 100644
index 000000000000..c4ca0d70faff
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
@@ -0,0 +1,547 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_EVICTED_ROW_COUNT;
+import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_HIT_COUNT;
+import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_MISS_COUNT;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CompatibilityFactory;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Append;
+import org.apache.hadoop.hbase.client.CheckAndMutate;
+import org.apache.hadoop.hbase.client.CheckAndMutateResult;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Row;
+import org.apache.hadoop.hbase.client.RowMutations;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.test.MetricsAssertHelper;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({ RegionServerTests.class, MediumTests.class })
+public class TestRowCache {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCache.class);
+
+ private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+ private static final byte[] CF1 = Bytes.toBytes("cf1");
+ private static final byte[] CF2 = Bytes.toBytes("cf2");
+ private static final byte[] Q1 = Bytes.toBytes("q1");
+ private static final byte[] Q2 = Bytes.toBytes("q2");
+
+ private static MetricsAssertHelper metricsHelper;
+ private static MetricsRegionServer regionServerMetrics;
+ private static MetricsRegionServerSource serverSource;
+
+ private static Admin admin;
+ private static RowCache rowCache;
+
+ private TableName tableName;
+ private Table table;
+ HRegion region;
+ private final Map counterBase = new HashMap<>();
+
+ @Rule
+ public TestName testName = new TestName();
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ Configuration conf = TEST_UTIL.getConfiguration();
+
+ // Enable row cache but reduce the block cache size to fit in 80% of the heap
+ conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f);
+ conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f);
+
+ SingleProcessHBaseCluster cluster = TEST_UTIL.startMiniCluster();
+ cluster.waitForActiveAndReadyMaster();
+ admin = TEST_UTIL.getAdmin();
+
+ metricsHelper = CompatibilityFactory.getInstance(MetricsAssertHelper.class);
+ HRegionServer regionServer = cluster.getRegionServer(0);
+ regionServerMetrics = regionServer.getMetrics();
+ serverSource = regionServerMetrics.getMetricsSource();
+
+ rowCache = regionServer.getRSRpcServices().getServer().getRowCache();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false;
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void beforeTestMethod() throws Exception {
+ ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ // To test data block encoding
+ ColumnFamilyDescriptor cf2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2)
+ .setDataBlockEncoding(DataBlockEncoding.FAST_DIFF).build();
+
+ tableName = TableName.valueOf(testName.getMethodName());
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true)
+ .setColumnFamily(cf1).setColumnFamily(cf2).build();
+ admin.createTable(td);
+ table = admin.getConnection().getTable(tableName);
+ region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getRegions().stream()
+ .filter(r -> r.getRegionInfo().getTable().equals(tableName)).findFirst().orElseThrow();
+ }
+
+ @After
+ public void afterTestMethod() throws Exception {
+ counterBase.clear();
+
+ admin.disableTable(tableName);
+ admin.deleteTable(tableName);
+ }
+
+ private void setCounterBase(String metric, long value) {
+ counterBase.put(metric, value);
+ }
+
+ private void assertCounterDiff(String metric, long diff) {
+ Long base = counterBase.get(metric);
+ if (base == null) {
+ throw new IllegalStateException(
+ "base counter of " + metric + " metric should have been set before by setCounterBase()");
+ }
+ long newValue = base + diff;
+ metricsHelper.assertCounter(metric, newValue, serverSource);
+ counterBase.put(metric, newValue);
+ }
+
+ private static void recomputeMetrics() {
+ regionServerMetrics.getRegionServerWrapper().forceRecompute();
+ }
+
+ @Test
+ public void testGetWithRowCache() throws IOException {
+ byte[] rowKey = "row".getBytes();
+ Get get = new Get(rowKey);
+ Result result;
+
+ RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey);
+
+ // Initialize metrics
+ recomputeMetrics();
+ setCounterBase("Get_num_ops", metricsHelper.getCounter("Get_num_ops", serverSource));
+ setCounterBase(ROW_CACHE_HIT_COUNT,
+ metricsHelper.getCounter(ROW_CACHE_HIT_COUNT, serverSource));
+ setCounterBase(ROW_CACHE_MISS_COUNT,
+ metricsHelper.getCounter(ROW_CACHE_MISS_COUNT, serverSource));
+ setCounterBase(ROW_CACHE_EVICTED_ROW_COUNT,
+ metricsHelper.getCounter(ROW_CACHE_EVICTED_ROW_COUNT, serverSource));
+
+ // Put a row
+ Put put = new Put(rowKey);
+ put.addColumn(CF1, Q1, Bytes.toBytes(0L));
+ put.addColumn(CF1, Q2, "12".getBytes());
+ put.addColumn(CF2, Q1, "21".getBytes());
+ put.addColumn(CF2, Q2, "22".getBytes());
+ table.put(put);
+ admin.flush(tableName);
+ recomputeMetrics();
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0);
+
+ // First get to populate the row cache
+ result = table.get(get);
+ recomputeMetrics();
+ assertArrayEquals(rowKey, result.getRow());
+ assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1));
+ assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2));
+ assertArrayEquals("21".getBytes(), result.getValue(CF2, Q1));
+ assertArrayEquals("22".getBytes(), result.getValue(CF2, Q2));
+ assertCounterDiff("Get_num_ops", 1);
+ // Ensure the get operation from HFile without row cache
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 1);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0);
+
+ // Get from the row cache
+ result = table.get(get);
+ recomputeMetrics();
+ assertArrayEquals(rowKey, result.getRow());
+ assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1));
+ assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2));
+ assertArrayEquals("21".getBytes(), result.getValue(CF2, Q1));
+ assertArrayEquals("22".getBytes(), result.getValue(CF2, Q2));
+ assertCounterDiff("Get_num_ops", 1);
+ // Ensure the get operation from the row cache
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 1);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0);
+
+ // Row cache is invalidated by the put operation
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ table.put(put);
+ recomputeMetrics();
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 1);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 1);
+
+ // Get is executed without the row cache; however, the cache is re-populated as a result
+ result = table.get(get);
+ recomputeMetrics();
+ assertArrayEquals(rowKey, result.getRow());
+ assertCounterDiff("Get_num_ops", 1);
+ // Ensure the get operation not from the row cache
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 1);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0);
+
+ // Get again with the row cache
+ result = table.get(get);
+ recomputeMetrics();
+ assertArrayEquals(rowKey, result.getRow());
+ assertCounterDiff("Get_num_ops", 1);
+ // Ensure the get operation from the row cache
+ assertCounterDiff(ROW_CACHE_HIT_COUNT, 1);
+ assertCounterDiff(ROW_CACHE_MISS_COUNT, 0);
+ assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0);
+
+ // Row cache is invalidated by the increment operation
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ table.incrementColumnValue(rowKey, CF1, Q1, 1);
+ assertNull(rowCache.getRow(rowCacheKey));
+
+ // Get is executed without the row cache; however, the cache is re-populated as a result
+ table.get(get);
+ assertNotNull(rowCache.getRow(rowCacheKey));
+
+ // Row cache is invalidated by the append operation
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ Append append = new Append(rowKey);
+ append.addColumn(CF1, Q1, Bytes.toBytes(0L));
+ table.append(append);
+ assertNull(rowCache.getRow(rowCacheKey));
+
+ // Get is executed without the row cache; however, the cache is re-populated as a result
+ table.get(get);
+ assertNotNull(rowCache.getRow(rowCacheKey));
+
+ // Row cache is invalidated by the delete operation
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ Delete delete = new Delete(rowKey);
+ delete.addColumn(CF1, Q1);
+ table.delete(delete);
+ assertNull(rowCache.getRow(rowCacheKey));
+ }
+
+ @Test(expected = DoNotRetryIOException.class)
+ public void testPutWithTTL() throws IOException {
+ // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot
+ // track TTL expiration
+ Put put = new Put("row".getBytes());
+ put.addColumn(CF1, Q1, "11".getBytes());
+ put.setTTL(1);
+ table.put(put);
+ }
+
+ @Test
+ public void testCheckAndMutate() throws IOException {
+ byte[] rowKey = "row".getBytes();
+ Get get = new Get(rowKey);
+ Result result;
+ CheckAndMutate cam;
+ CheckAndMutateResult camResult;
+
+ RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey);
+
+ // Put a row
+ Put put1 = new Put(rowKey);
+ put1.addColumn(CF1, Q1, "11".getBytes());
+ put1.addColumn(CF1, Q2, "12".getBytes());
+ table.put(put1);
+ admin.flush(tableName);
+
+ // Validate that the row cache is populated
+ result = table.get(get);
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ assertArrayEquals("11".getBytes(), result.getValue(CF1, Q1));
+ assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2));
+
+ // The row cache is not invalidated when a checkAndMutate operation fails
+ Put put2 = new Put(rowKey);
+ put2.addColumn(CF1, Q2, "1212".getBytes());
+ cam = CheckAndMutate.newBuilder(rowKey).ifEquals(CF1, Q2, "00".getBytes()).build(put2);
+ camResult = table.checkAndMutate(cam);
+ assertFalse(camResult.isSuccess());
+ assertNull(rowCache.getRow(rowCacheKey));
+
+ // Validate that the row cache is populated
+ result = table.get(get);
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ assertArrayEquals("11".getBytes(), result.getValue(CF1, Q1));
+ assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2));
+
+ // The row cache is invalidated by a checkAndMutate operation
+ cam = CheckAndMutate.newBuilder(rowKey).ifEquals(CF1, Q2, "12".getBytes()).build(put2);
+ camResult = table.checkAndMutate(cam);
+ assertTrue(camResult.isSuccess());
+ assertNull(rowCache.getRow(rowCacheKey));
+ }
+
+ @Test
+ public void testCheckAndMutates() throws IOException {
+ byte[] rowKey1 = "row1".getBytes();
+ byte[] rowKey2 = "row2".getBytes();
+ Get get1 = new Get(rowKey1);
+ Get get2 = new Get(rowKey2);
+ Result result1, result2;
+ List cams;
+ List camResults;
+
+ RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1);
+ RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2);
+
+ // Put rows
+ Put put1 = new Put(rowKey1);
+ put1.addColumn(CF1, Q1, "111".getBytes());
+ put1.addColumn(CF1, Q2, "112".getBytes());
+ table.put(put1);
+ Put put2 = new Put(rowKey2);
+ put2.addColumn(CF1, Q1, "211".getBytes());
+ put2.addColumn(CF1, Q2, "212".getBytes());
+ table.put(put2);
+ admin.flush(tableName);
+
+ // Validate that the row caches are populated
+ result1 = table.get(get1);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("111".getBytes(), result1.getValue(CF1, Q1));
+ assertArrayEquals("112".getBytes(), result1.getValue(CF1, Q2));
+ result2 = table.get(get2);
+ assertNotNull(rowCache.getRow(rowCacheKey2));
+ assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1));
+ assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2));
+
+ // The row caches are invalidated by checkAndMutate operations
+ cams = new ArrayList<>();
+ cams.add(CheckAndMutate.newBuilder(rowKey1).ifEquals(CF1, Q2, "112".getBytes()).build(put1));
+ cams.add(CheckAndMutate.newBuilder(rowKey2).ifEquals(CF1, Q2, "212".getBytes()).build(put2));
+ camResults = table.checkAndMutate(cams);
+ assertTrue(camResults.get(0).isSuccess());
+ assertTrue(camResults.get(1).isSuccess());
+ assertNull(rowCache.getRow(rowCacheKey1));
+ assertNull(rowCache.getRow(rowCacheKey2));
+ }
+
+ @Test
+ public void testRowMutations() throws IOException {
+ byte[] rowKey1 = "row1".getBytes();
+ byte[] rowKey2 = "row2".getBytes();
+ Get get1 = new Get(rowKey1);
+ Get get2 = new Get(rowKey2);
+ Result result1, result2;
+
+ RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1);
+ RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2);
+
+ // Put rows
+ Put put1 = new Put(rowKey1);
+ put1.addColumn(CF1, Q1, "111".getBytes());
+ put1.addColumn(CF1, Q2, "112".getBytes());
+ table.put(put1);
+ Put put2 = new Put(rowKey2);
+ put2.addColumn(CF1, Q1, "211".getBytes());
+ put2.addColumn(CF1, Q2, "212".getBytes());
+ table.put(put2);
+ admin.flush(tableName);
+
+ // Validate that the row caches are populated
+ result1 = table.get(get1);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("111".getBytes(), result1.getValue(CF1, Q1));
+ assertArrayEquals("112".getBytes(), result1.getValue(CF1, Q2));
+ result2 = table.get(get2);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1));
+ assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2));
+
+ // The row caches are invalidated by batch operation
+ Put put12 = new Put(rowKey1);
+ put12.addColumn(CF1, Q1, "111111".getBytes());
+ Put put13 = new Put(rowKey1);
+ put13.addColumn(CF1, Q2, "112112".getBytes());
+ RowMutations rms = new RowMutations(rowKey1);
+ rms.add(put12);
+ rms.add(put13);
+ CheckAndMutate cam =
+ CheckAndMutate.newBuilder(rowKey1).ifEquals(CF1, Q1, "111".getBytes()).build(rms);
+ table.checkAndMutate(cam);
+ assertNull(rowCache.getRow(rowCacheKey1));
+ assertNotNull(rowCache.getRow(rowCacheKey2));
+
+ // Validate that the row caches are populated
+ result1 = table.get(get1);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("111111".getBytes(), result1.getValue(CF1, Q1));
+ assertArrayEquals("112112".getBytes(), result1.getValue(CF1, Q2));
+ result2 = table.get(get2);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1));
+ assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2));
+ }
+
+ @Test
+ public void testBatch() throws IOException, InterruptedException {
+ byte[] rowKey1 = "row1".getBytes();
+ byte[] rowKey2 = "row2".getBytes();
+ byte[] rowKey3 = "row3".getBytes();
+ Get get1 = new Get(rowKey1);
+ Get get2 = new Get(rowKey2);
+ Get get3 = new Get(rowKey3);
+ List batchOperations;
+ Object[] results;
+
+ RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1);
+ RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2);
+ RowCacheKey rowCacheKey3 = new RowCacheKey(region, rowKey3);
+
+ // Put rows
+ batchOperations = new ArrayList<>();
+ Put put1 = new Put(rowKey1);
+ put1.addColumn(CF1, Q1, "111".getBytes());
+ put1.addColumn(CF1, Q2, "112".getBytes());
+ batchOperations.add(put1);
+ Put put2 = new Put(rowKey2);
+ put2.addColumn(CF1, Q1, "211".getBytes());
+ put2.addColumn(CF1, Q2, "212".getBytes());
+ batchOperations.add(put2);
+ Put put3 = new Put(rowKey3);
+ put3.addColumn(CF1, Q1, "311".getBytes());
+ put3.addColumn(CF1, Q2, "312".getBytes());
+ batchOperations.add(put3);
+ results = new Result[batchOperations.size()];
+ table.batch(batchOperations, results);
+ admin.flush(tableName);
+
+ // Validate that the row caches are populated
+ batchOperations = new ArrayList<>();
+ batchOperations.add(get1);
+ batchOperations.add(get2);
+ batchOperations.add(get3);
+ results = new Object[batchOperations.size()];
+ table.batch(batchOperations, results);
+ assertEquals(3, results.length);
+ assertNotNull(rowCache.getRow(rowCacheKey1));
+ assertArrayEquals("111".getBytes(), ((Result) results[0]).getValue(CF1, Q1));
+ assertArrayEquals("112".getBytes(), ((Result) results[0]).getValue(CF1, Q2));
+ assertNotNull(rowCache.getRow(rowCacheKey2));
+ assertArrayEquals("211".getBytes(), ((Result) results[1]).getValue(CF1, Q1));
+ assertArrayEquals("212".getBytes(), ((Result) results[1]).getValue(CF1, Q2));
+ assertNotNull(rowCache.getRow(rowCacheKey3));
+ assertArrayEquals("311".getBytes(), ((Result) results[2]).getValue(CF1, Q1));
+ assertArrayEquals("312".getBytes(), ((Result) results[2]).getValue(CF1, Q2));
+
+ // The row caches are invalidated by batch operation
+ batchOperations = new ArrayList<>();
+ batchOperations.add(put1);
+ Put put2New = new Put(rowKey2);
+ put2New.addColumn(CF1, Q1, "211211".getBytes());
+ put2New.addColumn(CF1, Q2, "212".getBytes());
+ CheckAndMutate cam =
+ CheckAndMutate.newBuilder(rowKey2).ifEquals(CF1, Q1, "211".getBytes()).build(put2New);
+ batchOperations.add(cam);
+ results = new Object[batchOperations.size()];
+ table.batch(batchOperations, results);
+ assertEquals(2, results.length);
+ assertNull(rowCache.getRow(rowCacheKey1));
+ assertNull(rowCache.getRow(rowCacheKey2));
+ assertNotNull(rowCache.getRow(rowCacheKey3));
+ }
+
+ @Test
+ public void testGetFromMemstoreOnly() throws IOException, InterruptedException {
+ byte[] rowKey = "row".getBytes();
+ RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey);
+
+ // Put a row into memstore only, not flushed to HFile yet
+ Put put = new Put(rowKey);
+ put.addColumn(CF1, Q1, Bytes.toBytes(0L));
+ table.put(put);
+
+ // Get from memstore only
+ Get get = new Get(rowKey);
+ table.get(get);
+
+ // Validate that the row cache is not populated
+ assertNull(rowCache.getRow(rowCacheKey));
+
+ // Flush memstore to HFile, then get again
+ admin.flush(tableName);
+ get = new Get(rowKey);
+ table.get(get);
+
+ // Validate that the row cache is populated now
+ assertNotNull(rowCache.getRow(rowCacheKey));
+
+ // Put another qualifier. And now the cells are in both memstore and HFile.
+ put = new Put(rowKey);
+ put.addColumn(CF1, Q2, Bytes.toBytes(0L));
+ table.put(put);
+
+ // Validate that the row cache is invalidated
+ assertNull(rowCache.getRow(rowCacheKey));
+
+ // Get from memstore and HFile
+ get = new Get(rowKey);
+ table.get(get);
+ assertNotNull(rowCache.getRow(rowCacheKey));
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java
new file mode 100644
index 000000000000..ea3ed188b758
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.function.Function;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CompareOperator;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Consistency;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.IsolationLevel;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.filter.BinaryComparator;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.security.visibility.Authorizations;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+@Category({ RegionServerTests.class, SmallTests.class })
+public class TestRowCacheCanCacheRow {
+ private static final byte[] CF1 = "cf1".getBytes();
+ private static final byte[] CF2 = "cf2".getBytes();
+ private static final byte[] ROW_KEY = "row".getBytes();
+ private static final TableName TABLE_NAME = TableName.valueOf("test");
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheCanCacheRow.class);
+
+ @Test
+ public void testRowCacheEnabledByTable() {
+ Region region = Mockito.mock(Region.class);
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ TableDescriptor td;
+
+ Get get = new Get(ROW_KEY);
+ get.addFamily(CF1);
+
+ td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true).setColumnFamily(cfd)
+ .build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+
+ RowCache rowCache = new RowCache(conf);
+ Assert.assertTrue(rowCache.canCacheRow(get, region));
+
+ // Disable row cache, expect false
+ td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setColumnFamily(cfd)
+ .setRowCacheEnabled(false).build();
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+ }
+
+ @Test
+ public void testRowCacheDisabledByConfig() {
+ Region region = Mockito.mock(Region.class);
+ Configuration conf = HBaseConfiguration.create();
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ TableDescriptor td;
+
+ Get get = new Get(ROW_KEY);
+ get.addFamily(CF1);
+
+ // Row cache enabled at table level, but disabled by row cache size 0, expect false
+ td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true).setColumnFamily(cfd)
+ .build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+
+ RowCache rowCache = new RowCache(conf);
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+ }
+
+ @Test
+ public void testRetrieveAllCells() {
+ Region region = Mockito.mock(Region.class);
+ ColumnFamilyDescriptor cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ ColumnFamilyDescriptor cfd2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2).build();
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true)
+ .setColumnFamily(cfd1).setColumnFamily(cfd2).build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ RowCache rowCache = new RowCache(conf);
+
+ // Not all CFs, expect false
+ Get get = new Get(ROW_KEY);
+ get.addFamily(CF1);
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+
+ // All CFs, expect true
+ get.addFamily(CF2);
+ Assert.assertTrue(rowCache.canCacheRow(get, region));
+
+ // Not all qualifiers, expect false
+ get.addColumn(CF1, "q1".getBytes());
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+ }
+
+ @Test
+ public void testTtl() {
+ ColumnFamilyDescriptor cfd1;
+ ColumnFamilyDescriptor cfd2;
+ TableDescriptor td;
+ Region region = Mockito.mock(Region.class);
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ RowCache rowCache = new RowCache(conf);
+
+ Get get = new Get(ROW_KEY);
+ get.addFamily(CF1);
+ get.addFamily(CF2);
+
+ // Ttl is set, expect false
+ cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).setTimeToLive(1).build();
+ cfd2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2).build();
+ td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true)
+ .setColumnFamily(cfd1).setColumnFamily(cfd2).build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+
+ // Ttl is not set, expect true
+ cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true)
+ .setColumnFamily(cfd1).setColumnFamily(cfd2).build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+ Assert.assertTrue(rowCache.canCacheRow(get, region));
+ }
+
+ @Test
+ public void testFilter() {
+ testWith(
+ get -> get.setFilter(new RowFilter(CompareOperator.EQUAL, new BinaryComparator(ROW_KEY))));
+ }
+
+ @Test
+ public void testCacheBlock() {
+ testWith(get -> get.setCacheBlocks(false));
+ }
+
+ @Test
+ public void testAttribute() {
+ testWith(get -> get.setAttribute("test", "value".getBytes()));
+ }
+
+ @Test
+ public void testCheckExistenceOnly() {
+ testWith(get -> get.setCheckExistenceOnly(true));
+ }
+
+ @Test
+ public void testColumnFamilyTimeRange() {
+ testWith(get -> get.setColumnFamilyTimeRange(CF1, 1000, 2000));
+ }
+
+ @Test
+ public void testConsistency() {
+ testWith(get -> get.setConsistency(Consistency.TIMELINE));
+ }
+
+ @Test
+ public void testAuthorizations() {
+ testWith(get -> get.setAuthorizations(new Authorizations("foo")));
+ }
+
+ @Test
+ public void testId() {
+ testWith(get -> get.setId("test"));
+ }
+
+ @Test
+ public void testIsolationLevel() {
+ testWith(get -> get.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED));
+ }
+
+ @Test
+ public void testMaxResultsPerColumnFamily() {
+ testWith(get -> get.setMaxResultsPerColumnFamily(2));
+ }
+
+ @Test
+ public void testReplicaId() {
+ testWith(get -> get.setReplicaId(1));
+ }
+
+ @Test
+ public void testRowOffsetPerColumnFamily() {
+ testWith(get -> get.setRowOffsetPerColumnFamily(1));
+ }
+
+ @Test
+ public void testTimeRange() {
+ testWith(get -> {
+ try {
+ return get.setTimeRange(1, 2);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @Test
+ public void testTimestamp() {
+ testWith(get -> get.setTimestamp(1));
+ }
+
+ private static void testWith(Function func) {
+ Region region = Mockito.mock(Region.class);
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true)
+ .setColumnFamily(cfd).build();
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled());
+
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ RowCache rowCache = new RowCache(conf);
+
+ Get get = new Get(ROW_KEY);
+ get.addFamily(CF1);
+ Assert.assertTrue(rowCache.canCacheRow(get, region));
+
+ // noinspection unused
+ var unused = func.apply(get);
+
+ // expect false
+ Assert.assertFalse(rowCache.canCacheRow(get, region));
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java
new file mode 100644
index 000000000000..02bba6fddf88
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ RegionServerTests.class, SmallTests.class })
+public class TestRowCacheConfiguration {
+ private static final byte[] CF1 = "cf1".getBytes();
+ private static final TableName TABLE_NAME = TableName.valueOf("table");
+ private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+
+ @Test
+ public void testDetermineRowCacheEnabled() throws IOException {
+ Configuration conf = TEST_UTIL.getConfiguration();
+
+ HRegion region;
+
+ // Set global config to false
+ conf.setBoolean(HConstants.ROW_CACHE_ENABLED_KEY, false);
+
+ region = createRegion(null);
+ assertFalse(region.checkRowCacheConfig());
+
+ region = createRegion(false);
+ assertFalse(region.checkRowCacheConfig());
+
+ region = createRegion(true);
+ assertTrue(region.checkRowCacheConfig());
+
+ // Set global config to true
+ conf.setBoolean(HConstants.ROW_CACHE_ENABLED_KEY, true);
+
+ region = createRegion(null);
+ assertTrue(region.checkRowCacheConfig());
+
+ region = createRegion(false);
+ assertFalse(region.checkRowCacheConfig());
+
+ region = createRegion(true);
+ assertTrue(region.checkRowCacheConfig());
+ }
+
+ private HRegion createRegion(Boolean rowCacheEnabled) throws IOException {
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ TableDescriptorBuilder tdb = TableDescriptorBuilder.newBuilder(TABLE_NAME).setColumnFamily(cfd);
+ if (rowCacheEnabled != null) {
+ tdb.setRowCacheEnabled(rowCacheEnabled);
+ }
+ return TEST_UTIL.createLocalHRegion(tdb.build(), "".getBytes(), "1".getBytes());
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java
new file mode 100644
index 000000000000..4b3a1419f93a
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_KEY;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@Category({ RegionServerTests.class, MediumTests.class })
+@RunWith(Parameterized.class)
+public class TestRowCacheEvictOnClose {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheEvictOnClose.class);
+
+ private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+ private static final byte[] CF1 = Bytes.toBytes("cf1");
+ private static final byte[] Q1 = Bytes.toBytes("q1");
+ private static final byte[] Q2 = Bytes.toBytes("q2");
+
+ @Rule
+ public TestName testName = new TestName();
+
+ @Parameterized.Parameter
+ public boolean evictOnClose;
+
+ @Parameterized.Parameters
+ public static List params() {
+ return Arrays.asList(new Object[][] { { true }, { false } });
+ }
+
+ @Test
+ public void testEvictOnClose() throws Exception {
+ Configuration conf = TEST_UTIL.getConfiguration();
+
+ // Enable row cache
+ conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f);
+ conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f);
+
+ // Set ROW_CACHE_EVICT_ON_CLOSE
+ conf.setBoolean(ROW_CACHE_EVICT_ON_CLOSE_KEY, evictOnClose);
+
+ // Start cluster
+ SingleProcessHBaseCluster cluster = TEST_UTIL.startMiniCluster();
+ cluster.waitForActiveAndReadyMaster();
+ Admin admin = TEST_UTIL.getAdmin();
+
+ RowCache rowCache = cluster.getRegionServer(0).getRSRpcServices().getServer().getRowCache();
+
+ // Create table with row cache enabled
+ ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
+ TableName tableName = TableName.valueOf(testName.getMethodName().replaceAll("[\\[\\]]", "_"));
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true)
+ .setColumnFamily(cf1).build();
+ admin.createTable(td);
+ Table table = admin.getConnection().getTable(tableName);
+
+ int numRows = 10;
+
+ // Put rows
+ for (int i = 0; i < numRows; i++) {
+ byte[] rowKey = ("row" + i).getBytes();
+ Put put = new Put(rowKey);
+ put.addColumn(CF1, Q1, Bytes.toBytes(0L));
+ put.addColumn(CF1, Q2, "12".getBytes());
+ table.put(put);
+ }
+ // Need to flush because the row cache is not populated when reading only from the memstore.
+ admin.flush(tableName);
+
+ // Populate row caches
+ for (int i = 0; i < numRows; i++) {
+ byte[] rowKey = ("row" + i).getBytes();
+ Get get = new Get(rowKey);
+ Result result = table.get(get);
+ assertArrayEquals(rowKey, result.getRow());
+ assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1));
+ assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2));
+ }
+
+ // Verify row cache has some entries
+ assertEquals(numRows, rowCache.getCount());
+
+ // Disable table
+ admin.disableTable(tableName);
+
+ // Verify row cache is cleared on table close
+ assertEquals(evictOnClose ? 0 : numRows, rowCache.getCount());
+
+ admin.deleteTable(tableName);
+ TEST_UTIL.shutdownMiniCluster();
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java
new file mode 100644
index 000000000000..a8c59dc6ccbc
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WALFactory;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({ RegionServerTests.class, MediumTests.class })
+public class TestRowCacheHRegion {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheHRegion.class);
+
+ private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+ public static final byte[] CF = Bytes.toBytes("cf1");
+
+ @Rule
+ public TestName currentTest = new TestName();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ TEST_UTIL.startMiniCluster(1);
+ }
+
+ @AfterClass
+ public static void teardownCluster() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testOpenHRegion() throws Exception {
+ Configuration conf = TEST_UTIL.getConfiguration();
+ WALFactory walFactory = new WALFactory(conf,
+ ServerName.valueOf(currentTest.getMethodName(), 16010, EnvironmentEdgeManager.currentTime())
+ .toString());
+ WAL wal = walFactory.getWAL(null);
+ Path hbaseRootDir = CommonFSUtils.getRootDir(conf);
+ TableName tableName = TableName.valueOf(currentTest.getMethodName());
+ RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build();
+ TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build();
+ HRegionServer regionServer = TEST_UTIL.getHBaseCluster().getRegionServer(0);
+ HRegion region = HRegion.openHRegion(conf, FileSystem.get(conf), hbaseRootDir, hri, htd, wal,
+ regionServer, null);
+
+ // Verify that rowCacheSeqNum is initialized correctly
+ assertNotEquals(HConstants.NO_SEQNUM, region.getRowCacheSeqNum());
+ assertEquals(region.getOpenSeqNum(), region.getRowCacheSeqNum());
+
+ region.close();
+ walFactory.close();
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java
new file mode 100644
index 000000000000..dafbfbdf6f8a
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@Category(MediumTests.class)
+@RunWith(Parameterized.class)
+public class TestRowCacheWithBucketCacheAndDataBlockEncoding {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheWithBucketCacheAndDataBlockEncoding.class);
+
+ @Parameterized.Parameter
+ public static boolean uesBucketCache;
+
+ @Parameterized.Parameters
+ public static List params() {
+ return Arrays.asList(new Object[][] { { true }, { false } });
+ }
+
+ @Rule
+ public TestName name = new TestName();
+
+ private static final byte[] ROW_KEY = Bytes.toBytes("checkRow");
+ private static final byte[] CF = Bytes.toBytes("CF");
+ private static final byte[] QUALIFIER = Bytes.toBytes("cq");
+ private static final byte[] VALUE = Bytes.toBytes("checkValue");
+ private static HBaseTestingUtil testingUtil;
+ private static Admin admin = null;
+ private static RowCache rowCache;
+
+ @Before
+ public void beforeClass() throws Exception {
+ testingUtil = new HBaseTestingUtil();
+ Configuration conf = testingUtil.getConfiguration();
+
+ // Use bucket cache
+ if (uesBucketCache) {
+ conf.setInt(ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY, 1);
+ conf.set(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap");
+ conf.setInt(HConstants.BUCKET_CACHE_SIZE_KEY, 64);
+ }
+
+ // Use row cache
+ conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f);
+ conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f);
+ testingUtil.startMiniCluster();
+ admin = testingUtil.getAdmin();
+
+ rowCache = testingUtil.getHBaseCluster().getRegionServer(0).getRowCache();
+ }
+
+ @After
+ public void afterClass() throws Exception {
+ testingUtil.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testRowCacheNoEncode() throws Exception {
+ testRowCache(name.getMethodName(), DataBlockEncoding.NONE);
+ }
+
+ @Test
+ public void testRowCacheEncode() throws Exception {
+ testRowCache(name.getMethodName(), DataBlockEncoding.FAST_DIFF);
+ }
+
+ private void testRowCache(String methodName, DataBlockEncoding dbe) throws Exception {
+ TableName tableName = TableName.valueOf(methodName.replaceAll("[\\[\\]]", "_"));
+ try (Table testTable = createTable(tableName, dbe)) {
+ Put put = new Put(ROW_KEY);
+ put.addColumn(CF, QUALIFIER, VALUE);
+ testTable.put(put);
+ admin.flush(testTable.getName());
+
+ long countBase = rowCache.getCount();
+ long hitCountBase = rowCache.getHitCount();
+
+ Result result;
+
+ // First get should not hit the row cache, and populate it
+ Get get = new Get(ROW_KEY);
+ result = testTable.get(get);
+ assertArrayEquals(ROW_KEY, result.getRow());
+ assertArrayEquals(VALUE, result.getValue(CF, QUALIFIER));
+ assertEquals(1, rowCache.getCount() - countBase);
+ assertEquals(0, rowCache.getHitCount() - hitCountBase);
+
+ // Second get should hit the row cache
+ result = testTable.get(get);
+ assertArrayEquals(ROW_KEY, result.getRow());
+ assertArrayEquals(VALUE, result.getValue(CF, QUALIFIER));
+ assertEquals(1, rowCache.getCount() - countBase);
+ assertEquals(1, rowCache.getHitCount() - hitCountBase);
+ }
+ }
+
+ private Table createTable(TableName tableName, DataBlockEncoding dbe) throws IOException {
+ TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName)
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(CF).setBlocksize(100)
+ .setDataBlockEncoding(dbe).build())
+ .setRowCacheEnabled(true).build();
+ return testingUtil.createTable(td, null);
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
new file mode 100644
index 000000000000..bfb8530d9f64
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValueTestUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Append;
+import org.apache.hadoop.hbase.client.CheckAndMutate;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Increment;
+import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.ipc.RpcCallContext;
+import org.apache.hadoop.hbase.testclassification.RegionServerTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.InOrder;
+import org.mockito.Mockito;
+
+import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
+
+@Category({ RegionServerTests.class, SmallTests.class })
+public class TestRowCacheWithMock {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheWithMock.class);
+
+ @Test
+ public void testBarrier() throws IOException {
+ // Mocking dependencies to create rowCache instance
+ RegionInfo regionInfo = Mockito.mock(RegionInfo.class);
+ Mockito.when(regionInfo.getEncodedName()).thenReturn("region1");
+ TableName tableName = TableName.valueOf("table1");
+ Mockito.when(regionInfo.getTable()).thenReturn(tableName);
+
+ List stores = new ArrayList<>();
+ HStore hStore = Mockito.mock(HStore.class);
+ Mockito.when(hStore.getStorefilesCount()).thenReturn(2);
+ stores.add(hStore);
+
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder("CF1".getBytes()).build();
+ TableDescriptor td = Mockito.mock(TableDescriptor.class);
+ Mockito.when(td.getColumnFamilies()).thenReturn(new ColumnFamilyDescriptor[] { cfd });
+
+ byte[] rowKey = "row".getBytes();
+ Get get = new Get(rowKey);
+ Scan scan = new Scan(get);
+ List results = new ArrayList<>();
+
+ RegionScannerImpl regionScanner = Mockito.mock(RegionScannerImpl.class);
+
+ RpcCallContext context = Mockito.mock(RpcCallContext.class);
+ Mockito.when(context.getBlockBytesScanned()).thenReturn(1L);
+
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+
+ RowCache rowCache = new RowCache(conf);
+
+ HRegion region = Mockito.mock(HRegion.class);
+ Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any());
+ region.setRowCache(rowCache);
+ Mockito.when(region.getRegionInfo()).thenReturn(regionInfo);
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.getStores()).thenReturn(stores);
+ Mockito.when(region.getScanner(scan)).thenReturn(regionScanner);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(true);
+ Mockito.when(region.getScannerWithResults(get, scan, results, context)).thenCallRealMethod();
+
+ RowCacheKey key = new RowCacheKey(region, rowKey);
+ results.add(KeyValueTestUtil.create("row", "CF", "q1", 1, "v1"));
+
+ // Verify that row cache populated before creating a row level barrier
+ region.getScannerWithResults(get, scan, results, context);
+ assertNotNull(rowCache.getRow(key));
+ assertNull(rowCache.getRowLevelBarrier(key));
+
+ // Evict the row cache
+ rowCache.evictRow(key);
+ assertNull(rowCache.getRow(key));
+
+ // Create a row level barrier for the row key
+ rowCache.createRowLevelBarrier(key);
+ assertEquals(1, rowCache.getRowLevelBarrier(key).get());
+
+ // Verify that no row cache populated after creating a row level barrier
+ region.getScannerWithResults(get, scan, results, context);
+ assertNull(rowCache.getRow(key));
+
+ // Remove the row level barrier
+ rowCache.removeRowLevelBarrier(key);
+ assertNull(rowCache.getRowLevelBarrier(key));
+
+ // Verify that row cache populated before creating a table level barrier
+ region.getScannerWithResults(get, scan, results, context);
+ assertNotNull(rowCache.getRow(key));
+ assertNull(rowCache.getRegionLevelBarrier(region));
+
+ // Evict the row cache
+ rowCache.evictRow(key);
+ assertNull(rowCache.getRow(key));
+
+ // Create a table level barrier for the row key
+ rowCache.createRegionLevelBarrier(region);
+ assertEquals(1, rowCache.getRegionLevelBarrier(region).get());
+
+ // Verify that no row cache populated after creating a table level barrier
+ region.getScannerWithResults(get, scan, results, context);
+ assertNull(rowCache.getRow(key));
+
+ // Remove the table level barrier
+ rowCache.removeTableLevelBarrier(region);
+ assertNull(rowCache.getRegionLevelBarrier(region));
+ }
+
+ @Test
+ public void testMutate() throws IOException, ServiceException {
+ // Mocking RowCache and its dependencies
+ TableDescriptor tableDescriptor = Mockito.mock(TableDescriptor.class);
+
+ RegionInfo regionInfo = Mockito.mock(RegionInfo.class);
+ Mockito.when(regionInfo.getEncodedName()).thenReturn("region1");
+
+ RowCache rowCache = Mockito.mock(RowCache.class);
+
+ RegionServerServices rss = Mockito.mock(RegionServerServices.class);
+ Mockito.when(rss.getRowCache()).thenReturn(rowCache);
+
+ HRegion region = Mockito.mock(HRegion.class);
+ Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any());
+ region.setRowCache(rowCache);
+ Mockito.when(region.getTableDescriptor()).thenReturn(tableDescriptor);
+ Mockito.when(region.getRegionInfo()).thenReturn(regionInfo);
+ Mockito.when(region.getBlockCache()).thenReturn(Mockito.mock(BlockCache.class));
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(true);
+ Mockito.when(region.getRegionServerServices()).thenReturn(rss);
+
+ RSRpcServices rsRpcServices = Mockito.mock(RSRpcServices.class);
+ Mockito.when(rsRpcServices.getRegion(Mockito.any())).thenReturn(region);
+
+ RpcController rpcController = Mockito.mock(RpcController.class);
+
+ CheckAndMutate checkAndMutate = CheckAndMutate.newBuilder("row".getBytes())
+ .ifEquals("CF".getBytes(), "q1".getBytes(), "v1".getBytes()).build(new Put("row".getBytes()));
+
+ Put put1 = new Put("row1".getBytes());
+ put1.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes());
+ Put put2 = new Put("row1".getBytes());
+ put2.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes());
+ List mutations = new ArrayList<>();
+ mutations.add(put1);
+ mutations.add(put2);
+
+ Delete del = new Delete("row1".getBytes());
+ Append append = new Append("row1".getBytes());
+ append.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes());
+ Increment increment = new Increment("row1".getBytes());
+ increment.addColumn("CF".getBytes(), "q1".getBytes(), 1L);
+
+ Mutation[] mutationArray = new Mutation[mutations.size()];
+ mutations.toArray(mutationArray);
+
+ // rowCache.mutateWithRowCacheBarrier must run real code so internal calls are recorded
+ Mockito.doCallRealMethod().when(rowCache).mutateWithRowCacheBarrier(Mockito.any(HRegion.class),
+ Mockito.any(byte[].class), Mockito.any());
+ Mockito.doCallRealMethod().when(rowCache).mutateWithRowCacheBarrier(Mockito.any(HRegion.class),
+ Mockito.anyList(), Mockito.any());
+
+ InOrder inOrder;
+
+ // Put
+ Mockito.doAnswer(invocation -> {
+ Put arg = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null);
+ return null;
+ }).when(region).put(put1);
+ Mockito.clearInvocations(rowCache);
+ inOrder = Mockito.inOrder(rowCache);
+ region.put(put1);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // Delete
+ Mockito.doAnswer(invocation -> {
+ Delete arg = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null);
+ return null;
+ }).when(region).delete(del);
+ inOrder = Mockito.inOrder(rowCache);
+ Mockito.clearInvocations(rowCache);
+ region.delete(del);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // Append
+ Mockito.doAnswer(invocation -> {
+ Append arg = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null);
+ return null;
+ }).when(region).append(append);
+ inOrder = Mockito.inOrder(rowCache);
+ Mockito.clearInvocations(rowCache);
+ region.append(append);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // Increment
+ Mockito.doAnswer(invocation -> {
+ Increment arg = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null);
+ return null;
+ }).when(region).increment(increment);
+ inOrder = Mockito.inOrder(rowCache);
+ Mockito.clearInvocations(rowCache);
+ region.increment(increment);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // CheckAndMutate
+ Mockito.doAnswer(invocation -> {
+ CheckAndMutate c = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, c.getRow(), () -> null);
+ return null;
+ }).when(region).checkAndMutate(Mockito.any(CheckAndMutate.class), Mockito.anyLong(),
+ Mockito.anyLong());
+ Mockito.clearInvocations(rowCache);
+ inOrder = Mockito.inOrder(rowCache);
+ region.checkAndMutate(checkAndMutate, 0, 0);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // RowMutations
+ Mockito.doAnswer(invocation -> {
+ List muts = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, muts, () -> null);
+ return null;
+ }).when(region).checkAndMutate(Mockito.anyList(), Mockito.any(CheckAndMutate.class),
+ Mockito.anyLong(), Mockito.anyLong());
+ Mockito.clearInvocations(rowCache);
+ inOrder = Mockito.inOrder(rowCache);
+ region.checkAndMutate(mutations, checkAndMutate, 0, 0);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // Batch
+ Mockito.doAnswer(invocation -> {
+ Mutation[] muts = invocation.getArgument(0);
+ rowCache.mutateWithRowCacheBarrier(region, Arrays.asList(muts), () -> null);
+ return null;
+ }).when(region).batchMutate(Mockito.any(Mutation[].class), Mockito.anyBoolean(),
+ Mockito.anyLong(), Mockito.anyLong());
+ Mockito.clearInvocations(rowCache);
+ inOrder = Mockito.inOrder(rowCache);
+ region.batchMutate(mutationArray, true, 0, 0);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any());
+
+ // Bulkload
+ HBaseProtos.RegionSpecifier regionSpecifier = HBaseProtos.RegionSpecifier.newBuilder()
+ .setType(HBaseProtos.RegionSpecifier.RegionSpecifierType.REGION_NAME)
+ .setValue(ByteString.copyFrom("region".getBytes())).build();
+ ClientProtos.BulkLoadHFileRequest bulkLoadRequest =
+ ClientProtos.BulkLoadHFileRequest.newBuilder().setRegion(regionSpecifier).build();
+ Mockito.doCallRealMethod().when(rsRpcServices).bulkLoadHFile(rpcController, bulkLoadRequest);
+ Mockito.clearInvocations(rowCache);
+ inOrder = Mockito.inOrder(rowCache);
+ rsRpcServices.bulkLoadHFile(rpcController, bulkLoadRequest);
+ // Verify the sequence of method calls
+ inOrder.verify(rowCache, Mockito.times(1)).createRegionLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).increaseRowCacheSeqNum(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeTableLevelBarrier(Mockito.any());
+ }
+
+ @Test
+ public void testCaching() throws IOException {
+ // Mocking dependencies to create RowCache instance
+ RegionInfo regionInfo = Mockito.mock(RegionInfo.class);
+ Mockito.when(regionInfo.getEncodedName()).thenReturn("region1");
+ TableName tableName = TableName.valueOf("table1");
+ Mockito.when(regionInfo.getTable()).thenReturn(tableName);
+
+ List stores = new ArrayList<>();
+ HStore hStore = Mockito.mock(HStore.class);
+ Mockito.when(hStore.getStorefilesCount()).thenReturn(2);
+ stores.add(hStore);
+
+ ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder("CF1".getBytes()).build();
+ TableDescriptor td = Mockito.mock(TableDescriptor.class);
+ Mockito.when(td.getColumnFamilies()).thenReturn(new ColumnFamilyDescriptor[] { cfd });
+
+ RpcCallContext context = Mockito.mock(RpcCallContext.class);
+ Mockito.when(context.getBlockBytesScanned()).thenReturn(1L);
+
+ byte[] rowKey = "row".getBytes();
+ RegionScannerImpl regionScanner = Mockito.mock(RegionScannerImpl.class);
+
+ Get get = new Get(rowKey);
+ Scan scan = new Scan(get);
+
+ Configuration conf = HBaseConfiguration.create();
+ conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f);
+ RowCache rowCache = new RowCache(conf);
+
+ HRegion region = Mockito.mock(HRegion.class);
+ Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any());
+ region.setRowCache(rowCache);
+ Mockito.when(region.getRegionInfo()).thenReturn(regionInfo);
+ Mockito.when(region.getTableDescriptor()).thenReturn(td);
+ Mockito.when(region.getStores()).thenReturn(stores);
+ Mockito.when(region.getScanner(scan)).thenReturn(regionScanner);
+ Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf);
+ Mockito.when(region.isRowCacheEnabled()).thenReturn(true);
+ Mockito.when(region.getScannerWithResults(Mockito.any(Get.class), Mockito.any(Scan.class),
+ Mockito.anyList(), Mockito.any())).thenCallRealMethod();
+
+ RowCacheKey key = new RowCacheKey(region, rowKey);
+ List results = new ArrayList<>();
+ results.add(KeyValueTestUtil.create("row", "CF", "q1", 1, "v1"));
+
+ // Verify that row cache populated with caching=false
+ // This should be called first not to populate the row cache
+ get.setCacheBlocks(false);
+ region.getScannerWithResults(get, scan, results, context);
+ assertNull(rowCache.getRow(key));
+ assertNull(rowCache.getRow(key));
+
+ // Verify that row cache populated with caching=true
+ get.setCacheBlocks(true);
+ region.getScannerWithResults(get, scan, results, context);
+ assertNotNull(rowCache.getRow(key, true));
+ assertNull(rowCache.getRow(key, false));
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java
new file mode 100644
index 000000000000..c5a62935e5e6
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.tool;
+
+import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.stream.IntStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.RowCache;
+import org.apache.hadoop.hbase.regionserver.RowCacheKey;
+import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.MiscTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+@Category({ MiscTests.class, MediumTests.class })
+public class TestRowCacheBulkLoadHFiles {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRowCacheBulkLoadHFiles.class);
+
+ private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
+ private static Admin admin;
+
+ final static int NUM_CFS = 2;
+ final static byte[] QUAL = Bytes.toBytes("qual");
+ final static int ROWCOUNT = 10;
+
+ private TableName tableName;
+ private Table table;
+ private HRegion[] regions;
+
+ @Rule
+ public TestName testName = new TestName();
+
+ static String family(int i) {
+ return String.format("family_%04d", i);
+ }
+
+ public static void buildHFiles(FileSystem fs, Path dir) throws IOException {
+ byte[] val = "value".getBytes();
+ for (int i = 0; i < NUM_CFS; i++) {
+ Path testIn = new Path(dir, family(i));
+
+ TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
+ Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
+ }
+ }
+
+ private TableDescriptor createTableDesc(TableName name) {
+ TableDescriptorBuilder builder =
+ TableDescriptorBuilder.newBuilder(name).setRowCacheEnabled(true);
+ IntStream.range(0, NUM_CFS).mapToObj(i -> ColumnFamilyDescriptorBuilder.of(family(i)))
+ .forEachOrdered(builder::setColumnFamily);
+ return builder.build();
+ }
+
+ private Path buildBulkFiles(TableName table) throws Exception {
+ Path dir = TEST_UTIL.getDataTestDirOnTestFS(table.getNameAsString());
+ Path bulk1 = new Path(dir, table.getNameAsString());
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+ buildHFiles(fs, bulk1);
+ return bulk1;
+ }
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ Configuration conf = TEST_UTIL.getConfiguration();
+
+ // Enable row cache but reduce the block cache size to fit in 80% of the heap
+ conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f);
+ conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f);
+
+ TEST_UTIL.startMiniCluster(1);
+ admin = TEST_UTIL.getAdmin();
+ }
+
+ @AfterClass
+ public static void teardownCluster() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ @Before
+ public void before() throws Exception {
+ tableName = TableName.valueOf(testName.getMethodName());
+ // Split the table into 2 regions
+ byte[][] splitKeys = new byte[][] { TestHRegionServerBulkLoad.rowkey(ROWCOUNT) };
+ admin.createTable(createTableDesc(tableName), splitKeys);
+ table = TEST_UTIL.getConnection().getTable(tableName);
+ // Sorted by region name
+ regions = TEST_UTIL.getRSForFirstRegionInTable(tableName).getRegions().stream()
+ .filter(r -> r.getRegionInfo().getTable().equals(tableName))
+ .sorted(Comparator.comparing(r -> r.getRegionInfo().getRegionNameAsString()))
+ .toArray(HRegion[]::new);
+ }
+
+ @After
+ public void after() throws Exception {
+ if (admin.tableExists(tableName)) {
+ admin.disableTable(tableName);
+ admin.deleteTable(tableName);
+ }
+ }
+
+ @Test
+ public void testRowCache() throws Exception {
+ RowCache rowCache =
+ TEST_UTIL.getHBaseCluster().getRegionServer(0).getRSRpcServices().getServer().getRowCache();
+
+ // The region to be bulk-loaded
+ byte[] rowKeyRegion0 = TestHRegionServerBulkLoad.rowkey(0);
+ // The region not to be bulk-loaded
+ byte[] rowKeyRegion1 = TestHRegionServerBulkLoad.rowkey(ROWCOUNT);
+
+ // Put a row into each region to populate the row cache
+ Put put0 = new Put(rowKeyRegion0);
+ put0.addColumn(family(0).getBytes(), "q1".getBytes(), "value".getBytes());
+ table.put(put0);
+ Put put1 = new Put(rowKeyRegion1);
+ put1.addColumn(family(0).getBytes(), "q1".getBytes(), "value".getBytes());
+ table.put(put1);
+ admin.flush(tableName);
+
+ // Ensure each region has a row cache
+ Get get0 = new Get(rowKeyRegion0);
+ Result result0 = table.get(get0);
+ assertNotNull(result0);
+ RowCacheKey keyPrev0 = new RowCacheKey(regions[0], get0.getRow());
+ assertNotNull(rowCache.getRow(keyPrev0));
+ Get get1 = new Get(rowKeyRegion1);
+ Result result1 = table.get(get1);
+ assertNotNull(result1);
+ RowCacheKey keyPrev1 = new RowCacheKey(regions[1], get1.getRow());
+ assertNotNull(rowCache.getRow(keyPrev1));
+
+ // Do bulkload to region0 only
+ Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+ BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
+ Path dir = buildBulkFiles(tableName);
+ loader.bulkLoad(tableName, dir);
+
+ // Ensure the row cache is removed after bulkload for region0
+ RowCacheKey keyCur0 = new RowCacheKey(regions[0], get0.getRow());
+ assertNotEquals(keyPrev0, keyCur0);
+ assertNull(rowCache.getRow(keyCur0));
+ // Ensure the row cache for keyPrev0 still exists, but it is not used anymore.
+ assertNotNull(rowCache.getRow(keyPrev0));
+
+ // Ensure the row cache for region1 is not affected
+ RowCacheKey keyCur1 = new RowCacheKey(regions[1], get1.getRow());
+ assertEquals(keyPrev1, keyCur1);
+ assertNotNull(rowCache.getRow(keyCur1));
+ }
+}
From cd93c2fb0f4e8d0002cf3bbe77d4b28bf55136ca Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Mon, 6 Apr 2026 09:11:22 +0900
Subject: [PATCH 02/10] Add GitHub Actions CI workflow files from master
---
.github/workflows/yetus-general-check.yml | 138 ++++++++++++++++++
.../yetus-jdk17-hadoop3-compile-check.yml | 107 ++++++++++++++
.../yetus-jdk17-hadoop3-unit-check.yml | 129 ++++++++++++++++
3 files changed, 374 insertions(+)
create mode 100644 .github/workflows/yetus-general-check.yml
create mode 100644 .github/workflows/yetus-jdk17-hadoop3-compile-check.yml
create mode 100644 .github/workflows/yetus-jdk17-hadoop3-unit-check.yml
diff --git a/.github/workflows/yetus-general-check.yml b/.github/workflows/yetus-general-check.yml
new file mode 100644
index 000000000000..79397d6a905f
--- /dev/null
+++ b/.github/workflows/yetus-general-check.yml
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# yamllint disable rule:line-length
+---
+name: Yetus General Check
+
+"on":
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+permissions: {}
+
+jobs:
+ general-check:
+ runs-on: ubuntu-latest
+ timeout-minutes: 600
+ permissions:
+ contents: read
+ statuses: write
+
+ env:
+ YETUS_VERSION: '0.15.0'
+
+ steps:
+ - name: Checkout HBase
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ path: src
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Set up JDK 17
+ uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+
+ - name: Maven cache
+ uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
+ with:
+ path: ~/.m2
+ key: hbase-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ hbase-m2-
+
+ - name: Download Yetus
+ run: |
+ mkdir -p yetus
+ cd yetus
+ bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
+ --keys 'https://downloads.apache.org/yetus/KEYS' \
+ --verify-tar-gz \
+ ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \
+ yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+
+ - name: Run Yetus General Check
+ env:
+ ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump"
+ DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile"
+ GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_USER: ${{ github.actor }}
+ PATCHDIR: "${{ github.workspace }}/yetus-general-check/output"
+ PLUGINS: "all,-javadoc,-jira,-shadedjars,-unit"
+ SET_JAVA_HOME: "/usr/lib/jvm/java-17"
+ SOURCEDIR: "${{ github.workspace }}/src"
+ TESTS_FILTER: "checkstyle,javac,pylint,shellcheck,shelldocs,blanks,perlcritic,ruby-lint,rubocop"
+ YETUSDIR: "${{ github.workspace }}/yetus"
+ AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc"
+ BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt"
+ BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt"
+ EXCLUDE_TESTS_URL: "https://ci-hbase.apache.org/job/HBase-Find-Flaky-Tests/job/${{ github.base_ref }}/lastSuccessfulBuild/artifact/output/excludes"
+ BUILD_THREAD: "4"
+ SUREFIRE_FIRST_PART_FORK_COUNT: "1.0C"
+ SUREFIRE_SECOND_PART_FORK_COUNT: "0.5C"
+ BRANCH_NAME: "${{ github.base_ref }}"
+ DEBUG: 'true'
+ run: |
+ cd "${{ github.workspace }}"
+ bash src/dev-support/jenkins_precommit_github_yetus.sh
+
+ - name: Publish Job Summary
+ if: always()
+ run: |
+ cd "${{ github.workspace }}"
+ python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output >> $GITHUB_STEP_SUMMARY
+
+ - name: Publish Test Results
+ if: always()
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: yetus-general-check-output
+ path: ${{ github.workspace }}/yetus-general-check/output
+ retention-days: 7
+
+ zizmor:
+ runs-on: ubuntu-latest
+ timeout-minutes: 5
+ permissions:
+ contents: read
+
+ steps:
+ - name: Check for workflow changes
+ id: changes
+ env:
+ GH_TOKEN: ${{ github.token }}
+ run: |
+ if gh pr diff "${{ github.event.pull_request.number }}" --repo "${{ github.repository }}" --name-only | grep -q '^\.github/workflows/'; then
+ echo "changed=true" >> "$GITHUB_OUTPUT"
+ else
+ echo "changed=false" >> "$GITHUB_OUTPUT"
+ fi
+
+ - name: Checkout HBase
+ if: steps.changes.outputs.changed == 'true'
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
+
+ - name: Run zizmor
+ if: steps.changes.outputs.changed == 'true'
+ run: pipx run zizmor --min-severity=medium .github/workflows/
diff --git a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml
new file mode 100644
index 000000000000..46d3a973d017
--- /dev/null
+++ b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# yamllint disable rule:line-length
+---
+name: Yetus JDK17 Hadoop3 Compile Check
+
+"on":
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+permissions:
+ contents: read
+ statuses: write
+
+jobs:
+ jdk17-hadoop3-compile-check:
+ runs-on: ubuntu-latest
+ timeout-minutes: 60
+
+ env:
+ YETUS_VERSION: '0.15.0'
+
+ steps:
+ - name: Checkout HBase
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ path: src
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Set up JDK 17
+ uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+
+ - name: Maven cache
+ uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
+ with:
+ path: ~/.m2
+ key: hbase-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ hbase-m2-
+
+ - name: Download Yetus
+ run: |
+ mkdir -p yetus
+ cd yetus
+ bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
+ --keys 'https://downloads.apache.org/yetus/KEYS' \
+ --verify-tar-gz \
+ ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \
+ yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+
+ - name: Run Yetus JDK17 Hadoop3 Compile Check
+ env:
+ ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump"
+ DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile"
+ GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_USER: ${{ github.actor }}
+ PATCHDIR: "${{ github.workspace }}/yetus-jdk17-hadoop3-compile-check/output"
+ PLUGINS: "compile,github,htmlout,javac,javadoc,maven,mvninstall,shadedjars"
+ SET_JAVA_HOME: "/usr/lib/jvm/java-17"
+ SOURCEDIR: "${{ github.workspace }}/src"
+ TESTS_FILTER: "javac,javadoc"
+ YETUSDIR: "${{ github.workspace }}/yetus"
+ AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc"
+ BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt"
+ BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt"
+ BUILD_THREAD: "4"
+ BRANCH_NAME: "${{ github.base_ref }}"
+ SKIP_ERRORPRONE: 'true'
+ DEBUG: 'true'
+ run: |
+ cd "${{ github.workspace }}"
+ bash src/dev-support/jenkins_precommit_github_yetus.sh
+
+ - name: Publish Job Summary
+ if: always()
+ run: |
+ cd "${{ github.workspace }}"
+ python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output >> $GITHUB_STEP_SUMMARY
+
+ - name: Publish Results
+ if: always()
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: yetus-jdk17-hadoop3-compile-check-output
+ path: ${{ github.workspace }}/yetus-jdk17-hadoop3-compile-check/output
+ retention-days: 7
diff --git a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml
new file mode 100644
index 000000000000..93ae44c247a3
--- /dev/null
+++ b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# yamllint disable rule:line-length
+---
+name: Yetus JDK17 Hadoop3 Unit Check
+
+"on":
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+permissions:
+ contents: read
+ statuses: write
+
+jobs:
+ jdk17-hadoop3-unit-check:
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - name: "small"
+ test_profile: "runSmallTests"
+ - name: "medium"
+ test_profile: "runMediumTests"
+ # Large tests split alphabetically by class name (after "Test" prefix)
+ # Wave 1: Test[A-H]*, Wave 2: Test[I-R]*, Wave 3: Test[S-Z]*
+ - name: "large-wave-1"
+ test_profile: "runLargeTests-wave1"
+ - name: "large-wave-2"
+ test_profile: "runLargeTests-wave2"
+ - name: "large-wave-3"
+ test_profile: "runLargeTests-wave3"
+
+ name: ${{ matrix.name }}
+
+ env:
+ YETUS_VERSION: '0.15.0'
+
+ steps:
+ - name: Checkout HBase
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ path: src
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Set up JDK 17
+ uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+
+ - name: Maven cache
+ uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3
+ with:
+ path: ~/.m2
+ key: hbase-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ hbase-m2-
+
+ - name: Download Yetus
+ run: |
+ mkdir -p yetus
+ cd yetus
+ bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
+ --keys 'https://downloads.apache.org/yetus/KEYS' \
+ --verify-tar-gz \
+ ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \
+ yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+ rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz
+
+ - name: Run Yetus JDK17 Hadoop3 Unit Check
+ env:
+ ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump"
+ DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile"
+ GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_USER: ${{ github.actor }}
+ PATCHDIR: "${{ github.workspace }}/yetus-jdk17-hadoop3-unit-check/output"
+ PLUGINS: "github,htmlout,maven,unit"
+ SET_JAVA_HOME: "/usr/lib/jvm/java-17"
+ SOURCEDIR: "${{ github.workspace }}/src"
+ YETUSDIR: "${{ github.workspace }}/yetus"
+ AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc"
+ BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt"
+ BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt"
+ EXCLUDE_TESTS_URL: "https://ci-hbase.apache.org/job/HBase-Find-Flaky-Tests/job/${{ github.base_ref }}/lastSuccessfulBuild/artifact/output/excludes"
+ BUILD_THREAD: "4"
+ SUREFIRE_FIRST_PART_FORK_COUNT: "1.0C"
+ SUREFIRE_SECOND_PART_FORK_COUNT: "0.5C"
+ BRANCH_NAME: "${{ github.base_ref }}"
+ SKIP_ERRORPRONE: 'true'
+ DEBUG: 'true'
+ TEST_PROFILE: ${{ matrix.test_profile }}
+ run: |
+ cd "${{ github.workspace }}"
+ bash src/dev-support/jenkins_precommit_github_yetus.sh
+
+ - name: Publish Job Summary
+ if: always()
+ run: |
+ cd "${{ github.workspace }}"
+ python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output >> $GITHUB_STEP_SUMMARY
+
+ - name: Publish Test Results
+ if: always()
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: yetus-jdk17-hadoop3-unit-check-${{ matrix.name }}
+ path: ${{ github.workspace }}/yetus-jdk17-hadoop3-unit-check/output
+ retention-days: 7
From 95de81b7d1bb3c58e3d1783cbe6c4551cfbda2b3 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Mon, 6 Apr 2026 09:53:44 +0900
Subject: [PATCH 03/10] Rename removeTableLevelBarrier to
removeRegionLevelBarrier
---
.../org/apache/hadoop/hbase/regionserver/RSRpcServices.java | 2 +-
.../java/org/apache/hadoop/hbase/regionserver/RowCache.java | 2 +-
.../hadoop/hbase/regionserver/TestRowCacheWithMock.java | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 7a21ab8a5504..d391cc17f5aa 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -2360,7 +2360,7 @@ public BulkLoadHFileResponse bulkLoadHFile(final RpcController controller,
return bulkLoadHFileInternal(request);
} finally {
// The row cache for the region has been enabled again
- rowCache.removeTableLevelBarrier(region);
+ rowCache.removeRegionLevelBarrier(region);
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
index 6a02618d7097..53c29269b4e4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
@@ -256,7 +256,7 @@ void increaseRowCacheSeqNum(HRegion region) {
region.increaseRowCacheSeqNum();
}
- void removeTableLevelBarrier(HRegion region) {
+ void removeRegionLevelBarrier(HRegion region) {
regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> {
int remaining = counter.decrementAndGet();
return (remaining <= 0) ? null : counter;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
index bfb8530d9f64..f3cde3792331 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java
@@ -152,7 +152,7 @@ public void testBarrier() throws IOException {
assertNull(rowCache.getRow(key));
// Remove the table level barrier
- rowCache.removeTableLevelBarrier(region);
+ rowCache.removeRegionLevelBarrier(region);
assertNull(rowCache.getRegionLevelBarrier(region));
}
@@ -332,7 +332,7 @@ public void testMutate() throws IOException, ServiceException {
// Verify the sequence of method calls
inOrder.verify(rowCache, Mockito.times(1)).createRegionLevelBarrier(Mockito.any());
inOrder.verify(rowCache, Mockito.times(1)).increaseRowCacheSeqNum(Mockito.any());
- inOrder.verify(rowCache, Mockito.times(1)).removeTableLevelBarrier(Mockito.any());
+ inOrder.verify(rowCache, Mockito.times(1)).removeRegionLevelBarrier(Mockito.any());
}
@Test
From 782dd75bbf3b846820fe2571819db73733d96e7b Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Mon, 6 Apr 2026 09:55:17 +0900
Subject: [PATCH 04/10] Move rowCache field next to MobFileCache and extract
initRowCache() method
---
.../regionserver/MetricsRegionServerWrapperImpl.java | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index b4dabf7fb3bf..ef80e2ee5803 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -68,8 +68,8 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper {
private BlockCache l1Cache = null;
private BlockCache l2Cache = null;
private MobFileCache mobFileCache;
+ private RowCache rowCache;
private CacheStats cacheStats;
- private final RowCache rowCache;
private CacheStats l1Stats = null;
private CacheStats l2Stats = null;
private volatile long numWALFiles = 0;
@@ -100,8 +100,7 @@ public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
this.regionServer = regionServer;
initBlockCache();
initMobFileCache();
- RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices();
- this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache();
+ initRowCache();
this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager();
this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
@@ -152,6 +151,11 @@ private void initMobFileCache() {
this.mobFileCache = this.regionServer.getMobFileCache().orElse(null);
}
+ private void initRowCache() {
+ RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices();
+ this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache();
+ }
+
@Override
public String getClusterId() {
return regionServer.getClusterId();
From 267f4efba0c6e819365951bb3a41014d6c3f6c9e Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Mon, 6 Apr 2026 09:56:53 +0900
Subject: [PATCH 05/10] Add @RestrictedApi annotation to setRowCache() in
HRegion
---
.../java/org/apache/hadoop/hbase/regionserver/HRegion.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 3a5c3f34313a..863c14956e7a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -957,7 +957,8 @@ boolean checkRowCacheConfig() {
: conf.getBoolean(HConstants.ROW_CACHE_ENABLED_KEY, HConstants.ROW_CACHE_ENABLED_DEFAULT);
}
- // For testing only
+ @RestrictedApi(explanation = "Should only be called in tests", link = "",
+ allowedOnPath = ".*/src/test/.*")
void setRowCache(RowCache rowCache) {
this.rowCache = rowCache;
}
From 1fc9d5c19c58c44c6a95d3fb7999863d75614773 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Mon, 6 Apr 2026 10:05:11 +0900
Subject: [PATCH 06/10] Migrate TestRowCache from JUnit4 to JUnit5
---
.../hbase/regionserver/TestRowCache.java | 66 +++++++++----------
1 file changed, 30 insertions(+), 36 deletions(-)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
index c4ca0d70faff..1cac5aa2b957 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
@@ -22,12 +22,13 @@
import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_EVICTED_ROW_COUNT;
import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_HIT_COUNT;
import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_MISS_COUNT;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
@@ -37,7 +38,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompatibilityFactory;
import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
import org.apache.hadoop.hbase.TableName;
@@ -61,22 +61,17 @@
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.ClassRule;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
-
-@Category({ RegionServerTests.class, MediumTests.class })
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+
+@Tag(RegionServerTests.TAG)
+@Tag(MediumTests.TAG)
public class TestRowCache {
- @ClassRule
- public static final HBaseClassTestRule CLASS_RULE =
- HBaseClassTestRule.forClass(TestRowCache.class);
-
private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
private static final byte[] CF1 = Bytes.toBytes("cf1");
private static final byte[] CF2 = Bytes.toBytes("cf2");
@@ -95,10 +90,7 @@ public class TestRowCache {
HRegion region;
private final Map counterBase = new HashMap<>();
- @Rule
- public TestName testName = new TestName();
-
- @BeforeClass
+ @BeforeAll
public static void beforeClass() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();
@@ -118,20 +110,20 @@ public static void beforeClass() throws Exception {
rowCache = regionServer.getRSRpcServices().getServer().getRowCache();
}
- @AfterClass
+ @AfterAll
public static void afterClass() throws Exception {
HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false;
TEST_UTIL.shutdownMiniCluster();
}
- @Before
- public void beforeTestMethod() throws Exception {
+ @BeforeEach
+ public void beforeTestMethod(TestInfo testInfo) throws Exception {
ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build();
// To test data block encoding
ColumnFamilyDescriptor cf2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2)
.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF).build();
- tableName = TableName.valueOf(testName.getMethodName());
+ tableName = TableName.valueOf(testInfo.getTestMethod().get().getName());
TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true)
.setColumnFamily(cf1).setColumnFamily(cf2).build();
admin.createTable(td);
@@ -140,7 +132,7 @@ public void beforeTestMethod() throws Exception {
.filter(r -> r.getRegionInfo().getTable().equals(tableName)).findFirst().orElseThrow();
}
- @After
+ @AfterEach
public void afterTestMethod() throws Exception {
counterBase.clear();
@@ -282,14 +274,16 @@ public void testGetWithRowCache() throws IOException {
assertNull(rowCache.getRow(rowCacheKey));
}
- @Test(expected = DoNotRetryIOException.class)
+ @Test
public void testPutWithTTL() throws IOException {
// Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot
// track TTL expiration
- Put put = new Put("row".getBytes());
- put.addColumn(CF1, Q1, "11".getBytes());
- put.setTTL(1);
- table.put(put);
+ assertThrows(DoNotRetryIOException.class, () -> {
+ Put put = new Put("row".getBytes());
+ put.addColumn(CF1, Q1, "11".getBytes());
+ put.setTTL(1);
+ table.put(put);
+ });
}
@Test
From 6739f062accbde26671deaee01b59cfc1fb3d693 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Thu, 30 Apr 2026 20:59:33 +0900
Subject: [PATCH 07/10] Make RowCacheStrategy pluggable via configuration
Introduce row.cache.strategy.class configuration key to allow
operators to plug in custom RowCacheStrategy implementations.
The default remains TinyLfuRowCacheStrategy.
RowCacheStrategy implementations must now provide a public
constructor that takes a Configuration argument, following the
same convention used by MemStore and RegionSplitPolicy.
---
.../src/main/java/org/apache/hadoop/hbase/HConstants.java | 5 +++++
.../org/apache/hadoop/hbase/regionserver/RowCache.java | 8 +++++---
.../hbase/regionserver/TinyLfuRowCacheStrategy.java | 8 +++++++-
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index f140783067af..329b8b4908bd 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1035,6 +1035,11 @@ public enum OperationStatusCode {
public static final String ROW_CACHE_EVICT_ON_CLOSE_KEY = "row.cache.evictOnClose";
public static final boolean ROW_CACHE_EVICT_ON_CLOSE_DEFAULT = false;
+ /**
+ * Configuration key for the row cache strategy class
+ */
+ public static final String ROW_CACHE_STRATEGY_CLASS_KEY = "row.cache.strategy.class";
+
/**
* Configuration key for the memory size of the block cache
*/
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
index 53c29269b4e4..2c859eab0f3e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
@@ -33,7 +33,7 @@
import org.apache.hadoop.hbase.client.Consistency;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Mutation;
-import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
/**
* Facade for row-level caching in the RegionServer.
@@ -85,8 +85,10 @@ R execute(RowOperation operation) throws IOException {
RowCache(Configuration conf) {
enabledByConf =
conf.getFloat(HConstants.ROW_CACHE_SIZE_KEY, HConstants.ROW_CACHE_SIZE_DEFAULT) > 0;
- // Currently we only support TinyLfu implementation
- rowCacheStrategy = new TinyLfuRowCacheStrategy(MemorySizeUtil.getRowCacheSize(conf));
+ Class extends RowCacheStrategy> strategyClass = conf.getClass(
+ HConstants.ROW_CACHE_STRATEGY_CLASS_KEY, TinyLfuRowCacheStrategy.class,
+ RowCacheStrategy.class);
+ rowCacheStrategy = ReflectionUtils.newInstance(strategyClass, conf);
}
R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation operation)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
index e141bd3cbb2b..a9f99b6d3edc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java
@@ -25,6 +25,8 @@
import java.util.Optional;
import java.util.OptionalLong;
import java.util.concurrent.atomic.LongAdder;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.checkerframework.checker.nullness.qual.NonNull;
@org.apache.yetus.audience.InterfaceAudience.Private
@@ -42,7 +44,11 @@ public void onRemoval(RowCacheKey key, RowCells value, @NonNull RemovalCause cau
// Cache.stats() does not provide eviction count for entries, so we maintain our own counter.
private final LongAdder evictedRowCount = new LongAdder();
- TinyLfuRowCacheStrategy(long maxSizeBytes) {
+ public TinyLfuRowCacheStrategy(Configuration conf) {
+ this(MemorySizeUtil.getRowCacheSize(conf));
+ }
+
+ private TinyLfuRowCacheStrategy(long maxSizeBytes) {
if (maxSizeBytes <= 0) {
cache = Caffeine.newBuilder().maximumSize(0).build();
return;
From e985135e33694b93a8e210a48cdb4836fa95c305 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Thu, 30 Apr 2026 21:16:40 +0900
Subject: [PATCH 08/10] Rename populateCache to cache and key region barrier by
encoded name
Address review feedback:
- Rename RowCache.populateCache to cache.
- Change regionLevelBarrierMap key type from HRegion to the encoded
region name (String). The encoded name is the canonical region
identifier already used elsewhere (e.g., RowCacheKey.isSameRegion).
- The cache method no longer needs an HRegion parameter; it derives
the encoded region name from the RowCacheKey.
The external signatures of create/remove/getRegionLevelBarrier still
take HRegion to make the caller's intent explicit; only the internal
map key type changes.
---
.../hadoop/hbase/regionserver/HRegion.java | 2 +-
.../hadoop/hbase/regionserver/RowCache.java | 23 +++++++++++--------
.../hbase/regionserver/RowCacheKey.java | 4 ++++
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 863c14956e7a..0785e23f76ef 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -3309,7 +3309,7 @@ RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results,
// When results came from memstore only, do not populate the row cache
boolean readFromMemStoreOnly = context.getBlockBytesScanned() < 1;
if (!readFromMemStoreOnly) {
- rowCache.populateCache(this, results, key);
+ rowCache.cache(results, key);
}
return scanner;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
index 2c859eab0f3e..420592024e27 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
@@ -60,9 +60,9 @@ public class RowCache {
/**
* A barrier that prevents the row cache from being populated during region operations, such as
* bulk loads. It is implemented as a counter to address issues that arise when the same region is
- * updated concurrently.
+ * updated concurrently. Keyed by the encoded region name.
*/
- private final Map regionLevelBarrierMap = new ConcurrentHashMap<>();
+ private final Map regionLevelBarrierMap = new ConcurrentHashMap<>();
/**
* A barrier that prevents the row cache from being populated during row mutations. It is
* implemented as a counter to address issues that arise when the same row is mutated
@@ -234,9 +234,9 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) {
return true;
}
- void populateCache(HRegion region, List results, RowCacheKey key) {
+ void cache(List results, RowCacheKey key) {
// The row cache is populated only when no region level barriers remain
- regionLevelBarrierMap.computeIfAbsent(region, t -> {
+ regionLevelBarrierMap.computeIfAbsent(key.getEncodedRegionName(), t -> {
// The row cache is populated only when no row level barriers remain
rowLevelBarrierMap.computeIfAbsent(key, k -> {
try {
@@ -251,7 +251,9 @@ void populateCache(HRegion region, List results, RowCacheKey key) {
}
void createRegionLevelBarrier(HRegion region) {
- regionLevelBarrierMap.computeIfAbsent(region, k -> new AtomicInteger(0)).incrementAndGet();
+ regionLevelBarrierMap
+ .computeIfAbsent(region.getRegionInfo().getEncodedName(), k -> new AtomicInteger(0))
+ .incrementAndGet();
}
void increaseRowCacheSeqNum(HRegion region) {
@@ -259,10 +261,11 @@ void increaseRowCacheSeqNum(HRegion region) {
}
void removeRegionLevelBarrier(HRegion region) {
- regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> {
- int remaining = counter.decrementAndGet();
- return (remaining <= 0) ? null : counter;
- });
+ regionLevelBarrierMap.computeIfPresent(region.getRegionInfo().getEncodedName(),
+ (k, counter) -> {
+ int remaining = counter.decrementAndGet();
+ return (remaining <= 0) ? null : counter;
+ });
}
long getHitCount() {
@@ -292,6 +295,6 @@ AtomicInteger getRowLevelBarrier(RowCacheKey key) {
// For testing only
AtomicInteger getRegionLevelBarrier(HRegion region) {
- return regionLevelBarrierMap.get(region);
+ return regionLevelBarrierMap.get(region.getRegionInfo().getEncodedName());
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java
index 09ec68194ea9..c6bcde41a7a4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java
@@ -41,6 +41,10 @@ public RowCacheKey(HRegion region, byte[] rowKey) {
this.rowCacheSeqNum = region.getRowCacheSeqNum();
}
+ String getEncodedRegionName() {
+ return encodedRegionName;
+ }
+
@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
From d8fef38bfc02906f0314397a588371be43fa6bf2 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Thu, 30 Apr 2026 21:49:23 +0900
Subject: [PATCH 09/10] Support cell-level TTL on Put with row cache by
checking expiration on hit
Address review feedback: instead of rejecting Puts with TTL on row
cache-enabled tables, check TTL expiration when serving cache hits.
- RowCells: precompute the earliest TTL expiration time across the
contained cells during construction, exposing isExpired(now) for an
O(1) check on each cache hit. Cells without a TTL tag yield
Long.MAX_VALUE so the check short-circuits.
- RowCache.tryGetFromCache: if the cached row is expired, evict it
and fall back to the storage read path.
- RowCache.cache: skip caching when results are empty.
- HRegion.put: remove the guard that rejected Puts with TTL on row
cache-enabled tables.
Server-side cells preserve their TTL tag (carried forward by
TagUtil.carryForwardTTLTag during mutation), so the same expiration
check used by ScanQueryMatcher can be applied at the cache layer.
CF-level TTL still disables the row cache via canCacheRow's
isDefaultTtl check; that policy is unchanged.
---
.../hadoop/hbase/regionserver/HRegion.java | 9 ----
.../hadoop/hbase/regionserver/RowCache.java | 13 ++++++
.../hadoop/hbase/regionserver/RowCells.java | 34 ++++++++++++++-
.../hbase/regionserver/TestRowCache.java | 42 ++++++++++++++-----
4 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 0785e23f76ef..d07aa9e9755a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -3473,15 +3473,6 @@ private void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, b
@Override
public void put(Put put) throws IOException {
TraceUtil.trace(() -> {
- // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot
- // track TTL expiration
- if (isRowCacheEnabled) {
- if (put.getTTL() != Long.MAX_VALUE) {
- throw new DoNotRetryIOException(
- "Tables with row cache enabled do not allow setting TTL on Puts");
- }
- }
-
checkReadOnly();
// Do a rough check that we have resources to accept a write. The check is
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
index 420592024e27..23c893b3a3b1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java
@@ -33,6 +33,7 @@
import org.apache.hadoop.hbase.client.Consistency;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Mutation;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ReflectionUtils;
/**
@@ -230,11 +231,23 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) {
return false;
}
+ if (row.isExpired(EnvironmentEdgeManager.currentTime())) {
+ // A cell in the cached row has expired by its cell-level TTL. Drop the row from the cache
+ // and treat this as a miss so the caller falls back to the normal read path.
+ evictRow(key);
+ return false;
+ }
+
results.addAll(row.getCells());
return true;
}
void cache(List results, RowCacheKey key) {
+ if (results.isEmpty()) {
+ // Nothing to cache; avoid creating an empty entry that would just be a cache hit returning
+ // an empty row.
+ return;
+ }
// The row cache is populated only when no region level barriers remain
regionLevelBarrierMap.computeIfAbsent(key.getEncodedRegionName(), t -> {
// The row cache is populated only when no row level barriers remain
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
index af0a0ea4c537..7b29de61c9c2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java
@@ -18,9 +18,13 @@
package org.apache.hadoop.hbase.regionserver;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ExtendedCell;
+import org.apache.hadoop.hbase.PrivateCellUtil;
+import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.TagType;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.ClassSize;
@@ -29,19 +33,47 @@ public class RowCells implements HeapSize {
public static final long FIXED_OVERHEAD = ClassSize.estimateBase(RowCells.class, false);
private final List cells = new ArrayList<>();
+ /**
+ * Earliest expiration time among contained cells, derived from cell-level TTL tags. Set to
+ * {@link Long#MAX_VALUE} when no cell carries a TTL tag, which lets the row cache short-circuit
+ * the expiration check on every hit.
+ */
+ private final long earliestExpirationMs;
public RowCells(List cells) throws CloneNotSupportedException {
+ long earliest = Long.MAX_VALUE;
for (Cell cell : cells) {
if (!(cell instanceof ExtendedCell extCell)) {
throw new CloneNotSupportedException("Cell is not an ExtendedCell");
}
try {
// To garbage collect the objects referenced by the cells
- this.cells.add(extCell.deepClone());
+ ExtendedCell cloned = extCell.deepClone();
+ this.cells.add(cloned);
+ long exp = expirationTimeOf(cloned);
+ if (exp < earliest) {
+ earliest = exp;
+ }
} catch (RuntimeException e) {
throw new CloneNotSupportedException("Deep clone failed");
}
}
+ this.earliestExpirationMs = earliest;
+ }
+
+ private static long expirationTimeOf(ExtendedCell cell) {
+ Iterator i = PrivateCellUtil.tagsIterator(cell);
+ while (i.hasNext()) {
+ Tag t = i.next();
+ if (TagType.TTL_TAG_TYPE == t.getType()) {
+ return cell.getTimestamp() + Tag.getValueAsLong(t);
+ }
+ }
+ return Long.MAX_VALUE;
+ }
+
+ public boolean isExpired(long now) {
+ return earliestExpirationMs < now;
}
@Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
index 1cac5aa2b957..239c40f01687 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java
@@ -37,7 +37,6 @@
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompatibilityFactory;
-import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
import org.apache.hadoop.hbase.TableName;
@@ -61,6 +60,8 @@
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
@@ -275,15 +276,36 @@ public void testGetWithRowCache() throws IOException {
}
@Test
- public void testPutWithTTL() throws IOException {
- // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot
- // track TTL expiration
- assertThrows(DoNotRetryIOException.class, () -> {
- Put put = new Put("row".getBytes());
- put.addColumn(CF1, Q1, "11".getBytes());
- put.setTTL(1);
- table.put(put);
- });
+ public void testPutWithTTL() throws Exception {
+ // Cell-level TTL set via Put.setTTL is supported: the cached row is invalidated on hit when
+ // the cell has expired.
+ byte[] rowKey = "row".getBytes();
+ RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey);
+
+ Put put = new Put(rowKey).addColumn(CF1, Q1, "v".getBytes());
+ put.setTTL(60_000);
+ table.put(put);
+ // Flush so that the next Get reads from HFile (memstore-only reads do not populate the cache)
+ admin.flush(tableName);
+
+ // First Get populates the cache
+ Result first = table.get(new Get(rowKey));
+ assertFalse(first.isEmpty());
+ assertNotNull(rowCache.getRow(rowCacheKey));
+
+ // Advance time beyond the cell TTL
+ ManualEnvironmentEdge edge = new ManualEnvironmentEdge();
+ edge.setValue(EnvironmentEdgeManager.currentTime() + 120_000);
+ EnvironmentEdgeManager.injectEdge(edge);
+ try {
+ // Cache hit detects expiration, evicts the row, and falls back to the read path. The
+ // storage path also filters the expired cell, so the result is empty.
+ Result second = table.get(new Get(rowKey));
+ assertTrue(second.isEmpty());
+ assertNull(rowCache.getRow(rowCacheKey));
+ } finally {
+ EnvironmentEdgeManager.reset();
+ }
}
@Test
From 8b89b7cb58f689f3e0d03d0d84f6c3201aa88268 Mon Sep 17 00:00:00 2001
From: "terence.yoo"
Date: Thu, 30 Apr 2026 21:56:09 +0900
Subject: [PATCH 10/10] Sync dev-support scripts from master
The CI workflows invoke scripts under dev-support/ that were missing
or outdated on this branch, causing GHA runs to fail at startup.
---
dev-support/Jenkinsfile | 446 +--------------
dev-support/create-release/README.txt | 3 +-
dev-support/create-release/release-build.sh | 2 +
dev-support/create-release/release-util.sh | 2 +-
...on_instead_of_inheritance-HBASE-17732.adoc | 4 +-
dev-support/docker/Dockerfile | 11 +
.../flaky-tests/python-requirements.txt | 4 +-
dev-support/flaky-tests/report-flakies.py | 53 +-
.../generate-website/build-hbase-website.sh | 250 ++++++++
.../generate-hbase-website.Jenkinsfile | 137 +++++
dev-support/gh_hide_old_comments.sh | 96 +++-
.../git-jira-release-audit/requirements.txt | 10 +-
...-backwards-compatibility-check.Jenkinsfile | 326 +++++++++++
dev-support/hbase-personality.sh | 92 +--
dev-support/hbase-vote.sh | 4 +-
.../integration-test.Jenkinsfile | 404 +++++++++++++
dev-support/integration-test/patch-hadoop3.sh | 24 +
.../pseudo-distributed-test.sh | 540 ++++++++++++++++++
.../integration-test/source-artifact.sh | 242 ++++++++
dev-support/jenkins_precommit_github_yetus.sh | 30 +-
dev-support/make_rc.sh | 2 +-
dev-support/spotbugs-exclude.xml | 12 +
dev-support/yetus_console_to_md.py | 522 +++++++++++++++++
23 files changed, 2680 insertions(+), 536 deletions(-)
create mode 100755 dev-support/generate-website/build-hbase-website.sh
create mode 100644 dev-support/generate-website/generate-hbase-website.Jenkinsfile
create mode 100644 dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile
create mode 100644 dev-support/integration-test/integration-test.Jenkinsfile
create mode 100755 dev-support/integration-test/patch-hadoop3.sh
create mode 100755 dev-support/integration-test/pseudo-distributed-test.sh
create mode 100755 dev-support/integration-test/source-artifact.sh
create mode 100644 dev-support/yetus_console_to_md.py
diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index c550272cc3f8..f22e67d04e83 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -38,7 +38,6 @@ pipeline {
OUTPUT_DIR_RELATIVE_JDK8_HADOOP3 = 'output-jdk8-hadoop3'
OUTPUT_DIR_RELATIVE_JDK11_HADOOP3 = 'output-jdk11-hadoop3'
OUTPUT_DIR_RELATIVE_JDK17_HADOOP3 = 'output-jdk17-hadoop3'
- OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS = 'output-jdk17-hadoop3-backwards'
PROJECT = 'hbase'
PROJECT_PERSONALITY = 'https://raw.githubusercontent.com/apache/hbase/master/dev-support/hbase-personality.sh'
@@ -58,9 +57,6 @@ pipeline {
ASF_NIGHTLIES = 'https://nightlies.apache.org'
ASF_NIGHTLIES_BASE_ORI = "${ASF_NIGHTLIES}/hbase/${JOB_NAME}/${BUILD_NUMBER}"
ASF_NIGHTLIES_BASE = "${ASF_NIGHTLIES_BASE_ORI.replaceAll(' ', '%20')}"
- // These are dependent on the branch
- HADOOP3_VERSIONS = "3.3.5,3.3.6,3.4.0,3.4.1,3.4.2"
- HADOOP3_DEFAULT_VERSION = "3.4.2"
}
parameters {
booleanParam(name: 'USE_YETUS_PRERELEASE', defaultValue: false, description: '''Check to use the current HEAD of apache/yetus rather than our configured release.
@@ -85,7 +81,7 @@ pipeline {
// can't just do a simple echo or the directory won't be created. :(
sh '''#!/usr/bin/env bash
echo "Make sure we have a directory for downloading dependencies: $(pwd)"
-'''
+ '''
}
sh '''#!/usr/bin/env bash
set -e
@@ -127,77 +123,8 @@ pipeline {
}
stash name: 'yetus', includes: "yetus-*/*,yetus-*/**/*,tools/personality.sh"
}
- }
- stage ('hadoop 2 cache') {
- environment {
- HADOOP2_VERSION="2.10.2"
- }
- steps {
- // directory must be unique for each parallel stage, because jenkins runs them in the same workspace :(
- dir('downloads-hadoop-2') {
- sh '''#!/usr/bin/env bash
- echo "Make sure we have a directory for downloading dependencies: $(pwd)"
-'''
- }
- sh '''#!/usr/bin/env bash
- set -e
- echo "Ensure we have a copy of Hadoop ${HADOOP2_VERSION}"
- "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
- --working-dir "${WORKSPACE}/downloads-hadoop-2" \
- --keys 'https://downloads.apache.org/hadoop/common/KEYS' \
- --verify-tar-gz \
- "${WORKSPACE}/hadoop-${HADOOP2_VERSION}-bin.tar.gz" \
- "hadoop/common/hadoop-${HADOOP2_VERSION}/hadoop-${HADOOP2_VERSION}.tar.gz"
- for stale in $(ls -1 "${WORKSPACE}"/hadoop-2*.tar.gz | grep -v ${HADOOP2_VERSION}); do
- echo "Delete stale hadoop 2 cache ${stale}"
- rm -rf $stale
- done
- '''
- stash name: 'hadoop-2', includes: "hadoop-${HADOOP2_VERSION}-bin.tar.gz"
- }
- }
- stage ('hadoop 3 cache') {
- steps {
- script {
- hadoop3_versions = env.HADOOP3_VERSIONS.split(",");
- env.HADOOP3_VERSIONS_REGEX = "[" + hadoop3_versions.join("|") + "]";
- for (hadoop3_version in hadoop3_versions) {
- env.HADOOP3_VERSION = hadoop3_version;
- echo "env.HADOOP3_VERSION" + env.hadoop3_version;
- stage ('Hadoop 3 cache inner stage') {
- // directory must be unique for each parallel stage, because jenkins runs them in the same workspace :(
- dir("downloads-hadoop-${HADOOP3_VERSION}") {
- sh '''#!/usr/bin/env bash
- echo "Make sure we have a directory for downloading dependencies: $(pwd)"
-'''
- } //dir
- sh '''#!/usr/bin/env bash
- set -e
- echo "Ensure we have a copy of Hadoop ${HADOOP3_VERSION}"
- "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
- --working-dir "${WORKSPACE}/downloads-hadoop-${HADOOP3_VERSION}" \
- --keys 'https://downloads.apache.org/hadoop/common/KEYS' \
- --verify-tar-gz \
- "${WORKSPACE}/hadoop-${HADOOP3_VERSION}-bin.tar.gz" \
- "hadoop/common/hadoop-${HADOOP3_VERSION}/hadoop-${HADOOP3_VERSION}.tar.gz"
- for stale in $(ls -1 "${WORKSPACE}"/hadoop-3*.tar.gz | grep -v ${HADOOP3_VERSION}); do
- echo "Delete stale hadoop 3 cache ${stale}"
- rm -rf $stale
- done
- '''
- stash name: "hadoop-${HADOOP3_VERSION}", includes: "hadoop-${HADOOP3_VERSION}-bin.tar.gz"
- script {
- if (env.HADOOP3_VERSION == env.HADOOP3_DEFAULT_VERSION) {
- // FIXME: we never unstash this, because we run the packaging tests with the version-specific stashes
- stash(name: "hadoop-3", includes: "hadoop-${HADOOP3_VERSION}-bin.tar.gz")
- } //if
- } //script
- } //stage ('Hadoop 3 cache inner stage')
- } //for
- } //script
- } //steps
- } //stage ('hadoop 3 cache') {
- } //parallel
+ } // stage ('yetus install')
+ } // parallel
} //stage ('thirdparty installs')
stage ('init health results') {
steps {
@@ -208,13 +135,6 @@ pipeline {
stash name: 'jdk8-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK8_HADOOP3}/doesn't-match"
stash name: 'jdk11-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK11_HADOOP3}/doesn't-match"
stash name: 'jdk17-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3}/doesn't-match"
- script {
- for (hadoop3_version in hadoop3_versions) {
- // confusing environment vs Groovy variables
- stash(name: "jdk17-hadoop3-backwards-result-${hadoop3_version}", allowEmpty: true, includes: "${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${hadoop3_version}/doesn't-match")
- }
- }
- stash name: 'srctarball-result', allowEmpty: true, includes: "output-srctarball/doesn't-match"
}
}
stage ('health checks') {
@@ -771,352 +691,6 @@ pipeline {
}
}
}
- // If/when we transition to transient runners, we could run every Hadoop check as a matrix job
- stage ('yetus jdk17 hadoop3 backwards compatibility checks') {
- agent {
- node {
- label 'hbase'
- }
- }
- environment {
- BASEDIR = "${env.WORKSPACE}/component"
- TESTS = "${env.DEEP_CHECKS}"
- SET_JAVA_HOME = "/usr/lib/jvm/java-17"
- // Activates hadoop 3.0 profile in maven runs.
- HADOOP_PROFILE = '3.0'
- // HADOOP_THREE_VERSION is set in script for loop
- TEST_PROFILE = 'runDevTests'
- SKIP_ERRORPRONE = true
- }
- steps {
- script {
- for (hadoop3_version in hadoop3_versions) {
- if (hadoop3_version == env.HADOOP3_DEFAULT_VERSION) {
- // We are running the full test suite, no need to run the dev tests too
- continue
- }
- //HADOOP_THREE_VERSION is the environment variable name expected by the nightly shell script
- env.HADOOP_THREE_VERSION = hadoop3_version;
- env.OUTPUT_DIR_RELATIVE = "${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${env.HADOOP_THREE_VERSION}"
- env.OUTPUT_DIR = "${env.WORKSPACE}/${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${env.HADOOP_THREE_VERSION}"
- try {
- stage ('yetus jdk17 hadoop3 backwards compatibility checks inner stage') {
- // Must do prior to anything else, since if one of them timesout we'll stash the commentfile
- sh '''#!/usr/bin/env bash
- set -e
- rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}"
- rm -f "${OUTPUT_DIR}/commentfile"
- '''
- unstash 'yetus'
- dir('component') {
- checkout scm
- }
- sh '''#!/usr/bin/env bash
- set -e
- rm -rf "${OUTPUT_DIR}/machine" && mkdir "${OUTPUT_DIR}/machine"
- "${BASEDIR}/dev-support/gather_machine_environment.sh" "${OUTPUT_DIR_RELATIVE}/machine"
- echo "got the following saved stats in '${OUTPUT_DIR_RELATIVE}/machine'"
- ls -lh "${OUTPUT_DIR_RELATIVE}/machine"
- '''
- script {
- def ret = sh(
- returnStatus: true,
- script: '''#!/usr/bin/env bash
- set -e
- declare -i status=0
- if "${BASEDIR}/dev-support/hbase_nightly_yetus.sh" ; then
- echo "(/) {color:green}+1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile"
- else
- echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile"
- status=1
- fi
- echo "-- For more information [see jdk17 report|${BUILD_URL}JDK17_20Nightly_20Build_20Report_20_28Hadoop3_29/]" >> "${OUTPUT_DIR}/commentfile"
- exit "${status}"
- '''
- )
- if (ret != 0) {
- // mark the build as UNSTABLE instead of FAILURE, to avoid skipping the later publish of
- // test output. See HBASE-26339 for more details.
- currentBuild.result = 'UNSTABLE'
- }
- } //script
- } //stage ('yetus jdk17 hadoop3 backwards compatibility checks inner stage') {
- } //try
- finally {
- stash name: "jdk17-hadoop3-backwards-result-${HADOOP_THREE_VERSION}", includes: "${OUTPUT_DIR_RELATIVE}/commentfile"
- junit testResults: "${env.OUTPUT_DIR_RELATIVE}/**/target/**/TEST-*.xml", allowEmptyResults: true
- // zip surefire reports.
- sh '''#!/bin/bash -e
- if [ ! -f "${OUTPUT_DIR}/commentfile" ]; then
- echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" >"${OUTPUT_DIR}/commentfile"
- echo "-- Something went wrong running this stage, please [check relevant console output|${BUILD_URL}/console]." >> "${OUTPUT_DIR}/commentfile"
- fi
- if [ -d "${OUTPUT_DIR}/archiver" ]; then
- count=$(find "${OUTPUT_DIR}/archiver" -type f | wc -l)
- if [[ 0 -ne ${count} ]]; then
- echo "zipping ${count} archived files"
- zip -q -m -r "${OUTPUT_DIR}/test_logs.zip" "${OUTPUT_DIR}/archiver"
- else
- echo "No archived files, skipping compressing."
- fi
- else
- echo "No archiver directory, skipping compressing."
- fi
- '''
- sshPublisher(publishers: [
- sshPublisherDesc(configName: 'Nightlies',
- transfers: [
- sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}",
- sourceFiles: "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip"
- )
- ]
- )
- ])
- // remove the big test logs zip file, store the nightlies url in test_logs.html
- sh '''#!/bin/bash -e
- if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
- echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
- rm -rf "${OUTPUT_DIR}/test_logs.zip"
- python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
- else
- echo "No test_logs.zip, skipping"
- fi
- '''
- // Has to be relative to WORKSPACE.
- archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/*"
- archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/**/*"
- publishHTML target: [
- allowMissing : true,
- keepAll : true,
- alwaysLinkToLastBuild: true,
- // Has to be relative to WORKSPACE.
- reportDir : "${env.OUTPUT_DIR_RELATIVE}",
- reportFiles : 'console-report.html',
- reportName : "JDK17 Nightly Build Report (Hadoop ${HADOOP_THREE_VERSION} backwards compatibility)"
- ]
- } //finally
- } // for
- } //script
- } //steps
- } //stage ('yetus jdk17 hadoop3 backwards compatibility checks')
-
- // This is meant to mimic what a release manager will do to create RCs.
- // See http://hbase.apache.org/book.html#maven.release
- // TODO (HBASE-23870): replace this with invocation of the release tool
- stage ('packaging and integration') {
- agent {
- node {
- label 'hbase'
- }
- }
- environment {
- BASEDIR = "${env.WORKSPACE}/component"
- BRANCH = "${env.BRANCH_NAME}"
- }
- steps {
- dir('component') {
- checkout scm
- }
- sh '''#!/bin/bash -e
- echo "Setting up directories"
- rm -rf "output-srctarball" && mkdir "output-srctarball"
- rm -rf "output-integration" && mkdir "output-integration" "output-integration/hadoop-2" "output-integration/hadoop-3" "output-integration/hadoop-3-shaded"
- rm -rf "unpacked_src_tarball" && mkdir "unpacked_src_tarball"
- rm -rf "hbase-install" && mkdir "hbase-install"
- rm -rf "hbase-client" && mkdir "hbase-client"
- rm -rf "hbase-hadoop3-install"
- rm -rf "hbase-hadoop3-client"
- rm -rf "hadoop-2" && mkdir "hadoop-2"
- rm -rf "hadoop-3" && mkdir "hadoop-3"
- rm -rf ".m2-for-repo" && mkdir ".m2-for-repo"
- rm -rf ".m2-for-src" && mkdir ".m2-for-src"
- # remove old hadoop tarballs in workspace
- rm -rf hadoop-2*.tar.gz
- rm -rf hadoop-3*.tar.gz
- rm -f "output-integration/commentfile"
- '''
- sh '''#!/usr/bin/env bash
- set -e
- rm -rf "output-srctarball/machine" && mkdir "output-srctarball/machine"
- "${BASEDIR}/dev-support/gather_machine_environment.sh" "output-srctarball/machine"
- echo "got the following saved stats in 'output-srctarball/machine'"
- ls -lh "output-srctarball/machine"
- '''
- sh '''#!/bin/bash -e
- echo "Checking the steps for an RM to make a source artifact, then a binary artifact."
- docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" .
- docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
- -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" --workdir=/hbase hbase-integration-test \
- "component/dev-support/hbase_nightly_source-artifact.sh" \
- --intermediate-file-dir output-srctarball \
- --unpack-temp-dir unpacked_src_tarball \
- --maven-m2-initial .m2-for-repo \
- --maven-m2-src-build .m2-for-src \
- --clean-source-checkout \
- component
- if [ $? -eq 0 ]; then
- echo '(/) {color:green}+1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile
- else
- echo '(x) {color:red}-1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile
- exit 1
- fi
- '''
- echo "unpacking the hbase bin tarball into 'hbase-install' and the client tarball into 'hbase-client'"
- sh '''#!/bin/bash -e
- if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v hadoop3 | wc -l) ]; then
- echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected binaries.' >>output-srctarball/commentfile
- exit 1
- fi
- install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v client-bin | grep -v hadoop3)
- tar --strip-component=1 -xzf "${install_artifact}" -C "hbase-install"
- client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-client-bin.tar.gz | grep -v hadoop3)
- tar --strip-component=1 -xzf "${client_artifact}" -C "hbase-client"
- if [ 2 -eq $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | wc -l) ]; then
- echo "hadoop3 artifacts available, unpacking the hbase hadoop3 bin tarball into 'hbase-hadoop3-install' and the client hadoop3 tarball into 'hbase-hadoop3-client'"
- mkdir hbase-hadoop3-install
- mkdir hbase-hadoop3-client
- hadoop3_install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | grep -v client-bin)
- tar --strip-component=1 -xzf "${hadoop3_install_artifact}" -C "hbase-hadoop3-install"
- hadoop3_client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-client-bin.tar.gz)
- tar --strip-component=1 -xzf "${hadoop3_client_artifact}" -C "hbase-hadoop3-client"
- fi
- '''
- unstash 'hadoop-2'
- sh '''#!/bin/bash -xe
- if [[ "${BRANCH}" == *"branch-2"* ]]; then
- echo "Attempting to use run an instance on top of Hadoop 2."
- artifact=$(ls -1 "${WORKSPACE}"/hadoop-2*.tar.gz | head -n 1)
- tar --strip-components=1 -xzf "${artifact}" -C "hadoop-2"
- docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" .
- docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
- -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-8" --workdir=/hbase hbase-integration-test \
- component/dev-support/hbase_nightly_pseudo-distributed-test.sh \
- --single-process \
- --working-dir output-integration/hadoop-2 \
- --hbase-client-install "hbase-client" \
- hbase-install \
- hadoop-2/bin/hadoop \
- hadoop-2/share/hadoop/yarn/timelineservice \
- hadoop-2/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \
- hadoop-2/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \
- hadoop-2/bin/mapred \
- >output-integration/hadoop-2.log 2>&1
- if [ $? -ne 0 ]; then
- echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop 2. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-2.log]. (note that this means we didn't run on Hadoop 3)" >output-integration/commentfile
- exit 2
- fi
- echo "(/) {color:green}+1 client integration test for HBase 2 {color}" >output-integration/commentfile
- else
- echo "Skipping to run against Hadoop 2 for branch ${BRANCH}"
- fi
- '''
- script {
- for (hadoop3_version in hadoop3_versions) {
- env.HADOOP3_VERSION = hadoop3_version;
- echo "env.HADOOP3_VERSION" + env.hadoop3_version;
- stage ("packaging and integration Hadoop 3 inner stage ") {
- unstash "hadoop-" + env.HADOOP3_VERSION
- sh '''#!/bin/bash -e
- echo "Attempting to use run an instance on top of Hadoop ${HADOOP3_VERSION}."
- # Clean up any previous tested Hadoop3 files before unpacking the current one
- rm -rf hadoop-3/*
- # Create working dir
- rm -rf "output-integration/hadoop-${HADOOP3_VERSION}" && mkdir "output-integration/hadoop-${HADOOP3_VERSION}"
- rm -rf "output-integration/hadoop-${HADOOP3_VERSION}-shaded" && mkdir "output-integration/hadoop-${HADOOP3_VERSION}-shaded"
- artifact=$(ls -1 "${WORKSPACE}"/hadoop-${HADOOP3_VERSION}-bin.tar.gz | head -n 1)
- tar --strip-components=1 -xzf "${artifact}" -C "hadoop-3"
- # we need to patch some files otherwise minicluster will fail to start, see MAPREDUCE-7471
- ${BASEDIR}/dev-support/patch-hadoop3.sh hadoop-3
- hbase_install_dir="hbase-install"
- hbase_client_dir="hbase-client"
- if [ -d "hbase-hadoop3-install" ]; then
- echo "run hadoop3 client integration test against hbase hadoop3 binaries"
- hbase_install_dir="hbase-hadoop3-install"
- hbase_client_dir="hbase-hadoop3-client"
- fi
- docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" .
- docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
- -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" \
- -e HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" \
- --workdir=/hbase hbase-integration-test \
- component/dev-support/hbase_nightly_pseudo-distributed-test.sh \
- --single-process \
- --working-dir output-integration/hadoop-${HADOOP3_VERSION} \
- --hbase-client-install ${hbase_client_dir} \
- ${hbase_install_dir} \
- hadoop-3/bin/hadoop \
- hadoop-3/share/hadoop/yarn/timelineservice \
- hadoop-3/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \
- hadoop-3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \
- hadoop-3/bin/mapred \
- >output-integration/hadoop-${HADOOP3_VERSION}.log 2>&1
- if [ $? -ne 0 ]; then
- echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop ${HADOOP3_VERSION}. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-${HADOOP3_VERSION}.log]. (note that this means we didn't check the Hadoop ${HADOOP3_VERSION} shaded client)" >> output-integration/commentfile
- exit 2
- fi
- echo "Attempting to use run an instance on top of Hadoop ${HADOOP3_VERSION}, relying on the Hadoop client artifacts for the example client program."
- docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
- -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" \
- -e HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" \
- --workdir=/hbase hbase-integration-test \
- component/dev-support/hbase_nightly_pseudo-distributed-test.sh \
- --single-process \
- --hadoop-client-classpath hadoop-3/share/hadoop/client/hadoop-client-api-*.jar:hadoop-3/share/hadoop/client/hadoop-client-runtime-*.jar \
- --working-dir output-integration/hadoop-${HADOOP3_VERSION}-shaded \
- --hbase-client-install ${hbase_client_dir} \
- ${hbase_install_dir} \
- hadoop-3/bin/hadoop \
- hadoop-3/share/hadoop/yarn/timelineservice \
- hadoop-3/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \
- hadoop-3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \
- hadoop-3/bin/mapred \
- >output-integration/hadoop-${HADOOP3_VERSION}-shaded.log 2>&1
- if [ $? -ne 0 ]; then
- echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop ${HADOOP3_VERSION} using Hadoop's shaded client. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-${HADOOP3_VERSION}-shaded.log]." >> output-integration/commentfile
- exit 2
- fi
- echo "(/) {color:green}+1 client integration test for ${HADOOP3_VERSION} {color}" >> output-integration/commentfile
- '''
- } //stage ("packaging and integration Hadoop 3 inner stage ")
- } //for
- } // script
- } //steps
- post {
- always {
- sh '''#!/bin/bash -e
- if [ ! -f "output-integration/commentfile" ]; then
- echo "(x) {color:red}-1 source release artifact{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >output-srctarball/commentfile
- echo "(x) {color:red}-1 client integration test{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >output-integration/commentfile
- fi
- '''
- stash name: 'srctarball-result', includes: "output-srctarball/commentfile,output-integration/commentfile"
- sshPublisher(publishers: [
- sshPublisherDesc(configName: 'Nightlies',
- transfers: [
- sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}",
- sourceFiles: "output-srctarball/hbase-src.tar.gz"
- )
- ]
- )
- ])
- // remove the big src tarball, store the nightlies url in hbase-src.html
- sh '''#!/bin/bash -e
- SRC_TAR="${WORKSPACE}/output-srctarball/hbase-src.tar.gz"
- if [ -f "${SRC_TAR}" ]; then
- echo "Remove ${SRC_TAR} for saving space"
- rm -rf "${SRC_TAR}"
- python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
- else
- echo "No hbase-src.tar.gz, skipping"
- fi
- '''
- archiveArtifacts artifacts: 'output-srctarball/*'
- archiveArtifacts artifacts: 'output-srctarball/**/*'
- archiveArtifacts artifacts: 'output-integration/*'
- archiveArtifacts artifacts: 'output-integration/**/*'
- } //always
- } //post
- } //stage packaging
} // parallel
} //stage:_health checks
} //stages
@@ -1133,32 +707,18 @@ pipeline {
rm -rf ${OUTPUT_DIR_RELATIVE_JDK8_HADOOP3}
rm -rf ${OUTPUT_DIR_RELATIVE_JDK11_HADOOP3}
rm -rf ${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3}
- rm -rf ${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-*
- rm -rf output-srctarball
- rm -rf output-integration
'''
unstash 'general-result'
unstash 'jdk8-hadoop2-result'
unstash 'jdk8-hadoop3-result'
unstash 'jdk11-hadoop3-result'
unstash 'jdk17-hadoop3-result'
- unstash 'srctarball-result'
def results = ["${env.OUTPUT_DIR_RELATIVE_GENERAL}/commentfile",
"${env.OUTPUT_DIR_RELATIVE_JDK8_HADOOP2}/commentfile",
"${env.OUTPUT_DIR_RELATIVE_JDK8_HADOOP3}/commentfile",
"${env.OUTPUT_DIR_RELATIVE_JDK11_HADOOP3}/commentfile",
"${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3}/commentfile"]
- for (hadoop3_version in hadoop3_versions) {
- if (hadoop3_version == env.HADOOP3_DEFAULT_VERSION) {
- // We haven't run these tests
- continue
- }
- unstash("jdk17-hadoop3-backwards-result-${hadoop3_version}")
- results.add("${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${hadoop3_version}/commentfile")
- }
- results.add('output-srctarball/commentfile')
- results.add('output-integration/commentfile')
echo env.BRANCH_NAME
echo env.BUILD_URL
echo currentBuild.result
diff --git a/dev-support/create-release/README.txt b/dev-support/create-release/README.txt
index f679a47cfb33..14c2b7d458db 100644
--- a/dev-support/create-release/README.txt
+++ b/dev-support/create-release/README.txt
@@ -32,7 +32,8 @@ to remove dry-run mode.
Before starting the RC build, run a reconciliation of what is in JIRA with
what is in the commit log. Make sure they align and that anomalies are
-explained up in JIRA. See http://hbase.apache.org/book.html#maven.release
+explained up in JIRA.
+See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate
for how.
Regardless of where your release build will run (locally, locally in docker,
diff --git a/dev-support/create-release/release-build.sh b/dev-support/create-release/release-build.sh
index cc6a5818c19c..8fdf07eaba53 100755
--- a/dev-support/create-release/release-build.sh
+++ b/dev-support/create-release/release-build.sh
@@ -101,6 +101,8 @@ fi
init_locale
init_java
+#set java 17 for spotless
+set_java17_home
init_mvn
init_python
# Print out subset of perl version (used in git hooks and japi-compliance-checker)
diff --git a/dev-support/create-release/release-util.sh b/dev-support/create-release/release-util.sh
index fd11ae853efb..02020d93de01 100755
--- a/dev-support/create-release/release-util.sh
+++ b/dev-support/create-release/release-util.sh
@@ -969,7 +969,7 @@ function get_hadoop3_version() {
# case spotless:check failure, so we should run spotless:apply before committing
function maven_spotless_apply() {
# our spotless plugin version requires at least java 11 to run, so we use java 17 here
- JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" "${MVN[@]}" spotless:apply
+ JAVA_HOME="${JAVA17_HOME}" "${MVN[@]}" spotless:apply
}
function git_add_poms() {
diff --git a/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc b/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc
index 2476f8a47825..8d588794efef 100644
--- a/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc
+++ b/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc
@@ -49,7 +49,7 @@ logic to internal code paths.
[[background]]
== Background
-Coprocessors are well link:http://hbase.apache.org/book.html#cp[documented in the refguide].
+Coprocessors are well link:https://hbase.apache.org/docs/cp[documented in the refguide].
Here we give a little background information on involved classes, their responsibilities, and
relationship to each other.
@@ -59,7 +59,7 @@ relationship to each other.
*** All *Observer* interfaces derive from Coprocessor interface.
**** Coprocessor Interface is a _Marker _Interface. It just has start/stop methods and enums for
stages in the Coprocessor Lifecycle.
-** http://hbase.apache.org/book.html#_observer_coprocessors[Observers] (interface)
+** https://hbase.apache.org/docs/cp#observer-coprocessors[Observers] (interface)
*** Contain hooks which third-party programs can override to inject functionality in various
internal code paths. For e.g preCreateTable(...) will be called just before any table is created.
*** Current set of observers: _MasterObserver, RegionObserver, RegionServerObserver, WALObserver,
diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile
index 26b2c35b3462..294fc272f1c9 100644
--- a/dev-support/docker/Dockerfile
+++ b/dev-support/docker/Dockerfile
@@ -24,6 +24,8 @@
FROM ubuntu:22.04 AS base_image
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+ARG NODE_VERSION=v20.15.0
+
RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \
DEBIAN_FRONTEND=noninteractive apt-get -qq install --no-install-recommends -y \
ca-certificates=20211016 \
@@ -55,6 +57,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \
shellcheck='0.8.0-*' \
libxml2-dev='2.9.13+dfsg-*' \
libxml2-utils='2.9.13+dfsg-*' \
+ zip='3.0-*' \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
@@ -70,6 +73,14 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \
locale-gen en_US.UTF-8
ENV LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8
+# Install Node.js. Needed for Playwright.
+RUN curl -fsSL "https://nodejs.org/dist/$NODE_VERSION/node-$NODE_VERSION-linux-x64.tar.gz" -o node.tar.gz \
+ && tar -xzf node.tar.gz -C /usr/local --strip-components=1 \
+ && rm node.tar.gz
+
+# Install Playwright dependencies. Needed for docs PDF export and docs UI e2e tests.
+RUN npx -y playwright install --with-deps
+
##
# download sundry dependencies
#
diff --git a/dev-support/flaky-tests/python-requirements.txt b/dev-support/flaky-tests/python-requirements.txt
index 75952d259116..b5e8c120440f 100644
--- a/dev-support/flaky-tests/python-requirements.txt
+++ b/dev-support/flaky-tests/python-requirements.txt
@@ -15,8 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-requests==2.32.4
+requests==2.33.0
future==0.18.3
-gitpython==3.1.41
+gitpython==3.1.47
rbtools==4.0
jinja2==3.1.6
diff --git a/dev-support/flaky-tests/report-flakies.py b/dev-support/flaky-tests/report-flakies.py
index 16096e3344a5..33e9a12f0d5e 100755
--- a/dev-support/flaky-tests/report-flakies.py
+++ b/dev-support/flaky-tests/report-flakies.py
@@ -54,6 +54,11 @@
parser.add_argument('--is-yetus', metavar='True/False', action='append', choices=['True', 'False'],
help='True, if build is yetus style i.e. look for maven output in artifacts; '
'False, if maven output is in /consoleText itself.')
+parser.add_argument('--excludes-threshold-flakiness', metavar='n', type=float, default=20.0,
+ required=False, help='Flakiness threshold for adding a test to excludes file')
+parser.add_argument('--excludes-threshold-runs', metavar='n', type=int, default=10,
+ required=False,
+ help='The times of a test should run before it can be added to excludes file')
parser.add_argument(
"--mvn", action="store_true",
help="Writes two strings for including/excluding these flaky tests using maven flags. These "
@@ -149,7 +154,6 @@ def expand_multi_config_projects(cli_args):
'excludes': excluded_builds, 'is_yetus': is_yetus})
return final_expanded_urls
-
# Set of timeout/failed tests across all given urls.
all_timeout_tests = set()
all_failed_tests = set()
@@ -160,6 +164,8 @@ def expand_multi_config_projects(cli_args):
# Contains { : [run_ids] }
# Used for common min/max build ids when generating sparklines.
url_to_build_ids = OrderedDict()
+all_flaky_results = {}
+
# Iterates over each url, gets test results and prints flaky tests.
expanded_urls = expand_multi_config_projects(args)
@@ -205,36 +211,46 @@ def expand_multi_config_projects(cli_args):
bad_tests.update(failed_tests.union(hanging_tests))
# For each bad test, get build ids where it ran, timed out, failed or hanged.
- test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(),
- 'hanging' : set(), 'bad_count' : 0}
+ test_to_build_ids = {key: {'all': set(), 'timeout': set(), 'failed': set(),
+ 'hanging': set(), 'bad_count': 0}
for key in bad_tests}
+
for build in build_id_to_results:
[all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
- for bad_test in test_to_build_ids:
+ for bad_test, test_result in test_to_build_ids.items():
is_bad = False
if all_tests.issuperset([bad_test]):
- test_to_build_ids[bad_test]["all"].add(build)
+ test_result["all"].add(build)
if timeout_tests.issuperset([bad_test]):
- test_to_build_ids[bad_test]['timeout'].add(build)
+ test_result['timeout'].add(build)
is_bad = True
if failed_tests.issuperset([bad_test]):
- test_to_build_ids[bad_test]['failed'].add(build)
+ test_result['failed'].add(build)
is_bad = True
if hanging_tests.issuperset([bad_test]):
- test_to_build_ids[bad_test]['hanging'].add(build)
+ test_result['hanging'].add(build)
is_bad = True
if is_bad:
- test_to_build_ids[bad_test]['bad_count'] += 1
+ test_result['bad_count'] += 1
# Calculate flakyness % and successful builds for each test. Also sort build ids.
- for bad_test in test_to_build_ids:
- test_result = test_to_build_ids[bad_test]
+ for bad_test, test_result in test_to_build_ids.items():
test_result['flakyness'] = test_result['bad_count'] * 100.0 / len(test_result['all'])
test_result['success'] = (test_result['all'].difference(
test_result['failed'].union(test_result['hanging'])))
for key in ['all', 'timeout', 'failed', 'hanging', 'success']:
test_result[key] = sorted(test_result[key])
-
+ # record flaky test result
+ # record the one with more runs, or greater flakiness if runs are equal
+ if bad_test not in all_flaky_results:
+ all_flaky_results[bad_test] = {'runs': len(test_result['all']),
+ 'flakyness': test_result['flakyness']}
+ elif all_flaky_results[bad_test]['runs'] < len(test_result['all']):
+ all_flaky_results[bad_test] = {'runs': len(test_result['all']),
+ 'flakyness': test_result['flakyness']}
+ elif all_flaky_results[bad_test]['runs'] == len(test_result['all']) and \
+ all_flaky_results[bad_test]['flakyness'] < test_result['flakyness']:
+ all_flaky_results[bad_test]['flakyness'] = test_result['flakyness']
# Sort tests in descending order by flakyness.
sorted_test_to_build_ids = OrderedDict(
@@ -260,14 +276,21 @@ def expand_multi_config_projects(cli_args):
print("Builds without any test runs: {}".format(build_ids_without_tests_run))
print("")
-
all_bad_tests = all_hanging_tests.union(all_failed_tests)
if args.mvn:
includes = ",".join(all_bad_tests)
with open(output_dir + "/includes", "w") as inc_file:
inc_file.write(includes)
- excludes = ["**/{0}.java".format(bad_test) for bad_test in all_bad_tests]
+ excludes = []
+ for bad_test in all_bad_tests:
+ if bad_test not in all_flaky_results:
+ print(f"No flaky record found for {bad_test}")
+ continue
+ test_result = all_flaky_results[bad_test]
+ if test_result['flakyness'] > args.excludes_threshold_flakiness and \
+ test_result['runs'] >= args.excludes_threshold_runs:
+ excludes.append(f"**/{bad_test}.java")
with open(output_dir + "/excludes", "w") as exc_file:
exc_file.write(",".join(excludes))
@@ -283,5 +306,5 @@ def expand_multi_config_projects(cli_args):
with open(output_dir + "/dashboard.html", "w") as f:
datetime = time.strftime("%m/%d/%Y %H:%M:%S")
- f.write(template.render(datetime=datetime, bad_tests_count=len(all_bad_tests),
+ f.write(template.render(datetime=datetime, bad_tests_count=len(bad_tests),
results=url_to_bad_test_results, build_ids=url_to_build_ids))
diff --git a/dev-support/generate-website/build-hbase-website.sh b/dev-support/generate-website/build-hbase-website.sh
new file mode 100755
index 000000000000..4e28ff757de4
--- /dev/null
+++ b/dev-support/generate-website/build-hbase-website.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+#
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements. See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership. The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License. You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+# This script is meant to run as part of a Jenkins job such as
+# https://builds.apache.org/job/hbase_generate_website/
+
+set -e
+function usage {
+ echo "Usage: ${0} [options] /path/to/hbase/checkout"
+ echo ""
+ echo " --working-dir /path/to/use Path for writing logs and a local checkout of hbase-site repo."
+ echo " if given must exist."
+ echo " defaults to making a directory via mktemp."
+ echo " --local-repo /path/for/maven/.m2 Path for putting local maven repo."
+ echo " if given must exist."
+ echo " defaults to making a clean directory in --working-dir."
+ echo " --help show this usage message."
+ exit 1
+}
+# if no args specified, show usage
+if [ $# -lt 1 ]; then
+ usage
+fi
+
+# Get arguments
+declare component_dir
+declare working_dir
+declare local_repo
+while [ $# -gt 0 ]
+do
+ case "$1" in
+ --working-dir) shift; working_dir=$1; shift;;
+ --local-repo) shift; local_repo=$1; shift;;
+ --) shift; break;;
+ -*) usage ;;
+ *) break;; # terminate while loop
+ esac
+done
+
+# should still have where component checkout is.
+if [ $# -lt 1 ]; then
+ usage
+fi
+
+MVN="mvn"
+if ! command -v mvn &>/dev/null; then
+ MVN=$MAVEN_HOME/bin/mvn
+fi
+
+component_dir="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
+
+if [ -z "${working_dir}" ]; then
+ echo "[DEBUG] defaulting to creating a directory via mktemp"
+ if ! working_dir="$(mktemp -d -t hbase-generate-website)" ; then
+ echo "Failed to create temporary working directory. Please specify via --working-dir"
+ exit 1
+ fi
+else
+ # absolutes please
+ working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
+ if [ ! -d "${working_dir}" ]; then
+ echo "passed working directory '${working_dir}' must already exist."
+ exit 1
+ fi
+fi
+
+echo "You'll find logs and temp files in ${working_dir}"
+
+if [ -z "${local_repo}" ]; then
+ echo "[DEBUG] defaulting to creating a local repo within '${working_dir}'"
+ local_repo="${working_dir}/.m2/repo"
+ # Nuke the local maven repo each time, to start with a known environment
+ rm -Rf "${local_repo}"
+ mkdir -p "${local_repo}"
+else
+ # absolutes please
+ local_repo="$(cd "$(dirname "${local_repo}")"; pwd)/$(basename "${local_repo}")"
+ if [ ! -d "${local_repo}" ]; then
+ echo "passed directory for storing the maven repo '${local_repo}' must already exist."
+ exit 1
+ fi
+fi
+
+export MAVEN_OPTS="${MAVEN_OPTS} -Dmaven.repo.local=${local_repo}"
+
+# Verify the Maven version
+${MVN} -version
+# Verify the git version
+git --version
+
+cd "${working_dir}"
+
+# Clean any leftover files in case we are reusing the workspace
+rm -Rf -- *.patch *.patch.zip target *.txt hbase-site
+
+# Save and print the SHA we are building
+CURRENT_HBASE_COMMIT="$(cd "${component_dir}" && git rev-parse HEAD)"
+# Fail if it's empty
+if [ -z "${CURRENT_HBASE_COMMIT}" ]; then
+ echo "Got back a blank answer for the current HEAD. failing."
+ exit 1
+fi
+echo "Current HBase commit: $CURRENT_HBASE_COMMIT"
+
+# Clone the hbase-site repo manually so it doesn't trigger spurious
+# commits in Jenkins.
+git clone --depth 1 --branch asf-site https://gitbox.apache.org/repos/asf/hbase-site.git
+
+# Figure out if the commit of the hbase repo has already been built and bail if so.
+declare -i PUSHED
+PUSHED=$(cd hbase-site && git rev-list --grep "${CURRENT_HBASE_COMMIT}" --fixed-strings --count HEAD)
+echo "[DEBUG] hash was found in $PUSHED commits for hbase-site repository."
+
+if [ "${PUSHED}" -ne 0 ]; then
+ echo "$CURRENT_HBASE_COMMIT is already mentioned in the hbase-site commit log. Not building."
+ exit 0
+else
+ echo "$CURRENT_HBASE_COMMIT is not yet mentioned in the hbase-site commit log. Assuming we don't have it yet."
+fi
+
+# Go to the hbase directory so we can build the site
+cd "${component_dir}"
+
+# This will only be set for builds that are triggered by SCM change, not manual builds
+if [ -n "$CHANGE_ID" ]; then
+ echo -n " ($CHANGE_ID - $CHANGE_TITLE)"
+fi
+
+# Build and install HBase, then build the site
+echo "Building HBase"
+# TODO we have to do a local install first because for whatever reason, the maven-javadoc-plugin's
+# forked compile phase requires that test-scoped dependencies be available, which
+# doesn't work since we will not have done a test-compile phase (MJAVADOC-490). the first place this
+# breaks for me is hbase-server trying to find hbase-http:test and hbase-zookeeper:test.
+# But! some sunshine: because we're doing a full install before running site, we can skip all the
+# compiling in the forked executions. We have to do it awkwardly because MJAVADOC-444.
+if ${MVN} \
+ --batch-mode \
+ -Psite-install-step \
+ --errors \
+ --log-file="${working_dir}/hbase-install-log-${CURRENT_HBASE_COMMIT}.txt" \
+ clean install \
+ && ${MVN} site \
+ --batch-mode \
+ -Dscala.skip=true \
+ -Psite-build-step \
+ --errors \
+ --log-file="${working_dir}/hbase-site-log-${CURRENT_HBASE_COMMIT}.txt"; then
+ echo "Successfully built site."
+else
+ status=$?
+ echo "Maven commands to build the site failed. check logs for details ${working_dir}/hbase-*-log-*.txt"
+ exit $status
+fi
+
+# Stage the site
+echo "Staging HBase site"
+${MVN} \
+ --batch-mode \
+ --errors \
+ --log-file="${working_dir}/hbase-stage-log-${CURRENT_HBASE_COMMIT}.txt" \
+ site:stage
+status=$?
+if [ $status -ne 0 ] || [ ! -d target/staging ]; then
+ echo "Failure: mvn site:stage"
+ exit $status
+fi
+
+# Get ready to update the hbase-site repo with the new artifacts
+cd "${working_dir}/hbase-site"
+
+#Remove previously-generated files
+FILES_TO_REMOVE=("hbase-*"
+ "apidocs"
+ "devapidocs"
+ "testapidocs"
+ "testdevapidocs"
+ "xref"
+ "xref-test"
+ "*book*"
+ "*.html"
+ "*.pdf*"
+ "css"
+ "js"
+ "images")
+
+for FILE in "${FILES_TO_REMOVE[@]}"; do
+ if [ -e "${FILE}" ]; then
+ echo "Removing hbase-site/$FILE"
+ rm -Rf "${FILE}"
+ fi
+done
+
+# Copy in the newly-built artifacts
+# First copy documentation from Maven site build
+echo "Copying documentation from target/staging"
+# TODO what do we do when the site build wants to remove something? Can't rsync because e.g. release-specific docs.
+cp -pPR "${component_dir}"/target/staging/* .
+
+# Then copy the new website (landing page) from hbase-website/build/client
+echo "Copying new website from hbase-website/build/client"
+cp -pPR "${component_dir}"/hbase-website/build/client/* .
+
+# If the index.html is missing, bail because this is serious
+if [ ! -f index.html ]; then
+ echo "The index.html is missing. Aborting."
+ exit 1
+fi
+
+echo "Adding all the files we know about"
+git add .
+if [[ -z "$(git status --porcelain)" ]]; then
+ echo "No files to commit, skipping..."
+ exit 0
+fi
+# Create the commit message and commit the changes
+WEBSITE_COMMIT_MSG="Published site at $CURRENT_HBASE_COMMIT."
+echo "WEBSITE_COMMIT_MSG: $WEBSITE_COMMIT_MSG"
+git commit -m "${WEBSITE_COMMIT_MSG}" -a
+# Dump a little report
+echo "This commit changed these files (excluding Modified files):"
+git diff --name-status --diff-filter=ADCRTXUB origin/asf-site | tee "${working_dir}/hbase-file-diff-summary-${CURRENT_HBASE_COMMIT}.txt"
+# Create a patch, which Jenkins can save as an artifact and can be examined for debugging
+git format-patch --stdout origin/asf-site > "${working_dir}/${CURRENT_HBASE_COMMIT}.patch"
+if [ ! -s "${working_dir}/${CURRENT_HBASE_COMMIT}.patch" ]; then
+ echo "Something went wrong when creating the patch of our updated site."
+ exit 1
+fi
+echo "Change set saved to patch ${working_dir}/${CURRENT_HBASE_COMMIT}.patch"
+
+# Zip up the patch so Jenkins can save it
+cd "${working_dir}"
+zip website.patch.zip "${CURRENT_HBASE_COMMIT}.patch"
diff --git a/dev-support/generate-website/generate-hbase-website.Jenkinsfile b/dev-support/generate-website/generate-hbase-website.Jenkinsfile
new file mode 100644
index 000000000000..2fdccf0c4f2e
--- /dev/null
+++ b/dev-support/generate-website/generate-hbase-website.Jenkinsfile
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+pipeline {
+ agent {
+ node {
+ label 'hbase'
+ }
+ }
+ triggers {
+ pollSCM('@daily')
+ }
+ options {
+ buildDiscarder(logRotator(numToKeepStr: '30'))
+ timeout (time: 1, unit: 'HOURS')
+ timestamps()
+ skipDefaultCheckout()
+ disableConcurrentBuilds()
+ }
+ parameters {
+ booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.')
+ booleanParam(name: 'FORCE_FAIL', defaultValue: false, description: 'force a failure to test notifications.')
+ }
+ stages {
+ stage ('build hbase website') {
+ steps {
+ dir('component') {
+ checkout scm
+ }
+ sh '''#!/bin/bash -e
+ if [ "${DEBUG}" = "true" ]; then
+ set -x
+ fi
+ if [ "${FORCE_FAIL}" = "true" ]; then
+ false
+ fi
+ user=$(whoami)
+ docker build -t hbase-build-website -f "${WORKSPACE}/component/dev-support/docker/Dockerfile" .
+ docker run --rm -v "${WORKSPACE}":/home/${user} -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
+ -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" -e GIT_AUTHOR_NAME="HBase" \
+ -e GIT_AUTHOR_EMAIL="dev@hbase.apache.org" -e GIT_COMMITTER_NAME="HBase" \
+ -e GIT_COMMITTER_EMAIL="dev@hbase.apache.org" --workdir=/home/${user} hbase-build-website \
+ "component/dev-support/generate-website/build-hbase-website.sh" \
+ --working-dir /home/${user} component
+ '''
+ script {
+ if (fileExists('website.patch.zip')) {
+ sh'''#!/bin/bash -e
+ patch=$(ls -1 *.patch | head -n 1)
+ echo "Has patch ${patch}, stash and then publish"
+ '''
+ stash name: 'patch', includes: "*.patch"
+ env.PUBLISH_WEBSITE = "true"
+ } else {
+ echo "No patch file, skip stashing and publishing"
+ env.PUBLISH_WEBSITE = "false"
+ }
+ }
+ }
+ }
+ stage('publish hbase website') {
+ agent {
+ node {
+ label 'git-websites'
+ }
+ }
+ when {
+ expression {
+ return env.PUBLISH_WEBSITE == 'true'
+ }
+ }
+ steps {
+ sh '''#!/bin/bash -e
+ # wipe out stall repo and files
+ rm -rf *.patch
+ rm -rf hbase-site
+ '''
+ unstash 'patch'
+ sh '''#!/bin/bash -e
+ git clone --depth 1 --branch asf-site https://gitbox.apache.org/repos/asf/hbase-site.git
+ patch=$(ls -1 *.patch | head -n 1)
+ cd hbase-site;
+ echo "applying ${patch}"
+ git am ../${patch}
+ echo "Publishing changes to remote repo..."
+ if git push origin asf-site; then
+ echo "changes pushed."
+ else
+ echo "Failed to push to asf-site. Website not updated."
+ exit 1
+ fi
+ echo "Sending empty commit to work around INFRA-10751."
+ git commit --allow-empty -m "INFRA-10751 Empty commit"
+ # Push the empty commit
+ if git push origin asf-site; then
+ echo "empty commit pushed."
+ else
+ echo "Failed to push the empty commit to asf-site. Website may not update. Manually push an empty commit to fix this. (See INFRA-10751)"
+ exit 1
+ fi
+ echo "Pushed the changes to branch asf-site. Refresh http://hbase.apache.org/ to see the changes within a few minutes."
+ '''
+ }
+ }
+ }
+ post {
+ always {
+ // Has to be relative to WORKSPACE.
+ archiveArtifacts artifacts: '*.patch.zip,hbase-*.txt'
+ }
+ failure {
+ mail to: 'dev@hbase.apache.org', replyTo: 'dev@hbase.apache.org', subject: "Failure: HBase Generate Website", body: """
+Build status: ${currentBuild.currentResult}
+
+The HBase website has not been updated to incorporate recent HBase changes.
+
+See ${env.BUILD_URL}console
+"""
+ }
+ cleanup {
+ deleteDir()
+ }
+ }
+}
diff --git a/dev-support/gh_hide_old_comments.sh b/dev-support/gh_hide_old_comments.sh
index 61217cfa241b..abba55a39ad2 100755
--- a/dev-support/gh_hide_old_comments.sh
+++ b/dev-support/gh_hide_old_comments.sh
@@ -40,25 +40,79 @@ declare CURL="${CURL:-curl}"
function fetch_comments {
local pr="$1"
local comments_file
+ local page_file
+ local headers_file
local -a curl_args
- curl_args=(
- --fail
- "${GITHUB_AUTH[@]}"
- --header 'Accept: application/vnd.github+json'
- --header 'X-GitHub-Api-Version: 2022-11-28'
- --request GET
- --url "${GITHUB_API_URL}/repos/${REPO}/issues/${pr}/comments?per_page=500"
- )
- if [ "${DEBUG}" = true ] ; then
- curl_args+=(--verbose)
- else
- curl_args+=(--silent)
- fi
+ local page=1
+ local next_url
comments_file="$(mktemp "comments_${pr}" 2>/dev/null || mktemp -t "comments_${pr}.XXXXXXXXXX")" || \
{ >&2 echo 'cannot create temp file'; exit 1 ;}
- "${CURL}" "${curl_args[@]}" > "${comments_file}"
+ page_file="$(mktemp "page_${pr}" 2>/dev/null || mktemp -t "page_${pr}.XXXXXXXXXX")" || \
+ { >&2 echo 'cannot create temp file'; exit 1 ;}
+ headers_file="$(mktemp "headers_${pr}" 2>/dev/null || mktemp -t "headers_${pr}.XXXXXXXXXX")" || \
+ { >&2 echo 'cannot create temp file'; exit 1 ;}
+
+ # cleanup temp files on error
+ trap 'rm -f "${page_file}" "${headers_file}"; exit 1' ERR
+
+ next_url="${GITHUB_API_URL}/repos/${REPO}/issues/${pr}/comments?per_page=100"
+
+ # start with empty JSON array
+ echo '[]' > "${comments_file}"
+
+ while [ -n "${next_url}" ] ; do
+ curl_args=(
+ --fail
+ --max-time 30
+ "${GITHUB_AUTH[@]}"
+ --header 'Accept: application/vnd.github+json'
+ --header 'X-GitHub-Api-Version: 2022-11-28'
+ --dump-header "${headers_file}"
+ --request GET
+ --url "${next_url}"
+ )
+ if [ "${DEBUG}" = true ] ; then
+ curl_args+=(--verbose)
+ >&2 echo "Fetching page ${page}: ${next_url}"
+ else
+ curl_args+=(--silent)
+ fi
+
+ if ! "${CURL}" "${curl_args[@]}" > "${page_file}"; then
+ >&2 echo "Failed to fetch page ${page}: ${next_url}"
+ rm -f "${page_file}" "${headers_file}"
+ exit 1
+ fi
+
+ if [ "${DEBUG}" = 'true' ] ; then
+ >&2 echo "Page ${page} returned $(jq length "${page_file}") comments"
+ fi
+
+ # merge this page into the accumulated results
+ if ! jq -s '.[0] + .[1]' "${comments_file}" "${page_file}" > "${comments_file}.tmp"; then
+ >&2 echo "Failed to merge comments from page ${page}"
+ rm -f "${page_file}" "${headers_file}" "${comments_file}.tmp"
+ exit 1
+ fi
+ mv "${comments_file}.tmp" "${comments_file}"
+
+ # check for next page in Link header
+ # Link header format: ; rel="next", ; rel="last"
+ # Extract URL associated with rel="next" regardless of position
+ next_url=""
+ if grep -qi '^link:' "${headers_file}" ; then
+ next_url=$(grep -i '^link:' "${headers_file}" | tr ',' '\n' | grep 'rel="next"' | sed -n 's/.*<\([^>]*\)>.*/\1/p' || true)
+ fi
+
+ page=$((page + 1))
+ done
+
+ rm -f "${page_file}" "${headers_file}"
+ trap - ERR
+
if [ "${DEBUG}" = 'true' ] ; then
+ >&2 echo "Total comments fetched: $(jq length "${comments_file}")"
>&2 cat "${comments_file}"
fi
echo "${comments_file}"
@@ -104,11 +158,16 @@ function identify_most_recent_build_number {
local pr="$1"
local comments_file="$2"
local jq_filter
+ local url_pattern="${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/"
+ # GitHub Actions URLs don't have /job/ in them
+ if [[ "${JOB_NAME}" == *"GH-Actions"* ]]; then
+ url_pattern="${JOB_NAME}/PR-${pr}/(?[0-9]+)/"
+ fi
read -r -d '' jq_filter << EOF || :
.[] \
| select(.user.id == ${BUILD_BOT_USER_ID}) \
| .body \
-| capture("${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/") \
+| capture("${url_pattern}") \
| .buildnum
EOF
@@ -122,10 +181,15 @@ function identify_old_comment_ids {
local comments_file="$2"
local most_recent_build_number="$3"
local jq_filter
+ local url_pattern="${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/"
+ # GitHub Actions URLs don't have /job/ in them
+ if [[ "${JOB_NAME}" == *"GH-Actions"* ]]; then
+ url_pattern="${JOB_NAME}/PR-${pr}/(?[0-9]+)/"
+ fi
read -r -d '' jq_filter << EOF || :
.[] \
| select(.user.id == ${BUILD_BOT_USER_ID}) \
-| { node_id, buildnum: (.body | capture("${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/") | .buildnum | tonumber) } \
+| { node_id, buildnum: (.body | capture("${url_pattern}") | .buildnum | tonumber) } \
| select(.buildnum < (${most_recent_build_number} | tonumber)) \
| .node_id
EOF
diff --git a/dev-support/git-jira-release-audit/requirements.txt b/dev-support/git-jira-release-audit/requirements.txt
index 8eb0eb04305f..7aaaacf23aa5 100644
--- a/dev-support/git-jira-release-audit/requirements.txt
+++ b/dev-support/git-jira-release-audit/requirements.txt
@@ -19,21 +19,21 @@ blessed==1.17.0
certifi==2024.7.4
cffi==1.13.2
chardet==3.0.4
-cryptography==44.0.1
+cryptography==46.0.7
defusedxml==0.6.0
enlighten==1.4.0
gitdb2==2.0.6
-GitPython==3.1.41
+GitPython==3.1.47
idna==3.7
jira==2.0.0
oauthlib==3.1.0
pbr==5.4.4
pycparser==2.19
-PyJWT==2.4.0
-requests==2.32.4
+PyJWT==2.12.0
+requests==2.33.0
requests-oauthlib==1.3.0
requests-toolbelt==0.9.1
six==1.14.0
smmap2==2.0.5
-urllib3==2.5.0
+urllib3==2.6.3
wcwidth==0.1.8
diff --git a/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile b/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile
new file mode 100644
index 000000000000..8e8eff8ded30
--- /dev/null
+++ b/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile
@@ -0,0 +1,326 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Jenkinsfile for Hadoop3 Backwards Compatibility Checks
+// Uses matrix job to parallelize checks across different Hadoop3 versions
+
+pipeline {
+ agent {
+ node {
+ label 'hbase'
+ }
+ }
+ triggers {
+ pollSCM('H H */2 * *')
+ }
+ options {
+ buildDiscarder(logRotator(numToKeepStr: '20'))
+ timeout (time: 8, unit: 'HOURS')
+ timestamps()
+ skipDefaultCheckout()
+ disableConcurrentBuilds()
+ }
+ environment {
+ YETUS_RELEASE = '0.15.0'
+ HADOOP_VERSIONS = "3.2.4,3.3.5,3.3.6,3.4.0,3.4.1,3.4.2"
+ }
+ parameters {
+ booleanParam(name: 'USE_YETUS_PRERELEASE', defaultValue: false, description: '''Check to use the current HEAD of apache/yetus rather than our configured release.
+
+ Should only be used manually when e.g. there is some non-work-aroundable issue in yetus we are checking a fix for.''')
+ booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.')
+ }
+ stages {
+ stage ('scm-checkout') {
+ steps {
+ dir('component') {
+ checkout scm
+ }
+ }
+ }
+ stage ('thirdparty installs') {
+ parallel {
+ stage ('yetus install') {
+ steps {
+ dir('downloads-yetus') {
+ sh '''#!/usr/bin/env bash
+ echo "Make sure we have a directory for downloading dependencies: $(pwd)"
+ '''
+ }
+ sh '''#!/usr/bin/env bash
+ set -e
+ echo "Ensure we have a copy of Apache Yetus."
+ if [[ true != "${USE_YETUS_PRERELEASE}" ]]; then
+ YETUS_DIR="${WORKSPACE}/yetus-${YETUS_RELEASE}"
+ echo "Checking for Yetus ${YETUS_RELEASE} in '${YETUS_DIR}'"
+ if ! "${YETUS_DIR}/bin/test-patch" --version >/dev/null 2>&1 ; then
+ rm -rf "${YETUS_DIR}"
+ "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
+ --working-dir "${WORKSPACE}/downloads-yetus" \
+ --keys 'https://downloads.apache.org/yetus/KEYS' \
+ --verify-tar-gz \
+ "${WORKSPACE}/yetus-${YETUS_RELEASE}-bin.tar.gz" \
+ "yetus/${YETUS_RELEASE}/apache-yetus-${YETUS_RELEASE}-bin.tar.gz"
+ mv "yetus-${YETUS_RELEASE}-bin.tar.gz" yetus.tar.gz
+ else
+ echo "Reusing cached install of Apache Yetus version ${YETUS_RELEASE}."
+ fi
+ else
+ YETUS_DIR="${WORKSPACE}/yetus-git"
+ rm -rf "${YETUS_DIR}"
+ echo "downloading from github"
+ curl -L --fail https://api.github.com/repos/apache/yetus/tarball/HEAD -o yetus.tar.gz
+ fi
+ if [ ! -d "${YETUS_DIR}" ]; then
+ echo "unpacking yetus into '${YETUS_DIR}'"
+ mkdir -p "${YETUS_DIR}"
+ gunzip -c yetus.tar.gz | tar xpf - -C "${YETUS_DIR}" --strip-components 1
+ fi
+ '''
+ stash name: 'yetus', includes: "yetus-*/*,yetus-*/**/*"
+ }
+ }
+ }
+ }
+ stage ('backwards compatibility checks') {
+ matrix {
+ axes {
+ axis {
+ name 'HADOOP3_VERSION'
+ values '3.2.4', '3.3.5', '3.3.6', '3.4.0', '3.4.1', '3.4.2'
+ }
+ }
+ agent {
+ node {
+ label 'hbase'
+ }
+ }
+ when {
+ expression {
+ if (HADOOP3_VERSION == '3.2.4') {
+ // only branch-2.5 need to run against hadoop 3.2.4, here we also includes
+ // HBASE-XXXXX-branch-2.5 feature branch
+ return env.BRANCH_NAME.contains('branch-2.5')
+ }
+ return true
+ }
+ }
+ environment {
+ PROJECT = 'hbase'
+ BASEDIR = "${WORKSPACE}/component"
+ PERSONALITY_FILE = "${BASEDIR}/dev-support/hbase-personality.sh"
+ TESTS_FILTER = 'checkstyle,javac,javadoc,pylint,shellcheck,shelldocs,blanks,perlcritic,ruby-lint,rubocop'
+ EXCLUDE_TESTS_URL = "${JENKINS_URL}/job/HBase-Find-Flaky-Tests/job/${BRANCH_NAME}/lastSuccessfulBuild/artifact/output/excludes"
+ ASF_NIGHTLIES = 'https://nightlies.apache.org'
+ ASF_NIGHTLIES_BASE_ORI = "${ASF_NIGHTLIES}/hbase/${JOB_NAME}/${BUILD_NUMBER}"
+ ASF_NIGHTLIES_BASE = "${ASF_NIGHTLIES_BASE_ORI.replaceAll(' ', '%20')}"
+ TESTS = 'compile,htmlout,javac,maven,mvninstall,shadedjars,unit'
+ SET_JAVA_HOME = "/usr/lib/jvm/java-17"
+ HADOOP_PROFILE = '3.0'
+ TEST_PROFILE = 'runDevTests'
+ SKIP_ERRORPRONE = true
+ OUTPUT_DIR_RELATIVE = "output-jdk17-hadoop3-backwards-${HADOOP3_VERSION}"
+ OUTPUT_DIR = "${WORKSPACE}/${OUTPUT_DIR_RELATIVE}"
+ AUTHOR_IGNORE_LIST = 'src/main/asciidoc/_chapters/developer.adoc'
+ BLANKS_EOL_IGNORE_FILE = 'dev-support/blanks-eol-ignore.txt'
+ BLANKS_TABS_IGNORE_FILE = 'dev-support/blanks-tabs-ignore.txt'
+ // output from surefire; sadly the archive function in yetus only works on file names.
+ ARCHIVE_PATTERN_LIST = 'TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump'
+ }
+ stages {
+ stage ('run checks') {
+ steps {
+ sh '''#!/usr/bin/env bash
+ set -e
+ rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}"
+ rm -f "${OUTPUT_DIR}/commentfile"
+ '''
+ unstash 'yetus'
+ dir('component') {
+ checkout scm
+ }
+ sh '''#!/usr/bin/env bash
+ set -e
+ rm -rf "${OUTPUT_DIR}/machine" && mkdir "${OUTPUT_DIR}/machine"
+ "${BASEDIR}/dev-support/gather_machine_environment.sh" "${OUTPUT_DIR_RELATIVE}/machine"
+ echo "got the following saved stats in '${OUTPUT_DIR_RELATIVE}/machine'"
+ ls -lh "${OUTPUT_DIR_RELATIVE}/machine"
+ '''
+ script {
+ def ret = sh(
+ returnStatus: true,
+ script: '''#!/usr/bin/env bash
+ set -e
+ declare -i status=0
+ if "${BASEDIR}/dev-support/hbase_nightly_yetus.sh" ; then
+ echo "(/) {color:green}+1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile"
+ else
+ echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile"
+ status=1
+ fi
+ echo "-- For more information [see jdk17 report|${BUILD_URL}console]" >> "${OUTPUT_DIR}/commentfile"
+ exit "${status}"
+ '''
+ )
+ if (ret != 0) {
+ currentBuild.result = 'UNSTABLE'
+ }
+ }
+ }
+ }
+ }
+ post {
+ always {
+ script {
+ stash name: "jdk17-hadoop3-backwards-result-${HADOOP3_VERSION}", includes: "${OUTPUT_DIR_RELATIVE}/commentfile"
+ junit testResults: "${env.OUTPUT_DIR_RELATIVE}/**/target/**/TEST-*.xml", allowEmptyResults: true
+ // zip surefire reports.
+ sh '''#!/bin/bash -e
+ if [ ! -f "${OUTPUT_DIR}/commentfile" ]; then
+ echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" >"${OUTPUT_DIR}/commentfile"
+ echo "-- Something went wrong running this stage, please [check relevant console output|${BUILD_URL}/console]." >> "${OUTPUT_DIR}/commentfile"
+ fi
+ if [ -d "${OUTPUT_DIR}/archiver" ]; then
+ count=$(find "${OUTPUT_DIR}/archiver" -type f | wc -l)
+ if [[ 0 -ne ${count} ]]; then
+ echo "zipping ${count} archived files"
+ zip -q -m -r "${OUTPUT_DIR}/test_logs.zip" "${OUTPUT_DIR}/archiver"
+ else
+ echo "No archived files, skipping compressing."
+ fi
+ else
+ echo "No archiver directory, skipping compressing."
+ fi
+ '''
+ def logFile = "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip"
+ if (fileExists(logFile)) {
+ sshPublisher(publishers: [
+ sshPublisherDesc(configName: 'Nightlies',
+ transfers: [
+ sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}",
+ sourceFiles: "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip"
+ )
+ ]
+ )
+ ])
+ sh '''#!/bin/bash -e
+ echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
+ rm -rf "${OUTPUT_DIR}/test_logs.zip"
+ python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
+ '''
+ }
+ archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/*"
+ archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/**/*"
+ publishHTML target: [
+ allowMissing: true,
+ keepAll: true,
+ alwaysLinkToLastBuild: true,
+ reportDir: "${env.OUTPUT_DIR_RELATIVE}",
+ reportFiles: 'console-report.html',
+ reportName: "JDK17 Nightly Build Report (Hadoop ${HADOOP3_VERSION} backwards compatibility)"
+ ]
+ } // script
+ } // always
+ } // post
+ } // matrix
+ } // stage ('backwards compatibility checks')
+ } // stages
+ post {
+ always {
+ script {
+ sh "printenv"
+ // wipe out all the output directories before unstashing
+ sh'''
+ echo "Clean up result directories"
+ rm -rf output-jdk17-hadoop3-backwards-*
+ '''
+ def results = []
+ for (hadoopVersion in getHadoopVersions(env.HADOOP_VERSIONS)) {
+ try {
+ unstash "jdk17-hadoop3-backwards-result-${hadoopVersion}"
+ results.add("output-jdk17-hadoop3-backwards-${hadoopVersion}/commentfile")
+ } catch (e) {
+ echo "unstash ${hadoopVersion} failed, ignore"
+ }
+ }
+ try {
+ def comment = "Results for branch ${env.BRANCH_NAME}\n"
+ comment += "\t[build ${currentBuild.displayName} on builds.a.o|${env.BUILD_URL}]: "
+ if (currentBuild.result == null || currentBuild.result == "SUCCESS") {
+ comment += "(/) *{color:green}+1 overall{color}*\n"
+ } else {
+ comment += "(x) *{color:red}-1 overall{color}*\n"
+ }
+ comment += "----\n"
+ comment += "Backwards compatibility checks:\n"
+ comment += results.collect { fileExists(file: it) ? readFile(file: it) : "" }.join("\n\n")
+
+ echo "[INFO] Comment:"
+ echo comment
+
+ def jiras = getJirasToComment(env.BRANCH_NAME, [])
+ if (jiras.isEmpty()) {
+ echo "[DEBUG] non-feature branch, checking change messages for jira keys."
+ jiras = getJirasToCommentFromChangesets(currentBuild)
+ }
+ jiras.each { currentIssue ->
+ jiraComment issueKey: currentIssue, body: comment
+ }
+ } catch (Exception exception) {
+ echo "Got exception: ${exception}"
+ echo " ${exception.getStackTrace()}"
+ }
+ }
+ }
+ }
+}
+
+@NonCPS
+List getHadoopVersions(String versions) {
+ return versions.split(',').collect { it.trim() }.findAll { it } as String[]
+}
+
+import org.jenkinsci.plugins.workflow.support.steps.build.RunWrapper
+@NonCPS
+List getJirasToCommentFromChangesets(RunWrapper thisBuild) {
+ def seenJiras = []
+ thisBuild.changeSets.each { cs ->
+ cs.getItems().each { change ->
+ CharSequence msg = change.msg
+ echo "change: ${change}"
+ echo " ${msg}"
+ echo " ${change.commitId}"
+ echo " ${change.author}"
+ seenJiras = getJirasToComment(msg, seenJiras)
+ }
+ }
+ return seenJiras
+}
+
+@NonCPS
+List getJirasToComment(CharSequence source, List seen) {
+ source.eachMatch("HBASE-[0-9]+") { currentIssue ->
+ echo "[DEBUG] found jira key: ${currentIssue}"
+ if (currentIssue in seen) {
+ echo "[DEBUG] already commented on ${currentIssue}."
+ } else {
+ echo "[INFO] commenting on ${currentIssue}."
+ seen << currentIssue
+ }
+ }
+ return seen
+}
diff --git a/dev-support/hbase-personality.sh b/dev-support/hbase-personality.sh
index 9a5d34cc2138..577f7c77deb9 100755
--- a/dev-support/hbase-personality.sh
+++ b/dev-support/hbase-personality.sh
@@ -298,30 +298,16 @@ function personality_file_tests
{
local filename=$1
yetus_debug "HBase specific personality_file_tests"
- # If the change is to the refguide, then we don't need any builtin yetus tests
- # the refguide test (below) will suffice for coverage.
- if [[ ${filename} =~ src/main/asciidoc ]] ||
- [[ ${filename} =~ src/main/xslt ]]; then
- yetus_debug "Skipping builtin yetus checks for ${filename}. refguide test should pick it up."
- else
- # If we change our asciidoc, rebuild mvnsite
- if [[ ${BUILDTOOL} = maven ]]; then
- if [[ ${filename} =~ src/site || ${filename} =~ src/main/asciidoc ]]; then
- yetus_debug "tests/mvnsite: ${filename}"
- add_test mvnsite
- fi
- fi
- # If we change checkstyle configs, run checkstyle
- if [[ ${filename} =~ checkstyle.*\.xml ]]; then
- yetus_debug "tests/checkstyle: ${filename}"
- add_test checkstyle
- fi
- # fallback to checking which tests based on what yetus would do by default
- if declare -f "${BUILDTOOL}_builtin_personality_file_tests" >/dev/null; then
- "${BUILDTOOL}_builtin_personality_file_tests" "${filename}"
- elif declare -f builtin_personality_file_tests >/dev/null; then
- builtin_personality_file_tests "${filename}"
- fi
+ # If we change checkstyle configs, run checkstyle
+ if [[ ${filename} =~ checkstyle.*\.xml ]]; then
+ yetus_debug "tests/checkstyle: ${filename}"
+ add_test checkstyle
+ fi
+ # fallback to checking which tests based on what yetus would do by default
+ if declare -f "${BUILDTOOL}_builtin_personality_file_tests" >/dev/null; then
+ "${BUILDTOOL}_builtin_personality_file_tests" "${filename}"
+ elif declare -f builtin_personality_file_tests >/dev/null; then
+ builtin_personality_file_tests "${filename}"
fi
}
@@ -330,6 +316,9 @@ function personality_file_tests
## @audience private
## @stability evolving
## @param name of variable to set with maven arguments
+# NOTE: INCLUDE_TESTS_URL uses -Dtest= which conflicts with pom.xml patterns.
+# Do not use INCLUDE_TESTS_URL with profiles that define their own patterns
+# (e.g., runLargeTests-wave1, runLargeTests-wave2, runLargeTests-wave3).
function get_include_exclude_tests_arg
{
local __resultvar=$1
@@ -397,8 +386,7 @@ function refguide_filefilter
# we only generate ref guide on master branch now
if [[ "${PATCH_BRANCH}" = master ]]; then
- if [[ ${filename} =~ src/main/asciidoc ]] ||
- [[ ${filename} =~ src/main/xslt ]] ||
+ if [[ ${filename} =~ hbase-website ]] ||
[[ ${filename} =~ hbase-common/src/main/resources/hbase-default\.xml ]]; then
add_test refguide
fi
@@ -424,8 +412,8 @@ function refguide_rebuild
# shellcheck disable=2046
echo_and_redirect "${logfile}" \
$(maven_executor) clean site --batch-mode \
- -pl . \
- -Dtest=NoUnitTests -DHBasePatchProcess -Prelease \
+ -pl hbase-website \
+ -DskipTests -DHBasePatchProcess -Prelease \
-Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
count=$(${GREP} -c '\[ERROR\]' "${logfile}")
@@ -435,31 +423,37 @@ function refguide_rebuild
return 1
fi
- if ! mv target/site "${PATCH_DIR}/${repostatus}-site"; then
+ if ! mv hbase-website/build/client "${PATCH_DIR}/${repostatus}-site"; then
add_vote_table -1 refguide "${repostatus} failed to produce a site directory."
add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log"
return 1
fi
- if [[ ! -f "${PATCH_DIR}/${repostatus}-site/book.html" ]]; then
+ if [[ ! -f "${PATCH_DIR}/${repostatus}-site/index.html" ]]; then
add_vote_table -1 refguide "${repostatus} failed to produce the html version of the reference guide."
add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log"
return 1
fi
- pdf_output="apache_hbase_reference_guide.pdf"
+pdf_output="apache-hbase-reference-guide.pdf"
- if [[ ! -f "${PATCH_DIR}/${repostatus}-site/${pdf_output}" ]]; then
+ if ! mv "hbase-website/public/books/${pdf_output}" "${PATCH_DIR}/${repostatus}-site"; then
add_vote_table -1 refguide "${repostatus} failed to produce the pdf version of the reference guide."
add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log"
return 1
fi
+ if [[ ! -f "${PATCH_DIR}/${repostatus}-site/${pdf_output}" ]]; then
+ add_vote_table -1 refguide "${repostatus} failed to verify the pdf version of the reference guide."
+ add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log"
+ return 1
+ fi
+
add_vote_table 0 refguide "${repostatus} has no errors when building the reference guide. See footer for rendered docs, which you should manually inspect."
if [[ -n "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}" ]]; then
- add_footer_table refguide "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/${repostatus}-site/book.html"
+ add_footer_table refguide "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/${repostatus}-site/index.html"
else
- add_footer_table refguide "@@BASE@@/${repostatus}-site/book.html"
+ add_footer_table refguide "@@BASE@@/${repostatus}-site/index.html"
fi
return 0
}
@@ -612,17 +606,17 @@ function hadoopcheck_rebuild
# TODO remove this on non 2.5 branches ?
yetus_info "Setting Hadoop 3 versions to test based on branch-2.5 rules"
if [[ "${QUICK_HADOOPCHECK}" == "true" ]]; then
- hbase_hadoop3_versions="3.2.4 3.3.6 3.4.1"
+ hbase_hadoop3_versions="3.2.4 3.3.6 3.4.2"
else
- hbase_hadoop3_versions="3.2.3 3.2.4 3.3.2 3.3.3 3.3.4 3.3.5 3.3.6 3.4.0 3.4.1"
+ hbase_hadoop3_versions="3.2.3 3.2.4 3.3.2 3.3.3 3.3.4 3.3.5 3.3.6 3.4.0 3.4.1 3.4.2"
fi
else
yetus_info "Setting Hadoop 3 versions to test based on branch-2.6+/master/feature branch rules"
# Isn't runnung these tests with the default Hadoop version redundant ?
if [[ "${QUICK_HADOOPCHECK}" == "true" ]]; then
- hbase_hadoop3_versions="3.3.6 3.4.1"
+ hbase_hadoop3_versions="3.3.6 3.4.2"
else
- hbase_hadoop3_versions="3.3.5 3.3.6 3.4.0 3.4.1"
+ hbase_hadoop3_versions="3.3.5 3.3.6 3.4.0 3.4.1 3.4.2"
fi
fi
@@ -852,6 +846,7 @@ function spotless_rebuild
{
local repostatus=$1
local logfile="${PATCH_DIR}/${repostatus}-spotless.txt"
+ local linecommentsfile="${PATCH_DIR}/${repostatus}-spotless-linecomments.txt"
if ! verify_needed_test spotless; then
return 0
@@ -869,12 +864,27 @@ function spotless_rebuild
count=$(${GREP} -c '\[ERROR\]' "${logfile}")
if [[ ${count} -gt 0 ]]; then
- add_vote_table -1 spotless "${repostatus} has ${count} errors when running spotless:check, run spotless:apply to fix."
- add_footer_table spotless "@@BASE@@/${repostatus}-spotless.txt"
+ # Generate file-level annotations for GitHub Actions
+ if [[ -n "${BUGLINECOMMENTS}" ]]; then
+ # Extract files with violations: lines like "[ERROR] src/path/to/file.java"
+ # with leading whitespace after [ERROR]
+ ${GREP} '^\[ERROR\][[:space:]]\+[^[:space:]]' "${logfile}" \
+ | ${SED} 's/^\[ERROR\][[:space:]]*//g' \
+ | while read -r file; do
+ echo "${file}:1:Spotless formatting required, run mvn spotless:apply"
+ done > "${linecommentsfile}"
+ if [[ -s "${linecommentsfile}" ]]; then
+ bugsystem_linecomments_queue spotless "${linecommentsfile}"
+ fi
+ fi
+
+ add_vote_table_v2 -1 spotless \
+ "@@BASE@@/${repostatus}-spotless.txt" \
+ "${repostatus} has ${count} errors when running spotless:check, run spotless:apply to fix."
return 1
fi
- add_vote_table +1 spotless "${repostatus} has no errors when running spotless:check."
+ add_vote_table_v2 +1 spotless "" "${repostatus} has no errors when running spotless:check."
return 0
}
diff --git a/dev-support/hbase-vote.sh b/dev-support/hbase-vote.sh
index abaa437fd750..3bab4f9b813f 100755
--- a/dev-support/hbase-vote.sh
+++ b/dev-support/hbase-vote.sh
@@ -149,8 +149,8 @@ function verify_checksums() {
}
function unzip_from_source() {
- tar -zxvf hbase-"${HBASE_VERSION}"-src.tar.gz
- cd hbase-"${HBASE_VERSION}"
+ tar -zxvf *-src.tar.gz
+ cd "$(tar -tzf *-src.tar.gz | head -1 | cut -d/ -f1)"
}
function rat_test() {
diff --git a/dev-support/integration-test/integration-test.Jenkinsfile b/dev-support/integration-test/integration-test.Jenkinsfile
new file mode 100644
index 000000000000..b17025ab6425
--- /dev/null
+++ b/dev-support/integration-test/integration-test.Jenkinsfile
@@ -0,0 +1,404 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+pipeline {
+ agent {
+ node {
+ label 'hbase'
+ }
+ }
+ triggers {
+ pollSCM('@daily')
+ }
+ options {
+ buildDiscarder(logRotator(numToKeepStr: '20'))
+ timeout (time: 16, unit: 'HOURS')
+ timestamps()
+ skipDefaultCheckout()
+ disableConcurrentBuilds()
+ }
+ environment {
+ HADOOP_VERSIONS = "2.10.2,3.2.4,3.3.5,3.3.6,3.4.0,3.4.1,3.4.2,3.4.3"
+ BASEDIR = "${env.WORKSPACE}/component"
+ }
+ parameters {
+ booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.')
+ }
+ stages {
+ stage('scm-checkout') {
+ steps {
+ dir('component') {
+ checkout scm
+ }
+ }
+ }
+ // This is meant to mimic what a release manager will do to create RCs.
+ // See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate
+ // TODO (HBASE-23870): replace this with invocation of the release tool
+ stage ('packaging test') {
+ steps {
+ sh '''#!/bin/bash -e
+ echo "Setting up directories"
+ rm -rf "output-srctarball" && mkdir "output-srctarball"
+ rm -rf "unpacked_src_tarball" && mkdir "unpacked_src_tarball"
+ rm -rf ".m2-for-repo" && mkdir ".m2-for-repo"
+ rm -rf ".m2-for-src" && mkdir ".m2-for-src"
+ '''
+ sh '''#!/bin/bash -e
+ rm -rf "output-srctarball/machine" && mkdir "output-srctarball/machine"
+ "${BASEDIR}/dev-support/gather_machine_environment.sh" "output-srctarball/machine"
+ echo "got the following saved stats in 'output-srctarball/machine'"
+ ls -lh "output-srctarball/machine"
+ '''
+ sh '''#!/bin/bash -e
+ echo "Checking the steps for an RM to make a source artifact, then a binary artifact."
+ docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" .
+ docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
+ -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" --workdir=/hbase hbase-integration-test \
+ "component/dev-support/integration-test/source-artifact.sh" \
+ --intermediate-file-dir output-srctarball \
+ --unpack-temp-dir unpacked_src_tarball \
+ --maven-m2-initial .m2-for-repo \
+ --maven-m2-src-build .m2-for-src \
+ --clean-source-checkout \
+ component
+ if [ $? -eq 0 ]; then
+ echo '(/) {color:green}+1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile
+ else
+ echo '(x) {color:red}-1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile
+ exit 1
+ fi
+ '''
+ echo "make sure we have proper hbase tarballs under hbase-assembly"
+ sh '''#!/bin/bash -e
+ if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v hadoop3 | wc -l) ]; then
+ echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected binaries.' >>output-srctarball/commentfile
+ exit 1
+ fi
+ if [[ "${BRANCH_NAME}" == *"branch-2"* ]]; then
+ if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | wc -l) ]; then
+ echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected hadoop3 binaries.' >>output-srctarball/commentfile
+ exit 1
+ fi
+ fi
+ '''
+ stash name: 'hbase-install', includes: "unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz"
+ } // steps
+ post {
+ always {
+ script {
+ def srcFile = "${env.WORKSPACE}/output-srctarball/hbase-src.tar.gz"
+ if (fileExists(srcFile)) {
+ echo "upload hbase-src.tar.gz to nightlies"
+ sshPublisher(publishers: [
+ sshPublisherDesc(configName: 'Nightlies',
+ transfers: [
+ sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}",
+ sourceFiles: srcFile
+ )
+ ]
+ )
+ ])
+ // remove the big src tarball, store the nightlies url in hbase-src.html
+ sh '''#!/bin/bash -e
+ SRC_TAR="${WORKSPACE}/output-srctarball/hbase-src.tar.gz"
+ echo "Remove ${SRC_TAR} for saving space"
+ rm -rf "${SRC_TAR}"
+ python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
+ '''
+ }
+ }
+ archiveArtifacts artifacts: 'output-srctarball/*'
+ archiveArtifacts artifacts: 'output-srctarball/**/*'
+ }
+ }
+ } // packaging test
+ stage ('integration test matrix') {
+ matrix {
+ agent {
+ node {
+ label 'hbase'
+ }
+ }
+ axes {
+ axis {
+ name 'HADOOP_VERSION'
+ // matrix does not support dynamic axis values, so here we need to keep align with the
+ // above environment
+ values "2.10.2","3.2.4","3.3.5","3.3.6","3.4.0","3.4.1","3.4.2","3.4.3"
+ }
+ }
+ environment {
+ BASEDIR = "${env.WORKSPACE}/component"
+ OUTPUT_DIR = "output-integration-hadoop-${env.HADOOP_VERSION}"
+ }
+ when {
+ expression {
+ if (HADOOP_VERSION == '2.10.2') {
+ // only branch-2/branch-2.x need to run against hadoop2, here we also includes
+ // HBASE-XXXXX-branch-2 feature branch
+ return env.BRANCH_NAME.contains('branch-2')
+ }
+ if (HADOOP_VERSION == '3.2.4') {
+ // only branch-2.5 need to run against hadoop 3.2.4, here we also includes
+ // HBASE-XXXXX-branch-2.5 feature branch
+ return env.BRANCH_NAME.contains('branch-2.5')
+ }
+ return true
+ }
+ }
+ stages {
+ stage('scm-checkout') {
+ steps {
+ sh '''#!/bin/bash -e
+ echo "Setting up directories"
+ rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}"
+ echo "(x) {color:red}-1 client integration test for ${HADOOP_VERSION}{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >${OUTPUT_DIR}/commentfile
+ rm -rf "unpacked_src_tarball"
+ rm -rf "hbase-install" && mkdir "hbase-install"
+ rm -rf "hbase-client" && mkdir "hbase-client"
+ rm -rf "hadoop-install" && mkdir "hadoop-install"
+ rm -rf "hbase-hadoop3-install"
+ rm -rf "hbase-hadoop3-client"
+ # remove old hadoop tarballs in workspace
+ rm -rf hadoop-*.tar.gz
+ '''
+ dir('component') {
+ checkout scm
+ }
+ } // steps
+ } // scm-checkout
+ stage('install hadoop') {
+ steps {
+ dir("downloads-hadoop") {
+ sh '''#!/bin/bash -e
+ echo "Make sure we have a directory for downloading dependencies: $(pwd)"
+ '''
+ sh '''#!/bin/bash -e
+ echo "Ensure we have a copy of Hadoop ${HADOOP_VERSION}"
+ "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \
+ --working-dir "${WORKSPACE}/downloads-hadoop" \
+ --keys 'https://downloads.apache.org/hadoop/common/KEYS' \
+ --verify-tar-gz \
+ "${WORKSPACE}/hadoop-${HADOOP_VERSION}-bin.tar.gz" \
+ "hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"
+ for stale in $(ls -1 "${WORKSPACE}"/hadoop-*.tar.gz | grep -v ${HADOOP_VERSION}); do
+ echo "Delete stale hadoop cache ${stale}"
+ rm -rf $stale
+ done
+ artifact=$(ls -1 "${WORKSPACE}"/hadoop-${HADOOP_VERSION}-bin.tar.gz | head -n 1)
+ tar --strip-components=1 -xzf "${artifact}" -C "${WORKSPACE}/hadoop-install"
+ if [[ ${HADOOP_VERSION} == 3.* ]]; then
+ # we need to patch some files otherwise minicluster will fail to start, see MAPREDUCE-7471
+ ${BASEDIR}/dev-support/integration-test/patch-hadoop3.sh "${WORKSPACE}/hadoop-install"
+ fi
+ '''
+ } // dir
+ } // steps
+ } // install hadoop
+ stage('install hbase') {
+ steps {
+ unstash 'hbase-install'
+ sh'''#!/bin/bash -e
+ install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v client-bin | grep -v hadoop3)
+ tar --strip-component=1 -xzf "${install_artifact}" -C "hbase-install"
+ client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-client-bin.tar.gz | grep -v hadoop3)
+ tar --strip-component=1 -xzf "${client_artifact}" -C "hbase-client"
+ if ls "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz &>/dev/null; then
+ echo "hadoop3 artifacts available, unpacking the hbase hadoop3 bin tarball into 'hbase-hadoop3-install' and the client hadoop3 tarball into 'hbase-hadoop3-client'"
+ mkdir hbase-hadoop3-install
+ mkdir hbase-hadoop3-client
+ hadoop3_install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | grep -v client-bin)
+ tar --strip-component=1 -xzf "${hadoop3_install_artifact}" -C "hbase-hadoop3-install"
+ hadoop3_client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-client-bin.tar.gz)
+ tar --strip-component=1 -xzf "${hadoop3_client_artifact}" -C "hbase-hadoop3-client"
+ fi
+ '''
+ } // steps
+ }
+ stage('integration test ') {
+ steps {
+ sh '''#!/bin/bash -e
+ hbase_install_dir="hbase-install"
+ hbase_client_dir="hbase-client"
+ if [[ ${HADOOP_VERSION} == 3.* ]] && [[ -d "hbase-hadoop3-install" ]]; then
+ echo "run hadoop3 client integration test against hbase hadoop3 binaries"
+ hbase_install_dir="hbase-hadoop3-install"
+ hbase_client_dir="hbase-hadoop3-client"
+ fi
+ java_home="/usr/lib/jvm/java-17"
+ hadoop_opts="--add-opens java.base/java.lang=ALL-UNNAMED"
+ if [[ ${HADOOP_VERSION} == 2.* ]]; then
+ java_home="/usr/lib/jvm/java-8"
+ hadoop_opts=""
+ fi
+ echo "Attempting to run an instance on top of Hadoop ${HADOOP_VERSION}."
+ # Create working dir
+ rm -rf "${OUTPUT_DIR}/non-shaded" && mkdir "${OUTPUT_DIR}/non-shaded"
+ docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" .
+ docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
+ -u `id -u`:`id -g` -e JAVA_HOME="${java_home}" \
+ -e HADOOP_OPTS="${hadoop_opts}" \
+ --workdir=/hbase hbase-integration-test \
+ component/dev-support/integration-test/pseudo-distributed-test.sh \
+ --single-process \
+ --working-dir ${OUTPUT_DIR}/non-shaded \
+ --hbase-client-install ${hbase_client_dir} \
+ ${hbase_install_dir} \
+ hadoop-install/bin/hadoop \
+ hadoop-install/share/hadoop/yarn/timelineservice \
+ hadoop-install/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \
+ hadoop-install/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \
+ hadoop-install/bin/mapred \
+ >${OUTPUT_DIR}/hadoop.log 2>&1
+ if [ $? -ne 0 ]; then
+ echo "(x) {color:red}-1 client integration test for ${HADOOP_VERSION}{color}\n--Failed when running client tests on top of Hadoop ${HADOOP_VERSION}. [see log for details|${BUILD_URL}/artifact/${OUTPUT_DIR}/hadoop.log]. (note that this means we didn't check the Hadoop ${HADOOP_VERSION} shaded client)" >${OUTPUT_DIR}/commentfile
+ exit 2
+ fi
+ echo "(/) {color:green}+1 client integration test for ${HADOOP_VERSION} {color}" >${OUTPUT_DIR}/commentfile
+ if [[ ${HADOOP_VERSION} == 2.* ]] || [[ ${HADOOP_VERSION} == 3.2.* ]]; then
+ echo "skip running shaded hadoop client test for ${HADOOP_VERSION}"
+ exit 0
+ fi
+ # Create working dir
+ rm -rf "${OUTPUT_DIR}/shaded" && mkdir "${OUTPUT_DIR}/shaded"
+ echo "Attempting to run an instance on top of Hadoop ${HADOOP_VERSION}, relying on the Hadoop client artifacts for the example client program."
+ docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \
+ -u `id -u`:`id -g` -e JAVA_HOME="${java_home}" \
+ -e HADOOP_OPTS="${hadoop_opts}" \
+ --workdir=/hbase hbase-integration-test \
+ component/dev-support/integration-test/pseudo-distributed-test.sh \
+ --single-process \
+ --hadoop-client-classpath hadoop-install/share/hadoop/client/hadoop-client-api-*.jar:hadoop-install/share/hadoop/client/hadoop-client-runtime-*.jar \
+ --working-dir ${OUTPUT_DIR}/shaded \
+ --hbase-client-install ${hbase_client_dir} \
+ ${hbase_install_dir} \
+ hadoop-install/bin/hadoop \
+ hadoop-install/share/hadoop/yarn/timelineservice \
+ hadoop-install/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \
+ hadoop-install/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \
+ hadoop-install/bin/mapred \
+ >${OUTPUT_DIR}/hadoop-shaded.log 2>&1
+ if [ $? -ne 0 ]; then
+ echo "(x) {color:red}-1 client integration testfor ${HADOOP_VERSION}{color}\n--Failed when running client tests on top of Hadoop ${HADOOP_VERSION} using Hadoop's shaded client. [see log for details|${BUILD_URL}/artifact/${OUTPUT_DIR}/hadoop-shaded.log]." >> ${OUTPUT_DIR}/commentfile
+ exit 2
+ fi
+ echo "(/) {color:green}+1 client integration test for ${HADOOP_VERSION} with shaded hadoop client{color}" >> ${OUTPUT_DIR}/commentfile
+ '''
+ } // steps
+ post {
+ always {
+ stash name: "test-result-${env.HADOOP_VERSION}", includes: "${env.OUTPUT_DIR}/commentfile"
+ archiveArtifacts artifacts: "${env.OUTPUT_DIR}/*"
+ archiveArtifacts artifacts: "${env.OUTPUT_DIR}/**/*"
+ } // always
+ } // post
+ } // integration test
+ } // stages
+ } // matrix
+ } // integration test matrix
+ } // stages
+ post {
+ always {
+ script {
+ sh "printenv"
+ // wipe out all the output directories before unstashing
+ sh'''
+ echo "Clean up result directories"
+ rm -rf output-srctarball
+ rm -rf output-integration-hadoop-*
+ '''
+ def results = []
+ results.add('output-srctarball/commentfile')
+ for (hadoopVersion in getHadoopVersions(env.HADOOP_VERSIONS)) {
+ try {
+ unstash "test-result-${hadoopVersion}"
+ results.add("output-integration-hadoop-${hadoopVersion}/commentfile")
+ } catch (e) {
+ echo "unstash ${hadoopVersion} failed, ignore"
+ }
+ }
+ echo env.BRANCH_NAME
+ echo env.BUILD_URL
+ echo currentBuild.result
+ echo currentBuild.durationString
+ def comment = "Results for branch ${env.BRANCH_NAME}\n"
+ comment += "\t[build ${currentBuild.displayName} on builds.a.o|${env.BUILD_URL}]: "
+ if (currentBuild.result == null || currentBuild.result == "SUCCESS") {
+ comment += "(/) *{color:green}+1 overall{color}*\n"
+ } else {
+ comment += "(x) *{color:red}-1 overall{color}*\n"
+ // Ideally get the committer our of the change and @ mention them in the per-jira comment
+ }
+ comment += "----\ndetails (if available):\n\n"
+ echo ""
+ echo "[DEBUG] trying to aggregate step-wise results"
+ comment += results.collect { fileExists(file: it) ? readFile(file: it) : "" }.join("\n\n")
+ echo "[INFO] Comment:"
+ echo comment
+ echo ""
+ echo "[DEBUG] checking to see if feature branch"
+ def jiras = getJirasToComment(env.BRANCH_NAME, [])
+ if (jiras.isEmpty()) {
+ echo "[DEBUG] non-feature branch, checking change messages for jira keys."
+ echo "[INFO] There are ${currentBuild.changeSets.size()} change sets."
+ jiras = getJirasToCommentFromChangesets(currentBuild)
+ }
+ jiras.each { currentIssue ->
+ jiraComment issueKey: currentIssue, body: comment
+ }
+ } // script
+ } // always
+ } // post
+}
+
+@NonCPS
+List getHadoopVersions(String versions) {
+ return versions.split(',').collect { it.trim() }.findAll { it } as String[]
+}
+
+import org.jenkinsci.plugins.workflow.support.steps.build.RunWrapper
+@NonCPS
+List getJirasToCommentFromChangesets(RunWrapper thisBuild) {
+ def seenJiras = []
+ thisBuild.changeSets.each { cs ->
+ cs.getItems().each { change ->
+ CharSequence msg = change.msg
+ echo "change: ${change}"
+ echo " ${msg}"
+ echo " ${change.commitId}"
+ echo " ${change.author}"
+ echo ""
+ seenJiras = getJirasToComment(msg, seenJiras)
+ }
+ }
+ return seenJiras
+}
+
+@NonCPS
+List getJirasToComment(CharSequence source, List seen) {
+ source.eachMatch("HBASE-[0-9]+") { currentIssue ->
+ echo "[DEBUG] found jira key: ${currentIssue}"
+ if (currentIssue in seen) {
+ echo "[DEBUG] already commented on ${currentIssue}."
+ } else {
+ echo "[INFO] commenting on ${currentIssue}."
+ seen << currentIssue
+ }
+ }
+ return seen
+}
+
diff --git a/dev-support/integration-test/patch-hadoop3.sh b/dev-support/integration-test/patch-hadoop3.sh
new file mode 100755
index 000000000000..b4c51ca9487d
--- /dev/null
+++ b/dev-support/integration-test/patch-hadoop3.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+##
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+
+hadoop_dir=$1
+
+sed -i "s/HADOOP_TOOLS_DIR=\${HADOOP_TOOLS_DIR:-\"share\/hadoop\/tools\"}/HADOOP_TOOLS_DIR=\${HADOOP_TOOLS_DIR:-\"\$HADOOP_TOOLS_HOME\/share\/hadoop\/tools\"}/g" "$hadoop_dir/libexec/hadoop-functions.sh"
+sed -i "/HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.MiniHadoopClusterManager/a mockitojar=\$(echo \"\${HADOOP_TOOLS_LIB_JARS_DIR}\"\/mockito-core-[0-9]*.jar)\nhadoop_add_classpath \"\${mockitojar}\"" "$hadoop_dir/bin/mapred"
+curl https://repo1.maven.org/maven2/org/mockito/mockito-core/2.28.2/mockito-core-2.28.2.jar -o "$hadoop_dir/share/hadoop/tools/lib/mockito-core-2.28.2.jar"
diff --git a/dev-support/integration-test/pseudo-distributed-test.sh b/dev-support/integration-test/pseudo-distributed-test.sh
new file mode 100755
index 000000000000..3089b6db3079
--- /dev/null
+++ b/dev-support/integration-test/pseudo-distributed-test.sh
@@ -0,0 +1,540 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+function usage {
+ echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/share/hadoop/yarn/timelineservice /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable"
+ echo ""
+ echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data."
+ echo " defaults to 'zk-data' in the working-dir."
+ echo " --working-dir /path/to/use Path for writing configs and logs. must exist."
+ echo " defaults to making a directory via mktemp."
+ echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars."
+ echo " defaults to 'hadoop classpath'"
+ echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install"
+ echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase"
+ echo " --single-process Run as single process instead of pseudo-distributed"
+ echo ""
+ exit 1
+}
+# if no args specified, show usage
+if [ $# -lt 5 ]; then
+ usage
+fi
+
+# Get arguments
+declare component_install
+declare hadoop_exec
+declare working_dir
+declare zk_data_dir
+declare clean
+declare distributed="true"
+declare hadoop_jars
+declare hbase_client
+while [ $# -gt 0 ]
+do
+ case "$1" in
+ --working-dir) shift; working_dir=$1; shift;;
+ --force-data-clean) shift; clean="true";;
+ --zookeeper-data) shift; zk_data_dir=$1; shift;;
+ --single-process) shift; distributed="false";;
+ --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;;
+ --hbase-client-install) shift; hbase_client="$1"; shift;;
+ --) shift; break;;
+ -*) usage ;;
+ *) break;; # terminate while loop
+ esac
+done
+
+# should still have where component checkout is.
+if [ $# -lt 5 ]; then
+ usage
+fi
+component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
+hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")"
+timeline_service_dir="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")"
+yarn_server_tests_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")"
+mapred_jobclient_test_jar="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")"
+mapred_exec="$(cd "$(dirname "$6")"; pwd)/$(basename "$6")"
+
+if [ ! -x "${hadoop_exec}" ]; then
+ echo "hadoop cli does not appear to be executable." >&2
+ exit 1
+fi
+
+if [ ! -x "${mapred_exec}" ]; then
+ echo "mapred cli does not appear to be executable." >&2
+ exit 1
+fi
+
+if [ ! -d "${component_install}" ]; then
+ echo "Path to HBase binary install should be a directory." >&2
+ exit 1
+fi
+
+if [ ! -f "${yarn_server_tests_test_jar}" ]; then
+ echo "Specified YARN server tests test jar is not a file." >&2
+ exit 1
+fi
+
+if [ ! -f "${mapred_jobclient_test_jar}" ]; then
+ echo "Specified MapReduce jobclient test jar is not a file." >&2
+ exit 1
+fi
+
+if [ -z "${working_dir}" ]; then
+ if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then
+ echo "Failed to create temporary working directory. Please specify via --working-dir" >&2
+ exit 1
+ fi
+else
+ # absolutes please
+ working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
+ if [ ! -d "${working_dir}" ]; then
+ echo "passed working directory '${working_dir}' must already exist." >&2
+ exit 1
+ fi
+fi
+
+if [ -z "${zk_data_dir}" ]; then
+ zk_data_dir="${working_dir}/zk-data"
+ mkdir "${zk_data_dir}"
+else
+ # absolutes please
+ zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")"
+ if [ ! -d "${zk_data_dir}" ]; then
+ echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist."
+ exit 1
+ fi
+fi
+
+if [ -z "${hbase_client}" ]; then
+ hbase_client="${component_install}"
+else
+ echo "Using HBase client-side artifact"
+ # absolutes please
+ hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")"
+ if [ ! -d "${hbase_client}" ]; then
+ echo "If given hbase client install should be a directory with contents of the client tarball." >&2
+ exit 1
+ fi
+fi
+
+if [ -n "${hadoop_jars}" ]; then
+ declare -a tmp_jars
+ for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do
+ tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")")
+ done
+ hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")"
+fi
+
+
+echo "You'll find logs and temp files in ${working_dir}"
+
+function redirect_and_run {
+ log_base=$1
+ shift
+ echo "$*" >"${log_base}.err"
+ "$@" >"${log_base}.out" 2>>"${log_base}.err"
+}
+
+(cd "${working_dir}"
+
+echo "Hadoop version information:"
+"${hadoop_exec}" version
+hadoop_version=$("${hadoop_exec}" version | head -n 1)
+hadoop_version="${hadoop_version#Hadoop }"
+if [ "${hadoop_version%.*.*}" -gt 2 ]; then
+ "${hadoop_exec}" envvars
+else
+ echo "JAVA_HOME: ${JAVA_HOME}"
+fi
+
+# Ensure that if some other Hadoop install happens to be present in the environment we ignore it.
+HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
+export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP
+
+if [ -n "${clean}" ]; then
+ echo "Cleaning out ZooKeeper..."
+ rm -rf "${zk_data_dir:?}/*"
+fi
+
+echo "HBase version information:"
+"${component_install}/bin/hbase" version 2>/dev/null
+hbase_version=$("${component_install}/bin/hbase" version 2>&1 | grep ^HBase | head -n 1)
+hbase_version="${hbase_version#HBase }"
+
+if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then
+ echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2
+ exit 1
+fi
+
+if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then
+ echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
+ exit 1
+fi
+
+if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then
+ echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
+ exit 1
+fi
+
+echo "Writing out configuration for HBase."
+rm -rf "${working_dir}/hbase-conf"
+mkdir "${working_dir}/hbase-conf"
+
+if [ -f "${component_install}/conf/log4j2.properties" ]; then
+ cp "${component_install}/conf/log4j2.properties" "${working_dir}/hbase-conf/log4j2.properties"
+else
+ cat >"${working_dir}/hbase-conf/log4j2.properties" <"${working_dir}/hbase-conf/hbase-site.xml" <
+
+
+
+
+ hbase.rootdir
+
+ /hbase
+
+
+ hbase.zookeeper.property.dataDir
+ ${zk_data_dir}
+
+
+ hbase.cluster.distributed
+ ${distributed}
+
+
+EOF
+
+if [ "true" = "${distributed}" ]; then
+ cat >"${working_dir}/hbase-conf/regionservers" <"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
+elif [ "${hadoop_version%.*.*}" -gt 2 ]; then
+ "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
+else
+ HADOOP_CLASSPATH="${timeline_service_dir}/*:${timeline_service_dir}/lib/*:${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
+fi
+
+echo "$!" > "${working_dir}/hadoop.pid"
+
+# 2 + 4 + 8 + .. + 256 ~= 8.5 minutes.
+max_sleep_time=512
+sleep_time=2
+until [[ -s "${working_dir}/hbase-conf/core-site.xml" || "${sleep_time}" -ge "${max_sleep_time}" ]]; do
+ printf '\twaiting for Hadoop to finish starting up.\n'
+ sleep "${sleep_time}"
+ sleep_time="$((sleep_time*2))"
+done
+
+if [ "${sleep_time}" -ge "${max_sleep_time}" ] ; then
+ echo "time out waiting for Hadoop to startup" >&2
+ exit 1
+fi
+
+if [ "${hadoop_version%.*.*}" -gt 2 ]; then
+ echo "Verifying configs"
+ hadoop_conf_files=""
+ for f in "${working_dir}"/hbase-conf/*-site.xml; do
+ hadoop_conf_files="$hadoop_conf_files -conffile $f"
+ done
+ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest $hadoop_conf_files
+fi
+
+if [ -n "${clean}" ]; then
+ echo "Cleaning out HDFS..."
+ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase
+ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/
+ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data
+fi
+
+echo "Listing HDFS contents"
+redirect_and_run "${working_dir}/hadoop_cluster_smoke" \
+ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R /
+
+echo "Starting up HBase"
+HBASE_CONF_DIR="${working_dir}/hbase-conf/" HBASE_LOG_DIR="${working_dir}" "${component_install}/bin/start-hbase.sh"
+
+sleep_time=2
+until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <"${working_dir}/table_create.log" 2>&1 < 1000, SPLITALGO => 'UniformSplit'}
+EOF
+
+echo "writing out example TSV to example.tsv"
+cat >"${working_dir}/example.tsv" <"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" </dev/null | grep "row(s)" | awk '{print $1}')
+if [ ! "${import_rowcount}" -eq 48 ]; then
+ echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}."
+ exit 2
+fi
+
+if [ -z "${hadoop_jars}" ]; then
+ echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example."
+ hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath)
+fi
+
+echo "Building shaded client example."
+cat >"${working_dir}/HBaseClientReadWriteExample.java" < regions = new LinkedList<>();
+ try (Admin admin = connection.getAdmin()) {
+ final ClusterMetrics cluster = admin.getClusterMetrics();
+ System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount()));
+ for (ServerMetrics server : cluster.getLiveServerMetrics().values()) {
+ for (RegionMetrics region : server.getRegionMetrics().values()) {
+ regions.add(region.getNameAsString());
+ }
+ }
+ }
+ final Path listing = new Path("example-region-listing.data");
+ System.out.println("Writing list to HDFS");
+ try (FileSystem fs = FileSystem.newInstance(hadoop)) {
+ final Path path = fs.makeQualified(listing);
+ try (FSDataOutputStream out = fs.create(path)) {
+ out.writeInt(regions.size());
+ for (String region : regions) {
+ out.writeUTF(region);
+ }
+ out.hsync();
+ }
+ }
+ final List puts = new LinkedList<>();
+ final Put marker = new Put(new byte[] { (byte)0 });
+ System.out.println("Reading list from HDFS");
+ try (FileSystem fs = FileSystem.newInstance(hadoop)) {
+ final Path path = fs.makeQualified(listing);
+ final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
+ try (FSDataInputStream in = fs.open(path)) {
+ final int count = in.readInt();
+ marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count));
+ for(int i = 0; i < count; i++) {
+ builder.clear();
+ final byte[] row = Bytes.toBytes(in.readUTF());
+ final Put put = new Put(row);
+ builder.setRow(row);
+ builder.setFamily(FAMILY_BYTES);
+ builder.setType(Cell.Type.Put);
+ put.add(builder.build());
+ puts.add(put);
+ }
+ }
+ }
+ System.out.println("Writing list into HBase table");
+ try (Table table = connection.getTable(TableName.valueOf("test:example"))) {
+ table.put(marker);
+ table.put(puts);
+ }
+ }
+ }
+}
+EOF
+redirect_and_run "${working_dir}/hbase-shaded-client-compile" \
+ $JAVA_HOME/bin/javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java"
+echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table."
+# The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190.
+redirect_and_run "${working_dir}/hbase-shaded-client-example" \
+ $JAVA_HOME/bin/java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample
+
+echo "Checking on results of example program."
+"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data"
+
+"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" </dev/null | grep "row(s)" | awk '{print $1}')
+if [ "${example_rowcount}" -gt "1049" ]; then
+ echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 1 for example's use of meta region, and 1 for example's count record"
+else
+ echo "ERROR: Only found ${example_rowcount} rows."
+fi
+
+)
diff --git a/dev-support/integration-test/source-artifact.sh b/dev-support/integration-test/source-artifact.sh
new file mode 100755
index 000000000000..7292d2da8c37
--- /dev/null
+++ b/dev-support/integration-test/source-artifact.sh
@@ -0,0 +1,242 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+function usage {
+ echo "Usage: ${0} [options] /path/to/component/checkout"
+ echo ""
+ echo " --intermediate-file-dir /path/to/use Path for writing listings and diffs. must exist."
+ echo " defaults to making a directory via mktemp."
+ echo " --unpack-temp-dir /path/to/use Path for unpacking tarball. default to"
+ echo " 'unpacked_src_tarball' in intermediate directory."
+ echo " --maven-m2-initial /path/to/use Path for maven artifacts while building in"
+ echo " component-dir."
+ echo " --maven-m2-src-build /path/to/use Path for maven artifacts while building from the"
+ echo " unpacked source tarball."
+ echo " --clean-source-checkout Destructively clean component checkout before"
+ echo " comparing to source tarball. N.B. will delete"
+ echo " anything in the checkout dir that isn't from"
+ echo " a git checkout, including ignored files."
+ exit 1
+}
+
+set -e
+
+MVN="mvn"
+if ! command -v mvn &>/dev/null; then
+ MVN=$MAVEN_HOME/bin/mvn
+fi
+# if no args specified, show usage
+if [ $# -lt 1 ]; then
+ usage
+fi
+
+# Get arguments
+declare component_dir
+declare unpack_dir
+declare m2_initial
+declare m2_tarbuild
+declare working_dir
+declare source_clean
+while [ $# -gt 0 ]
+do
+ case "$1" in
+ --unpack-temp-dir) shift; unpack_dir=$1; shift;;
+ --maven-m2-initial) shift; m2_initial=$1; shift;;
+ --maven-m2-src-build) shift; m2_tarbuild=$1; shift;;
+ --intermediate-file-dir) shift; working_dir=$1; shift;;
+ --clean-source-checkout) shift; source_clean="true";;
+ --) shift; break;;
+ -*) usage ;;
+ *) break;; # terminate while loop
+ esac
+done
+
+# should still have where component checkout is.
+if [ $# -lt 1 ]; then
+ usage
+fi
+component_dir="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
+
+if [ -z "${working_dir}" ]; then
+ if ! working_dir="$(mktemp -d -t hbase-srctarball-test)" ; then
+ echo "Failed to create temporary working directory. Please specify via --unpack-temp-dir"
+ exit 1
+ fi
+else
+ # absolutes please
+ working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
+ if [ ! -d "${working_dir}" ]; then
+ echo "passed working directory '${working_dir}' must already exist."
+ exit 1
+ fi
+fi
+
+echo "You'll find logs and temp files in ${working_dir}"
+
+if [ -z "${unpack_dir}" ]; then
+ unpack_dir="${working_dir}/unpacked_src_tarball"
+ mkdir "${unpack_dir}"
+else
+ # absolutes please
+ unpack_dir="$(cd "$(dirname "${unpack_dir}")"; pwd)/$(basename "${unpack_dir}")"
+ if [ ! -d "${unpack_dir}" ]; then
+ echo "passed directory for unpacking the source tarball '${unpack_dir}' must already exist."
+ exit 1
+ fi
+ rm -rf "${unpack_dir:?}/*"
+fi
+
+if [ -z "${m2_initial}" ]; then
+ m2_initial="${working_dir}/.m2-initial"
+ mkdir "${m2_initial}"
+else
+ # absolutes please
+ m2_initial="$(cd "$(dirname "${m2_initial}")"; pwd)/$(basename "${m2_initial}")"
+ if [ ! -d "${m2_initial}" ]; then
+ echo "passed directory for storing the initial build's maven repo '${m2_initial}' " \
+ "must already exist."
+ exit 1
+ fi
+fi
+
+if [ -z "${m2_tarbuild}" ]; then
+ m2_tarbuild="${working_dir}/.m2-tarbuild"
+ mkdir "${m2_tarbuild}"
+else
+ # absolutes please
+ m2_tarbuild="$(cd "$(dirname "${m2_tarbuild}")"; pwd)/$(basename "${m2_tarbuild}")"
+ if [ ! -d "${m2_tarbuild}" ]; then
+ echo "passed directory for storing the build from src tarball's maven repo '${m2_tarbuild}' " \
+ "must already exist."
+ exit 1
+ fi
+fi
+
+# This is meant to mimic what a release manager will do to create RCs.
+# See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate
+
+echo "Maven details, in case our JDK doesn't match expectations:"
+${MVN} --version --offline | tee "${working_dir}/maven_version"
+
+echo "Do a clean building of the source artifact using code in ${component_dir}"
+cd "${component_dir}"
+if [ -n "${source_clean}" ]; then
+ echo "Clean..."
+ git clean -xdfff >"${working_dir}/component_git_clean.log" 2>&1
+fi
+echo "Follow the ref guide section on making a RC: Step 6 Build the source tarball"
+git archive --format=tar.gz --output="${working_dir}/hbase-src.tar.gz" \
+ --prefix="hbase-SOMEVERSION/" HEAD \
+ >"${working_dir}/component_build_src_tarball.log" 2>&1
+
+cd "${unpack_dir}"
+echo "Unpack the source tarball"
+tar --strip-components=1 -xzf "${working_dir}/hbase-src.tar.gz" \
+ >"${working_dir}/srctarball_unpack.log" 2>&1
+
+cd "${component_dir}"
+echo "Diff against source tree"
+diff --binary --recursive . "${unpack_dir}" >"${working_dir}/diff_output" || true
+
+cd "${working_dir}"
+# expectation check largely based on HBASE-14952
+echo "Checking against things we don't expect to include in the source tarball (git related, etc.)"
+# Add in lines to show differences between the source tarball and this branch, in the same format diff would give.
+# e.g. prior to HBASE-19152 we'd have the following lines (ignoring the bash comment marker):
+#Only in .: .gitattributes
+#Only in .: .gitignore
+cat >known_excluded <"${working_dir}/unexpected.diff" ; then
+ echo "Any output here are unexpected differences between the source artifact we'd make for an RC and the current branch."
+ echo "One potential source of differences is if you have an unclean working directory; you should expect to see"
+ echo "such extraneous files below."
+ echo ""
+ echo "The expected differences are on the < side and the current differences are on the > side."
+ echo "In a given set of differences, '.' refers to the branch in the repo and 'unpacked_src_tarball' refers to what we pulled out of the tarball."
+ diff known_excluded diff_output
+else
+ echo "Everything looks as expected."
+fi
+
+function get_hadoop3_version {
+ local version="$1"
+ if [[ "${version}" =~ -SNAPSHOT$ ]]; then
+ echo "${version/-SNAPSHOT/-hadoop3-SNAPSHOT}"
+ else
+ echo "${version}-hadoop3"
+ fi
+}
+
+function build_tarball {
+ local build_hadoop3=$1
+ local mvn_extra_args=""
+ local build_log="srctarball_install.log"
+ local tarball_glob="hbase-*-bin.tar.gz"
+ if [ $build_hadoop3 -ne 0 ]; then
+ local version=$(${MVN} -Dmaven.repo.local="${m2_tarbuild}" help:evaluate -Dexpression=project.version -q -DforceStdout)
+ local hadoop3_version=$(get_hadoop3_version $version)
+ mvn_extra_args="-Drevision=${hadoop3_version} -Dhadoop.profile=3.0"
+ build_log="hadoop3_srctarball_install.log"
+ tarball_glob="hbase-*-hadoop3-*-bin.tar.gz"
+ echo "Follow the ref guide section on making a RC: Step 8 Build the hadoop3 binary tarball."
+ else
+ echo "Follow the ref guide section on making a RC: Step 7 Build the binary tarball."
+ fi
+ if ${MVN} --threads=2 -DskipTests -Prelease --batch-mode -Dmaven.repo.local="${m2_tarbuild}" ${mvn_extra_args} clean install \
+ assembly:single >"${working_dir}/${build_log}" 2>&1; then
+ for artifact in "${unpack_dir}"/hbase-assembly/target/${tarball_glob}; do
+ if [ -f "${artifact}" ]; then
+ # TODO check the layout of the binary artifact we just made.
+ echo "Building a binary tarball from the source tarball succeeded."
+ return 0
+ fi
+ done
+ fi
+
+ echo "Building a binary tarball from the source tarball failed. see ${working_dir}/${build_log} for details."
+ # Copy up the rat.txt to the working dir so available in build archive in case rat complaints.
+ # rat.txt can be under any module target dir... copy them all up renaming them to include parent dir as we go.
+ find ${unpack_dir} -name rat.txt -type f | while IFS= read -r NAME; do cp -v "$NAME" "${working_dir}/${NAME//\//_}"; done
+ return 1
+}
+
+cd "${unpack_dir}"
+
+if ${MVN} -Dmaven.repo.local="${m2_tarbuild}" help:active-profiles | grep -q hadoop-3.0; then
+ echo "The hadoop-3.0 profile is activated by default, build a default tarball."
+ build_tarball 0
+else
+ echo "The hadoop-3.0 profile is not activated by default, build a default tarball first."
+ # use java 8 to build with hadoop2
+ JAVA_HOME="/usr/lib/jvm/java-8" build_tarball 0
+ if [ $? -ne 0 ]; then
+ exit 1
+ fi
+
+ # move the previous tarballs out, so it will not be cleaned while building against hadoop3
+ mv "${unpack_dir}"/hbase-assembly/target/hbase-*-bin.tar.gz "${unpack_dir}"/
+ echo "build a hadoop3 tarball."
+ build_tarball 1
+ if [ $? -ne 0 ]; then
+ exit 1
+ fi
+ # move tarballs back
+ mv "${unpack_dir}"/hbase-*-bin.tar.gz "${unpack_dir}"/hbase-assembly/target/
+fi
diff --git a/dev-support/jenkins_precommit_github_yetus.sh b/dev-support/jenkins_precommit_github_yetus.sh
index 8604d96760dc..4ec0c1d3829e 100755
--- a/dev-support/jenkins_precommit_github_yetus.sh
+++ b/dev-support/jenkins_precommit_github_yetus.sh
@@ -31,7 +31,6 @@ declare -i missing_env=0
declare -a required_envs=(
# these ENV variables define the required API with Jenkinsfile_GitHub
"ARCHIVE_PATTERN_LIST"
- "BUILD_URL_ARTIFACTS"
"DOCKERFILE"
"GITHUB_PASSWORD"
"GITHUB_USER"
@@ -39,7 +38,6 @@ declare -a required_envs=(
"PLUGINS"
"SET_JAVA_HOME"
"SOURCEDIR"
- "TESTS_FILTER"
"YETUSDIR"
"AUTHOR_IGNORE_LIST"
"BLANKS_EOL_IGNORE_FILE"
@@ -53,6 +51,12 @@ for required_env in "${required_envs[@]}"; do
fi
done
+# BUILD_URL_ARTIFACTS is required for Jenkins but set in personality for GitHub Actions
+if [[ "${GITHUB_ACTIONS}" != "true" ]] && [[ -z "${BUILD_URL_ARTIFACTS}" ]]; then
+ echo "[ERROR] Required environment variable 'BUILD_URL_ARTIFACTS' is not set."
+ missing_env=${missing_env}+1
+fi
+
if [ ${missing_env} -gt 0 ]; then
echo "[ERROR] Please set the required environment variables before invoking. If this error is " \
"on Jenkins, then please file a JIRA about the error."
@@ -91,7 +95,11 @@ YETUS_ARGS+=("--console-report-file=${PATCHDIR}/console.txt")
YETUS_ARGS+=("--html-report-file=${PATCHDIR}/report.html")
# enable writing back to Github
YETUS_ARGS+=("--github-token=${GITHUB_PASSWORD}")
-YETUS_ARGS+=("--github-write-comment")
+# GitHub Actions fork PRs cannot write comments (GITHUB_TOKEN has no PR write permission)
+# Jenkins can write comments via its own credentials
+if [[ "${GITHUB_ACTIONS}" != "true" ]]; then
+ YETUS_ARGS+=("--github-write-comment")
+fi
# auto-kill any surefire stragglers during unit test runs
YETUS_ARGS+=("--reapermode=kill")
# set relatively high limits for ASF machines
@@ -103,7 +111,9 @@ YETUS_ARGS+=("--spotbugs-strict-precheck")
# rsync these files back into the archive dir
YETUS_ARGS+=("--archive-list=${ARCHIVE_PATTERN_LIST}")
# URL for user-side presentation in reports and such to our artifacts
-YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}")
+if [[ -n "${BUILD_URL_ARTIFACTS}" ]]; then
+ YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}")
+fi
# plugins to enable
YETUS_ARGS+=("--plugins=${PLUGINS},-findbugs")
# run in docker mode and specifically point to our
@@ -115,15 +125,17 @@ YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}")
YETUS_ARGS+=("--author-ignore-list=${AUTHOR_IGNORE_LIST}")
YETUS_ARGS+=("--blanks-eol-ignore-file=${BLANKS_EOL_IGNORE_FILE}")
YETUS_ARGS+=("--blanks-tabs-ignore-file=${BLANKS_TABS_IGNORE_FILE}*")
-YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}")
+if [[ -n "${TESTS_FILTER}" ]]; then
+ YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}")
+fi
YETUS_ARGS+=("--personality=${SOURCEDIR}/dev-support/hbase-personality.sh")
YETUS_ARGS+=("--quick-hadoopcheck")
if [[ "${SKIP_ERRORPRONE}" = "true" ]]; then
# skip error prone
YETUS_ARGS+=("--skip-errorprone")
fi
-# effectively treat dev-support as a custom maven module
-YETUS_ARGS+=("--skip-dirs=dev-support")
+# Exclude non-code directories from module detection to avoid triggering full builds
+YETUS_ARGS+=("--skip-dirs=dev-support,.github,bin,conf")
# For testing with specific hadoop version. Activates corresponding profile in maven runs.
if [[ -n "${HADOOP_PROFILE}" ]]; then
# Master has only Hadoop3 support. We don't need to activate any profile.
@@ -156,6 +168,10 @@ fi
if [[ -n "${JAVA8_HOME}" ]]; then
YETUS_ARGS+=("--java8-home=${JAVA8_HOME}")
fi
+# Test profile for running specific test categories (e.g., runDevTests, runLargeTests-wave1)
+if [[ -n "${TEST_PROFILE}" ]]; then
+ YETUS_ARGS+=("--test-profile=${TEST_PROFILE}")
+fi
echo "Launching yetus with command line:"
echo "${TESTPATCHBIN} ${YETUS_ARGS[*]}"
diff --git a/dev-support/make_rc.sh b/dev-support/make_rc.sh
index 1d65f1807c7a..bd65ceb87296 100755
--- a/dev-support/make_rc.sh
+++ b/dev-support/make_rc.sh
@@ -17,4 +17,4 @@
# limitations under the License.
echo "Replaced by ./dev-support/create-release/do-release-docker.sh script."
-echo "See http://hbase.apache.org/book.html#do-release-docker.sh"
+echo "See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate"
diff --git a/dev-support/spotbugs-exclude.xml b/dev-support/spotbugs-exclude.xml
index 2f0684eff4d7..17b8d2cbdedd 100644
--- a/dev-support/spotbugs-exclude.xml
+++ b/dev-support/spotbugs-exclude.xml
@@ -271,4 +271,16 @@
+
+
+
+
+
+
+
+
+
diff --git a/dev-support/yetus_console_to_md.py b/dev-support/yetus_console_to_md.py
new file mode 100644
index 000000000000..bee5512eec71
--- /dev/null
+++ b/dev-support/yetus_console_to_md.py
@@ -0,0 +1,522 @@
+#!/usr/bin/env python3
+##
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Convert Apache Yetus console output to Markdown format.
+"""
+import os
+import re
+import sys
+from io import TextIOWrapper
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+# Vote to emoji mapping
+VOTE_EMOJI = {
+ '+1': '✅',
+ '-1': '❌',
+ '0': '🆗',
+ '+0': '🆗',
+ '-0': '⚠️'
+}
+
+
+def convert_vote(vote: str) -> str:
+ """Convert vote string to emoji."""
+ return VOTE_EMOJI.get(vote, vote)
+
+
+def is_runtime(text: str) -> bool:
+ """Check if text is a runtime like '41m 24s'."""
+ return bool(re.match(r'^\d+m\s+\d+s$', text))
+
+
+def parse_table_row(line: str) -> Tuple[str, str, str, str]:
+ """
+ Parse a table row and return tuple of cell values.
+ Returns exactly 4 columns: (vote, subsystem, runtime, comment)
+ """
+ parts = line.split('|')
+ # Remove first empty element (from leading |)
+ parts = parts[1:] if len(parts) > 1 else []
+
+ # Take first 4 columns and strip whitespace
+ result: List[str] = [p.strip() for p in parts[:4]]
+
+ # Pad to 4 columns if needed
+ while len(result) < 4:
+ result.append('')
+
+ return result[0], result[1], result[2], result[3]
+
+
+def is_results_section_start(line: str) -> bool:
+ """Check if line indicates the start of Results section."""
+ return bool(re.search(r'^\[\w+] Results:', line.strip()))
+
+
+def is_tests_run_summary(line: str) -> bool:
+ """Check if line is the Tests run summary line."""
+ return bool(re.search(r'^\[\w+] Tests run:', line.strip()))
+
+
+def parse_results_section(
+ f: TextIOWrapper,
+ failures: List[str],
+ flakes: List[str],
+ errors: List[str]
+) -> None:
+ """
+ Parse the Results section within a patch-unit file.
+ """
+ current_error_type = None
+ while line := f.readline():
+ stripped = line.strip()
+
+ # Section end markers
+ if is_tests_run_summary(line):
+ return
+
+ # Detect error type sections
+ if re.search(r'^\[\w+] Failures:', stripped):
+ current_error_type = failures
+ elif re.search(r'^\[\w+] Flakes:', stripped):
+ current_error_type = flakes
+ elif re.search(r'^\[\w+] Errors:', stripped):
+ current_error_type = errors
+ else:
+ # Parse test entries
+ if current_error_type is not None:
+ test_match = re.search(
+ r'^\[\w+]\s+((?:org\.)?\S+\.(?:\w+\.)*\w+\.\w+)',
+ stripped
+ )
+ if test_match:
+ test_name = test_match.group(1)
+ if 'test' in test_name.lower():
+ current_error_type.append(test_name)
+
+
+def skip_to_results_section(f: TextIOWrapper) -> bool:
+ """
+ Skip the io stream to the Results section.
+ After calling this method, the TextIOWrapper will locate at the next line of "Results: "
+
+ Returns:
+ True if we find a results section, False if we have reached the EOF
+ """
+ while line := f.readline():
+ if is_results_section_start(line):
+ return True
+ return False
+
+
+def scan_all_tests(dir: Path) -> Dict[str, str]:
+ """
+ Scan the archiver dir to find all the tests and their module
+
+ Returns:
+ Dict mapping test name to module name
+ """
+ module = None
+ module_to_test_name = {}
+ for dirpath, _, filenames in os.walk(dir):
+ if len(filenames) > 0:
+ # /archiver//target/surefire-reports
+ module = dirpath.split(os.sep)[-3]
+ for filename in filenames:
+ match = re.match(r'(org\.apache\.[^-]+)\.txt', filename)
+ if match:
+ module_to_test_name[match.group(1)] = module
+ return module_to_test_name
+
+
+def parse_patch_unit_file(
+ file_path: Path,
+ failures: List[str],
+ flakes: List[str],
+ errors: List[str]
+) -> None:
+ """
+ Parse a patch-unit-*.txt file and extract failed tests by module.
+ """
+ with open(file_path, 'r') as f:
+ while skip_to_results_section(f):
+ parse_results_section(f, failures, flakes, errors)
+
+
+def get_module(test_name: str, test_name_to_module: Dict[str, str]) -> str:
+ rindex_of_bracket = test_name.rfind('[')
+ if rindex_of_bracket > 0:
+ # parameterized test, remove the tailing parameters
+ test_name = test_name[:rindex_of_bracket]
+
+ module = test_name_to_module.get(test_name)
+ if module:
+ return module
+
+ # usually the failed test name has the method name suffix, but the test_name_to_module only
+ # contains class name, so let's try to remove the last part and try again
+ rindex_of_dot = test_name.rfind('.')
+ if rindex_of_dot > 0:
+ test_name = test_name[:rindex_of_dot]
+
+ module = test_name_to_module.get(test_name)
+ if module:
+ return module
+ return 'default'
+
+
+def increase(module_to_count: Dict[str, int], module: str) -> None:
+ if module in module_to_count:
+ module_to_count[module] += 1
+ else:
+ module_to_count[module] = 1
+
+
+def add_to_details(test_name: str, module: str, error_type: str,
+ details: Dict[str, Dict[str, List[str]]]) -> None:
+ if module not in details:
+ error_type_to_tests = {}
+ details[module] = error_type_to_tests
+ else:
+ error_type_to_tests = details[module]
+
+ if error_type in error_type_to_tests:
+ error_type_to_tests[error_type].append(test_name)
+ else:
+ error_type_to_tests[error_type] = [test_name]
+
+
+def process_failed_tests(
+ error_type: str,
+ failed_tests: List[str],
+ module_to_test_name: Dict[str, str],
+ counts: Dict[str, Dict[str, int]],
+ details: Dict[str, Dict[str, List[str]]]
+) -> None:
+ for test_name in failed_tests:
+ module = get_module(test_name, module_to_test_name)
+ increase(counts[error_type], module)
+ add_to_details(test_name, module, error_type, details)
+
+
+def aggregate_failed_tests(yetus_dir: Path) -> Tuple[
+ Dict[str, Dict[str, int]], Dict[str, Dict[str, List[str]]]]:
+ """
+ Aggregate failed tests from all patch-unit-*.txt files.
+
+ Returns:
+ Tuple of:
+ - counts: {error_type: {module: count}}
+ - details: {module: {error_type: [test_names]}}
+ """
+ patch_files = list(yetus_dir.glob('patch-unit-*.txt'))
+
+ if not patch_files:
+ return {}, {}
+
+ # Aggregate results from all files
+ failures = []
+ flakes = []
+ errors = []
+
+ for patch_file in patch_files:
+ parse_patch_unit_file(patch_file, failures, flakes, errors)
+
+ if not failures and not flakes and not errors:
+ return {}, {}
+
+ counts = {'Failures': {}, 'Flakes': {}, 'Errors': {}}
+ details = {}
+ module_to_test_name = scan_all_tests(yetus_dir / 'archiver')
+ process_failed_tests('Failures', failures, module_to_test_name, counts, details)
+ process_failed_tests('Flakes', flakes, module_to_test_name, counts, details)
+ process_failed_tests('Errors', errors, module_to_test_name, counts, details)
+
+ return dict(counts), dict(details)
+
+
+def generate_failed_tests_table(
+ counts: Dict[str, Dict[str, int]],
+ details: Dict[str, Dict[str, List[str]]]
+) -> List[str]:
+ """Generate the Failed Tests HTML table."""
+ total_failures = sum(sum(m.values()) for m in counts.values())
+ if total_failures == 0:
+ return []
+
+ content = [
+ '\n## Failed Tests\n\n',
+ '\n',
+ 'Error Type Count Module Tests \n',
+ '\n'
+ ]
+
+ error_types = ['Failures', 'Flakes', 'Errors']
+
+ for error_type in error_types:
+ if error_type not in counts:
+ continue
+
+ modules = counts[error_type]
+ total_count = sum(modules.values())
+ num_modules = len(modules)
+
+ first_row = True
+ for module in sorted(modules.keys()):
+ tests = details.get(module, {}).get(error_type, [])
+ tests_str = ' '.join(sorted(set(tests))) if tests else ''
+
+ if first_row:
+ content.append(
+ f'{error_type} '
+ f'{total_count} '
+ f'{module} {tests_str} \n'
+ )
+ first_row = False
+ else:
+ content.append(f'{module} {tests_str} \n')
+
+ content.extend([' \n', ' \n'])
+
+ return content
+
+
+def collect_continuation_lines(
+ lines: List[str],
+ start_idx: int
+) -> Tuple[List[str], int]:
+ """
+ Collect continuation lines for a table row.
+
+ Args:
+ lines: All lines from the file
+ start_idx: Index to start checking from
+
+ Returns:
+ Tuple of (list of comment parts, next index to process)
+ """
+ comment_parts = []
+ i = start_idx
+
+ while i < len(lines):
+ line = lines[i]
+ stripped = line.strip()
+
+ if not stripped.startswith('|'):
+ break
+
+ if '|| Subsystem || Report/Notes ||' in line:
+ break
+
+ vote, _, runtime, comment = parse_table_row(line)
+
+ # Stop at new data row
+ if vote in VOTE_EMOJI:
+ break
+
+ # Empty vote/subsystem means continuation or separator
+ if not vote:
+ if comment:
+ comment_parts.append(comment)
+ i += 1
+ elif runtime and is_runtime(runtime):
+ break
+ else:
+ i += 1
+ else:
+ break
+
+ return comment_parts, i
+
+
+def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]:
+ """
+ Process the first table (Vote, Subsystem, Runtime, Comment).
+
+ Returns:
+ Tuple of (Markdown lines, next index to process)
+ """
+ content = [
+ '\n',
+ '| Vote | Subsystem | Runtime | Comment |\n',
+ '|------|-----------|---------|---------|\n'
+ ]
+
+ i = start_idx
+
+ # Skip the original separator line
+ if i < len(lines) and '===' in lines[i]:
+ i += 1
+
+ while i < len(lines):
+ line = lines[i]
+ stripped = line.strip()
+
+ if '|| Subsystem || Report/Notes ||' in line:
+ break
+
+ if stripped.startswith('+--'):
+ i += 1
+ continue
+
+ if not stripped.startswith('|'):
+ i += 1
+ continue
+
+ vote, subsystem, runtime, comment = parse_table_row(line)
+
+ # Section header (vote and subsystem are empty)
+ if not vote and not subsystem:
+ if comment:
+ content.append(f'| | | | {comment} |\n')
+ elif runtime and is_runtime(runtime):
+ content.append(f'| | | {runtime} | |\n')
+ i += 1
+ continue
+
+ # Data row with vote
+ if vote in VOTE_EMOJI:
+ vote_emoji = convert_vote(vote)
+ comment_parts = [comment] if comment else []
+
+ continuation_parts, i = collect_continuation_lines(lines, i + 1)
+ comment_parts.extend(continuation_parts)
+
+ comment_text = ' '.join(comment_parts)
+ content.append(f'| {vote_emoji} | {subsystem} | {runtime} | {comment_text} |\n')
+ continue
+
+ # Other cases, skip
+ i += 1
+
+ return content, i
+
+
+def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]:
+ """
+ Process the second table (Subsystem, Report/Notes).
+
+ Returns:
+ Tuple of (Markdown lines, next index to process)
+ """
+ content = [
+ '\n## Subsystem Reports\n\n',
+ '| Subsystem | Report/Notes |\n',
+ '|-----------|------------|\n'
+ ]
+
+ i = start_idx
+
+ # Skip the original separator line
+ if i < len(lines) and '===' in lines[i]:
+ i += 1
+
+ while i < len(lines):
+ line = lines[i]
+ stripped = line.strip()
+
+ if not stripped.startswith('|'):
+ break
+
+ # Split by | and get non-empty parts
+ parts = [p.strip() for p in stripped.split('|') if p.strip()]
+ if len(parts) >= 2:
+ content.append(f'| {parts[0]} | {parts[1]} |\n')
+
+ i += 1
+
+ return content, i
+
+
+def convert_console_to_markdown(input_dir: str, output_file: Optional[str] = None) -> str:
+ """Convert Yetus console output to Markdown format."""
+ input_path = Path(input_dir)
+
+ if not input_path.is_dir():
+ print(f'Error: Input path "{input_dir}" is not a directory', file=sys.stderr)
+ sys.exit(1)
+
+ console_file = input_path / 'console.txt'
+ if not console_file.exists():
+ print(f'Error: console.txt not found in "{input_dir}"', file=sys.stderr)
+ sys.exit(1)
+
+ with open(console_file, 'r') as f:
+ lines = f.readlines()
+
+ content = []
+ i = 0
+
+ while i < len(lines):
+ line = lines[i]
+ stripped = line.strip()
+
+ if stripped == '-1 overall':
+ content.append(f'❌ {stripped} \n')
+ i += 1
+ elif stripped == '+1 overall':
+ content.append(f'✅ {stripped} \n')
+ i += 1
+ elif '| Vote |' in line and 'Subsystem' in line:
+ table_content, i = process_first_table(lines, i + 1)
+ content.extend(table_content)
+
+ counts, details = aggregate_failed_tests(input_path)
+ if counts:
+ content.extend(generate_failed_tests_table(counts, details))
+ elif '|| Subsystem || Report/Notes ||' in line:
+ table_content, i = process_second_table(lines, i + 1)
+ content.extend(table_content)
+ else:
+ i += 1
+
+ result = ''.join(content)
+
+ if output_file:
+ with open(output_file, 'w') as f:
+ f.write(result)
+ print(f'Converted {input_dir} to {output_file}', file=sys.stderr)
+ else:
+ print(result, end='')
+
+ return result
+
+
+def main():
+ if len(sys.argv) < 2:
+ print(f'Usage: {sys.argv[0]} [output_file]', file=sys.stderr)
+ print(
+ f' input_directory: Directory containing console.txt and optional patch-unit-*.txt files',
+ file=sys.stderr)
+ print(f' If output_file is not provided, output goes to stdout', file=sys.stderr)
+ sys.exit(1)
+
+ input_dir = sys.argv[1]
+ output_file = sys.argv[2] if len(sys.argv) > 2 else None
+
+ if not Path(input_dir).exists():
+ print(f'Error: Input directory "{input_dir}" does not exist', file=sys.stderr)
+ sys.exit(1)
+
+ convert_console_to_markdown(input_dir, output_file)
+
+
+if __name__ == '__main__':
+ main()
| | | | | | | | | | | | | | | |