From 55269b9d79fa71ba054526aca9bbf5ac4923c7d9 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Thu, 7 May 2026 15:53:22 +0200 Subject: [PATCH] HIVE-27382: Migrate postgres-tpcds-metastore image to the Git repo 1. Move necessary files from https://github.com/zabetak/hive-postgres-metastore repo to ASF. 2. Remove redundant information and update README file. 3. Create GitHub action for publishing new image to ASF Dockerhub on-demand. --- .../hive-postgres-tpcds-metastore.yml | 57 +++++++++++++++ .../hive-postgres-tpcds-metastore/Dockerfile | 22 ++++++ .../hive-postgres-tpcds-metastore/README.md | 72 +++++++++++++++++++ .../init_user_db.sql | 17 +++++ .../restore_metastore.sh | 22 ++++++ .../dbinstall/rules/PostgresTPCDS.java | 2 +- 6 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/hive-postgres-tpcds-metastore.yml create mode 100644 standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile create mode 100644 standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md create mode 100644 standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql create mode 100644 standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh diff --git a/.github/workflows/hive-postgres-tpcds-metastore.yml b/.github/workflows/hive-postgres-tpcds-metastore.yml new file mode 100644 index 000000000000..88068ba41edf --- /dev/null +++ b/.github/workflows/hive-postgres-tpcds-metastore.yml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Build and Publish Postgres TPC-DS Metastore Image + +on: + workflow_dispatch: + inputs: + imageTag: + description: 'Docker image tag. Use "test" for experimentation purposes and proper semantic versioning (e.g., 1.4) when pushing to production. CAUTION: Choose the tag carefully to avoid overwriting existing images.' + required: false + default: 'test' + pushImage: + description: 'Push image to Docker Hub? (true/false)' + required: false + default: false + type: boolean + +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd + + - name: Login to Docker Hub + if: ${{ github.event.inputs.pushImage == 'true' }} + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Build and optionally push Docker image + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 + with: + context: ./standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/ + file: ./standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile + push: ${{ github.event.inputs.pushImage == 'true' }} + tags: ${{ github.repository_owner }}/hive-postgres-tpcds-metastore:${{ github.event.inputs.imageTag }} diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile new file mode 100644 index 000000000000..25aa55c61504 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/Dockerfile @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM postgres:12.3 + +ADD https://github.com/zabetak/hive-test-datasets/releases/download/1.0/metastore_tpcds30tb_3_1_3000.dump.gz /tmp/metastore.dump.gz +RUN chown postgres:postgres /tmp/metastore.dump.gz +COPY init_user_db.sql /docker-entrypoint-initdb.d/init_user_db.sql +COPY restore_metastore.sh /docker-entrypoint-initdb.d/restore_metastore.sh diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md new file mode 100644 index 000000000000..e21d791f4248 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/README.md @@ -0,0 +1,72 @@ + +# Postgres TPC-DS metastore + +A dockerized Postgres database with a Hive metastore dump from a +[TPC-DS 30TB dataset](https://github.com/zabetak/hive-test-datasets/releases/download/1.0/metastore_tpcds30tb_3_1_3000.dump.gz). + +## Build and deploy + +### Docker Hub + +Use the GitHub CI workflow `postgres-tpcds-metastore.yml` for building and deploying the image to +the official ASF Docker Hub registry. + +## Manual + +Build and tag the docker image: `docker build --tag apache/hive-postgres-tpcds-metastore:1.4 .` + +## Usage + +- Create and start Postgres container: + `docker run --name postgres_metastore -p 5432:5432 -e POSTGRES_PASSWORD=postgres -d apache/hive-postgres-tpcds-metastore:1.4` +- Verify that the container is running: `docker ps` +- Stop Postgres container: `docker stop postgres_metastore` +- Remove Postgres container: `docker rm postgres_metastore` + +If you want to check the contents of the metastore the easiest way would be to +open a shell in the container and connect to the database via psql. + + docker exec -it postgres_metastore bash + su postgres + psql -U hive -d metastore + +The default configuration binds the host port 5432 to the database running in +the container. You can access the database via JDBC using the following +information: + +- URL: `jdbc:postgresql://localhost:5432/metastore` +- DRIVER: `org.postgresql.Driver` +- USER: `hive` +- PASSWORD: `hive` + +If you want to start Hive and instruct it to use this database as the metastore +you have to set the following properties in `hive-site.xml`: + +- `javax.jdo.option.ConnectionURL` +- `javax.jdo.option.ConnectionDriverName` +- `javax.jdo.option.ConnectionUserName` +- `javax.jdo.option.ConnectionPassword` + +If you need to use the current dumps with a more recent version of Hive then +after creating and starting the Postgres container you can use the +[schematool](https://hive.apache.org/docs/latest/admin/hive-schema-tool/) +to upgrade the metastore: + + schematool -dbType postgres -upgradeSchemaFrom 3.1.3000 -driver org.postgresql.Driver -url jdbc:postgresql://localhost:5432/metastore -userName hive -passWord hive diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql new file mode 100644 index 000000000000..a91fe8f446e2 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/init_user_db.sql @@ -0,0 +1,17 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CREATE ROLE hive WITH LOGIN PASSWORD 'hive'; +CREATE DATABASE metastore WITH OWNER = hive TEMPLATE template0; diff --git a/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh new file mode 100644 index 000000000000..4590ddf28a31 --- /dev/null +++ b/standalone-metastore/metastore-server/docker/hive-postgres-tpcds-metastore/restore_metastore.sh @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +# Restore may exit with non-blocking errors so we shouldn't stop the script +# since in many cases the dump will be restored correctly +pg_restore -d metastore /tmp/metastore.dump.gz || true +# Remove the temporary file +rm /tmp/metastore.dump.gz diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java index b0a692d82ca4..68d539fbb2b7 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/dbinstall/rules/PostgresTPCDS.java @@ -31,7 +31,7 @@ */ public class PostgresTPCDS extends Postgres { public PostgresTPCDS() { - super(DockerImageName.parse("zabetak/postgres-tpcds-metastore:1.3").asCompatibleSubstituteFor("postgres")); + super(DockerImageName.parse("apache/hive-postgres-tpcds-metastore:1.4").asCompatibleSubstituteFor("postgres")); container.withUsername("postgres"); }