diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java index 8ba2c51e8506..f2ea4ac41289 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java @@ -183,13 +183,14 @@ public static SemanticNodeProcessor getColumnProcessor() { return new ColumnExprProcessor(); } - private static boolean findSourceColumn( + private static boolean findSourceColumn(Operator inpOp, LineageCtx lctx, Predicate cond, String tabAlias, String alias) { for (Map.Entry topOpMap: lctx.getParseCtx().getTopOps().entrySet()) { TableScanOperator tableScanOp = topOpMap.getValue(); Table tbl = tableScanOp.getConf().getTableMetadata(); - if (tbl.getTableName().equals(tabAlias) - || tabAlias.equals(tableScanOp.getConf().getAlias())) { + if (inpOp.getOperatorId().equals(tableScanOp.getOperatorId()) + && (tbl.getTableName().equals(tabAlias) + || tabAlias.equals(tableScanOp.getConf().getAlias()))) { for (FieldSchema column: tbl.getCols()) { if (column.getName().equals(alias)) { TableAliasInfo table = new TableAliasInfo(); @@ -241,7 +242,7 @@ public static String getExprString(RowSchema rs, ExprNodeDesc expr, } if (tabAlias != null && tabAlias.length() > 0 && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) { - if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && dep != null) { + if (cond != null && !findSourceColumn(inpOp, lctx, cond, tabAlias, alias) && dep != null) { cond.getBaseCols().addAll(dep.getBaseCols()); } return tabAlias + "." + alias; diff --git a/ql/src/test/queries/clientpositive/lineage8.q b/ql/src/test/queries/clientpositive/lineage8.q new file mode 100644 index 000000000000..959376eaf417 --- /dev/null +++ b/ql/src/test/queries/clientpositive/lineage8.q @@ -0,0 +1,19 @@ +set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger; + +create table table_1 (id1 int, id2 int); +create table table_2 (id1 int, id2 int); + +create table table_3 as +select id1 from table_1 t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2; + +create table table_4 as +select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2; + +create table table_5 as +select t.id1 from +(select id1 from table_1 t1 where t1.id2 = 1) t +join table_2 t1 on t.id1 = t1.id2; diff --git a/ql/src/test/results/clientpositive/llap/lineage8.q.out b/ql/src/test/results/clientpositive/llap/lineage8.q.out new file mode 100644 index 000000000000..6f8334018da7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/lineage8.q.out @@ -0,0 +1,41 @@ +PREHOOK: query: create table table_1 (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_1 +PREHOOK: query: create table table_2 (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_2 +PREHOOK: query: create table table_3 as +select id1 from table_1 t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_3 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"24a0f860f60a1b7d5f350fd8eb164a37","queryText":"create table table_3 as\nselect id1 from table_1 t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1,2],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"(t1.id2 = 1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_3.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]} +PREHOOK: query: create table table_4 as +select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1 +union all +select id1 from table_2 t1 where t1.id2 = 2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_4 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"761d0cf34076cec77766bf7af8f1cbe9","queryText":"create table table_4 as\nselect id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 2","edges":[{"sources":[1],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"expression":"((t1.id1 = 3) and (t1.id2 = 1))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_4.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_2.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]} +PREHOOK: query: create table table_5 as +select t.id1 from +(select id1 from table_1 t1 where t1.id2 = 1) t +join table_2 t1 on t.id1 = t1.id2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@table_1 +PREHOOK: Input: default@table_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@table_5 +Result schema has 1 fields, but we don't get as many dependencies +{"version":"1.0","engine":"tez","database":"default","hash":"615bb67f6ff2dd50695bffd14c296677","queryText":"create table table_5 as\nselect t.id1 from\n(select id1 from table_1 t1 where t1.id2 = 1) t\njoin table_2 t1 on t.id1 = t1.id2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,1],"targets":[0],"expression":"((t1.id2 = 1) and t1.id1 is not null)","edgeType":"PREDICATE"},{"sources":[1,3],"targets":[0],"expression":"(t1.id1 = t1.id2)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"t1.id2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.table_5.id1"},{"id":1,"vertexType":"COLUMN","vertexId":"default.table_1.id1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.table_1.id2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.table_2.id2"}]}