Example: Join pushdown v2
This example shows join pushdown between the foreign tables of the same remote HIVE/SPARK server to that remote HIVE/SPARK server:
Tables on HIVE/SPARK server:
0: jdbc:hive2://localhost:10000> describe emp; +-----------+------------+----------+--+ | col_name | data_type | comment | +-----------+------------+----------+--+ | empno | int | NULL | | ename | string | NULL | | job | string | NULL | | mgr | int | NULL | | hiredate | date | NULL | | sal | int | NULL | | comm | int | NULL | | deptno | int | NULL | +-----------+------------+----------+--+ 8 rows selected (0.747 seconds) 0: jdbc:hive2://localhost:10000> describe dept; +-----------+------------+----------+--+ | col_name | data_type | comment | +-----------+------------+----------+--+ | deptno | int | NULL | | dname | string | NULL | | loc | string | NULL | +-----------+------------+----------+--+ 3 rows selected (0.067 seconds)
Tables on Postgres server:
-- load extension first time after install CREATE EXTENSION hdfs_fdw; -- create server object CREATE SERVER hdfs_server FOREIGN DATA WRAPPER hdfs_fdw OPTIONS(host 'localhost', port '10000', client_type 'spark', auth_type 'LDAP'); -- create user mapping CREATE USER MAPPING FOR public SERVER hdfs_server OPTIONS (username 'user1', password 'pwd123'); -- create foreign table CREATE FOREIGN TABLE dept ( deptno INTEGER, dname VARCHAR(14), loc VARCHAR(13) ) SERVER hdfs_server OPTIONS (dbname 'fdw_db', table_name 'dept'); -- create foreign table CREATE FOREIGN TABLE emp ( empno INTEGER, ename VARCHAR(10), job VARCHAR(9), mgr INTEGER, hiredate DATE, sal INTEGER, comm INTEGER, deptno INTEGER ) SERVER hdfs_server OPTIONS (dbname 'fdw_db', table_name 'emp');
Queries with join pushdown:
--inner join edb=# EXPLAIN VERBOSE SELECT t1.ename, t2.dname FROM emp t1 INNER JOIN dept t2 ON ( t1.deptno = t2.deptno );
Output
QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan (cost=15.00..35.00 rows=5000 width=84) Output: t1.ename, t2.dname Relations: (fdw_db.emp t1) INNER JOIN (fdw_db.dept t2) Remote SQL: SELECT r1., r2. FROM (. r1 INNER JOIN . r2 ON (((r1. = r2.)))) (4 rows)
--left join edb=# EXPLAIN VERBOSE SELECT t1.ename, t2.dname FROM emp t1 LEFT JOIN dept t2 ON ( t1.deptno = t2.deptno );
Output
QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan (cost=15.00..35.00 rows=5000 width=84) Output: t1.ename, t2.dname Relations: (fdw_db.emp t1) LEFT JOIN (fdw_db.dept t2) Remote SQL: SELECT r1., r2. FROM (. r1 LEFT JOIN . r2 ON (((r1. = r2.)))) (4 rows)
--right join edb=# EXPLAIN VERBOSE SELECT t1.ename, t2.dname FROM emp t1 RIGHT JOIN dept t2 ON ( t1.deptno = t2.deptno );
Output
QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan (cost=15.00..35.00 rows=5000 width=84) Output: t1.ename, t2.dname Relations: (fdw_db.dept t2) LEFT JOIN (fdw_db.emp t1) Remote SQL: SELECT r1., r2. FROM (. r2 LEFT JOIN . r1 ON (((r1. = r2.)))) (4 rows)
--full join edb=# EXPLAIN VERBOSE SELECT t1.ename, t2.dname FROM emp t1 FULL JOIN dept t2 ON ( t1.deptno = t2.deptno );
Output
QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------- Foreign Scan (cost=15.00..35.00 rows=5000 width=84) Output: t1.ename, t2.dname Relations: (fdw_db.emp t1) FULL JOIN (fdw_db.dept t2) Remote SQL: SELECT r1., r2. FROM (. r1 FULL JOIN . r2 ON (((r1. = r2.)))) (4 rows)
--cross join edb=# EXPLAIN VERBOSE SELECT t1.ename, t2.dname FROM emp t1 CROSS JOIN dept t2;
Output
QUERY PLAN -------------------------------------------------------------------------------------------------------------- Foreign Scan (cost=15.00..35.00 rows=1000000 width=84) Output: t1.ename, t2.dname Relations: (fdw_db.emp t1) INNER JOIN (fdw_db.dept t2) Remote SQL: SELECT r1., r2. FROM (. r1 INNER JOIN . r2 ON (TRUE)) (4 rows)
Enable/disable GUC for join pushdown queries at the table level:
-- enable join pushdown at the table level ALTER FOREIGN TABLE emp OPTIONS (SET enable_join_pushdown 'true'); EXPLAIN (VERBOSE, COSTS OFF) SELECT e.empno, e.ename, d.dname FROM emp e JOIN dept d ON (e.deptno = d.deptno) ORDER BY e.empno;
Output
QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Output: e.empno, e.ename, d.dname Sort Key: e.empno -> Foreign Scan Output: e.empno, e.ename, d.dname Relations: (fdw_db.emp e) INNER JOIN (fdw_db.dept d) Remote SQL: SELECT r1., r1., r2. FROM (. r1 INNER JOIN . r2 ON (((r1. = r2.)))) (7 rows)
--Disable the GUC enable_join_pushdown. SET hdfs_fdw.enable_join_pushdown to false; -- Pushdown shouldn't happen as enable_join_pushdown is false. EXPLAIN (VERBOSE, COSTS OFF) SELECT e.empno, e.ename, d.dname FROM emp e JOIN dept d ON (e.deptno = d.deptno) ORDER BY e.empno;
Output
QUERY PLAN ------------------------------------------------------------------------------------------- Sort Output: e.empno, e.ename, d.dname Sort Key: e.empno -> Nested Loop Output: e.empno, e.ename, d.dname Join Filter: (e.deptno = d.deptno) -> Foreign Scan on public.emp e Output: e.empno, e.ename, e.job, e.mgr, e.hiredate, e.sal, e.comm, e.deptno Remote SQL: SELECT , , FROM . -> Materialize Output: d.dname, d.deptno -> Foreign Scan on public.dept d Output: d.dname, d.deptno Remote SQL: SELECT , FROM .
Enable/disable GUC for join pushdown queries at the session level:
SET hdfs_fdw.enable_join_pushdown to true; EXPLAIN (VERBOSE, COSTS OFF) SELECT e.empno, e.ename, d.dname FROM emp e JOIN dept d ON (e.deptno = d.deptno) ORDER BY e.empno;
Output
QUERY PLAN ------------------------------------------------------------------------------------------- Sort Output: e.empno, e.ename, d.dname Sort Key: e.empno -> Nested Loop Output: e.empno, e.ename, d.dname Join Filter: (e.deptno = d.deptno) -> Foreign Scan on public.emp e Output: e.empno, e.ename, e.job, e.mgr, e.hiredate, e.sal, e.comm, e.deptno Remote SQL: SELECT , , FROM . -> Materialize Output: d.dname, d.deptno -> Foreign Scan on public.dept d Output: d.dname, d.deptno Remote SQL: SELECT , FROM . (14 rows)
Note
You can also enable/disable join pushdown at the server level using a GUC. For more information, see CREATE SERVER.