-- Explain: simple and variants explain --explain analyze --explain analyze verbose --explain (analyze, buffers, format json) select operator, name from osm_austria_point where shop='computer' -- Use index: correlated index explain analyze select name, shop, operator, brand from osm_austria_point where brand = 'BMW' -- and shop in('car', 'car_parts', 'car_repair') -- Use index: existing index not selective enough explain analyze select name, shop, operator, brand from osm_austria_point where shop = 'computer' --where shop <> 'computer' -- <- Back to the slides -- Index not usable because of function call explain analyze select name, shop, operator, brand from osm_austria_point -- where name = 'Eurospar' where lower(name) = 'eurospar'; -- Functional index on lower(name) --create index osm_austria_point_lower_name_idx on osm_austria_point (lower(name)); -- Use of functional index explain analyze select name, shop, operator, brand from osm_austria_point where lower(name) = 'eurospar'; -- => Index is used -- Clean up --drop index osm_austria_point_lower_name_idx; -- <- back to the slides -- CTE introduction: Subgroup selection -- explain analyze with supermarket as ( select * from osm_austria_point where shop = 'supermarket' ) select operator, count(*) as opcount from supermarket group by operator order by opcount desc -- Select small result set from multiple tables, joining --explain analyze with streets as ( select name as street from osm_austria_roads where highway in ('primary', 'secondary', 'residential') and name is not null ), shops as ( select name as shop, "addr:street" as street, "addr:housenumber" as housenr from osm_austria_point where shop is not null and name is not null and "addr:street" is not null ), shopstreets as ( select shop, sh.street, housenr from shops sh inner join streets st on lower(sh.street) = lower(st.street) ) select street, count(shop) as shops, min(housenr) as min_housenr, max(housenr) as max_housenr from shopstreets group by street order by shops desc -- Query with prepared parameters with param as ( -- select cast(? as text) as category select cast('hairdresser' as text) as category ) select name, operator, brand from osm_austria_point cross join param where shop = param.category group by name, operator, brand -- Test the query with example values, then change to ? parameter for deployment -- <- back to the slides -- Recursive CTE: Table with recursive structure drop table if exists recursive_test; create table recursive_test ( id integer primary key, parent_id integer, name text ); insert into recursive_test values (1, null, 'Root 1'); insert into recursive_test values (2, 1, 'Child 1/1'); insert into recursive_test values (3, 2, 'Child 1/1/1'); insert into recursive_test values (4, 1, 'Child 1/2'); insert into recursive_test values (5, 1, 'Child 1/3'); insert into recursive_test values (11, null, 'Root 2'); insert into recursive_test values (12, 11, 'Child 2/1'); select * from recursive_test; -- Recursive query with recursive structure as ( -- Root elements without parent, depth = 1 select id, name, cast('' as text) as parent_name, name as full_path, 1 as depth from recursive_test where (parent_id is null or parent_id = id) union -- Children have a parent select rt.id, rt.name, p.name as parent_name, p.full_path || ': ' || rt.name, p.depth + 1 as depth from recursive_test rt -- Join using the parent ID inner join structure p on rt.parent_id = p.id -- Failsafe depth limit, optional where p.depth <= 100 ) select * from structure order by full_path -- Wrong entry showing to itself insert into recursive_test values (13, 13, 'Broken'); delete from recursive_test where name = 'Broken'; -- <- back to the slides -- Example: writable CTE begin; insert into recursive_test (id, parent_id, name) values ((SELECT max(id) FROM recursive_test) + 10, null, 'New root') returning *; -- Not sure yet, let's check the result first and roll back for now. rollback; -- <- back to the slides -- Window Functions: Sales per day, average, comparison with average with sales_per_day as ( select datum, sum(sales) as sales from advsql_sales where datum between '2018-06-01' and '2018-06-30' group by datum ) select *, -- Average daily sales round(avg(sales) over (), 1) as avgsales, -- Day value compared with average (absolute) round(sales - avg(sales) over (), 1) as avg_comparison, -- Relative comparison with average (percent) round((sales - avg(sales) over ()) / avg(sales) over () * 100, 1) as comparison_pct from sales_per_day order by datum -- Window Functions: Monthly Avg, Min, Max with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where datum between '2018-05-20' and '2018-06-10' group by datum ) select datum, sales, round(avg(sales) over (partition by month), 1) as avg_sales_month, min(sales) over (partition by month) as min_sales_month, max(sales) over (partition by month) as max_sales_month from sales_per_day order by datum -- Window Functions: Cumulative Sum with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where extract(year from datum) = 2018 group by datum ) select datum, sales, -- Cumulative sum in the current month. Starts anew every month. sum(sales) over (partition by month order by datum) as month_cum_sum, -- Cumulative sum over the whole year sum(sales) over (order by datum) as year_cum_sum from sales_per_day order by datum -- Window Functions: RANK and ROW_NUMBER with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where extract(year from datum) = 2018 group by datum ) select month, datum, sales, -- Rank in the current month (next rank number left out after ties) RANK() over (partition by month order by sales desc) as month_rank, -- Rank in the current month (no number left out after ties) DENSE_RANK() over (partition by month order by sales desc) as month_dense_rank, -- Unique number inside the month ROW_NUMBER() over (partition by month order by sales desc) as month_rownr, -- Rank in the whole year rank() over (order by sales desc) as year_rank from sales_per_day order by month, month_rank -- Searching for duplicates in the last century select operator, name, count(osm_id) as dups from osm_austria_point where shop='computer' and name is not null group by operator, name having count(osm_id) > 1 order by dups desc, operator, name -- And then iteration over the found values -- Searching for duplicates today with computershops as ( select *, count(osm_id) over (partition by name) as dups, min(osm_id) over (partition by name) as minid from osm_austria_point where shop = 'computer' and name is not null ) select osm_id, shop, name, dups, "addr:street" from computershops where dups > 1 order by dups desc, name -- Example deletion command with computershops as ( -- Like before ) delete from osm_austria_point where osm_id in ( select osm_id from computershops where dups > 1 and osm_id <> minid ) -- Window functions: ABC analysis -- A: the first 70 % by shipped products, B: until 90 %, C: below that with shops as ( select p.name as shop, p.osm_id, sum(s.sales) as shopsales from advsql_sales s inner join osm_austria_point p using (osm_id) group by p.name, p.osm_id ), shops_abc as ( select shop, shopsales, -- Rank of this shop among all rank() over (order by shopsales desc) as rang, -- Cumulative sum by sales (from highest to lowest) sum(shopsales) over (order by shopsales desc) as cum_sum, -- Overall sum --sum(shopsales) over () as overall_sum, -- A and B group limits sum(shopsales) over () * 0.7 as group_a, sum(shopsales) over () * 0.9 as group_b, -- ABC classification case when sum(shopsales) over (order by shopsales desc) / sum(shopsales) over () <= 0.7 then 'A' when sum(shopsales) over (order by shopsales desc) / sum(shopsales) over () <= 0.9 then 'B' else 'C' end as abc from shops ), shops_abc_extended as ( select shop, shopsales, abc || ' (' || rank() over (partition by abc order by shopsales desc) || '/' || count(*) over (partition by abc) || ') (' || rank() over (order by shopsales desc) || '/' || count(*) over () || ')' as abc_extended from shops_abc order by shopsales desc ) select * from shops_abc -- from shops_abc_extended -- A1 (1/100) (2/200) -- Window functions: Top 3 days per month -- CTE with window function and selection with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where extract(year from datum) = 2018 group by datum ), monthrank as ( select month, datum, sales, RANK() over (partition by month order by sales desc) as monthrank from sales_per_day ) select month, datum, sales, monthrank from monthrank where monthrank <= 3 order by month, monthrank -- Window functions: moving average with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where datum between '2018-03-15' and '2018-04-15' group by datum ) select datum, sales, round(avg(sales) OVER (ORDER BY datum ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING)) AS avg7, round(avg(sales) OVER (ORDER BY datum ROWS BETWEEN 7 PRECEDING AND 6 FOLLOWING)) AS avg14 from sales_per_day order by datum -- Alternate window definition syntax with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where datum between '2018-03-15' and '2018-04-15' group by datum ) select datum, sales, round(avg(sales) OVER _7_days) AS avg7, round(avg(sales) OVER _14_days) AS avg14, count(sales) OVER _14_days as count14 from sales_per_day window _7_days as (order by datum rows between 3 preceding and 3 following), _14_days as (order by datum rows between 7 preceding and 6 following) order by datum -- Window functions: daily change with sales_per_day as ( select datum, extract(month from datum) as month, sum(sales) as sales from advsql_sales where datum between '2018-03-15' and '2018-04-15' group by datum ) select datum, sales, sales - lag(sales, 1) over (order by datum) as vs_yesterday, sales - lead(sales, 1) over (order by datum) as vs_tomorrow from sales_per_day order by datum -- <---- Back to the slides -- XML Example SELECT * FROM xmltest; -- XML Functions SELECT beschreibung, daten is document as is_document, xmlexists('//vehicles/vehicle' passing by ref daten) as xml_exists, xml_is_well_formed(daten::text) as xml_well_formed, daten FROM xmltest; -- XPath query SELECT beschreibung, unnest(xpath('//vehicles/vehicle/@id', daten)) as id, unnest(xpath('//vehicles/vehicle/@license_plate', daten)) as license_plate FROM xmltest -- XPath filter SELECT beschreibung, id, model_name FROM ( SELECT beschreibung, unnest(xpath('//vehicles/vehicle/@id', daten)) as id, unnest(xpath('//vehicles/vehicle/model/@model_name', daten)) as model_name FROM xmltest ) vehicles125ccm WHERE model_name::text like '%125%ccm'; -- JSON example select * FROM opendata; -- JSON functions SELECT bezeichnung, jsonb_pretty(daten) FROM opendata; SELECT bezeichnung, jsonb_object_keys(daten), jsonb_each(daten) FROM opendata; -- Accessing JSON elements SELECT bezeichnung, jsonb_array_elements(daten->'features') FROM opendata WHERE bezeichnung like 'Universität%'; SELECT bezeichnung, jsonb_array_elements(daten->'features')->'geometry'->'coordinates' as latlong FROM opendata WHERE bezeichnung like 'Universität%'; -- Filter on document properties with features as ( SELECT bezeichnung, jsonb_array_elements(daten->'features') as features FROM opendata WHERE bezeichnung like 'Universität%' ) SELECT features->'properties'->>'NAME' as name, features->'properties'->>'BEZEICHNUNG' as bezeichnung FROM features WHERE features->'properties'->>'NAME' like 'Universit%' ;