Добро пожаловать в форум, Guest  >>   Войти | Регистрация | Поиск | Правила | В избранное | Подписаться
Все форумы / Oracle Новый топик    Ответить
 Группировка значений по временным интервалам  [new]
urdada
Member

Откуда: Запиндрищинск
Сообщений: 49
Просьба помочь решить следующую задачу.

Есть таблицы, в которых хранится история изменения атрибутов сущности (в каждой - значение атрибута / дата начала действия значения / дата окончания действия значения). Хотелось бы при помощи SQL вывести список всех существовавших комбинаций значений атрибутов и периоды действия этих комбинаций.
Например:

Таблица значений атрибута 1 (для сущностей с id 1 и 2):
main_idattrstart_dateend_date
1A02.01.201910.01.2019
1A11.01.201912.01.2019
1B13.01.201915.01.2019
1A15.01.201920.01.2019
2A02.01.201905.01.2019
2A05.01.201910.01.2019


Таблица значений атрибута 2 (для сущностей с id 1 и 2):
main_idattrstart_dateend_date
1a05.01.201907.01.2019
1c08.01.201920.01.2019
2b02.01.201910.01.2019


Нужно получить список периодов, в которых действовали различные комбинации атрибутов для каждой сущности, если в виде кода, то нужно переписать запрос
SELECT * FROM tbl1 JOIN tbl2 ON tbl1.main_id=tbl2.main_id
в этом коде:
WITH tbl1 AS (
select 1 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('11.01.2019', 'dd.mm.yyyy') as start_date, to_date('12.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'B' as attr, to_date('13.01.2019', 'dd.mm.yyyy') as start_date, to_date('15.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('15.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('05.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
),
tbl2 AS (
select 1 as main_id, 'a' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('07.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'c' as attr, to_date('08.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'b' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
)
SELECT * FROM tbl1 JOIN tbl2 ON tbl1.main_id=tbl2.main_id

чтобы в результате получить таблицу такого вида:

main_idtbl1.attrtbl2.attrstart_dateend_date
1Anull02.01.201905.01.2019
1Aa05.01.201907.01.2019
1Anull07.01.201908.01.2019
1Ac08.01.201910.01.2019
1nullc10.01.201911.01.2019
1Ac11.01.201912.01.2019
1nullc12.01.201913.01.2019
1Bc13.01.201915.01.2019
1Ac15.01.201920.01.2019
2Ab02.01.201910.01.2019
15 авг 19, 22:16    [21950550]     Ответить | Цитировать Сообщить модератору
 Re: Группировка значений по временным интервалам  [new]
xtender
Member

Откуда: Мск
Сообщений: 5212
urdada,

+
WITH tbl1 AS (
select 1 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('11.01.2019', 'dd.mm.yyyy') as start_date, to_date('12.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'B' as attr, to_date('13.01.2019', 'dd.mm.yyyy') as start_date, to_date('15.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('15.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('05.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
),
tbl2 AS (
select 1 as main_id, 'a' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('07.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'c' as attr, to_date('08.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'b' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
)
,dates as ( -- получаем все интервалы:
   select lag(dt)over(order by dt) as start_date, dt as end_date
   from (-- получаем все точки:
      select distinct dt
      from (select start_date,end_date from tbl1
            union all
            select start_date,end_date from tbl2
           )
      unpivot (
         dt for tp in (start_date,end_date)
      )
   )
)
,split_data as ( -- выводим данные по каждому интервалу:
   select dt.* 
        , nvl(tbl1.main_id, tbl2.main_id) main_id
        , tbl1.attr as attr1
        , tbl2.attr as attr2
   from dates dt
        left join tbl1 
             on  dt.start_date< tbl1.end_date
             and dt.end_date  > tbl1.start_date
        left join tbl2
             on  (tbl1.main_id is null or tbl1.main_id=tbl2.main_id)
             and dt.start_date< tbl2.end_date
             and dt.end_date  > tbl2.start_date
   where dt.start_date is not null
   order by 3,1
)
-- группируем последовательные интервалы, где аттрибуты не меняются:
select *
from split_data
match_recognize (
   order by main_id,start_date
   MEASURES  STRT.main_id    as main_id
            ,STRT.start_date as start_date
            ,LAST(end_date)  as end_date
            ,STRT.attr1      as attr1  
            ,STRT.attr2      as attr2  
   PATTERN (STRT P*)
     DEFINE
         P AS  P.main_id    = prev(P.main_id)
           and P.start_date = prev(P.end_date)
           and P.attr1      = prev(P.attr1)
           and P.attr2      = prev(P.attr2)
   )
/
16 авг 19, 02:13    [21950628]     Ответить | Цитировать Сообщить модератору
 Re: Группировка значений по временным интервалам  [new]
urdada
Member

Откуда: Запиндрищинск
Сообщений: 49
xtender, снимаю шляпу!

это решение даже поизящнее того, до которого я додумался, хотя в общих чертах похоже
+
WITH tbl1 AS (
select 1 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('11.01.2019', 'dd.mm.yyyy') as start_date, to_date('12.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'B' as attr, to_date('13.01.2019', 'dd.mm.yyyy') as start_date, to_date('15.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'A' as attr, to_date('15.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('05.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'A' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
),
tbl2 AS (
select 1 as main_id, 'a' as attr, to_date('05.01.2019', 'dd.mm.yyyy') as start_date, to_date('07.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 1 as main_id, 'c' as attr, to_date('08.01.2019', 'dd.mm.yyyy') as start_date, to_date('20.01.2019', 'dd.mm.yyyy') as end_date from dual union all 
select 2 as main_id, 'b' as attr, to_date('02.01.2019', 'dd.mm.yyyy') as start_date, to_date('10.01.2019', 'dd.mm.yyyy') as end_date from dual
),
-- выпускайте кракена
period_dates AS ( -- все моменты периода
SELECT  period_date, main_id FROM
(SELECT start_date period_date, tbl1.main_id AS main_id  FROM tbl1 UNION SELECT end_date, tbl1.main_id FROM tbl1 UNION
SELECT start_date, tbl2.main_id FROM tbl2 UNION SELECT end_date, tbl2.main_id FROM tbl2) ORDER BY 1
),
periods AS ( -- значения атрибутов на момтенты
SELECT  p.period_date, LEAD(p.period_date) OVER (ORDER BY p.main_id, p.period_date) AS next_date,
p.main_id,
 NVL(tbl1.main_id,tbl2.main_id) AS tbl_main_id,
 tbl1.attr tbl1_attr,  tbl2.attr tbl2_attr,
 tbl1.start_date tbl1_start_date, tbl1.end_date tbl1_end_date,
 tbl2.start_date tbl2_start_date, tbl2.end_date tbl2_end_date
 FROM period_dates p
LEFT JOIN tbl1 ON  p.period_date BETWEEN tbl1.start_date AND tbl1.end_date-1/24/60/60 AND tbl1.main_id=p.main_id
LEFT JOIN tbl2 ON  p.period_date BETWEEN tbl2.start_date AND tbl2.end_date-1/24/60/60 AND tbl2.main_id=p.main_id AND
(
  tbl1.main_id=tbl2.main_id OR 
  (tbl1.main_id IS NULL OR tbl2.main_id IS NULL)
)
),
sverni AS ( -- еще нужно свернуть идущие подряд периоды
SELECT main_id, tbl1_attr, tbl2_attr, period_date, next_date FROM periods
WHERE tbl_main_id IS NOT NULL 
)
-------------------
SELECT main_id, tbl1_attr, tbl2_attr, 
 MIN(connect_by_root(t.period_date)) start_date, t.next_date end_date
  FROM sverni t
 WHERE connect_by_isleaf = 1
CONNECT BY (t.period_date = PRIOR t.next_date AND t.tbl1_attr = PRIOR tbl1_attr AND 
           t.tbl2_attr = PRIOR tbl2_attr AND t.main_id = PRIOR main_id)
 GROUP BY t.period_date,  t.next_date,  main_id, tbl1_attr, tbl2_attr
 ORDER BY main_id,  t.period_date


Премного благодарен )
16 авг 19, 03:04    [21950635]     Ответить | Цитировать Сообщить модератору
Все форумы / Oracle Ответить