.net - Best way to replicate Oracles range windowing function in SQL Server -


i need perform oracle query in sql server:

select case_id, channel_index,      min(su_min) sustained_min,      max(su_max) sustained_max (     select case_id, channel_index, start_time,         min(dms_value) on (partition case_id, channel_index order start_time               range numtodsinterval(3, 'minute') preceeding) su_max,         max(dms_value) on (partition case_id, channel_index order start_time               range numtodsinterval(3, 'minute') preceeding) su_min,          min(start_time) on (partition case_id, channel_index order start_time)              first_time      data_table order start_time      ) su_data       first_time + numtodsinterval(3, 'minute') <= start_time group     case_id, channel_index 

here attempted in basic t-sql job when case has 1 million+ records takes more 37 mins (after cancelled query):

alter procedure [dbo].[getsustainedvalues](    @case_id int,   @time_limit int,    @bypass_only bit = null)  begin  declare @time datetime, @channelindex int, @lastchannelindex int declare @tmin float, @tmax float, @min float, @max float, @caseid int  declare @results table(case_id int, channel_index int, max float null, min float null) declare cursorname cursor fast_forward     select start_time, channel_index continuous_data case_id = @case_id order channel_index, start_time open cursorname fetch next cursorname @time, @channelindex set @lastchannelindex = @channelindex while @@fetch_status = 0 begin     --print 'hello' --'chennel:' + convert (varchar(50), @channelindex,128) + '  time:' + convert (varchar(50), @time,128)     if @lastchannelindex != @channelindex     begin         --print 'starting new channel:' + convert (varchar(50), @channelindex,128)         -- starting on new channel insert data results         -- table , reset min/max         insert @results(case_id, channel_index, max, min) values(@case_id, @lastchannelindex, @max, @min)         set @max = null         set @min = null         set @lastchannelindex = @channelindex     end      select @tmax = max(dms_value), @tmin = min(dms_value)     continuous_data     case_id = @case_id , channel_index = @channelindex , start_time between dateadd(s, -(@time_limit-1), @time) , @time      having sum(value_duration) >= @time_limit     if @@rowcount > 0     begin         if @max null or @tmin > @max         begin             --print 'setting max:' + convert (varchar(50), @tmin,128) + ' channel:' + convert (varchar(50), @channelindex,128)             set @max = @tmin         end          if @min null or @tmax < @min         begin             set @min = @tmax         end     end     --print 'max:' + convert (varchar(50), @max,128) + '  min:' + convert (varchar(50), @min,128)     fetch next cursorname @time, @channelindex end close cursorname deallocate cursorname --print 'max:' + convert (varchar(50), @max,128) + '  min:' + convert (varchar(50), @min,128) select * @results end 

is place use clr stored procedure? other ideas make more efficient query?

edit 3-9-2012: don't focus on "first_time" field. there make sure 3 minute window starts 3 minutes data set. in query don't care first_time. need min/max sustained value 3 minute periods per channel.

here sample data contains 2 channels. notice duration of each sample not same:

create table #continuous_data (         case_id         int     ,   channel_index   int     ,   start_time      datetime     ,   dms_value       float,     ,   value_duration  smallint )  insert #continuous_data values (2081,   51, '2011-05-18 09:36:34.000',  90,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:36:39.000',  94.8125,    1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:40.000',  95.4375,    1) insert #continuous_data values (2081,   51, '2011-05-18 09:36:40.000',  96,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:36:41.000',  96.75,      1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:42.000',  98.0625,    2) insert #continuous_data values (2081,   50, '2011-05-18 09:36:44.000',  99.3125,    1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:45.000',  100.625,    1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:46.000',  101.9375,   2) insert #continuous_data values (2081,   51, '2011-05-18 09:36:46.000',  98,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:36:48.000',  103.25,     1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:49.000',  104.5625,   1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:50.000',  105.8125,   2) insert #continuous_data values (2081,   50, '2011-05-18 09:36:52.000',  107.125,    1) insert #continuous_data values (2081,   51, '2011-05-18 09:36:52.000',  92,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:36:53.000',  108.4375,   1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:54.000',  109.75,     1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:55.000',  111.0625,   2) insert #continuous_data values (2081,   50, '2011-05-18 09:36:57.000',  112.3125,   1) insert #continuous_data values (2081,   50, '2011-05-18 09:36:58.000',  113.625,    1) insert #continuous_data values (2081,   51, '2011-05-18 09:36:58.000',  86,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:36:59.000',  114.9375,   2) insert #continuous_data values (2081,   50, '2011-05-18 09:37:01.000',  116.25,     1) insert #continuous_data values (2081,   50, '2011-05-18 09:37:02.000',  117.5,      1) insert #continuous_data values (2081,   50, '2011-05-18 09:37:03.000',  118.8125,   2) insert #continuous_data values (2081,   51, '2011-05-18 09:37:04.000',  80,     6) insert #continuous_data values (2081,   50, '2011-05-18 09:37:05.000',  120.125,    1) insert #continuous_data values (2081,   50, '2011-05-18 09:37:06.000',  121.4375,   1) insert #continuous_data values (2081,   50, '2011-05-18 09:37:07.000',  122.75,     1) insert #continuous_data values (2081,   50, '2011-05-18 09:37:08.000',  124.0625,   1) 

if understand correct, want following

for each case_id, channel_index combination:

  1. find lowest max value 3 minute windows (min sustained value)
  2. find highest min value 3 minutes windows (max sustained value).
  3. use data preceeding 3 minutes. if 3 minutes has not elapsed since first (min) start_time value, exclude data.

there still several unexplained differences between oracle query , solution (both stored procedure , clr stored procedure):

  1. the oracle query doesn't ensure time difference each window 3 minutes. takes min/max value preceeding 3 minutes. clause first_time + numtodsinterval(3, 'minute') <= start_time removes time windows before first 3 minutes has elapsed.
  2. the value_duration column in sample data, not used in solution
  3. the sample data not include 3 minutes of data, changed time range 10 seconds
  4. you did not list expected results sample data

solution -- may not fastest solution, should work --

step 0: window time range -- sample data not include 3 minutes of data, used variable hold desired number of seconds window time range. actual data, use 180 seconds.

declare @seconds int set @seconds = 10 

step 1: first time -- although first_time isn't important, still necessary make sure don't include incomplete time periods. used later exclude data before first complete time period has elapsed.

-- query return first_time, last_time, , range_time -- range_time first complete time period using time range select  case_id      ,   channel_index      ,   min(start_time) first_time     ,   dateadd(ss, @seconds, min(start_time)) range_time     ,   max(start_time) last_time    #continuous_data  group case_id, channel_index order case_id, channel_index  -- results sample data case_id     channel_index first_time              range_time              last_time ----------- ------------- ----------------------- ----------------------- ----------------------- 2081        50            2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 2011-05-18 09:37:08.000 2081        51            2011-05-18 09:36:34.000 2011-05-18 09:36:44.000 2011-05-18 09:37:04.000 

step 2: time windows -- oracle query uses partition case_id, channel_index order start_time range numtodsinterval(3, 'minute') preceeding find minimum , maximum dms_value first_time in subquery. since sql server not have range functionality, need use subquery define 3 minute windows. oracle query uses range ... preceeding, sql server range use dateadd negative value:

-- windowing each time range. window negative time -- range each start_time row select  case_id      ,   channel_index      ,   dateadd(ss, -@seconds, start_time) window_start     ,   start_time                         window_end    #continuous_data  order case_id, channel_index, start_time 

step 3: min/max time windows -- next need find minimum , maximum values each window. majority of calculation performed , needs debugging expected results.

-- find maximum , minimum values each window range -- included start_time min/max/diff debugging select  su.case_id      ,   su.channel_index      ,   win.window_end      ,   max(dms_value) dms_max     ,   min(dms_value) dms_min     ,   min(su.start_time) time_min     ,   max(su.start_time) time_max     ,   datediff(ss, min(su.start_time), max(su.start_time)) time_diff    #continuous_data su    join (         -- windowing each time range. window negative time         -- range each start_time row         select  case_id              ,   channel_index              ,   dateadd(ss, -@seconds, start_time) window_start             ,   start_time                         window_end            #continuous_data      ) win         on (    su.case_id       = win.case_id             , su.channel_index = win.channel_index)    join (         -- find first_time , add time range         select  case_id              ,   channel_index              ,   min(start_time)                        first_time             ,   dateadd(ss, @seconds, min(start_time)) range_time            #continuous_data          group case_id, channel_index     ) fir         on (    su.case_id       = fir.case_id             , su.channel_index = fir.channel_index)   su.start_time between win.window_start , win.window_end     , win.window_end >= fir.range_time group su.case_id, su.channel_index, win.window_end order su.case_id, su.channel_index, win.window_end  -- results sample data: case_id     channel_index window_end              dms_max                dms_min                time_min                time_max                time_diff ----------- ------------- ----------------------- ---------------------- ---------------------- ----------------------- ----------------------- ----------- 2081        50            2011-05-18 09:36:49.000 104.5625               94.8125                2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 10 2081        50            2011-05-18 09:36:50.000 105.8125               95.4375                2011-05-18 09:36:40.000 2011-05-18 09:36:50.000 10 2081        50            2011-05-18 09:36:52.000 107.125                98.0625                2011-05-18 09:36:42.000 2011-05-18 09:36:52.000 10 2081        50            2011-05-18 09:36:53.000 108.4375               99.3125                2011-05-18 09:36:44.000 2011-05-18 09:36:53.000 9 2081        50            2011-05-18 09:36:54.000 109.75                 99.3125                2011-05-18 09:36:44.000 2011-05-18 09:36:54.000 10 2081        50            2011-05-18 09:36:55.000 111.0625               100.625                2011-05-18 09:36:45.000 2011-05-18 09:36:55.000 10 2081        50            2011-05-18 09:36:57.000 112.3125               103.25                 2011-05-18 09:36:48.000 2011-05-18 09:36:57.000 9 2081        50            2011-05-18 09:36:58.000 113.625                103.25                 2011-05-18 09:36:48.000 2011-05-18 09:36:58.000 10 2081        50            2011-05-18 09:36:59.000 114.9375               104.5625               2011-05-18 09:36:49.000 2011-05-18 09:36:59.000 10 2081        50            2011-05-18 09:37:01.000 116.25                 107.125                2011-05-18 09:36:52.000 2011-05-18 09:37:01.000 9 2081        50            2011-05-18 09:37:02.000 117.5                  107.125                2011-05-18 09:36:52.000 2011-05-18 09:37:02.000 10 2081        50            2011-05-18 09:37:03.000 118.8125               108.4375               2011-05-18 09:36:53.000 2011-05-18 09:37:03.000 10 2081        50            2011-05-18 09:37:05.000 120.125                111.0625               2011-05-18 09:36:55.000 2011-05-18 09:37:05.000 10 2081        50            2011-05-18 09:37:06.000 121.4375               112.3125               2011-05-18 09:36:57.000 2011-05-18 09:37:06.000 9 2081        50            2011-05-18 09:37:07.000 122.75                 112.3125               2011-05-18 09:36:57.000 2011-05-18 09:37:07.000 10 2081        50            2011-05-18 09:37:08.000 124.0625               113.625                2011-05-18 09:36:58.000 2011-05-18 09:37:08.000 10 2081        51            2011-05-18 09:36:46.000 98                     96                     2011-05-18 09:36:40.000 2011-05-18 09:36:46.000 6 2081        51            2011-05-18 09:36:52.000 98                     92                     2011-05-18 09:36:46.000 2011-05-18 09:36:52.000 6 2081        51            2011-05-18 09:36:58.000 92                     86                     2011-05-18 09:36:52.000 2011-05-18 09:36:58.000 6 2081        51            2011-05-18 09:37:04.000 86                     80                     2011-05-18 09:36:58.000 2011-05-18 09:37:04.000 6 

step 4: finally, can put return lowest max value , highest min value each time window:

select  su.case_id      ,   su.channel_index      ,   min(dms_max) su_min     ,   max(dms_min) su_max    (         select  su.case_id              ,   su.channel_index              ,   win.window_end              ,   max(dms_value) dms_max             ,   min(dms_value) dms_min            #continuous_data su            join (                 -- windowing each time range. window negative time                 -- range each start_time row                 select  case_id                      ,   channel_index                      ,   dateadd(ss, -@seconds, start_time) window_start                     ,   start_time                         window_end                    #continuous_data              ) win                 on (    su.case_id       = win.case_id                     , su.channel_index = win.channel_index)            join (                 -- find first_time , add time range                 select  case_id                      ,   channel_index                      ,   min(start_time)                        first_time                     ,   dateadd(ss, @seconds, min(start_time)) range_time                    #continuous_data                  group case_id, channel_index             ) fir                 on (    su.case_id       = fir.case_id                     , su.channel_index = fir.channel_index)           su.start_time between win.window_start , win.window_end             , win.window_end >= fir.range_time         group su.case_id, su.channel_index, win.window_end ) su group su.case_id, su.channel_index order su.case_id, su.channel_index  -- results sample data: case_id     channel_index su_min                 su_max ----------- ------------- ---------------------- ---------------------- 2081        50            104.5625               113.625 2081        51            86                     96 

Comments

Popular posts from this blog

jasper reports - Fixed header in Excel using JasperReports -

media player - Android: mediaplayer went away with unhandled events -

python - ('The SQL contains 0 parameter markers, but 50 parameters were supplied', 'HY000') or TypeError: 'tuple' object is not callable -