.net - Best way to replicate Oracles range windowing function in SQL Server -
i need perform oracle query in sql server:
select case_id, channel_index, min(su_min) sustained_min, max(su_max) sustained_max ( select case_id, channel_index, start_time, min(dms_value) on (partition case_id, channel_index order start_time range numtodsinterval(3, 'minute') preceeding) su_max, max(dms_value) on (partition case_id, channel_index order start_time range numtodsinterval(3, 'minute') preceeding) su_min, min(start_time) on (partition case_id, channel_index order start_time) first_time data_table order start_time ) su_data first_time + numtodsinterval(3, 'minute') <= start_time group case_id, channel_index
here attempted in basic t-sql job when case has 1 million+ records takes more 37 mins (after cancelled query):
alter procedure [dbo].[getsustainedvalues]( @case_id int, @time_limit int, @bypass_only bit = null) begin declare @time datetime, @channelindex int, @lastchannelindex int declare @tmin float, @tmax float, @min float, @max float, @caseid int declare @results table(case_id int, channel_index int, max float null, min float null) declare cursorname cursor fast_forward select start_time, channel_index continuous_data case_id = @case_id order channel_index, start_time open cursorname fetch next cursorname @time, @channelindex set @lastchannelindex = @channelindex while @@fetch_status = 0 begin --print 'hello' --'chennel:' + convert (varchar(50), @channelindex,128) + ' time:' + convert (varchar(50), @time,128) if @lastchannelindex != @channelindex begin --print 'starting new channel:' + convert (varchar(50), @channelindex,128) -- starting on new channel insert data results -- table , reset min/max insert @results(case_id, channel_index, max, min) values(@case_id, @lastchannelindex, @max, @min) set @max = null set @min = null set @lastchannelindex = @channelindex end select @tmax = max(dms_value), @tmin = min(dms_value) continuous_data case_id = @case_id , channel_index = @channelindex , start_time between dateadd(s, -(@time_limit-1), @time) , @time having sum(value_duration) >= @time_limit if @@rowcount > 0 begin if @max null or @tmin > @max begin --print 'setting max:' + convert (varchar(50), @tmin,128) + ' channel:' + convert (varchar(50), @channelindex,128) set @max = @tmin end if @min null or @tmax < @min begin set @min = @tmax end end --print 'max:' + convert (varchar(50), @max,128) + ' min:' + convert (varchar(50), @min,128) fetch next cursorname @time, @channelindex end close cursorname deallocate cursorname --print 'max:' + convert (varchar(50), @max,128) + ' min:' + convert (varchar(50), @min,128) select * @results end
is place use clr stored procedure? other ideas make more efficient query?
edit 3-9-2012: don't focus on "first_time" field. there make sure 3 minute window starts 3 minutes data set. in query don't care first_time. need min/max sustained value 3 minute periods per channel.
here sample data contains 2 channels. notice duration of each sample not same:
create table #continuous_data ( case_id int , channel_index int , start_time datetime , dms_value float, , value_duration smallint ) insert #continuous_data values (2081, 51, '2011-05-18 09:36:34.000', 90, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:36:39.000', 94.8125, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:40.000', 95.4375, 1) insert #continuous_data values (2081, 51, '2011-05-18 09:36:40.000', 96, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:36:41.000', 96.75, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:42.000', 98.0625, 2) insert #continuous_data values (2081, 50, '2011-05-18 09:36:44.000', 99.3125, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:45.000', 100.625, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:46.000', 101.9375, 2) insert #continuous_data values (2081, 51, '2011-05-18 09:36:46.000', 98, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:36:48.000', 103.25, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:49.000', 104.5625, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:50.000', 105.8125, 2) insert #continuous_data values (2081, 50, '2011-05-18 09:36:52.000', 107.125, 1) insert #continuous_data values (2081, 51, '2011-05-18 09:36:52.000', 92, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:36:53.000', 108.4375, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:54.000', 109.75, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:55.000', 111.0625, 2) insert #continuous_data values (2081, 50, '2011-05-18 09:36:57.000', 112.3125, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:36:58.000', 113.625, 1) insert #continuous_data values (2081, 51, '2011-05-18 09:36:58.000', 86, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:36:59.000', 114.9375, 2) insert #continuous_data values (2081, 50, '2011-05-18 09:37:01.000', 116.25, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:37:02.000', 117.5, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:37:03.000', 118.8125, 2) insert #continuous_data values (2081, 51, '2011-05-18 09:37:04.000', 80, 6) insert #continuous_data values (2081, 50, '2011-05-18 09:37:05.000', 120.125, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:37:06.000', 121.4375, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:37:07.000', 122.75, 1) insert #continuous_data values (2081, 50, '2011-05-18 09:37:08.000', 124.0625, 1)
if understand correct, want following
for each case_id, channel_index combination:
- find lowest max value 3 minute windows (min sustained value)
- find highest min value 3 minutes windows (max sustained value).
- use data preceeding 3 minutes. if 3 minutes has not elapsed since first (min)
start_time
value, exclude data.
there still several unexplained differences between oracle query , solution (both stored procedure , clr stored procedure):
- the oracle query doesn't ensure time difference each window 3 minutes. takes min/max value preceeding 3 minutes. clause
first_time + numtodsinterval(3, 'minute') <= start_time
removes time windows before first 3 minutes has elapsed. - the
value_duration
column in sample data, not used in solution - the sample data not include 3 minutes of data, changed time range 10 seconds
- you did not list expected results sample data
solution -- may not fastest solution, should work --
step 0: window time range -- sample data not include 3 minutes of data, used variable hold desired number of seconds window time range. actual data, use 180 seconds.
declare @seconds int set @seconds = 10
step 1: first time -- although first_time
isn't important, still necessary make sure don't include incomplete time periods. used later exclude data before first complete time period has elapsed.
-- query return first_time, last_time, , range_time -- range_time first complete time period using time range select case_id , channel_index , min(start_time) first_time , dateadd(ss, @seconds, min(start_time)) range_time , max(start_time) last_time #continuous_data group case_id, channel_index order case_id, channel_index -- results sample data case_id channel_index first_time range_time last_time ----------- ------------- ----------------------- ----------------------- ----------------------- 2081 50 2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 2011-05-18 09:37:08.000 2081 51 2011-05-18 09:36:34.000 2011-05-18 09:36:44.000 2011-05-18 09:37:04.000
step 2: time windows -- oracle query uses partition case_id, channel_index order start_time range numtodsinterval(3, 'minute') preceeding
find minimum , maximum dms_value
first_time
in subquery. since sql server not have range
functionality, need use subquery define 3 minute windows. oracle query uses range ... preceeding
, sql server range use dateadd
negative value:
-- windowing each time range. window negative time -- range each start_time row select case_id , channel_index , dateadd(ss, -@seconds, start_time) window_start , start_time window_end #continuous_data order case_id, channel_index, start_time
step 3: min/max time windows -- next need find minimum , maximum values each window. majority of calculation performed , needs debugging expected results.
-- find maximum , minimum values each window range -- included start_time min/max/diff debugging select su.case_id , su.channel_index , win.window_end , max(dms_value) dms_max , min(dms_value) dms_min , min(su.start_time) time_min , max(su.start_time) time_max , datediff(ss, min(su.start_time), max(su.start_time)) time_diff #continuous_data su join ( -- windowing each time range. window negative time -- range each start_time row select case_id , channel_index , dateadd(ss, -@seconds, start_time) window_start , start_time window_end #continuous_data ) win on ( su.case_id = win.case_id , su.channel_index = win.channel_index) join ( -- find first_time , add time range select case_id , channel_index , min(start_time) first_time , dateadd(ss, @seconds, min(start_time)) range_time #continuous_data group case_id, channel_index ) fir on ( su.case_id = fir.case_id , su.channel_index = fir.channel_index) su.start_time between win.window_start , win.window_end , win.window_end >= fir.range_time group su.case_id, su.channel_index, win.window_end order su.case_id, su.channel_index, win.window_end -- results sample data: case_id channel_index window_end dms_max dms_min time_min time_max time_diff ----------- ------------- ----------------------- ---------------------- ---------------------- ----------------------- ----------------------- ----------- 2081 50 2011-05-18 09:36:49.000 104.5625 94.8125 2011-05-18 09:36:39.000 2011-05-18 09:36:49.000 10 2081 50 2011-05-18 09:36:50.000 105.8125 95.4375 2011-05-18 09:36:40.000 2011-05-18 09:36:50.000 10 2081 50 2011-05-18 09:36:52.000 107.125 98.0625 2011-05-18 09:36:42.000 2011-05-18 09:36:52.000 10 2081 50 2011-05-18 09:36:53.000 108.4375 99.3125 2011-05-18 09:36:44.000 2011-05-18 09:36:53.000 9 2081 50 2011-05-18 09:36:54.000 109.75 99.3125 2011-05-18 09:36:44.000 2011-05-18 09:36:54.000 10 2081 50 2011-05-18 09:36:55.000 111.0625 100.625 2011-05-18 09:36:45.000 2011-05-18 09:36:55.000 10 2081 50 2011-05-18 09:36:57.000 112.3125 103.25 2011-05-18 09:36:48.000 2011-05-18 09:36:57.000 9 2081 50 2011-05-18 09:36:58.000 113.625 103.25 2011-05-18 09:36:48.000 2011-05-18 09:36:58.000 10 2081 50 2011-05-18 09:36:59.000 114.9375 104.5625 2011-05-18 09:36:49.000 2011-05-18 09:36:59.000 10 2081 50 2011-05-18 09:37:01.000 116.25 107.125 2011-05-18 09:36:52.000 2011-05-18 09:37:01.000 9 2081 50 2011-05-18 09:37:02.000 117.5 107.125 2011-05-18 09:36:52.000 2011-05-18 09:37:02.000 10 2081 50 2011-05-18 09:37:03.000 118.8125 108.4375 2011-05-18 09:36:53.000 2011-05-18 09:37:03.000 10 2081 50 2011-05-18 09:37:05.000 120.125 111.0625 2011-05-18 09:36:55.000 2011-05-18 09:37:05.000 10 2081 50 2011-05-18 09:37:06.000 121.4375 112.3125 2011-05-18 09:36:57.000 2011-05-18 09:37:06.000 9 2081 50 2011-05-18 09:37:07.000 122.75 112.3125 2011-05-18 09:36:57.000 2011-05-18 09:37:07.000 10 2081 50 2011-05-18 09:37:08.000 124.0625 113.625 2011-05-18 09:36:58.000 2011-05-18 09:37:08.000 10 2081 51 2011-05-18 09:36:46.000 98 96 2011-05-18 09:36:40.000 2011-05-18 09:36:46.000 6 2081 51 2011-05-18 09:36:52.000 98 92 2011-05-18 09:36:46.000 2011-05-18 09:36:52.000 6 2081 51 2011-05-18 09:36:58.000 92 86 2011-05-18 09:36:52.000 2011-05-18 09:36:58.000 6 2081 51 2011-05-18 09:37:04.000 86 80 2011-05-18 09:36:58.000 2011-05-18 09:37:04.000 6
step 4: finally, can put return lowest max value , highest min value each time window:
select su.case_id , su.channel_index , min(dms_max) su_min , max(dms_min) su_max ( select su.case_id , su.channel_index , win.window_end , max(dms_value) dms_max , min(dms_value) dms_min #continuous_data su join ( -- windowing each time range. window negative time -- range each start_time row select case_id , channel_index , dateadd(ss, -@seconds, start_time) window_start , start_time window_end #continuous_data ) win on ( su.case_id = win.case_id , su.channel_index = win.channel_index) join ( -- find first_time , add time range select case_id , channel_index , min(start_time) first_time , dateadd(ss, @seconds, min(start_time)) range_time #continuous_data group case_id, channel_index ) fir on ( su.case_id = fir.case_id , su.channel_index = fir.channel_index) su.start_time between win.window_start , win.window_end , win.window_end >= fir.range_time group su.case_id, su.channel_index, win.window_end ) su group su.case_id, su.channel_index order su.case_id, su.channel_index -- results sample data: case_id channel_index su_min su_max ----------- ------------- ---------------------- ---------------------- 2081 50 104.5625 113.625 2081 51 86 96
Comments
Post a Comment