forked from docs/doc-exports
Reviewed-by: Kacur, Michal <michal.kacur@t-systems.com> Co-authored-by: Yang, Tong <yangtong2@huawei.com> Co-committed-by: Yang, Tong <yangtong2@huawei.com>
334 lines
36 KiB
HTML
334 lines
36 KiB
HTML
<a name="mrs_01_24513"></a><a name="mrs_01_24513"></a>
|
|
|
|
<h1 class="topictitle1">UDF Overview</h1>
|
|
<div id="body0000001582950985"><p id="mrs_01_24513__p1559104313464">IoTDB provides multiple built-in functions and user-defined functions (UDFs) to meet users' computing requirements.</p>
|
|
<div class="section" id="mrs_01_24513__section11415531202312"><h4 class="sectiontitle">UDF Types</h4><p id="mrs_01_24513__p18112144917230"><a href="#mrs_01_24513__table869011383477">Table 1</a> lists the UDF types supported by IoTDB.</p>
|
|
|
|
<div class="tablenoborder"><a name="mrs_01_24513__table869011383477"></a><a name="table869011383477"></a><table cellpadding="4" cellspacing="0" summary="" id="mrs_01_24513__table869011383477" frame="border" border="1" rules="all"><caption><b>Table 1 </b>UDF types</caption><thead align="left"><tr id="mrs_01_24513__row86912387474"><th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.2.3.2.3.1.1"><p id="mrs_01_24513__p19691538124715">Type</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.2.3.2.3.1.2"><p id="mrs_01_24513__p10691838174712">Description</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="mrs_01_24513__row669143874710"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.2.3.2.3.1.1 "><p id="mrs_01_24513__p17691538184719">User-defined timeseries generating function (UDTF)</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.2.3.2.3.1.2 "><p id="mrs_01_24513__p3691838114712">This type of function can take multiple time series as input and generate one time series, which can contain any number of data points.</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
<div class="section" id="mrs_01_24513__section73615552410"><h4 class="sectiontitle">UDTF</h4><p id="mrs_01_24513__p119281835165310">To write a UDTF, you need to inherit the <strong id="mrs_01_24513__b182481046114413">org.apache.iotdb.db.query.udf.api.UDTF</strong> class and implement at least the <strong id="mrs_01_24513__b67301056124417">beforeStart</strong> method and one <strong id="mrs_01_24513__b1970962104519">transform</strong> method.</p>
|
|
<p id="mrs_01_24513__p14215174585411"><a href="#mrs_01_24513__table13622155265515">Table 2</a> describes all interfaces that can be implemented by users.</p>
|
|
|
|
<div class="tablenoborder"><a name="mrs_01_24513__table13622155265515"></a><a name="table13622155265515"></a><table cellpadding="4" cellspacing="0" summary="" id="mrs_01_24513__table13622155265515" frame="border" border="1" rules="all"><caption><b>Table 2 </b>Interface description</caption><thead align="left"><tr id="mrs_01_24513__row86231752115516"><th align="left" class="cellrowborder" valign="top" width="31.58315831583159%" id="mcps1.3.3.4.2.4.1.1"><p id="mrs_01_24513__p17597912205710">Interface Definition</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="51.365136513651365%" id="mcps1.3.3.4.2.4.1.2"><p id="mrs_01_24513__p1562325210557">Description</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="17.051705170517053%" id="mcps1.3.3.4.2.4.1.3"><p id="mrs_01_24513__p9623125285516">Mandatory</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="mrs_01_24513__row176231452145511"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p1562311528555">void validate(UDFParameterValidator validator) throws Exception</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p1462345219554">This method is used to validate <strong id="mrs_01_24513__b4764273497">UDFParameters</strong> and is executed before <strong id="mrs_01_24513__b6710123175120">beforeStart</strong> is called.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p06232522552">No</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row1562335213556"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p2062325235510">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p262316524551">This is an initialization method used to call the user-defined initialization behavior before the UDTF processes the input data. Each time a user executes a UDTF query, the framework constructs a new UDF instance, and this method is called. It is called only once in the lifecycle of each UDF instance.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p1623155265515">Yes</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row146231752125517"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p56232521553">void transform(Row row, PointCollector collector) throws Exception</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p1162375213551">This method is called by the framework. When you choose to use the <strong id="mrs_01_24513__b2077013278216">RowByRowAccessStrategy</strong> strategy in <strong id="mrs_01_24513__b114019331423">beforeStart</strong> to consume raw data, this data processing method is called. The input data is passed in by <strong id="mrs_01_24513__b107022261954">Row</strong>, and the result is output by <strong id="mrs_01_24513__b774140063">PointCollector</strong>. You need to call the data collection method provided by <strong id="mrs_01_24513__b17925185691515">collector</strong> in this method to determine the output data.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p7623155295519">Use either this method or <strong id="mrs_01_24513__b138981959181912">transform(RowWindow rowWindow, PointCollector collector)</strong>.</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row1562385211558"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p762311529552">void transform(RowWindow rowWindow, PointCollector collector) throws Exception</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p362345235510">This method is called by the framework. When you choose to use the <strong id="mrs_01_24513__b753813157212">SlidingSizeWindowAccessStrategy</strong> or <strong id="mrs_01_24513__b288181912119">SlidingTimeWindowAccessStrategy</strong> strategy in <strong id="mrs_01_24513__b1857918367213">beforeStart</strong> to consume raw data, this data processing method will be called. The input data is passed in by <strong id="mrs_01_24513__b19672340202211">RowWindow</strong>, and the result is output by <strong id="mrs_01_24513__b267311409220">PointCollector</strong>. You need to call the data collection method provided by <strong id="mrs_01_24513__b5691854102214">collector</strong> in this method to determine the output data.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p762345285513">Use either this method or <strong id="mrs_01_24513__b1861165918221">transform(Row row, PointCollector collector)</strong>.</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row10623135214553"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p136236522554">void terminate(PointCollector collector) throws Exception</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p20623952125511">This method is called by the framework. This method is called after all <strong id="mrs_01_24513__b688151352410">transform</strong> calls have been executed and before <strong id="mrs_01_24513__b423154518241">beforeDestory</strong> is called. In a single UDF query, this method will be called only once. You need to call the data collection method provided by <strong id="mrs_01_24513__b1192512499265">collector</strong> in this method to determine the output data.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p4623195215556">No</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row4623452135514"><td class="cellrowborder" valign="top" width="31.58315831583159%" headers="mcps1.3.3.4.2.4.1.1 "><p id="mrs_01_24513__p1623145245514">void beforeDestroy()</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="51.365136513651365%" headers="mcps1.3.3.4.2.4.1.2 "><p id="mrs_01_24513__p36234524551">This method is called by the framework after the last input data is processed, and will be called only once in the lifecycle of each UDF instance.</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="17.051705170517053%" headers="mcps1.3.3.4.2.4.1.3 "><p id="mrs_01_24513__p262313528552">No</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<p id="mrs_01_24513__p71075560718"><strong id="mrs_01_24513__b163819471285">Calling sequence of each method:</strong></p>
|
|
<ol id="mrs_01_24513__ol71072564712"><li id="mrs_01_24513__li1510718564719"><strong id="mrs_01_24513__b421414245292">void validate(UDFParameterValidator validator) throws Exception</strong></li><li id="mrs_01_24513__li13107356876"><strong id="mrs_01_24513__b298712179297">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception</strong></li><li id="mrs_01_24513__li17107256478"><strong id="mrs_01_24513__b198845622910">void transform(Row row, PointCollector collector) throws Exception</strong> or <strong id="mrs_01_24513__b96911014298">void transform(RowWindow rowWindow, PointCollector collector) throws Exception</strong></li><li id="mrs_01_24513__li111071356979"><strong id="mrs_01_24513__b10826333192913">void terminate(PointCollector collector) throws Exception</strong></li><li id="mrs_01_24513__li131072567713"><strong id="mrs_01_24513__b2034343917291">void beforeDestroy()</strong></li></ol>
|
|
<div class="notice" id="mrs_01_24513__note101731551313"><span class="noticetitle"><img src="public_sys-resources/notice_3.0-en-us.png"> </span><div class="noticebody"><p id="mrs_01_24513__p517311512132">Each time the framework executes a UDTF query, a new UDF instance will be constructed. When the query ends, this UDF instance will be destroyed. Therefore, the internal data of the instances in different UDTF queries (even in the same SQL statement) is isolated. You can maintain some state data in the UDTF without considering the impact of concurrency and other factors.</p>
|
|
</div></div>
|
|
<p id="mrs_01_24513__p99277281882"><strong id="mrs_01_24513__b180573511171311">Interface usage: </strong></p>
|
|
<ul id="mrs_01_24513__ul1192702813816"><li id="mrs_01_24513__li69271528682">void validate(UDFParameterValidator validator) throws Exception<p id="mrs_01_24513__p1792719281289"><a name="mrs_01_24513__li69271528682"></a><a name="li69271528682"></a>The <strong id="mrs_01_24513__b64211256133519">validate</strong> method is used to validate the parameters entered by users.</p>
|
|
<p id="mrs_01_24513__p99272283815">In this method, you can limit the number and types of input time series, check the attributes of user input, or perform any custom logic verification.</p>
|
|
</li></ul>
|
|
<ul id="mrs_01_24513__ul11927152810815"><li id="mrs_01_24513__li892710281811">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception<p id="mrs_01_24513__p6927428687"><a name="mrs_01_24513__li892710281811"></a><a name="li892710281811"></a>Using this method, you can do the following things:</p>
|
|
<ul id="mrs_01_24513__ul9317132791814"><li id="mrs_01_24513__li13317627121811">Use <strong id="mrs_01_24513__b0308204751315">UDFParameters</strong> to get the time series paths and parse the entered key-value pair attributes.</li><li id="mrs_01_24513__li1631762781810">Set information required for running the UDF. That is, set the strategy to access the raw data and set the output data type in <strong id="mrs_01_24513__b983414472618">UDTFConfigurations</strong>.</li><li id="mrs_01_24513__li8317162721817">Create resources, such as creating external connections and opening files.</li></ul>
|
|
</li></ul>
|
|
</div>
|
|
<div class="section" id="mrs_01_24513__section39185019102"><h4 class="sectiontitle">UDFParameters</h4><p id="mrs_01_24513__p1582065371016"><strong id="mrs_01_24513__b1889182814296">UDFParameters</strong> is used to parse the UDF parameters in SQL statements (the part in the parentheses following the UDF name in the SQL statements). The parameters include two parts. The first part is the path and its data type of the time series to be processed by the UDF. The second part is the key-value pair attributes for customization.</p>
|
|
<p id="mrs_01_24513__p8726034141117">Example:</p>
|
|
<pre class="screen" id="mrs_01_24513__screen88629616126">SELECT UDF(s1, s2, 'key1'='iotdb', 'key2'='123.45') FROM root.sg.d;</pre>
|
|
<p id="mrs_01_24513__p1387271915112">Usage:</p>
|
|
<pre class="screen" id="mrs_01_24513__screen578641810142">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception {
|
|
// parameters
|
|
for (PartialPath path : parameters.getPaths()) {
|
|
TSDataType dataType = parameters.getDataType(path);
|
|
// do something
|
|
}
|
|
String stringValue = parameters.getString("key1"); // iotdb
|
|
Float floatValue = parameters.getFloat("key2"); // 123.45
|
|
Double doubleValue = parameters.getDouble("key3"); // null
|
|
int intValue = parameters.getIntOrDefault("key4", 678); // 678
|
|
// do something
|
|
|
|
// configurations
|
|
// ...
|
|
}</pre>
|
|
</div>
|
|
<div class="section" id="mrs_01_24513__section17361193871418"><h4 class="sectiontitle">UDTFConfigurations</h4><p id="mrs_01_24513__p11332641131410">You can use <strong id="mrs_01_24513__b82941915183718">UDTFConfigurations</strong> to specify the strategy used by the UDF to access raw data and the type of the output time series.</p>
|
|
<p id="mrs_01_24513__p63412202157">Usage:</p>
|
|
<pre class="screen" id="mrs_01_24513__screen011917444151">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception {
|
|
// parameters
|
|
// ...
|
|
|
|
// configurations
|
|
configurations
|
|
.setAccessStrategy(new RowByRowAccessStrategy())
|
|
.setOutputDataType(TSDataType.INT32);
|
|
}</pre>
|
|
<p id="mrs_01_24513__p443215136170">The <strong id="mrs_01_24513__b17369172914410">setAccessStrategy</strong> method is used to set the strategy used by the UDF to access raw data. The <strong id="mrs_01_24513__b1924187184219">setOutputDataType</strong> method is used to set the data type of the output time series.</p>
|
|
<ul id="mrs_01_24513__ul1543210132171"><li id="mrs_01_24513__li3432161315176">setAccessStrategy<p id="mrs_01_24513__p15432813111718"><a name="mrs_01_24513__li3432161315176"></a><a name="li3432161315176"></a>Note that the raw data access strategy you set here determines which <strong id="mrs_01_24513__b8405519174316">transform</strong> method the framework will call. Implement the <strong id="mrs_01_24513__b842219333434">transform</strong> method corresponding to the raw data access strategy. You can also dynamically decide which strategy to set based on the attribute parameters parsed by <strong id="mrs_01_24513__b1582135184412">UDFParameters</strong>. Therefore, the two <strong id="mrs_01_24513__b14711212450">transform</strong> methods are also allowed to be implemented in one UDF.</p>
|
|
<p id="mrs_01_24513__p0432213111718">The following are the strategies you can set.</p>
|
|
|
|
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="mrs_01_24513__table447102661818" frame="border" border="1" rules="all"><thead align="left"><tr id="mrs_01_24513__row64742661818"><th align="left" class="cellrowborder" valign="top" width="22.682268226822682%" id="mcps1.3.5.6.1.3.1.4.1.1"><p id="mrs_01_24513__p16475268180">Interface Definition</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="52.33523352335233%" id="mcps1.3.5.6.1.3.1.4.1.2"><p id="mrs_01_24513__p148142619188">Description</p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="24.98249824982498%" id="mcps1.3.5.6.1.3.1.4.1.3"><p id="mrs_01_24513__p204810268185"><strong id="mrs_01_24513__b87982555466">transform</strong> Method to Call</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="mrs_01_24513__row048122611186"><td class="cellrowborder" valign="top" width="22.682268226822682%" headers="mcps1.3.5.6.1.3.1.4.1.1 "><p id="mrs_01_24513__p1248152613186">RowByRowAccessStrategy</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="52.33523352335233%" headers="mcps1.3.5.6.1.3.1.4.1.2 "><p id="mrs_01_24513__p64814262186">Processes raw data row by row. The framework calls the <strong id="mrs_01_24513__b11111181616">transform</strong> method once for each row of raw data input. When a UDF has only one input time series, a row of input is a data point in the input time series. When a UDF has multiple input time series, a row of input is a result record of the raw query (aligned by time) on these input time series. (In a row, there may be a column with a value of <strong id="mrs_01_24513__b1855737565">null</strong>, but not all of them are <strong id="mrs_01_24513__b986919122616">null</strong>.)</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="24.98249824982498%" headers="mcps1.3.5.6.1.3.1.4.1.3 "><p id="mrs_01_24513__p348226161818">void transform(Row row, PointCollector collector) throws Exception</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row548626101819"><td class="cellrowborder" valign="top" width="22.682268226822682%" headers="mcps1.3.5.6.1.3.1.4.1.1 "><p id="mrs_01_24513__p1748182618184">SlidingTimeWindowAccessStrategy</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="52.33523352335233%" headers="mcps1.3.5.6.1.3.1.4.1.2 "><p id="mrs_01_24513__p174842671810">Processes a batch of data in a fixed time interval each time. A data batch is called a window. The framework calls the <strong id="mrs_01_24513__b104771820131211">transform</strong> method once for each raw data input window. A window may contain multiple rows of data. Each row of data is a result record of the raw query (aligned by time) on these input time series. (In a row, there may be a column with a value of <strong id="mrs_01_24513__b9539105614157">null</strong>, but not all of them are <strong id="mrs_01_24513__b5118458121520">null</strong>.)</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="24.98249824982498%" headers="mcps1.3.5.6.1.3.1.4.1.3 "><p id="mrs_01_24513__p848202613189">void transform(RowWindow rowWindow, PointCollector collector) throws Exception</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row144862611820"><td class="cellrowborder" valign="top" width="22.682268226822682%" headers="mcps1.3.5.6.1.3.1.4.1.1 "><p id="mrs_01_24513__p1488265182">SlidingSizeWindowAccessStrategy</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="52.33523352335233%" headers="mcps1.3.5.6.1.3.1.4.1.2 "><p id="mrs_01_24513__p5482268186">Processes raw data batch by batch, and each batch contains a fixed number of raw data rows (except the last batch). A data batch is called a window. The framework calls the <strong id="mrs_01_24513__b1139205531710">transform</strong> method once for each raw data input window. A window may contain multiple rows of data. Each row of data is a result record of the raw query (aligned by time) on these input time series. (In a row, there may be a column with a value of <strong id="mrs_01_24513__b881809101817">null</strong>, but not all of them are <strong id="mrs_01_24513__b158181094189">null</strong>.)</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="24.98249824982498%" headers="mcps1.3.5.6.1.3.1.4.1.3 "><p id="mrs_01_24513__p3481626181811">void transform(RowWindow rowWindow, PointCollector collector) throws Exception</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<p id="mrs_01_24513__p24641816223">The construction of <strong id="mrs_01_24513__b154491558171813">RowByRowAccessStrategy</strong> does not require any parameters.</p>
|
|
<div class="p" id="mrs_01_24513__p8461918172217"><strong id="mrs_01_24513__b12407119192">SlidingTimeWindowAccessStrategy</strong> has multiple constructors, and you can pass the following types of parameters to the constructors:<ul id="mrs_01_24513__ul6164182813222"><li id="mrs_01_24513__li91640283221">Start time and end time of the display window on the time axis</li><li id="mrs_01_24513__li8164192892215">Time interval for dividing the time axis (must be positive)</li><li id="mrs_01_24513__li71646287229">Time sliding step (not required to be greater than or equal to the time interval, but must be a positive number)</li></ul>
|
|
</div>
|
|
<p id="mrs_01_24513__p9766656162119">The display window on the time axis is optional. If these parameters are not provided, the start time of the display window will be set to the same as the minimum timestamp of the query result set, and the end time of the display window will be set to the same as the maximum timestamp of the query result set.</p>
|
|
<p id="mrs_01_24513__p11766175612115">The sliding step parameter is also optional. If the parameter is not provided, the sliding step will be set to the same as the time interval for dividing the time axis.</p>
|
|
<p id="mrs_01_24513__p0766256102118">The following figure shows the relationship between the three types of parameters.</p>
|
|
<p id="mrs_01_24513__p121131735192310"><span><img id="mrs_01_24513__image711353511231" src="en-us_image_0000001583272185.png"></span></p>
|
|
<p id="mrs_01_24513__p115966214248">Note that the actual time interval of some of the last time windows may be less than the specified time interval parameter. In addition, the number of data rows in some time windows may be 0. In this case, the framework will also call the <strong id="mrs_01_24513__b14426637184512">transform</strong> method for the empty windows.</p>
|
|
<div class="p" id="mrs_01_24513__p165965210249"><strong id="mrs_01_24513__b1544118567458">SlidingSizeWindowAccessStrategy</strong> has multiple constructors, and you can pass the following types of parameters to the constructors:<ul id="mrs_01_24513__ul1245473792412"><li id="mrs_01_24513__li9454133713242">Window size, that is, the number of data rows in a data processing window. Note that the number of data rows in some of the last time windows may be less than the specified number of data rows.</li><li id="mrs_01_24513__li9454183718241">Sliding step, that is, the number of rows between the first point of the next window and the first point of the current window. (This parameter is not required to be greater than or equal to the window size, but must be a positive number.)</li></ul>
|
|
</div>
|
|
<p id="mrs_01_24513__p1059642192417">The sliding step parameter is optional. If this parameter is not provided, the sliding step will be set to the same as the window size.</p>
|
|
<p id="mrs_01_24513__p773685614244">Note that the type of output time series you set here determines the type of data that <strong id="mrs_01_24513__b170825315311">PointCollector</strong> in the <strong id="mrs_01_24513__b172418545416">transform</strong> method can actually receive. The relationship between the output data type set in <strong id="mrs_01_24513__b5967921185520">setOutputDataType</strong> and the actual data output type that <strong id="mrs_01_24513__b190015272559">PointCollector</strong> can receive is as follows.</p>
|
|
|
|
<div class="tablenoborder"><table cellpadding="4" cellspacing="0" summary="" id="mrs_01_24513__table15236101417261" frame="border" border="1" rules="all"><thead align="left"><tr id="mrs_01_24513__row7237121414262"><th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.5.6.1.14.1.3.1.1"><p id="mrs_01_24513__p19237121492612">Output Data Type Set in <strong id="mrs_01_24513__b10801130155616">setOutputDataType</strong></p>
|
|
</th>
|
|
<th align="left" class="cellrowborder" valign="top" width="50%" id="mcps1.3.5.6.1.14.1.3.1.2"><p id="mrs_01_24513__p19237201462610">Data Type That PointCollector Can Receive</p>
|
|
</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody><tr id="mrs_01_24513__row11237151416264"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p1523791416264">INT32</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p8237181420266">int</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row16237201411268"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p1223751419264">INT64</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p6237131417262">long</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row123701442611"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p1523711147264">FLOAT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p172377147266">float</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row112371214142611"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p1123714142267">DOUBLE</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p92371914192615">double</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row152376144267"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p2023711416263">BOOLEAN</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p523712144264">boolean</p>
|
|
</td>
|
|
</tr>
|
|
<tr id="mrs_01_24513__row12374148265"><td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.1 "><p id="mrs_01_24513__p123711415265">TEXT</p>
|
|
</td>
|
|
<td class="cellrowborder" valign="top" width="50%" headers="mcps1.3.5.6.1.14.1.3.1.2 "><p id="mrs_01_24513__p1723714148264">java.lang.String and org.apache.iotdb.tsfile.utils.Binary</p>
|
|
</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</li><li id="mrs_01_24513__li1445185619135">The type of the output time series of a UDTF is determined at runtime. The UDTF can dynamically determine the type of the output time series according to the type of the input time series.<div class="p" id="mrs_01_24513__p342851811295"><a name="mrs_01_24513__li1445185619135"></a><a name="li1445185619135"></a>Example:<pre class="screen" id="mrs_01_24513__screen232432533010">void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) throws Exception {
|
|
// do something
|
|
// ...
|
|
|
|
configurations
|
|
.setAccessStrategy(new RowByRowAccessStrategy())
|
|
.setOutputDataType(parameters.getDataType(0));
|
|
}</pre>
|
|
</div>
|
|
<ul id="mrs_01_24513__ul1865715229319"><li id="mrs_01_24513__li1565819222317">void transform(Row row, PointCollector collector) throws Exception<p id="mrs_01_24513__p1878143312316"><a name="mrs_01_24513__li1565819222317"></a><a name="li1565819222317"></a>You need to implement this method when you specify the strategy for the UDF to read raw data as <strong id="mrs_01_24513__b117701813244">RowByRowAccessStrategy</strong> in <strong id="mrs_01_24513__b199995614415">beforeStart</strong>.</p>
|
|
<p id="mrs_01_24513__p10781163323118">This method processes one row of raw data at a time. The raw data is input from <strong id="mrs_01_24513__b1257715541266">Row</strong> and output by <strong id="mrs_01_24513__b1241813561461">PointCollector</strong>. You can choose to output any number of data points in one <strong id="mrs_01_24513__b183741224819">transform</strong> call. Note that the type of the output data points must be the same as you set in the <strong id="mrs_01_24513__b7849449383">beforeStart</strong> method, and the timestamp of the output data points must be strictly monotonically increasing.</p>
|
|
<p id="mrs_01_24513__p1781733163115">The following is a complete UDF example that implements the <strong id="mrs_01_24513__b05367121121">void transform(Row row, PointCollector collector) throws Exception</strong> method. It is an adder that receives two columns of time series as input. When two data points in a row are not <strong id="mrs_01_24513__b129401083161">null</strong>, this UDF will output the algebraic sum of these two data points.</p>
|
|
<pre class="screen" id="mrs_01_24513__screen1858543973314">import org.apache.iotdb.db.query.udf.api.UDTF;
|
|
import org.apache.iotdb.db.query.udf.api.access.Row;
|
|
import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.strategy.RowByRowAccessStrategy;
|
|
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
|
|
|
|
public class Adder implements UDTF {
|
|
|
|
@Override
|
|
public void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) {
|
|
configurations
|
|
.setOutputDataType(TSDataType.INT64)
|
|
.setAccessStrategy(new RowByRowAccessStrategy());
|
|
}
|
|
|
|
@Override
|
|
public void transform(Row row, PointCollector collector) throws Exception {
|
|
if (row.isNull(0) || row.isNull(1)) {
|
|
return;
|
|
}
|
|
collector.putLong(row.getTime(), row.getLong(0) + row.getLong(1));
|
|
}
|
|
}</pre>
|
|
</li></ul>
|
|
<ul id="mrs_01_24513__ul5128195253320"><li id="mrs_01_24513__li1212835263311">void transform(RowWindow rowWindow, PointCollector collector) throws Exception<p id="mrs_01_24513__p1192893733414"><a name="mrs_01_24513__li1212835263311"></a><a name="li1212835263311"></a>You need to implement this method when you specify the strategy for the UDF to read raw data as <strong id="mrs_01_24513__b1446910458173">SlidingTimeWindowAccessStrategy</strong> or <strong id="mrs_01_24513__b1365135151713">SlidingSizeWindowAccessStrategy</strong>.</p>
|
|
<p id="mrs_01_24513__p722824173410">This method processes a batch of data in a fixed number of rows or a fixed time interval each time, and the container containing this batch of data is called a window. The raw data is input from <strong id="mrs_01_24513__b32041753122011">RowWindow</strong> and output by <strong id="mrs_01_24513__b720445310204">PointCollector</strong>. <strong id="mrs_01_24513__b11145121618232">RowWindow</strong> can help you access a batch of rows, and it provides a set of interfaces for random access and iterative access to this batch of rows. You can choose to output any number of data points in one <strong id="mrs_01_24513__b6622852820">transform</strong> call. Note that the type of output data points must be the same as you set in the <strong id="mrs_01_24513__b1295444542819">beforeStart</strong> method, and the timestamps of output data points must be strictly monotonically increasing.</p>
|
|
<p id="mrs_01_24513__p1822884173418">The following is a complete UDF example that implements the <strong id="mrs_01_24513__b6341014122910">void transform(RowWindow rowWindow, PointCollector collector) throws Exception</strong> method. It is a counter that receives any number of time series as input, and its function is to count and output the number of data rows in each time window within a specified time range.</p>
|
|
<pre class="screen" id="mrs_01_24513__screen192211159173611">import java.io.IOException;
|
|
import org.apache.iotdb.db.query.udf.api.UDTF;
|
|
import org.apache.iotdb.db.query.udf.api.access.RowWindow;
|
|
import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.strategy.SlidingTimeWindowAccessStrategy;
|
|
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
|
|
|
|
public class Counter implements UDTF {
|
|
|
|
@Override
|
|
public void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) {
|
|
configurations
|
|
.setOutputDataType(TSDataType.INT32)
|
|
.setAccessStrategy(new SlidingTimeWindowAccessStrategy(
|
|
parameters.getLong("time_interval"),
|
|
parameters.getLong("sliding_step"),
|
|
parameters.getLong("display_window_begin"),
|
|
parameters.getLong("display_window_end")));
|
|
}
|
|
|
|
@Override
|
|
public void transform(RowWindow rowWindow, PointCollector collector) throws Exception {
|
|
if (rowWindow.windowSize() != 0) {
|
|
collector.putInt(rowWindow.getRow(0).getTime(), rowWindow.windowSize());
|
|
}
|
|
}
|
|
}</pre>
|
|
</li></ul>
|
|
<ul id="mrs_01_24513__ul1019421511378"><li id="mrs_01_24513__li119413152377">void terminate(PointCollector collector) throws Exception<p id="mrs_01_24513__p1937742823710"><a name="mrs_01_24513__li119413152377"></a><a name="li119413152377"></a>In some scenarios, a UDF needs to traverse all the raw data to calculate the final output data points. The <strong id="mrs_01_24513__b31941418377">terminate</strong> interface provides support for those scenarios.</p>
|
|
<p id="mrs_01_24513__p14377182883718">This method is called after all <strong id="mrs_01_24513__b99251412133813">transform</strong> calls have been executed and before <strong id="mrs_01_24513__b29263127382">beforeDestory</strong> is called. You can implement the <strong id="mrs_01_24513__b333094412386">transform</strong> method to perform pure data processing, and implement the <strong id="mrs_01_24513__b7509181014398">terminate</strong> method to output the processing results.</p>
|
|
<p id="mrs_01_24513__p8377122873717">The processing results need to be output by <strong id="mrs_01_24513__b2964151816405">PointCollector</strong>. You can choose to output any number of data points in one <strong id="mrs_01_24513__b2074201015410">terminate</strong> call. Note that the type of the output data points must be the same as you set in the <strong id="mrs_01_24513__b5290104954115">beforeStart</strong> method, and the timestamp of the output data points must be strictly monotonically increasing.</p>
|
|
<p id="mrs_01_24513__p19377102816373">The following is a complete UDF example that implements the <strong id="mrs_01_24513__b6725135824114">void terminate(PointCollector collector) throws Exception</strong> method. It takes one time series whose data type is <strong id="mrs_01_24513__b1897633564211">INT32</strong> as input, and outputs the maximum value point of the series.</p>
|
|
<pre class="screen" id="mrs_01_24513__screen1913814315393">import java.io.IOException;
|
|
import org.apache.iotdb.db.query.udf.api.UDTF;
|
|
import org.apache.iotdb.db.query.udf.api.access.Row;
|
|
import org.apache.iotdb.db.query.udf.api.collector.PointCollector;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.config.UDTFConfigurations;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.parameter.UDFParameters;
|
|
import org.apache.iotdb.db.query.udf.api.customizer.strategy.RowByRowAccessStrategy;
|
|
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
|
|
|
|
public class Max implements UDTF {
|
|
|
|
private Long time;
|
|
private int value;
|
|
|
|
@Override
|
|
public void beforeStart(UDFParameters parameters, UDTFConfigurations configurations) {
|
|
configurations
|
|
.setOutputDataType(TSDataType.INT32)
|
|
.setAccessStrategy(new RowByRowAccessStrategy());
|
|
}
|
|
|
|
@Override
|
|
public void transform(Row row, PointCollector collector) {
|
|
int candidateValue = row.getInt(0);
|
|
if (time == null || value < candidateValue) {
|
|
time = row.getTime();
|
|
value = candidateValue;
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void terminate(PointCollector collector) throws IOException {
|
|
if (time != null) {
|
|
collector.putInt(time, value);
|
|
}
|
|
}
|
|
}</pre>
|
|
</li></ul>
|
|
<ul id="mrs_01_24513__ul17610133504014"><li id="mrs_01_24513__li861063518403">void beforeDestroy()<p id="mrs_01_24513__p261043518409"><a name="mrs_01_24513__li861063518403"></a><a name="li861063518403"></a>This method is used to terminate a UDF.</p>
|
|
<p id="mrs_01_24513__p196101035124014">This method is called by the framework. For a UDF instance, <strong id="mrs_01_24513__b174965264613">beforeDestroy</strong> will be called after the last record is processed. In the entire lifecycle of the instance, <strong id="mrs_01_24513__b263432217465">beforeDestroy</strong> will be called only once.</p>
|
|
</li></ul>
|
|
</li></ul>
|
|
</div>
|
|
</div>
|
|
<div>
|
|
<div class="familylinks">
|
|
<div class="parentlink"><strong>Parent topic:</strong> <a href="mrs_01_24512.html">UDFs</a></div>
|
|
</div>
|
|
</div>
|
|
|