diff --git a/docs/dli/sqlreference/ALL_META.TXT.json b/docs/dli/sqlreference/ALL_META.TXT.json new file mode 100644 index 00000000..474594b3 --- /dev/null +++ b/docs/dli/sqlreference/ALL_META.TXT.json @@ -0,0 +1,3822 @@ +[ + { + "uri":"dli_08_0221.html", + "product_code":"dli", + "code":"1", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Spark SQL Syntax Reference", + "title":"Spark SQL Syntax Reference", + "githuburl":"" + }, + { + "uri":"dli_08_0266.html", + "product_code":"dli", + "code":"2", + "des":"This section describes the common configuration items of the SQL syntax for DLI batch jobs.", + "doc_type":"sqlreference", + "kw":"Common Configuration Items of Batch SQL Jobs,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Common Configuration Items of Batch SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_08_0219.html", + "product_code":"dli", + "code":"3", + "des":"This section describes the Spark SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.", + "doc_type":"sqlreference", + "kw":"SQL Syntax Overview of Batch Jobs,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"SQL Syntax Overview of Batch Jobs", + "githuburl":"" + }, + { + "uri":"dli_08_0070.html", + "product_code":"dli", + "code":"4", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Databases", + "title":"Databases", + "githuburl":"" + }, + { + "uri":"dli_08_0071.html", + "product_code":"dli", + "code":"5", + "des":"This statement is used to create a database.IF NOT EXISTS: Prevents system errors if the database to be created exists.COMMENT: Describes a database.DBPROPERTIES: Specifi", + "doc_type":"sqlreference", + "kw":"Creating a Database,Databases,SQL Syntax Reference", + "title":"Creating a Database", + "githuburl":"" + }, + { + "uri":"dli_08_0072.html", + "product_code":"dli", + "code":"6", + "des":"This statement is used to delete a database.IF EXISTS: Prevents system errors if the database to be deleted does not exist.DATABASE and SCHEMA can be used interchangeably", + "doc_type":"sqlreference", + "kw":"Deleting a Database,Databases,SQL Syntax Reference", + "title":"Deleting a Database", + "githuburl":"" + }, + { + "uri":"dli_08_0073.html", + "product_code":"dli", + "code":"7", + "des":"This syntax is used to view the information about a specified database, including the database name and database description.EXTENDED: Displays the database properties.If", + "doc_type":"sqlreference", + "kw":"Viewing a Specified Database,Databases,SQL Syntax Reference", + "title":"Viewing a Specified Database", + "githuburl":"" + }, + { + "uri":"dli_08_0074.html", + "product_code":"dli", + "code":"8", + "des":"This syntax is used to query all current databases.NoneKeyword DATABASES is equivalent to SCHEMAS. You can use either of them in this statement.View all the current datab", + "doc_type":"sqlreference", + "kw":"Viewing All Databases,Databases,SQL Syntax Reference", + "title":"Viewing All Databases", + "githuburl":"" + }, + { + "uri":"dli_08_0223.html", + "product_code":"dli", + "code":"9", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating an OBS Table", + "title":"Creating an OBS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0076.html", + "product_code":"dli", + "code":"10", + "des":"Create an OBS table using the DataSource syntax.The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of suppor", + "doc_type":"sqlreference", + "kw":"Creating an OBS Table Using the DataSource Syntax,Creating an OBS Table,SQL Syntax Reference", + "title":"Creating an OBS Table Using the DataSource Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0077.html", + "product_code":"dli", + "code":"11", + "des":"This statement is used to create an OBS table using the Hive syntax. The main differences between the DataSource and the Hive syntax lie in the supported data formats and", + "doc_type":"sqlreference", + "kw":"Creating an OBS Table Using the Hive Syntax,Creating an OBS Table,SQL Syntax Reference", + "title":"Creating an OBS Table Using the Hive Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0224.html", + "product_code":"dli", + "code":"12", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table", + "title":"Creating a DLI Table", + "githuburl":"" + }, + { + "uri":"dli_08_0098.html", + "product_code":"dli", + "code":"13", + "des":"This DataSource syntax can be used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table Using the DataSource Syntax,Creating a DLI Table,SQL Syntax Reference", + "title":"Creating a DLI Table Using the DataSource Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0204.html", + "product_code":"dli", + "code":"14", + "des":"This Hive syntax is used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of suppor", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table Using the Hive Syntax,Creating a DLI Table,SQL Syntax Reference", + "title":"Creating a DLI Table Using the Hive Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0087.html", + "product_code":"dli", + "code":"15", + "des":"This statement is used to delete tables.If the table is stored in OBS, only the metadata is deleted. The data stored on OBS is not deleted.If the table is stored in DLI, ", + "doc_type":"sqlreference", + "kw":"Deleting a Table,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Deleting a Table", + "githuburl":"" + }, + { + "uri":"dli_08_0089.html", + "product_code":"dli", + "code":"16", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Viewing Tables", + "title":"Viewing Tables", + "githuburl":"" + }, + { + "uri":"dli_08_0090.html", + "product_code":"dli", + "code":"17", + "des":"This statement is used to view all tables and views in the current database.FROM/IN: followed by the name of a database whose tables and views will be displayed.NoneCreat", + "doc_type":"sqlreference", + "kw":"Viewing All Tables,Viewing Tables,SQL Syntax Reference", + "title":"Viewing All Tables", + "githuburl":"" + }, + { + "uri":"dli_08_0091.html", + "product_code":"dli", + "code":"18", + "des":"This statement is used to show the statements for creating a table.CREATE TABLE: statement for creating a tableThe table specified in this statement must exist. Otherwise", + "doc_type":"sqlreference", + "kw":"Viewing Table Creation Statements,Viewing Tables,SQL Syntax Reference", + "title":"Viewing Table Creation Statements", + "githuburl":"" + }, + { + "uri":"dli_08_0092.html", + "product_code":"dli", + "code":"19", + "des":"Check the properties of a table.TBLPROPERTIES: This statement allows you to add a key/value property to a table.property_name is case sensitive. You cannot specify multip", + "doc_type":"sqlreference", + "kw":"Viewing Table Properties,Viewing Tables,SQL Syntax Reference", + "title":"Viewing Table Properties", + "githuburl":"" + }, + { + "uri":"dli_08_0093.html", + "product_code":"dli", + "code":"20", + "des":"This statement is used to query all columns in a specified table.COLUMNS: columns in the current tableFROM/IN: followed by the name of a database whose tables and views w", + "doc_type":"sqlreference", + "kw":"Viewing All Columns in a Specified Table,Viewing Tables,SQL Syntax Reference", + "title":"Viewing All Columns in a Specified Table", + "githuburl":"" + }, + { + "uri":"dli_08_0094.html", + "product_code":"dli", + "code":"21", + "des":"This statement is used to view all partitions in a specified table.PARTITIONS: partitions in a specified tablePARTITION: a specified partitionThe table specified in this ", + "doc_type":"sqlreference", + "kw":"Viewing All Partitions in a Specified Table,Viewing Tables,SQL Syntax Reference", + "title":"Viewing All Partitions in a Specified Table", + "githuburl":"" + }, + { + "uri":"dli_08_0105.html", + "product_code":"dli", + "code":"22", + "des":"This statement is used to view the table statistics. The names and data types of all columns in a specified table will be returned.EXTENDED: displays all metadata of the ", + "doc_type":"sqlreference", + "kw":"Viewing Table Statistics,Viewing Tables,SQL Syntax Reference", + "title":"Viewing Table Statistics", + "githuburl":"" + }, + { + "uri":"dli_08_0262.html", + "product_code":"dli", + "code":"23", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Modifying a Table", + "title":"Modifying a Table", + "githuburl":"" + }, + { + "uri":"dli_08_0263.html", + "product_code":"dli", + "code":"24", + "des":"This statement is used to add one or more new columns to a table.ADD COLUMNS: columns to addCOMMENT: column descriptionDo not run this SQL statement concurrently. Otherwi", + "doc_type":"sqlreference", + "kw":"Adding a Column,Modifying a Table,SQL Syntax Reference", + "title":"Adding a Column", + "githuburl":"" + }, + { + "uri":"dli_08_0354.html", + "product_code":"dli", + "code":"25", + "des":"DLI controls multiple versions of backup data for restoration. After the multiversion function is enabled, the system automatically backs up table data when you delete or", + "doc_type":"sqlreference", + "kw":"Enabling or Disabling Multiversion Backup,Modifying a Table,SQL Syntax Reference", + "title":"Enabling or Disabling Multiversion Backup", + "githuburl":"" + }, + { + "uri":"dli_08_0080.html", + "product_code":"dli", + "code":"26", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Syntax for Partitioning a Table", + "title":"Syntax for Partitioning a Table", + "githuburl":"" + }, + { + "uri":"dli_08_0081.html", + "product_code":"dli", + "code":"27", + "des":"After an OBS partitioned table is created, no partition information is generated for the table. Partition information is generated only after you:Insert data to the OBS p", + "doc_type":"sqlreference", + "kw":"Adding Partition Data (Only OBS Tables Supported),Syntax for Partitioning a Table,SQL Syntax Referen", + "title":"Adding Partition Data (Only OBS Tables Supported)", + "githuburl":"" + }, + { + "uri":"dli_08_0082.html", + "product_code":"dli", + "code":"28", + "des":"This statement is used to rename partitions.PARTITION: a specified partitionRENAME: new name of the partitionThis statement is used for OBS table operations.The table and", + "doc_type":"sqlreference", + "kw":"Renaming a Partition (Only OBS Tables Supported),Syntax for Partitioning a Table,SQL Syntax Referenc", + "title":"Renaming a Partition (Only OBS Tables Supported)", + "githuburl":"" + }, + { + "uri":"dli_08_0083.html", + "product_code":"dli", + "code":"29", + "des":"Deletes one or more partitions from a partitioned table.The table in which partitions are to be deleted must exist. Otherwise, an error is reported.The to-be-deleted part", + "doc_type":"sqlreference", + "kw":"Deleting a Partition,Syntax for Partitioning a Table,SQL Syntax Reference", + "title":"Deleting a Partition", + "githuburl":"" + }, + { + "uri":"dli_08_0343.html", + "product_code":"dli", + "code":"30", + "des":"This statement is used to delete one or more partitions based on specified conditions.This statement is used for OBS table operations only.The table in which partitions a", + "doc_type":"sqlreference", + "kw":"Deleting Partitions by Specifying Filter Criteria (Only OBS Tables Supported),Syntax for Partitionin", + "title":"Deleting Partitions by Specifying Filter Criteria (Only OBS Tables Supported)", + "githuburl":"" + }, + { + "uri":"dli_08_0084.html", + "product_code":"dli", + "code":"31", + "des":"This statement is used to modify the positions of table partitions.PARTITION: a specified partitionLOCATION: path of the partitionFor a table partition whose position is ", + "doc_type":"sqlreference", + "kw":"Altering the Partition Location of a Table (Only OBS Tables Supported),Syntax for Partitioning a Tab", + "title":"Altering the Partition Location of a Table (Only OBS Tables Supported)", + "githuburl":"" + }, + { + "uri":"dli_08_0079.html", + "product_code":"dli", + "code":"32", + "des":"This statement is used to update the partition information about a table in the Metastore.OrPARTITIONS: partition informationSERDEPROPERTIES: Serde attributeThis statemen", + "doc_type":"sqlreference", + "kw":"Updating Partitioned Table Data (Only OBS Tables Supported),Syntax for Partitioning a Table,SQL Synt", + "title":"Updating Partitioned Table Data (Only OBS Tables Supported)", + "githuburl":"" + }, + { + "uri":"dli_08_0359.html", + "product_code":"dli", + "code":"33", + "des":"Spark caches Parquet metadata to improve performance. If you update a Parquet table, the cached metadata is not updated. Spark SQL cannot find the newly inserted data and", + "doc_type":"sqlreference", + "kw":"Updating Table Metadata with REFRESH TABLE,Syntax for Partitioning a Table,SQL Syntax Reference", + "title":"Updating Table Metadata with REFRESH TABLE", + "githuburl":"" + }, + { + "uri":"dli_08_0100.html", + "product_code":"dli", + "code":"34", + "des":"The LOAD DATA function can be used to import data in CSV, Parquet, ORC, JSON, and Avro formats. The data is converted into the Parquet data format for storage.INPATH: pat", + "doc_type":"sqlreference", + "kw":"Importing Data to the Table,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Importing Data to the Table", + "githuburl":"" + }, + { + "uri":"dli_08_0095.html", + "product_code":"dli", + "code":"35", + "des":"This statement is used to insert the SELECT query result or a certain data record into a table.Insert the SELECT query result into a table.INSERT INTO [TABLE] [db_name.]t", + "doc_type":"sqlreference", + "kw":"Inserting Data,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Inserting Data", + "githuburl":"" + }, + { + "uri":"dli_08_0217.html", + "product_code":"dli", + "code":"36", + "des":"This statement is used to delete data from the DLI or OBS table.Only data in the DLI or OBS table can be deleted.", + "doc_type":"sqlreference", + "kw":"Clearing Data,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Clearing Data", + "githuburl":"" + }, + { + "uri":"dli_08_0205.html", + "product_code":"dli", + "code":"37", + "des":"This statement is used to directly write query results to a specified directory. The query results can be stored in CSV, Parquet, ORC, JSON, or Avro format.USING: Specifi", + "doc_type":"sqlreference", + "kw":"Exporting Search Results,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Exporting Search Results", + "githuburl":"" + }, + { + "uri":"dli_08_0349.html", + "product_code":"dli", + "code":"38", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Backing Up and Restoring Data of Multiple Versions", + "title":"Backing Up and Restoring Data of Multiple Versions", + "githuburl":"" + }, + { + "uri":"dli_08_0350.html", + "product_code":"dli", + "code":"39", + "des":"After multiversion is enabled, backup data is retained for seven days by default. You can change the retention period by setting system parameterdli.multi.version.retenti", + "doc_type":"sqlreference", + "kw":"Setting the Retention Period for Multiversion Backup Data,Backing Up and Restoring Data of Multiple ", + "title":"Setting the Retention Period for Multiversion Backup Data", + "githuburl":"" + }, + { + "uri":"dli_08_0351.html", + "product_code":"dli", + "code":"40", + "des":"After the multiversion function is enabled, you can run the SHOW HISTORY command to view the backup data of a table. For details about the syntax for enabling or disablin", + "doc_type":"sqlreference", + "kw":"Viewing Multiversion Backup Data,Backing Up and Restoring Data of Multiple Versions,SQL Syntax Refer", + "title":"Viewing Multiversion Backup Data", + "githuburl":"" + }, + { + "uri":"dli_08_0352.html", + "product_code":"dli", + "code":"41", + "des":"After the multiversion function is enabled, you can run the RESTORE TABLE statement to restore a table or partition of a specified version. For details about the syntax f", + "doc_type":"sqlreference", + "kw":"Restoring Multiversion Backup Data,Backing Up and Restoring Data of Multiple Versions,SQL Syntax Ref", + "title":"Restoring Multiversion Backup Data", + "githuburl":"" + }, + { + "uri":"dli_08_0353.html", + "product_code":"dli", + "code":"42", + "des":"After the multiversion function is enabled, expired backup data will be directly deleted by the system when theinsert overwrite or truncate statement is executed. You can", + "doc_type":"sqlreference", + "kw":"Configuring the Trash Bin for Expired Multiversion Data,Backing Up and Restoring Data of Multiple Ve", + "title":"Configuring the Trash Bin for Expired Multiversion Data", + "githuburl":"" + }, + { + "uri":"dli_08_0355.html", + "product_code":"dli", + "code":"43", + "des":"The retention period of multiversion backup data takes effect each time the insert overwrite or truncate statement is executed. If neither statement is executed for the t", + "doc_type":"sqlreference", + "kw":"Deleting Multiversion Backup Data,Backing Up and Restoring Data of Multiple Versions,SQL Syntax Refe", + "title":"Deleting Multiversion Backup Data", + "githuburl":"" + }, + { + "uri":"dli_08_0118.html", + "product_code":"dli", + "code":"44", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with an HBase Table", + "title":"Creating a Datasource Connection with an HBase Table", + "githuburl":"" + }, + { + "uri":"dli_08_0119.html", + "product_code":"dli", + "code":"45", + "des":"This statement is used to create a DLI table and associate it with an existing HBase table.Before creating a DLI table and associating it with HBase, you need to create a", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with HBase,Creating a Datasource Connection with an HBase Ta", + "title":"Creating a DLI Table and Associating It with HBase", + "githuburl":"" + }, + { + "uri":"dli_08_0120.html", + "product_code":"dli", + "code":"46", + "des":"This statement is used to insert data in a DLI table to the associated HBase table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field", + "doc_type":"sqlreference", + "kw":"Inserting Data to an HBase Table,Creating a Datasource Connection with an HBase Table,SQL Syntax Ref", + "title":"Inserting Data to an HBase Table", + "githuburl":"" + }, + { + "uri":"dli_08_0121.html", + "product_code":"dli", + "code":"47", + "des":"This statement is used to query data in an HBase table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be querie", + "doc_type":"sqlreference", + "kw":"Querying an HBase Table,Creating a Datasource Connection with an HBase Table,SQL Syntax Reference", + "title":"Querying an HBase Table", + "githuburl":"" + }, + { + "uri":"dli_08_0220.html", + "product_code":"dli", + "code":"48", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with an OpenTSDB Table", + "title":"Creating a Datasource Connection with an OpenTSDB Table", + "githuburl":"" + }, + { + "uri":"dli_08_0122.html", + "product_code":"dli", + "code":"49", + "des":"Run the CREATE TABLE statement to create the DLI table and associate it with the existing metric in OpenTSDB. This syntax supports the OpenTSDB of CloudTable and MRS.Befo", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with OpenTSDB,Creating a Datasource Connection with an OpenT", + "title":"Creating a DLI Table and Associating It with OpenTSDB", + "githuburl":"" + }, + { + "uri":"dli_08_0123.html", + "product_code":"dli", + "code":"50", + "des":"Run the INSERT INTO statement to insert the data in the DLI table to the associated OpenTSDB metric.If no metric exists on the OpenTSDB, a new metric is automatically cre", + "doc_type":"sqlreference", + "kw":"Inserting Data to the OpenTSDB Table,Creating a Datasource Connection with an OpenTSDB Table,SQL Syn", + "title":"Inserting Data to the OpenTSDB Table", + "githuburl":"" + }, + { + "uri":"dli_08_0124.html", + "product_code":"dli", + "code":"51", + "des":"This SELECT command is used to query data in an OpenTSDB table.If no metric exists in OpenTSDB, an error will be reported when the corresponding DLI table is queried.If t", + "doc_type":"sqlreference", + "kw":"Querying an OpenTSDB Table,Creating a Datasource Connection with an OpenTSDB Table,SQL Syntax Refere", + "title":"Querying an OpenTSDB Table", + "githuburl":"" + }, + { + "uri":"dli_08_0192.html", + "product_code":"dli", + "code":"52", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with a DWS table", + "title":"Creating a Datasource Connection with a DWS table", + "githuburl":"" + }, + { + "uri":"dli_08_0193.html", + "product_code":"dli", + "code":"53", + "des":"This statement is used to create a DLI table and associate it with an existing DWS table.Before creating a DLI table and associating it with DWS, you need to create a dat", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with DWS,Creating a Datasource Connection with a DWS table,S", + "title":"Creating a DLI Table and Associating It with DWS", + "githuburl":"" + }, + { + "uri":"dli_08_0194.html", + "product_code":"dli", + "code":"54", + "des":"This statement is used to insert data in a DLI table to the associated DWS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "doc_type":"sqlreference", + "kw":"Inserting Data to the DWS Table,Creating a Datasource Connection with a DWS table,SQL Syntax Referen", + "title":"Inserting Data to the DWS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0195.html", + "product_code":"dli", + "code":"55", + "des":"This statement is used to query data in a DWS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried m", + "doc_type":"sqlreference", + "kw":"Querying the DWS Table,Creating a Datasource Connection with a DWS table,SQL Syntax Reference", + "title":"Querying the DWS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0196.html", + "product_code":"dli", + "code":"56", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with an RDS Table", + "title":"Creating a Datasource Connection with an RDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0197.html", + "product_code":"dli", + "code":"57", + "des":"This statement is used to create a DLI table and associate it with an existing RDS table. This function supports access to the MySQL and PostgreSQL clusters of RDS.Before", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with RDS,Creating a Datasource Connection with an RDS Table,", + "title":"Creating a DLI Table and Associating It with RDS", + "githuburl":"" + }, + { + "uri":"dli_08_0198.html", + "product_code":"dli", + "code":"58", + "des":"This statement is used to insert data in a DLI table to the associated RDS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "doc_type":"sqlreference", + "kw":"Inserting Data to the RDS Table,Creating a Datasource Connection with an RDS Table,SQL Syntax Refere", + "title":"Inserting Data to the RDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0199.html", + "product_code":"dli", + "code":"59", + "des":"This statement is used to query data in an RDS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried ", + "doc_type":"sqlreference", + "kw":"Querying the RDS Table,Creating a Datasource Connection with an RDS Table,SQL Syntax Reference", + "title":"Querying the RDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0200.html", + "product_code":"dli", + "code":"60", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with a CSS Table", + "title":"Creating a Datasource Connection with a CSS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0201.html", + "product_code":"dli", + "code":"61", + "des":"This statement is used to create a DLI table and associate it with an existing CSS table.Before creating a DLI table and associating it with CSS, you need to create a dat", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with CSS,Creating a Datasource Connection with a CSS Table,S", + "title":"Creating a DLI Table and Associating It with CSS", + "githuburl":"" + }, + { + "uri":"dli_08_0202.html", + "product_code":"dli", + "code":"62", + "des":"This statement is used to insert data in a DLI table to the associated CSS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "doc_type":"sqlreference", + "kw":"Inserting Data to the CSS Table,Creating a Datasource Connection with a CSS Table,SQL Syntax Referen", + "title":"Inserting Data to the CSS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0203.html", + "product_code":"dli", + "code":"63", + "des":"This statement is used to query data in a CSS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried m", + "doc_type":"sqlreference", + "kw":"Querying the CSS Table,Creating a Datasource Connection with a CSS Table,SQL Syntax Reference", + "title":"Querying the CSS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0225.html", + "product_code":"dli", + "code":"64", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with a DCS Table", + "title":"Creating a Datasource Connection with a DCS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0226.html", + "product_code":"dli", + "code":"65", + "des":"This statement is used to create a DLI table and associate it with an existing DCS key.Before creating a DLI table and associating it with DCS, you need to create a datas", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with DCS,Creating a Datasource Connection with a DCS Table,S", + "title":"Creating a DLI Table and Associating It with DCS", + "githuburl":"" + }, + { + "uri":"dli_08_0227.html", + "product_code":"dli", + "code":"66", + "des":"This statement is used to insert data in a DLI table to the DCS key.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2...\n [FROM DL", + "doc_type":"sqlreference", + "kw":"Inserting Data to a DCS Table,Creating a Datasource Connection with a DCS Table,SQL Syntax Reference", + "title":"Inserting Data to a DCS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0228.html", + "product_code":"dli", + "code":"67", + "des":"This statement is used to query data in a DCS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.Query data in the test_re", + "doc_type":"sqlreference", + "kw":"Querying the DCS Table,Creating a Datasource Connection with a DCS Table,SQL Syntax Reference", + "title":"Querying the DCS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0229.html", + "product_code":"dli", + "code":"68", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Datasource Connection with a DDS Table", + "title":"Creating a Datasource Connection with a DDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0230.html", + "product_code":"dli", + "code":"69", + "des":"This statement is used to create a DLI table and associate it with an existing DDS collection.Before creating a DLI table and associating it with DDS, you need to create ", + "doc_type":"sqlreference", + "kw":"Creating a DLI Table and Associating It with DDS,Creating a Datasource Connection with a DDS Table,S", + "title":"Creating a DLI Table and Associating It with DDS", + "githuburl":"" + }, + { + "uri":"dli_08_0231.html", + "product_code":"dli", + "code":"70", + "des":"This statement is used to insert data in a DLI table to the associated DDS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "doc_type":"sqlreference", + "kw":"Inserting Data to the DDS Table,Creating a Datasource Connection with a DDS Table,SQL Syntax Referen", + "title":"Inserting Data to the DDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0232.html", + "product_code":"dli", + "code":"71", + "des":"This statement is used to query data in a DDS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.If schema information is ", + "doc_type":"sqlreference", + "kw":"Querying the DDS Table,Creating a Datasource Connection with a DDS Table,SQL Syntax Reference", + "title":"Querying the DDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0129.html", + "product_code":"dli", + "code":"72", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Views", + "title":"Views", + "githuburl":"" + }, + { + "uri":"dli_08_0130.html", + "product_code":"dli", + "code":"73", + "des":"This statement is used to create views.CREATE VIEW: creates views based on the given select statement. The result of the select statement will not be written into the dis", + "doc_type":"sqlreference", + "kw":"Creating a View,Views,SQL Syntax Reference", + "title":"Creating a View", + "githuburl":"" + }, + { + "uri":"dli_08_0131.html", + "product_code":"dli", + "code":"74", + "des":"This statement is used to delete views.DROP: Deletes the metadata of a specified view. Although views and tables have many common points, the DROP TABLE statement cannot ", + "doc_type":"sqlreference", + "kw":"Deleting a View,Views,SQL Syntax Reference", + "title":"Deleting a View", + "githuburl":"" + }, + { + "uri":"dli_08_0138.html", + "product_code":"dli", + "code":"75", + "des":"This statement returns the logical plan and physical execution plan for the SQL statement.EXTENDED: After this keyword is specified, the logical and physical plans are ou", + "doc_type":"sqlreference", + "kw":"Viewing the Execution Plan,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Viewing the Execution Plan", + "githuburl":"" + }, + { + "uri":"dli_08_0139.html", + "product_code":"dli", + "code":"76", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Permissions Management", + "title":"Data Permissions Management", + "githuburl":"" + }, + { + "uri":"dli_08_0140.html", + "product_code":"dli", + "code":"77", + "des":"Table 1 describes the SQL statement permission matrix in DLI in terms of permissions on databases, tables, and roles.For privilege granting or revocation on databases and", + "doc_type":"sqlreference", + "kw":"Data Permissions List,Data Permissions Management,SQL Syntax Reference", + "title":"Data Permissions List", + "githuburl":"" + }, + { + "uri":"dli_08_0141.html", + "product_code":"dli", + "code":"78", + "des":"This statement is used to create a role in the current database or a specified database.Only users with the CREATE_ROLE permission on the database can create roles. For e", + "doc_type":"sqlreference", + "kw":"Creating a Role,Data Permissions Management,SQL Syntax Reference", + "title":"Creating a Role", + "githuburl":"" + }, + { + "uri":"dli_08_0148.html", + "product_code":"dli", + "code":"79", + "des":"This statement is used to delete a role in the current database or a specified database.NoneThe role_name to be deleted must exist in the current database or the specifie", + "doc_type":"sqlreference", + "kw":"Deleting a Role,Data Permissions Management,SQL Syntax Reference", + "title":"Deleting a Role", + "githuburl":"" + }, + { + "uri":"dli_08_0142.html", + "product_code":"dli", + "code":"80", + "des":"This statement is used to bind a user with a role.NoneThe role_name and username must exist. Otherwise, an error will be reported.", + "doc_type":"sqlreference", + "kw":"Binding a Role,Data Permissions Management,SQL Syntax Reference", + "title":"Binding a Role", + "githuburl":"" + }, + { + "uri":"dli_08_0147.html", + "product_code":"dli", + "code":"81", + "des":"This statement is used to unbind the user with the role.Nonerole_name and user_name must exist and user_name has been bound to role_name.To unbind the user_name1 from rol", + "doc_type":"sqlreference", + "kw":"Unbinding a Role,Data Permissions Management,SQL Syntax Reference", + "title":"Unbinding a Role", + "githuburl":"" + }, + { + "uri":"dli_08_0143.html", + "product_code":"dli", + "code":"82", + "des":"This statement is used to display all roles or roles bound to the user_name in the current database.ALL: Displays all roles.Keywords ALL and user_name cannot coexist.To d", + "doc_type":"sqlreference", + "kw":"Displaying a Role,Data Permissions Management,SQL Syntax Reference", + "title":"Displaying a Role", + "githuburl":"" + }, + { + "uri":"dli_08_0144.html", + "product_code":"dli", + "code":"83", + "des":"This statement is used to grant permissions to a user or role.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must be a user.The privilege mu", + "doc_type":"sqlreference", + "kw":"Granting a Permission,Data Permissions Management,SQL Syntax Reference", + "title":"Granting a Permission", + "githuburl":"" + }, + { + "uri":"dli_08_0146.html", + "product_code":"dli", + "code":"84", + "des":"This statement is used to revoke permissions granted to a user or role.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must be a user.The pri", + "doc_type":"sqlreference", + "kw":"Revoking a Permission,Data Permissions Management,SQL Syntax Reference", + "title":"Revoking a Permission", + "githuburl":"" + }, + { + "uri":"dli_08_0145.html", + "product_code":"dli", + "code":"85", + "des":"This statement is used to show the permissions granted to a user or role in the resource.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must", + "doc_type":"sqlreference", + "kw":"Displaying the Granted Permissions,Data Permissions Management,SQL Syntax Reference", + "title":"Displaying the Granted Permissions", + "githuburl":"" + }, + { + "uri":"dli_08_0149.html", + "product_code":"dli", + "code":"86", + "des":"This statement is used to display the binding relationship between roles and a user in the current database.NoneThe ROLE variable must exist.", + "doc_type":"sqlreference", + "kw":"Displaying the Binding Relationship Between All Roles and Users,Data Permissions Management,SQL Synt", + "title":"Displaying the Binding Relationship Between All Roles and Users", + "githuburl":"" + }, + { + "uri":"dli_08_0056.html", + "product_code":"dli", + "code":"87", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Types", + "title":"Data Types", + "githuburl":"" + }, + { + "uri":"dli_08_0057.html", + "product_code":"dli", + "code":"88", + "des":"Data type is a basic attribute of data. It is used to distinguish different types of data. Different data types occupy different storage space and support different opera", + "doc_type":"sqlreference", + "kw":"Overview,Data Types,SQL Syntax Reference", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_08_0058.html", + "product_code":"dli", + "code":"89", + "des":"Table 1 lists the primitive data types supported by DLI.VARCHAR and CHAR data is stored in STRING type on DLI. Therefore, the string that exceeds the specified length wil", + "doc_type":"sqlreference", + "kw":"Primitive Data Types,Data Types,SQL Syntax Reference", + "title":"Primitive Data Types", + "githuburl":"" + }, + { + "uri":"dli_08_0059.html", + "product_code":"dli", + "code":"90", + "des":"Spark SQL supports complex data types, as shown in Table 1.When a table containing fields of the complex data type is created, the storage format of this table cannot be ", + "doc_type":"sqlreference", + "kw":"Complex Data Types,Data Types,SQL Syntax Reference", + "title":"Complex Data Types", + "githuburl":"" + }, + { + "uri":"dli_08_0282.html", + "product_code":"dli", + "code":"91", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"User-Defined Functions", + "title":"User-Defined Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0283.html", + "product_code":"dli", + "code":"92", + "des":"DLI allows you to create and use user-defined functions (UDF) and user-defined table functions (UDTF) in Spark jobs.If a function with the same name exists in the databas", + "doc_type":"sqlreference", + "kw":"Creating a Function,User-Defined Functions,SQL Syntax Reference", + "title":"Creating a Function", + "githuburl":"" + }, + { + "uri":"dli_08_0284.html", + "product_code":"dli", + "code":"93", + "des":"This statement is used to delete functions.TEMPORARY: Indicates whether the function to be deleted is a temporary function.IF EXISTS: Used when the function to be deleted", + "doc_type":"sqlreference", + "kw":"Deleting a Function,User-Defined Functions,SQL Syntax Reference", + "title":"Deleting a Function", + "githuburl":"" + }, + { + "uri":"dli_08_0281.html", + "product_code":"dli", + "code":"94", + "des":"Displays information about a specified function.EXTENDED: displays extended usage information.The metadata (implementation class and usage) of an existing function is ret", + "doc_type":"sqlreference", + "kw":"Displaying Function Details,User-Defined Functions,SQL Syntax Reference", + "title":"Displaying Function Details", + "githuburl":"" + }, + { + "uri":"dli_08_0285.html", + "product_code":"dli", + "code":"95", + "des":"View all functions in the current project.In the preceding statement, regex is a regular expression. For details about its parameters, see Table 1.For details about other", + "doc_type":"sqlreference", + "kw":"Displaying All Functions,User-Defined Functions,SQL Syntax Reference", + "title":"Displaying All Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0064.html", + "product_code":"dli", + "code":"96", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Built-in Functions", + "title":"Built-in Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0065.html", + "product_code":"dli", + "code":"97", + "des":"Table 1 lists the mathematical functions supported in DLI.", + "doc_type":"sqlreference", + "kw":"Mathematical Functions,Built-in Functions,SQL Syntax Reference", + "title":"Mathematical Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0066.html", + "product_code":"dli", + "code":"98", + "des":"Table 1 lists the date functions supported in DLI.", + "doc_type":"sqlreference", + "kw":"Date Functions,Built-in Functions,SQL Syntax Reference", + "title":"Date Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0067.html", + "product_code":"dli", + "code":"99", + "des":"Table 1 lists the string functions supported by DLI.", + "doc_type":"sqlreference", + "kw":"String Functions,Built-in Functions,SQL Syntax Reference", + "title":"String Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0068.html", + "product_code":"dli", + "code":"100", + "des":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "doc_type":"sqlreference", + "kw":"Aggregate Functions,Built-in Functions,SQL Syntax Reference", + "title":"Aggregate Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0069.html", + "product_code":"dli", + "code":"101", + "des":"A window function performs a calculation operation on a set of values related to the current value. A window function can be an aggregate function used in the GROUP BY cl", + "doc_type":"sqlreference", + "kw":"Window Functions,Built-in Functions,SQL Syntax Reference", + "title":"Window Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0150.html", + "product_code":"dli", + "code":"102", + "des":"This statement is a basic query statement and is used to return the query results.The table to be queried must exist. Otherwise, an error is reported.To filter the record", + "doc_type":"sqlreference", + "kw":"Basic SELECT Statements,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"Basic SELECT Statements", + "githuburl":"" + }, + { + "uri":"dli_08_0151.html", + "product_code":"dli", + "code":"103", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Filtering", + "title":"Filtering", + "githuburl":"" + }, + { + "uri":"dli_08_0152.html", + "product_code":"dli", + "code":"104", + "des":"This statement is used to filter the query results using the WHERE clause.All is used to return repeated rows. By default, all repeated rows are returned. It is followed ", + "doc_type":"sqlreference", + "kw":"WHERE Filtering Clause,Filtering,SQL Syntax Reference", + "title":"WHERE Filtering Clause", + "githuburl":"" + }, + { + "uri":"dli_08_0153.html", + "product_code":"dli", + "code":"105", + "des":"This statement is used to filter the query results using the HAVING clause.All is used to return repeated rows. By default, all repeated rows are returned. It is followed", + "doc_type":"sqlreference", + "kw":"HAVING Filtering Clause,Filtering,SQL Syntax Reference", + "title":"HAVING Filtering Clause", + "githuburl":"" + }, + { + "uri":"dli_08_0154.html", + "product_code":"dli", + "code":"106", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Sorting", + "title":"Sorting", + "githuburl":"" + }, + { + "uri":"dli_08_0155.html", + "product_code":"dli", + "code":"107", + "des":"This statement is used to order the result set of a query by the specified field.ASC/DESC: ASC sorts from the lowest value to the highest value. DESC sorts from the highe", + "doc_type":"sqlreference", + "kw":"ORDER BY,Sorting,SQL Syntax Reference", + "title":"ORDER BY", + "githuburl":"" + }, + { + "uri":"dli_08_0156.html", + "product_code":"dli", + "code":"108", + "des":"This statement is used to achieve the partial sorting of tables according to fields.ASC/DESC: ASC sorts from the lowest value to the highest value. DESC sorts from the hi", + "doc_type":"sqlreference", + "kw":"SORT BY,Sorting,SQL Syntax Reference", + "title":"SORT BY", + "githuburl":"" + }, + { + "uri":"dli_08_0157.html", + "product_code":"dli", + "code":"109", + "des":"This statement is used to bucket a table and sort the table within buckets.CLUSTER BY: Buckets are created based on specified fields. Single fields and multiple fields ar", + "doc_type":"sqlreference", + "kw":"CLUSTER BY,Sorting,SQL Syntax Reference", + "title":"CLUSTER BY", + "githuburl":"" + }, + { + "uri":"dli_08_0158.html", + "product_code":"dli", + "code":"110", + "des":"This statement is used to bucket a table according to the field.DISTRIBUTE BY: Buckets are created based on specified fields. A single field or multiple fields are suppor", + "doc_type":"sqlreference", + "kw":"DISTRIBUTE BY,Sorting,SQL Syntax Reference", + "title":"DISTRIBUTE BY", + "githuburl":"" + }, + { + "uri":"dli_08_0159.html", + "product_code":"dli", + "code":"111", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Grouping", + "title":"Grouping", + "githuburl":"" + }, + { + "uri":"dli_08_0160.html", + "product_code":"dli", + "code":"112", + "des":"This statement is used to group a table based on columns.Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.Single-column GROU", + "doc_type":"sqlreference", + "kw":"Column-Based GROUP BY,Grouping,SQL Syntax Reference", + "title":"Column-Based GROUP BY", + "githuburl":"" + }, + { + "uri":"dli_08_0161.html", + "product_code":"dli", + "code":"113", + "des":"This statement is used to group a table according to expressions.The groupby_expression can contain a single field or multiple fields, and also can call aggregate functio", + "doc_type":"sqlreference", + "kw":"Expression-Based GROUP BY,Grouping,SQL Syntax Reference", + "title":"Expression-Based GROUP BY", + "githuburl":"" + }, + { + "uri":"dli_08_0162.html", + "product_code":"dli", + "code":"114", + "des":"This statement filters a table after grouping it using the HAVING clause.The groupby_expression can contain a single field or multiple fields, and can also call aggregate", + "doc_type":"sqlreference", + "kw":"GROUP BY Using HAVING,Grouping,SQL Syntax Reference", + "title":"GROUP BY Using HAVING", + "githuburl":"" + }, + { + "uri":"dli_08_0163.html", + "product_code":"dli", + "code":"115", + "des":"This statement is used to generate the aggregate row, super-aggregate row, and the total row. The statement can achieve multi-layer statistics from right to left and disp", + "doc_type":"sqlreference", + "kw":"ROLLUP,Grouping,SQL Syntax Reference", + "title":"ROLLUP", + "githuburl":"" + }, + { + "uri":"dli_08_0164.html", + "product_code":"dli", + "code":"116", + "des":"This statement is used to generate the cross-table row and achieve the cross-statistics of the GROUP BY field.GROUPING SETS is the expansion of GROUP BY. For example:SELE", + "doc_type":"sqlreference", + "kw":"GROUPING SETS,Grouping,SQL Syntax Reference", + "title":"GROUPING SETS", + "githuburl":"" + }, + { + "uri":"dli_08_0165.html", + "product_code":"dli", + "code":"117", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"JOIN", + "title":"JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0166.html", + "product_code":"dli", + "code":"118", + "des":"This statement is used to join and return the rows that meet the JOIN conditions from two tables as the result set.JOIN/INNER JOIN: Only the records that meet the JOIN co", + "doc_type":"sqlreference", + "kw":"INNER JOIN,JOIN,SQL Syntax Reference", + "title":"INNER JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0167.html", + "product_code":"dli", + "code":"119", + "des":"Join the left table with the right table and return all joined records of the left table. If no joined record is found, NULL will be returned.LEFT OUTER JOIN: Returns all", + "doc_type":"sqlreference", + "kw":"LEFT OUTER JOIN,JOIN,SQL Syntax Reference", + "title":"LEFT OUTER JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0168.html", + "product_code":"dli", + "code":"120", + "des":"Match the right table with the left table and return all matched records of the right table. If no matched record is found, NULL will be returned.RIGHT OUTER JOIN: Return", + "doc_type":"sqlreference", + "kw":"RIGHT OUTER JOIN,JOIN,SQL Syntax Reference", + "title":"RIGHT OUTER JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0169.html", + "product_code":"dli", + "code":"121", + "des":"Join all records from the right table and the left table and return all joined records. If no joined record is found, NULL will be returned.FULL OUTER JOIN: Matches all r", + "doc_type":"sqlreference", + "kw":"FULL OUTER JOIN,JOIN,SQL Syntax Reference", + "title":"FULL OUTER JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0170.html", + "product_code":"dli", + "code":"122", + "des":"This statement has the same function as INNER JOIN, that is, the result set that meet the WHERE condition is returned. However, IMPLICIT JOIN does not use the condition s", + "doc_type":"sqlreference", + "kw":"IMPLICIT JOIN,JOIN,SQL Syntax Reference", + "title":"IMPLICIT JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0171.html", + "product_code":"dli", + "code":"123", + "des":"Cartesian JOIN joins each record of table A with all records in table B. For example, if there are m records in table A and n records in table B, m x n records will be ge", + "doc_type":"sqlreference", + "kw":"Cartesian JOIN,JOIN,SQL Syntax Reference", + "title":"Cartesian JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0172.html", + "product_code":"dli", + "code":"124", + "des":"This statement is used to query the records that meet the JOIN condition from the left table.LEFT SEMI JOIN: Indicates to only return the records from the left table. LEF", + "doc_type":"sqlreference", + "kw":"LEFT SEMI JOIN,JOIN,SQL Syntax Reference", + "title":"LEFT SEMI JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0173.html", + "product_code":"dli", + "code":"125", + "des":"This statement is used to join multiple tables using unequal values and return the result set that meet the condition.The non_equi_join_condition is similar to join_condi", + "doc_type":"sqlreference", + "kw":"NON-EQUIJOIN,JOIN,SQL Syntax Reference", + "title":"NON-EQUIJOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0174.html", + "product_code":"dli", + "code":"126", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Subquery", + "title":"Subquery", + "githuburl":"" + }, + { + "uri":"dli_08_0175.html", + "product_code":"dli", + "code":"127", + "des":"Subqueries are nested in the WHERE clause, and the subquery result is used as the filtering condition.All is used to return repeated rows. By default, all repeated rows a", + "doc_type":"sqlreference", + "kw":"Subquery Nested by WHERE,Subquery,SQL Syntax Reference", + "title":"Subquery Nested by WHERE", + "githuburl":"" + }, + { + "uri":"dli_08_0176.html", + "product_code":"dli", + "code":"128", + "des":"This statement is used to nest subquery by FROM and use the subquery results as the data source of the external SELECT statement.All is used to return repeated rows. By d", + "doc_type":"sqlreference", + "kw":"Subquery Nested by FROM,Subquery,SQL Syntax Reference", + "title":"Subquery Nested by FROM", + "githuburl":"" + }, + { + "uri":"dli_08_0177.html", + "product_code":"dli", + "code":"129", + "des":"This statement is used to embed a subquery in the HAVING clause. The subquery result is used as a part of the HAVING clause.All is used to return repeated rows. By defaul", + "doc_type":"sqlreference", + "kw":"Subquery Nested by HAVING,Subquery,SQL Syntax Reference", + "title":"Subquery Nested by HAVING", + "githuburl":"" + }, + { + "uri":"dli_08_0178.html", + "product_code":"dli", + "code":"130", + "des":"This statement is used to nest queries in the subquery.All is used to return repeated rows. By default, all repeated rows are returned. It is followed by asterisks (*) on", + "doc_type":"sqlreference", + "kw":"Multi-Layer Nested Subquery,Subquery,SQL Syntax Reference", + "title":"Multi-Layer Nested Subquery", + "githuburl":"" + }, + { + "uri":"dli_08_0179.html", + "product_code":"dli", + "code":"131", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Alias", + "title":"Alias", + "githuburl":"" + }, + { + "uri":"dli_08_0180.html", + "product_code":"dli", + "code":"132", + "des":"This statement is used to specify an alias for a table or the subquery result.table_reference: Can be a table, view, or subquery.As: Is used to connect to table_reference", + "doc_type":"sqlreference", + "kw":"AS for Table,Alias,SQL Syntax Reference", + "title":"AS for Table", + "githuburl":"" + }, + { + "uri":"dli_08_0181.html", + "product_code":"dli", + "code":"133", + "des":"This statement is used to specify an alias for a column.alias: gives an alias for the attr_expr field.AS: Whether to add AS does not affect the result.The to-be-queried t", + "doc_type":"sqlreference", + "kw":"AS for Column,Alias,SQL Syntax Reference", + "title":"AS for Column", + "githuburl":"" + }, + { + "uri":"dli_08_0182.html", + "product_code":"dli", + "code":"134", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Set Operations", + "title":"Set Operations", + "githuburl":"" + }, + { + "uri":"dli_08_0183.html", + "product_code":"dli", + "code":"135", + "des":"This statement is used to return the union set of multiple query results.UNION: The set operation is used to join the head and tail of a table based on certain conditions", + "doc_type":"sqlreference", + "kw":"UNION,Set Operations,SQL Syntax Reference", + "title":"UNION", + "githuburl":"" + }, + { + "uri":"dli_08_0184.html", + "product_code":"dli", + "code":"136", + "des":"This statement is used to return the intersection set of multiple query results.INTERSECT returns the intersection of multiple query results. The number of columns return", + "doc_type":"sqlreference", + "kw":"INTERSECT,Set Operations,SQL Syntax Reference", + "title":"INTERSECT", + "githuburl":"" + }, + { + "uri":"dli_08_0185.html", + "product_code":"dli", + "code":"137", + "des":"This statement is used to return the difference set of two query results.EXCEPT minus the sets. A EXCEPT B indicates to remove the records that exist in both A and B from", + "doc_type":"sqlreference", + "kw":"EXCEPT,Set Operations,SQL Syntax Reference", + "title":"EXCEPT", + "githuburl":"" + }, + { + "uri":"dli_08_0186.html", + "product_code":"dli", + "code":"138", + "des":"This statement is used to define the common table expression (CTE) using WITH...AS to simplify the query and make the result easier to read and maintain.cte_name: Name of", + "doc_type":"sqlreference", + "kw":"WITH...AS,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"WITH...AS", + "githuburl":"" + }, + { + "uri":"dli_08_0187.html", + "product_code":"dli", + "code":"139", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"CASE...WHEN", + "title":"CASE...WHEN", + "githuburl":"" + }, + { + "uri":"dli_08_0188.html", + "product_code":"dli", + "code":"140", + "des":"This statement is used to display result_expression according to the joined results of input_expression and when_expression.CASE: Subquery is supported in basic CASE stat", + "doc_type":"sqlreference", + "kw":"Basic CASE Statement,CASE...WHEN,SQL Syntax Reference", + "title":"Basic CASE Statement", + "githuburl":"" + }, + { + "uri":"dli_08_0189.html", + "product_code":"dli", + "code":"141", + "des":"This statement is used to obtain the value of boolean_expression for each WHEN statement in a specified order. Then return the first result_expression with the value TRUE", + "doc_type":"sqlreference", + "kw":"CASE Query Statement,CASE...WHEN,SQL Syntax Reference", + "title":"CASE Query Statement", + "githuburl":"" + }, + { + "uri":"dli_08_0190.html", + "product_code":"dli", + "code":"142", + "des":"This statement is used together with the window function. The OVER statement is used to group data and sort the data within the group. The window function is used to gene", + "doc_type":"sqlreference", + "kw":"OVER Clause,Spark SQL Syntax Reference,SQL Syntax Reference", + "title":"OVER Clause", + "githuburl":"" + }, + { + "uri":"dli_08_0370.html", + "product_code":"dli", + "code":"143", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Flink Opensource SQL 1.12 Syntax Reference", + "title":"Flink Opensource SQL 1.12 Syntax Reference", + "githuburl":"" + }, + { + "uri":"dli_08_0371.html", + "product_code":"dli", + "code":"144", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Constraints and Definitions", + "title":"Constraints and Definitions", + "githuburl":"" + }, + { + "uri":"dli_08_0372.html", + "product_code":"dli", + "code":"145", + "des":"STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP,", + "doc_type":"sqlreference", + "kw":"Supported Data Types,Constraints and Definitions,SQL Syntax Reference", + "title":"Supported Data Types", + "githuburl":"" + }, + { + "uri":"dli_08_0373.html", + "product_code":"dli", + "code":"146", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Syntax", + "title":"Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0374.html", + "product_code":"dli", + "code":"147", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Definition Language (DDL)", + "title":"Data Definition Language (DDL)", + "githuburl":"" + }, + { + "uri":"dli_08_0375.html", + "product_code":"dli", + "code":"148", + "des":"Create a table with a specified name.COMPUTED COLUMNA computed column is a virtual column generated using column_name AS computed_column_expression. A computed column eva", + "doc_type":"sqlreference", + "kw":"CREATE TABLE,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE TABLE", + "githuburl":"" + }, + { + "uri":"dli_08_0376.html", + "product_code":"dli", + "code":"149", + "des":"Create a view with multiple layers nested in it to simplify the development process.IF NOT EXISTSIf the view already exists, nothing happens.Create a view named viewName.", + "doc_type":"sqlreference", + "kw":"CREATE VIEW,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE VIEW", + "githuburl":"" + }, + { + "uri":"dli_08_0377.html", + "product_code":"dli", + "code":"150", + "des":"Create a user-defined function.For details about how to create a user-defined function, see User-Defined Functions (UDFs).IF NOT EXISTSIf the function already exists, not", + "doc_type":"sqlreference", + "kw":"CREATE FUNCTION,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE FUNCTION", + "githuburl":"" + }, + { + "uri":"dli_08_0378.html", + "product_code":"dli", + "code":"151", + "des":"SyntaxPrecautionsFlink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:The case of identifiers is preserved whether or not the", + "doc_type":"sqlreference", + "kw":"Data Manipulation Language (DML),Syntax,SQL Syntax Reference", + "title":"Data Manipulation Language (DML)", + "githuburl":"" + }, + { + "uri":"dli_08_0379.html", + "product_code":"dli", + "code":"152", + "des":"This section describes the Flink open source SQL 1.12 syntax supported by DLI. For details about the parameters and examples, see the syntax description.", + "doc_type":"sqlreference", + "kw":"Overview,Flink Opensource SQL 1.12 Syntax Reference,SQL Syntax Reference", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_08_0380.html", + "product_code":"dli", + "code":"153", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"DDL Syntax", + "title":"DDL Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0381.html", + "product_code":"dli", + "code":"154", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating Source Tables", + "title":"Creating Source Tables", + "githuburl":"" + }, + { + "uri":"dli_08_0382.html", + "product_code":"dli", + "code":"155", + "des":"DataGen is used to generate random data for debugging and testing.NoneWhen you create a DataGen table, the table field type cannot be Array, Map, or Row. You can use COMP", + "doc_type":"sqlreference", + "kw":"DataGen Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"DataGen Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0383.html", + "product_code":"dli", + "code":"156", + "des":"DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types an", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"GaussDB(DWS) Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0384.html", + "product_code":"dli", + "code":"157", + "des":"Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excell", + "doc_type":"sqlreference", + "kw":"HBase Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"HBase Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0385.html", + "product_code":"dli", + "code":"158", + "des":"The JDBC connector is a Flink's built-in connector to read data from a database.An enhanced datasource connection with the instances has been established, so that you can", + "doc_type":"sqlreference", + "kw":"JDBC Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"JDBC Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0386.html", + "product_code":"dli", + "code":"159", + "des":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "doc_type":"sqlreference", + "kw":"Kafka Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"Kafka Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0387.html", + "product_code":"dli", + "code":"160", + "des":"The MySQL CDC source table, that is, the MySQL streaming source table, reads all historical data in the database first and then smoothly switches data read to the Binlog ", + "doc_type":"sqlreference", + "kw":"MySQL CDC Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"MySQL CDC Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0388.html", + "product_code":"dli", + "code":"161", + "des":"The Postgres CDC source table, that is, Postgres streaming source table, is used to read the full snapshot data and changed data of the PostgreSQL database in sequence. T", + "doc_type":"sqlreference", + "kw":"Postgres CDC Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"Postgres CDC Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0389.html", + "product_code":"dli", + "code":"162", + "des":"Create a source stream to obtain data from Redis as input for jobs.An enhanced datasource connection has been created for DLI to connect to the Redis database, so that yo", + "doc_type":"sqlreference", + "kw":"Redis Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"Redis Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0390.html", + "product_code":"dli", + "code":"163", + "des":"Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provi", + "doc_type":"sqlreference", + "kw":"Upsert Kafka Source Table,Creating Source Tables,SQL Syntax Reference", + "title":"Upsert Kafka Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0391.html", + "product_code":"dli", + "code":"164", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating Result Tables", + "title":"Creating Result Tables", + "githuburl":"" + }, + { + "uri":"dli_08_0392.html", + "product_code":"dli", + "code":"165", + "des":"The BlackHole connector allows for swallowing all input records. It is designed for high-performance testing and UDF output. It is not a substantive sink. The BlackHole r", + "doc_type":"sqlreference", + "kw":"BlackHole Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"BlackHole Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0393.html", + "product_code":"dli", + "code":"166", + "des":"DLI can output Flink job data to the ClickHouse database. ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and prov", + "doc_type":"sqlreference", + "kw":"ClickHouse Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"ClickHouse Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0394.html", + "product_code":"dli", + "code":"167", + "des":"DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex t", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"GaussDB(DWS) Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0395.html", + "product_code":"dli", + "code":"168", + "des":"DLI outputs Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "doc_type":"sqlreference", + "kw":"Elasticsearch Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"Elasticsearch Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0396.html", + "product_code":"dli", + "code":"169", + "des":"DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scal", + "doc_type":"sqlreference", + "kw":"HBase Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"HBase Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0397.html", + "product_code":"dli", + "code":"170", + "des":"DLI outputs the Flink job output data to RDS through the JDBC result table.An enhanced datasource connection with the instances has been established, so that you can conf", + "doc_type":"sqlreference", + "kw":"JDBC Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"JDBC Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0398.html", + "product_code":"dli", + "code":"171", + "des":"DLI outputs the Flink job output data to Kafka through the Kafka result table.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subs", + "doc_type":"sqlreference", + "kw":"Kafka Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"Kafka Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0399.html", + "product_code":"dli", + "code":"172", + "des":"The Print connector is used to print output data to the error file or TaskManager file, making it easier for you to view the result in code debugging.NoneThe Print result", + "doc_type":"sqlreference", + "kw":"Print Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"Print Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0400.html", + "product_code":"dli", + "code":"173", + "des":"DLI outputs the Flink job output data to Redis. Redis is a key-value storage system that supports multiple types of data structures. It can be used in scenarios such as c", + "doc_type":"sqlreference", + "kw":"Redis Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"Redis Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0401.html", + "product_code":"dli", + "code":"174", + "des":"Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provi", + "doc_type":"sqlreference", + "kw":"Upsert Kafka Result Table,Creating Result Tables,SQL Syntax Reference", + "title":"Upsert Kafka Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0402.html", + "product_code":"dli", + "code":"175", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating Dimension Tables", + "title":"Creating Dimension Tables", + "githuburl":"" + }, + { + "uri":"dli_08_0403.html", + "product_code":"dli", + "code":"176", + "des":"Create a GaussDB(DWS) table to connect to source streams for wide table generation.Ensure that you have created a GaussDB(DWS) cluster using your account.A DWS database t", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Dimension Table,Creating Dimension Tables,SQL Syntax Reference", + "title":"GaussDB(DWS) Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0404.html", + "product_code":"dli", + "code":"177", + "des":"Create a Hbase dimension table to connect to the source streams for wide table generation.An enhanced datasource connection has been created for DLI to connect to HBase, ", + "doc_type":"sqlreference", + "kw":"HBase Dimension Table,Creating Dimension Tables,SQL Syntax Reference", + "title":"HBase Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0405.html", + "product_code":"dli", + "code":"178", + "des":"Create a JDBC dimension table to connect to the source stream.You have created a JDBC instance for your account.When you create a Flink OpenSource SQL job, set Flink Vers", + "doc_type":"sqlreference", + "kw":"JDBC Dimension Table,Creating Dimension Tables,SQL Syntax Reference", + "title":"JDBC Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0406.html", + "product_code":"dli", + "code":"179", + "des":"Create a Redis table to connect to source streams for wide table generation.An enhanced datasource connection with Redis has been established, so that you can configure s", + "doc_type":"sqlreference", + "kw":"Redis Dimension Table,Creating Dimension Tables,SQL Syntax Reference", + "title":"Redis Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0407.html", + "product_code":"dli", + "code":"180", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Format", + "title":"Format", + "githuburl":"" + }, + { + "uri":"dli_08_0408.html", + "product_code":"dli", + "code":"181", + "des":"Apache Avro is supported for you to read and write Avro data based on an Avro schema with Flink. The Avro schema is derived from the table schema.KafkaUpsert KafkaCurrent", + "doc_type":"sqlreference", + "kw":"Avro,Format,SQL Syntax Reference", + "title":"Avro", + "githuburl":"" + }, + { + "uri":"dli_08_0409.html", + "product_code":"dli", + "code":"182", + "des":"Canal is a Changelog Data Capture (CDC) tool that can stream changes in real-time from MySQL into other systems. Canal provides a unified format schema for changelog and ", + "doc_type":"sqlreference", + "kw":"Canal,Format,SQL Syntax Reference", + "title":"Canal", + "githuburl":"" + }, + { + "uri":"dli_08_0410.html", + "product_code":"dli", + "code":"183", + "des":"The Avro Schema Registry (avro-confluent) format allows you to read records that were serialized by the io.confluent.kafka.serializers.KafkaAvroSerializer and to write re", + "doc_type":"sqlreference", + "kw":"Confluent Avro,Format,SQL Syntax Reference", + "title":"Confluent Avro", + "githuburl":"" + }, + { + "uri":"dli_08_0411.html", + "product_code":"dli", + "code":"184", + "des":"The CSV format allows you to read and write CSV data based on a CSV schema. Currently, the CSV schema is derived from table schema.KafkaUpsert KafkaUse Kafka to send data", + "doc_type":"sqlreference", + "kw":"CSV,Format,SQL Syntax Reference", + "title":"CSV", + "githuburl":"" + }, + { + "uri":"dli_08_0412.html", + "product_code":"dli", + "code":"185", + "des":"Debezium is a Changelog Data Capture (CDC) tool that can stream changes in real-time from other databases into Kafka. Debezium provides a unified format schema for change", + "doc_type":"sqlreference", + "kw":"Debezium,Format,SQL Syntax Reference", + "title":"Debezium", + "githuburl":"" + }, + { + "uri":"dli_08_0413.html", + "product_code":"dli", + "code":"186", + "des":"The JSON format allows you to read and write JSON data based on a JSON schema. Currently, the JSON schema is derived from table schema.KafkaUpsert KafkaElasticsearchIn th", + "doc_type":"sqlreference", + "kw":"JSON,Format,SQL Syntax Reference", + "title":"JSON", + "githuburl":"" + }, + { + "uri":"dli_08_0414.html", + "product_code":"dli", + "code":"187", + "des":"Flink supports to interpret Maxwell JSON messages as INSERT/UPDATE/DELETE messages into Flink SQL system. This is useful in many cases to leverage this feature,such as:Sy", + "doc_type":"sqlreference", + "kw":"Maxwell,Format,SQL Syntax Reference", + "title":"Maxwell", + "githuburl":"" + }, + { + "uri":"dli_08_0415.html", + "product_code":"dli", + "code":"188", + "des":"The raw format allows you to read and write raw (byte based) values as a single column.Note: This format encodes null values as null of the byte[] type. This may have lim", + "doc_type":"sqlreference", + "kw":"Raw,Format,SQL Syntax Reference", + "title":"Raw", + "githuburl":"" + }, + { + "uri":"dli_08_0416.html", + "product_code":"dli", + "code":"189", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"DML Snytax", + "title":"DML Snytax", + "githuburl":"" + }, + { + "uri":"dli_08_0417.html", + "product_code":"dli", + "code":"190", + "des":"SyntaxDescriptionSELECT is used to select data from a table.ALL indicates that all results are returned.DISTINCT indicates that the duplicated results are removed.Precaut", + "doc_type":"sqlreference", + "kw":"SELECT,DML Snytax,SQL Syntax Reference", + "title":"SELECT", + "githuburl":"" + }, + { + "uri":"dli_08_0418.html", + "product_code":"dli", + "code":"191", + "des":"SyntaxDescriptionUNION is used to return the union set of multiple query results.INTERSECT is used to return the intersection of multiple query results.EXCEPT is used to ", + "doc_type":"sqlreference", + "kw":"Set Operations,DML Snytax,SQL Syntax Reference", + "title":"Set Operations", + "githuburl":"" + }, + { + "uri":"dli_08_0419.html", + "product_code":"dli", + "code":"192", + "des":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:Array functionsArray functionsGroup", + "doc_type":"sqlreference", + "kw":"Window,DML Snytax,SQL Syntax Reference", + "title":"Window", + "githuburl":"" + }, + { + "uri":"dli_08_0420.html", + "product_code":"dli", + "code":"193", + "des":"SyntaxPrecautionsCurrently, only equi-joins are supported, for example, joins that have at least one conjunctive condition with an equality predicate. Arbitrary cross or ", + "doc_type":"sqlreference", + "kw":"JOIN,DML Snytax,SQL Syntax Reference", + "title":"JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0421.html", + "product_code":"dli", + "code":"194", + "des":"FunctionThis clause is used to sort data in ascending order on a time attribute.PrecautionsCurrently, only sorting by time attribute is supported.ExampleSort data in asce", + "doc_type":"sqlreference", + "kw":"OrderBy & Limit,DML Snytax,SQL Syntax Reference", + "title":"OrderBy & Limit", + "githuburl":"" + }, + { + "uri":"dli_08_0422.html", + "product_code":"dli", + "code":"195", + "des":"Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in", + "doc_type":"sqlreference", + "kw":"Top-N,DML Snytax,SQL Syntax Reference", + "title":"Top-N", + "githuburl":"" + }, + { + "uri":"dli_08_0423.html", + "product_code":"dli", + "code":"196", + "des":"Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.ROW_NUMBER(): Assigns a unique, sequential number to each row,", + "doc_type":"sqlreference", + "kw":"Deduplication,DML Snytax,SQL Syntax Reference", + "title":"Deduplication", + "githuburl":"" + }, + { + "uri":"dli_08_0424.html", + "product_code":"dli", + "code":"197", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Functions", + "title":"Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0425.html", + "product_code":"dli", + "code":"198", + "des":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "doc_type":"sqlreference", + "kw":"User-Defined Functions (UDFs),Functions,SQL Syntax Reference", + "title":"User-Defined Functions (UDFs)", + "githuburl":"" + }, + { + "uri":"dli_08_0426.html", + "product_code":"dli", + "code":"199", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Built-In Functions", + "title":"Built-In Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0427.html", + "product_code":"dli", + "code":"200", + "des":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "doc_type":"sqlreference", + "kw":"Mathematical Operation Functions,Built-In Functions,SQL Syntax Reference", + "title":"Mathematical Operation Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0428.html", + "product_code":"dli", + "code":"201", + "des":"SyntaxExampleTest input data.Test the data source kafka. The message content is as follows:{name:James,age:24,sex:male,grade:{math:95,science:[80,85],english:100}}\n{name:", + "doc_type":"sqlreference", + "kw":"String Functions,Built-In Functions,SQL Syntax Reference", + "title":"String Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0429.html", + "product_code":"dli", + "code":"202", + "des":"Table 1 lists the time functions supported by Flink OpenSource SQL.FunctionReturns a SQL date parsed from string in form of yyyy-MM-dd.Returns a SQL date parsed from stri", + "doc_type":"sqlreference", + "kw":"Temporal Functions,Built-In Functions,SQL Syntax Reference", + "title":"Temporal Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0430.html", + "product_code":"dli", + "code":"203", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Conditional Functions,Built-In Functions,SQL Syntax Reference", + "title":"Conditional Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0431.html", + "product_code":"dli", + "code":"204", + "des":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.The following example converts the amount value to an integer.Flink jobs do not sup", + "doc_type":"sqlreference", + "kw":"Type Conversion Functions,Built-In Functions,SQL Syntax Reference", + "title":"Type Conversion Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0432.html", + "product_code":"dli", + "code":"205", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Collection Functions,Built-In Functions,SQL Syntax Reference", + "title":"Collection Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0433.html", + "product_code":"dli", + "code":"206", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Value Construction Functions,Built-In Functions,SQL Syntax Reference", + "title":"Value Construction Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0434.html", + "product_code":"dli", + "code":"207", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Value Access Functions,Built-In Functions,SQL Syntax Reference", + "title":"Value Access Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0435.html", + "product_code":"dli", + "code":"208", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Hash Functions,Built-In Functions,SQL Syntax Reference", + "title":"Hash Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0436.html", + "product_code":"dli", + "code":"209", + "des":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "doc_type":"sqlreference", + "kw":"Aggregate Functions,Built-In Functions,SQL Syntax Reference", + "title":"Aggregate Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0437.html", + "product_code":"dli", + "code":"210", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Table-Valued Functions", + "title":"Table-Valued Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0438.html", + "product_code":"dli", + "code":"211", + "des":"The string_split function splits a target string into substrings based on the specified separator and returns a substring list.Create a Flink OpenSource SQL job by referr", + "doc_type":"sqlreference", + "kw":"string_split,Table-Valued Functions,SQL Syntax Reference", + "title":"string_split", + "githuburl":"" + }, + { + "uri":"dli_08_0289.html", + "product_code":"dli", + "code":"212", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Flink Opensource SQL 1.10 Syntax Reference", + "title":"Flink Opensource SQL 1.10 Syntax Reference", + "githuburl":"" + }, + { + "uri":"dli_08_0290.html", + "product_code":"dli", + "code":"213", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Constraints and Definitions", + "title":"Constraints and Definitions", + "githuburl":"" + }, + { + "uri":"dli_08_0291.html", + "product_code":"dli", + "code":"214", + "des":"STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP,", + "doc_type":"sqlreference", + "kw":"Supported Data Types,Constraints and Definitions,SQL Syntax Reference", + "title":"Supported Data Types", + "githuburl":"" + }, + { + "uri":"dli_08_0292.html", + "product_code":"dli", + "code":"215", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Syntax Definition", + "title":"Syntax Definition", + "githuburl":"" + }, + { + "uri":"dli_08_0293.html", + "product_code":"dli", + "code":"216", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Definition Language (DDL)", + "title":"Data Definition Language (DDL)", + "githuburl":"" + }, + { + "uri":"dli_08_0294.html", + "product_code":"dli", + "code":"217", + "des":"This clause is used to create a table with a specified name.COMPUTED COLUMNA computed column is a virtual column generated using column_name AS computed_column_expression", + "doc_type":"sqlreference", + "kw":"CREATE TABLE,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE TABLE", + "githuburl":"" + }, + { + "uri":"dli_08_0295.html", + "product_code":"dli", + "code":"218", + "des":"Create a view with multiple layers nested in it to simplify the development process.IF NOT EXISTSIf the view already exists, nothing happens.Create a view named viewName.", + "doc_type":"sqlreference", + "kw":"CREATE VIEW,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE VIEW", + "githuburl":"" + }, + { + "uri":"dli_08_0296.html", + "product_code":"dli", + "code":"219", + "des":"Create a user-defined function.IF NOT EXISTSIf the function already exists, nothing happens.LANGUAGE JAVA|SCALALanguage tag is used to instruct Flink runtime how to execu", + "doc_type":"sqlreference", + "kw":"CREATE FUNCTION,Data Definition Language (DDL),SQL Syntax Reference", + "title":"CREATE FUNCTION", + "githuburl":"" + }, + { + "uri":"dli_08_0297.html", + "product_code":"dli", + "code":"220", + "des":"SyntaxPrecautionsFlink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:The case of identifiers is preserved whether they are q", + "doc_type":"sqlreference", + "kw":"Data Manipulation Language (DML),Syntax Definition,SQL Syntax Reference", + "title":"Data Manipulation Language (DML)", + "githuburl":"" + }, + { + "uri":"dli_08_0298.html", + "product_code":"dli", + "code":"221", + "des":"This section describes the Flink OpenSource SQL syntax supported by DLI. For details about the parameters and examples, see the syntax description.", + "doc_type":"sqlreference", + "kw":"Flink OpenSource SQL 1.10 Syntax,Flink Opensource SQL 1.10 Syntax Reference,SQL Syntax Reference", + "title":"Flink OpenSource SQL 1.10 Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0299.html", + "product_code":"dli", + "code":"222", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Definition Language (DDL)", + "title":"Data Definition Language (DDL)", + "githuburl":"" + }, + { + "uri":"dli_08_0300.html", + "product_code":"dli", + "code":"223", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Source Table", + "title":"Creating a Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0301.html", + "product_code":"dli", + "code":"224", + "des":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "doc_type":"sqlreference", + "kw":"Kafka Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"Kafka Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0302.html", + "product_code":"dli", + "code":"225", + "des":"Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove da", + "doc_type":"sqlreference", + "kw":"DIS Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"DIS Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0303.html", + "product_code":"dli", + "code":"226", + "des":"The JDBC connector is a Flink's built-in connector to read data from a database.An enhanced datasource connection with the database has been established, so that you can ", + "doc_type":"sqlreference", + "kw":"JDBC Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"JDBC Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0304.html", + "product_code":"dli", + "code":"227", + "des":"DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types an", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"GaussDB(DWS) Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0305.html", + "product_code":"dli", + "code":"228", + "des":"Create a source stream to obtain data from Redis as input for jobs.An enhanced datasource connection with Redis has been established, so that you can configure security g", + "doc_type":"sqlreference", + "kw":"Redis Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"Redis Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0306.html", + "product_code":"dli", + "code":"229", + "des":"Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excell", + "doc_type":"sqlreference", + "kw":"HBase Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"HBase Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0358.html", + "product_code":"dli", + "code":"230", + "des":"You can call APIs to obtain data from the cloud ecosystem or an open source ecosystem and use the obtained data as input of Flink jobs.The customized source class needs t", + "doc_type":"sqlreference", + "kw":"userDefined Source Table,Creating a Source Table,SQL Syntax Reference", + "title":"userDefined Source Table", + "githuburl":"" + }, + { + "uri":"dli_08_0307.html", + "product_code":"dli", + "code":"231", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Result Table", + "title":"Creating a Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0344.html", + "product_code":"dli", + "code":"232", + "des":"DLI exports Flink job data to ClickHouse result tables.ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and provide", + "doc_type":"sqlreference", + "kw":"ClickHouse Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"ClickHouse Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0308.html", + "product_code":"dli", + "code":"233", + "des":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "doc_type":"sqlreference", + "kw":"Kafka Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"Kafka Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0309.html", + "product_code":"dli", + "code":"234", + "des":"DLI exports the output data of the Flink job to Kafka in upsert mode.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription ", + "doc_type":"sqlreference", + "kw":"Upsert Kafka Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"Upsert Kafka Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0310.html", + "product_code":"dli", + "code":"235", + "des":"DLI writes the Flink job output data into DIS. The data is filtered and imported to the DIS stream for future processing.DIS addresses the challenge of transmitting data ", + "doc_type":"sqlreference", + "kw":"DIS Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"DIS Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0311.html", + "product_code":"dli", + "code":"236", + "des":"DLI exports the output data of the Flink job to RDS.An enhanced datasource connection with the database has been established, so that you can configure security group rul", + "doc_type":"sqlreference", + "kw":"JDBC Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"JDBC Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0312.html", + "product_code":"dli", + "code":"237", + "des":"DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex t", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"GaussDB(DWS) Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0313.html", + "product_code":"dli", + "code":"238", + "des":"DLI exports the output data of the Flink job to Redis. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scen", + "doc_type":"sqlreference", + "kw":"Redis Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"Redis Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0314.html", + "product_code":"dli", + "code":"239", + "des":"DLI exports Flink job output data to SMN.SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling ", + "doc_type":"sqlreference", + "kw":"SMN Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"SMN Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0315.html", + "product_code":"dli", + "code":"240", + "des":"DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scal", + "doc_type":"sqlreference", + "kw":"HBase Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"HBase Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0316.html", + "product_code":"dli", + "code":"241", + "des":"DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "doc_type":"sqlreference", + "kw":"Elasticsearch Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"Elasticsearch Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0348.html", + "product_code":"dli", + "code":"242", + "des":"OpenTSDB is a distributed, scalable time series database based on HBase. OpenTSDB is designed to collect monitoring information of a large-scale cluster and query data in", + "doc_type":"sqlreference", + "kw":"OpenTSDB Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"OpenTSDB Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0347.html", + "product_code":"dli", + "code":"243", + "des":"Write your Java code to insert the processed data into a specified database supported by your cloud service.Implement the custom sink class :The custom sink class is inhe", + "doc_type":"sqlreference", + "kw":"User-defined Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"User-defined Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0345.html", + "product_code":"dli", + "code":"244", + "des":"The print connector exports your data output to the error file or the out file of TaskManager. It is mainly used for code debugging and output viewing.Read data from Kafk", + "doc_type":"sqlreference", + "kw":"Print Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"Print Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0346.html", + "product_code":"dli", + "code":"245", + "des":"You can create a file system result table to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly accord", + "doc_type":"sqlreference", + "kw":"File System Result Table,Creating a Result Table,SQL Syntax Reference", + "title":"File System Result Table", + "githuburl":"" + }, + { + "uri":"dli_08_0317.html", + "product_code":"dli", + "code":"246", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Dimension Table", + "title":"Creating a Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0318.html", + "product_code":"dli", + "code":"247", + "des":"Create a JDBC dimension table to connect to the source stream.You have created a JDBC instance for your account.The RDS table is used to connect to the source stream.CREA", + "doc_type":"sqlreference", + "kw":"JDBC Dimension Table,Creating a Dimension Table,SQL Syntax Reference", + "title":"JDBC Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0319.html", + "product_code":"dli", + "code":"248", + "des":"Create a GaussDB(DWS) dimension table to connect to the input stream.You have created a GaussDB(DWS) instance for your account.Use an RDS table to connect to the source s", + "doc_type":"sqlreference", + "kw":"GaussDB(DWS) Dimension Table,Creating a Dimension Table,SQL Syntax Reference", + "title":"GaussDB(DWS) Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0320.html", + "product_code":"dli", + "code":"249", + "des":"Create a Hbase dimension table to connect to the source stream.An enhanced datasource connection has been created for DLI to connect to HBase, so that jobs can run on the", + "doc_type":"sqlreference", + "kw":"HBase Dimension Table,Creating a Dimension Table,SQL Syntax Reference", + "title":"HBase Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0321.html", + "product_code":"dli", + "code":"250", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Data Manipulation Language (DML)", + "title":"Data Manipulation Language (DML)", + "githuburl":"" + }, + { + "uri":"dli_08_0322.html", + "product_code":"dli", + "code":"251", + "des":"SyntaxDescriptionThis clause is used to select data from a table.ALL indicates that all results are returned.DISTINCT indicates that the duplicated results are removed.Pr", + "doc_type":"sqlreference", + "kw":"SELECT,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"SELECT", + "githuburl":"" + }, + { + "uri":"dli_08_0323.html", + "product_code":"dli", + "code":"252", + "des":"SyntaxDescriptionUNION is used to return the union set of multiple query results.INTERSECT is used to return the intersection of multiple query results.EXCEPT is used to ", + "doc_type":"sqlreference", + "kw":"Set Operations,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"Set Operations", + "githuburl":"" + }, + { + "uri":"dli_08_0324.html", + "product_code":"dli", + "code":"253", + "des":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:Array functionsArray functionsGroup", + "doc_type":"sqlreference", + "kw":"Window,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"Window", + "githuburl":"" + }, + { + "uri":"dli_08_0325.html", + "product_code":"dli", + "code":"254", + "des":"SyntaxPrecautionsCurrently, only equi-joins are supported, for example, joins that have at least one conjunctive condition with an equality predicate. Arbitrary cross or ", + "doc_type":"sqlreference", + "kw":"JOIN,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"JOIN", + "githuburl":"" + }, + { + "uri":"dli_08_0326.html", + "product_code":"dli", + "code":"255", + "des":"FunctionThis clause is used to sort data in ascending order on a time attribute.PrecautionsCurrently, only sorting by time attribute is supported.ExampleSort data in asce", + "doc_type":"sqlreference", + "kw":"OrderBy & Limit,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"OrderBy & Limit", + "githuburl":"" + }, + { + "uri":"dli_08_0327.html", + "product_code":"dli", + "code":"256", + "des":"Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in", + "doc_type":"sqlreference", + "kw":"Top-N,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"Top-N", + "githuburl":"" + }, + { + "uri":"dli_08_0328.html", + "product_code":"dli", + "code":"257", + "des":"Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.ROW_NUMBER(): Assigns a unique, sequential number to each row,", + "doc_type":"sqlreference", + "kw":"Deduplication,Data Manipulation Language (DML),SQL Syntax Reference", + "title":"Deduplication", + "githuburl":"" + }, + { + "uri":"dli_08_0329.html", + "product_code":"dli", + "code":"258", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Functions", + "title":"Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0330.html", + "product_code":"dli", + "code":"259", + "des":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "doc_type":"sqlreference", + "kw":"User-Defined Functions,Functions,SQL Syntax Reference", + "title":"User-Defined Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0331.html", + "product_code":"dli", + "code":"260", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Built-In Functions", + "title":"Built-In Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0332.html", + "product_code":"dli", + "code":"261", + "des":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "doc_type":"sqlreference", + "kw":"Mathematical Operation Functions,Built-In Functions,SQL Syntax Reference", + "title":"Mathematical Operation Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0333.html", + "product_code":"dli", + "code":"262", + "des":"SyntaxExampleTest input data.Test the data source kafka. The message content is as follows:\"{name:James,age:24,sex:male,grade:{math:95,science:[80,85],english:100}}\"\n\"{na", + "doc_type":"sqlreference", + "kw":"String Functions,Built-In Functions,SQL Syntax Reference", + "title":"String Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0334.html", + "product_code":"dli", + "code":"263", + "des":"Table 1 lists the temporal functions supported by Flink OpenSource SQL.FunctionReturns a date parsed from string in form of yyyy-MM-dd.Returns a date parsed from string i", + "doc_type":"sqlreference", + "kw":"Temporal Functions,Built-In Functions,SQL Syntax Reference", + "title":"Temporal Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0335.html", + "product_code":"dli", + "code":"264", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Conditional Functions,Built-In Functions,SQL Syntax Reference", + "title":"Conditional Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0336.html", + "product_code":"dli", + "code":"265", + "des":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.The following example converts the amount value to an integer.Flink jobs do not sup", + "doc_type":"sqlreference", + "kw":"Type Conversion Function,Built-In Functions,SQL Syntax Reference", + "title":"Type Conversion Function", + "githuburl":"" + }, + { + "uri":"dli_08_0337.html", + "product_code":"dli", + "code":"266", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Collection Functions,Built-In Functions,SQL Syntax Reference", + "title":"Collection Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0338.html", + "product_code":"dli", + "code":"267", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Value Construction Functions,Built-In Functions,SQL Syntax Reference", + "title":"Value Construction Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0339.html", + "product_code":"dli", + "code":"268", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Value Access Functions,Built-In Functions,SQL Syntax Reference", + "title":"Value Access Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0340.html", + "product_code":"dli", + "code":"269", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Hash Functions,Built-In Functions,SQL Syntax Reference", + "title":"Hash Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0341.html", + "product_code":"dli", + "code":"270", + "des":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "doc_type":"sqlreference", + "kw":"Aggregate Function,Built-In Functions,SQL Syntax Reference", + "title":"Aggregate Function", + "githuburl":"" + }, + { + "uri":"dli_08_0342.html", + "product_code":"dli", + "code":"271", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Table-Valued Functions", + "title":"Table-Valued Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0357.html", + "product_code":"dli", + "code":"272", + "des":"The split_cursor function can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be use", + "doc_type":"sqlreference", + "kw":"split_cursor,Table-Valued Functions,SQL Syntax Reference", + "title":"split_cursor", + "githuburl":"" + }, + { + "uri":"dli_08_0356.html", + "product_code":"dli", + "code":"273", + "des":"The string_split function splits a target string into substrings based on the specified separator and returns a substring list.Prepare test input data.Source table disSou", + "doc_type":"sqlreference", + "kw":"string_split,Table-Valued Functions,SQL Syntax Reference", + "title":"string_split", + "githuburl":"" + }, + { + "uri":"dli_08_0450.html", + "product_code":"dli", + "code":"274", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Historical Versions", + "title":"Historical Versions", + "githuburl":"" + }, + { + "uri":"dli_08_0233.html", + "product_code":"dli", + "code":"275", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Flink SQL Syntax", + "title":"Flink SQL Syntax", + "githuburl":"" + }, + { + "uri":"dli_08_0075.html", + "product_code":"dli", + "code":"276", + "des":"Currently, Flink SQL only supports the following operations: SELECT, FROM, WHERE, UNION, aggregation, window, JOIN between stream and table data, and JOIN between streams", + "doc_type":"sqlreference", + "kw":"SQL Syntax Constraints and Definitions,Flink SQL Syntax,SQL Syntax Reference", + "title":"SQL Syntax Constraints and Definitions", + "githuburl":"" + }, + { + "uri":"dli_08_0275.html", + "product_code":"dli", + "code":"277", + "des":"This section describes the Flink SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.", + "doc_type":"sqlreference", + "kw":"SQL Syntax Overview of Stream Jobs,Flink SQL Syntax,SQL Syntax Reference", + "title":"SQL Syntax Overview of Stream Jobs", + "githuburl":"" + }, + { + "uri":"dli_08_0234.html", + "product_code":"dli", + "code":"278", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Source Stream", + "title":"Creating a Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0237.html", + "product_code":"dli", + "code":"279", + "des":"Create a source stream to obtain data from HBase of CloudTable as input data of the job. HBase is a column-oriented distributed cloud storage system that features enhance", + "doc_type":"sqlreference", + "kw":"CloudTable HBase Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"CloudTable HBase Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0235.html", + "product_code":"dli", + "code":"280", + "des":"Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove da", + "doc_type":"sqlreference", + "kw":"DIS Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"DIS Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0270.html", + "product_code":"dli", + "code":"281", + "des":"DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully man", + "doc_type":"sqlreference", + "kw":"DMS Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"DMS Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0238.html", + "product_code":"dli", + "code":"282", + "des":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "doc_type":"sqlreference", + "kw":"MRS Kafka Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"MRS Kafka Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0239.html", + "product_code":"dli", + "code":"283", + "des":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "doc_type":"sqlreference", + "kw":"Open-Source Kafka Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"Open-Source Kafka Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0236.html", + "product_code":"dli", + "code":"284", + "des":"Create a source stream to obtain data from OBS. DLI reads data stored by users in OBS as input data for jobs. OBS applies to various scenarios, such as big data analysis,", + "doc_type":"sqlreference", + "kw":"OBS Source Stream,Creating a Source Stream,SQL Syntax Reference", + "title":"OBS Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0240.html", + "product_code":"dli", + "code":"285", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Sink Stream", + "title":"Creating a Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0243.html", + "product_code":"dli", + "code":"286", + "des":"DLI exports the job output data to HBase of CloudTable. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performa", + "doc_type":"sqlreference", + "kw":"CloudTable HBase Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"CloudTable HBase Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0244.html", + "product_code":"dli", + "code":"287", + "des":"DLI exports the job output data to OpenTSDB of CloudTable. OpenTSDB is a distributed, scalable time series database based on HBase. It stores time series data. Time serie", + "doc_type":"sqlreference", + "kw":"CloudTable OpenTSDB Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"CloudTable OpenTSDB Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0286.html", + "product_code":"dli", + "code":"288", + "des":"DLI exports the output data of the Flink job to OpenTSDB of MRS.OpenTSDB has been installed in the MRS cluster.In this scenario, jobs must run on the dedicated queue of D", + "doc_type":"sqlreference", + "kw":"MRS OpenTSDB Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"MRS OpenTSDB Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0252.html", + "product_code":"dli", + "code":"289", + "des":"DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "doc_type":"sqlreference", + "kw":"CSS Elasticsearch Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"CSS Elasticsearch Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0253.html", + "product_code":"dli", + "code":"290", + "des":"DLI exports the Flink job output data to Redis of DCS. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scen", + "doc_type":"sqlreference", + "kw":"DCS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"DCS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0249.html", + "product_code":"dli", + "code":"291", + "des":"DLI outputs the job output data to Document Database Service (DDS).DDS is compatible with the MongoDB protocol and is secure, highly available, reliable, scalable, and ea", + "doc_type":"sqlreference", + "kw":"DDS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"DDS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0241.html", + "product_code":"dli", + "code":"292", + "des":"DLI writes the Flink job output data into DIS. This cloud ecosystem is applicable to scenarios where data is filtered and imported to the DIS stream for future processing", + "doc_type":"sqlreference", + "kw":"DIS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"DIS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0271.html", + "product_code":"dli", + "code":"293", + "des":"DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully man", + "doc_type":"sqlreference", + "kw":"DMS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"DMS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0247.html", + "product_code":"dli", + "code":"294", + "des":"DLI outputs the Flink job output data to Data Warehouse Service (DWS). DWS database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more co", + "doc_type":"sqlreference", + "kw":"DWS Sink Stream (JDBC Mode),Creating a Sink Stream,SQL Syntax Reference", + "title":"DWS Sink Stream (JDBC Mode)", + "githuburl":"" + }, + { + "uri":"dli_08_0248.html", + "product_code":"dli", + "code":"295", + "des":"Create a sink stream to export Flink job data to DWS through OBS-based dumping, specifically, output Flink job data to OBS and then import data from OBS to DWS. For detai", + "doc_type":"sqlreference", + "kw":"DWS Sink Stream (OBS-based Dumping),Creating a Sink Stream,SQL Syntax Reference", + "title":"DWS Sink Stream (OBS-based Dumping)", + "githuburl":"" + }, + { + "uri":"dli_08_0255.html", + "product_code":"dli", + "code":"296", + "des":"DLI exports the output data of the Flink job to HBase of MRS.An MRS cluster has been created by using your account. DLI can interconnect with HBase clusters with Kerberos", + "doc_type":"sqlreference", + "kw":"MRS HBase Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"MRS HBase Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0254.html", + "product_code":"dli", + "code":"297", + "des":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "doc_type":"sqlreference", + "kw":"MRS Kafka Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"MRS Kafka Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0257.html", + "product_code":"dli", + "code":"298", + "des":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "doc_type":"sqlreference", + "kw":"Open-Source Kafka Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"Open-Source Kafka Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0267.html", + "product_code":"dli", + "code":"299", + "des":"You can create a sink stream to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly according to the ge", + "doc_type":"sqlreference", + "kw":"File System Sink Stream (Recommended),Creating a Sink Stream,SQL Syntax Reference", + "title":"File System Sink Stream (Recommended)", + "githuburl":"" + }, + { + "uri":"dli_08_0242.html", + "product_code":"dli", + "code":"300", + "des":"Create a sink stream to export DLI data to OBS. DLI can export the job analysis results to OBS. OBS applies to various scenarios, such as big data analysis, cloud-native ", + "doc_type":"sqlreference", + "kw":"OBS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"OBS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0245.html", + "product_code":"dli", + "code":"301", + "des":"DLI outputs the Flink job output data to RDS. Currently, PostgreSQL and MySQL databases are supported. The PostgreSQL database can store data of more complex types and de", + "doc_type":"sqlreference", + "kw":"RDS Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"RDS Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0251.html", + "product_code":"dli", + "code":"302", + "des":"DLI exports Flink job output data to SMN.SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling ", + "doc_type":"sqlreference", + "kw":"SMN Sink Stream,Creating a Sink Stream,SQL Syntax Reference", + "title":"SMN Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0258.html", + "product_code":"dli", + "code":"303", + "des":"The temporary stream is used to simplify SQL logic. If complex SQL logic is followed, write SQL statements concatenated with temporary streams. The temporary stream is ju", + "doc_type":"sqlreference", + "kw":"Creating a Temporary Stream,Flink SQL Syntax,SQL Syntax Reference", + "title":"Creating a Temporary Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0259.html", + "product_code":"dli", + "code":"304", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Creating a Dimension Table", + "title":"Creating a Dimension Table", + "githuburl":"" + }, + { + "uri":"dli_08_0260.html", + "product_code":"dli", + "code":"305", + "des":"Create a Redis table to connect to the source stream.For details about the JOIN syntax, see JOIN Between Stream Data and Table Data.Redis clusters are not supported.Ensur", + "doc_type":"sqlreference", + "kw":"Creating a Redis Table,Creating a Dimension Table,SQL Syntax Reference", + "title":"Creating a Redis Table", + "githuburl":"" + }, + { + "uri":"dli_08_0261.html", + "product_code":"dli", + "code":"306", + "des":"Create an RDS/DWS table to connect to the source stream.For details about the JOIN syntax, see JOIN.Ensure that you have created a PostgreSQL or MySQL RDS instance in RDS", + "doc_type":"sqlreference", + "kw":"Creating an RDS Table,Creating a Dimension Table,SQL Syntax Reference", + "title":"Creating an RDS Table", + "githuburl":"" + }, + { + "uri":"dli_08_0272.html", + "product_code":"dli", + "code":"307", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Custom Stream Ecosystem", + "title":"Custom Stream Ecosystem", + "githuburl":"" + }, + { + "uri":"dli_08_0273.html", + "product_code":"dli", + "code":"308", + "des":"Compile code to obtain data from the desired cloud ecosystem or open-source ecosystem as the input data of Flink jobs.The user-defined source class needs to inherit the R", + "doc_type":"sqlreference", + "kw":"Custom Source Stream,Custom Stream Ecosystem,SQL Syntax Reference", + "title":"Custom Source Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0274.html", + "product_code":"dli", + "code":"309", + "des":"Compile code to write the data processed by DLI to a specified cloud ecosystem or open-source ecosystem.The user-defined sink class needs to inherit the RichSinkFunction ", + "doc_type":"sqlreference", + "kw":"Custom Sink Stream,Custom Stream Ecosystem,SQL Syntax Reference", + "title":"Custom Sink Stream", + "githuburl":"" + }, + { + "uri":"dli_08_0207.html", + "product_code":"dli", + "code":"310", + "des":"Data type is a basic attribute of data and used to distinguish different types of data. Different data types occupy different storage space and support different operatio", + "doc_type":"sqlreference", + "kw":"Data Type,Flink SQL Syntax,SQL Syntax Reference", + "title":"Data Type", + "githuburl":"" + }, + { + "uri":"dli_08_0086.html", + "product_code":"dli", + "code":"311", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Built-In Functions", + "title":"Built-In Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0191.html", + "product_code":"dli", + "code":"312", + "des":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "doc_type":"sqlreference", + "kw":"Mathematical Operation Functions,Built-In Functions,SQL Syntax Reference", + "title":"Mathematical Operation Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0096.html", + "product_code":"dli", + "code":"313", + "des":"The common character string functions of DLI are as follows:FunctionConcatenates two character strings.Concatenates two character strings.SyntaxVARCHAR VARCHAR a || VARCH", + "doc_type":"sqlreference", + "kw":"String Functions,Built-In Functions,SQL Syntax Reference", + "title":"String Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0097.html", + "product_code":"dli", + "code":"314", + "des":"Table 1 lists the time functions supported by Flink SQL.None", + "doc_type":"sqlreference", + "kw":"Temporal Functions,Built-In Functions,SQL Syntax Reference", + "title":"Temporal Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0112.html", + "product_code":"dli", + "code":"315", + "des":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.Flink jobs do not support the conversion of bigint to timestamp using CAST. You can", + "doc_type":"sqlreference", + "kw":"Type Conversion Functions,Built-In Functions,SQL Syntax Reference", + "title":"Type Conversion Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0104.html", + "product_code":"dli", + "code":"316", + "des":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "doc_type":"sqlreference", + "kw":"Aggregate Functions,Built-In Functions,SQL Syntax Reference", + "title":"Aggregate Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0206.html", + "product_code":"dli", + "code":"317", + "des":"Table-valued functions can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be used i", + "doc_type":"sqlreference", + "kw":"Table-Valued Functions,Built-In Functions,SQL Syntax Reference", + "title":"Table-Valued Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0101.html", + "product_code":"dli", + "code":"318", + "des":"Example:The returned number of elements in the array is 3.HELLO WORLD is returned.", + "doc_type":"sqlreference", + "kw":"Other Functions,Built-In Functions,SQL Syntax Reference", + "title":"Other Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0099.html", + "product_code":"dli", + "code":"319", + "des":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "doc_type":"sqlreference", + "kw":"User-Defined Functions,Flink SQL Syntax,SQL Syntax Reference", + "title":"User-Defined Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0209.html", + "product_code":"dli", + "code":"320", + "des":"Table 1 describes the basic geospatial geometric elements.You can build complex geospatial geometries based on basic geospatial geometric elements. Table 2 describes the ", + "doc_type":"sqlreference", + "kw":"Geographical Functions,Flink SQL Syntax,SQL Syntax Reference", + "title":"Geographical Functions", + "githuburl":"" + }, + { + "uri":"dli_08_0102.html", + "product_code":"dli", + "code":"321", + "des":"SyntaxDescriptionThe SELECT statement is used to select data from a table or insert constant data into a table.PrecautionsThe table to be queried must exist. Otherwise, a", + "doc_type":"sqlreference", + "kw":"SELECT,Flink SQL Syntax,SQL Syntax Reference", + "title":"SELECT", + "githuburl":"" + }, + { + "uri":"dli_08_0103.html", + "product_code":"dli", + "code":"322", + "des":"SyntaxorDescriptionIf the value of value is value1, result1 is returned. If the value is not any of the values listed in the clause, resultZ is returned. If no else state", + "doc_type":"sqlreference", + "kw":"Condition Expression,Flink SQL Syntax,SQL Syntax Reference", + "title":"Condition Expression", + "githuburl":"" + }, + { + "uri":"dli_08_0218.html", + "product_code":"dli", + "code":"323", + "des":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:time_attr can be processing-time or", + "doc_type":"sqlreference", + "kw":"Window,Flink SQL Syntax,SQL Syntax Reference", + "title":"Window", + "githuburl":"" + }, + { + "uri":"dli_08_0106.html", + "product_code":"dli", + "code":"324", + "des":"The JOIN operation allows you to query data from a table and write the query result to the sink stream. Currently, only RDSs and DCS Redis tables are supported. The ON ke", + "doc_type":"sqlreference", + "kw":"JOIN Between Stream Data and Table Data,Flink SQL Syntax,SQL Syntax Reference", + "title":"JOIN Between Stream Data and Table Data", + "githuburl":"" + }, + { + "uri":"dli_08_0107.html", + "product_code":"dli", + "code":"325", + "des":"Flink provides two time models: processing time and event time.DLI allows you to specify the time model during creation of the source stream and temporary stream.Processi", + "doc_type":"sqlreference", + "kw":"Configuring Time Models,Flink SQL Syntax,SQL Syntax Reference", + "title":"Configuring Time Models", + "githuburl":"" + }, + { + "uri":"dli_08_0108.html", + "product_code":"dli", + "code":"326", + "des":"Complex event processing (CEP) is used to detect complex patterns in endless data streams so as to identify and search patterns in various data rows. Pattern matching is ", + "doc_type":"sqlreference", + "kw":"Pattern Matching,Flink SQL Syntax,SQL Syntax Reference", + "title":"Pattern Matching", + "githuburl":"" + }, + { + "uri":"dli_08_0109.html", + "product_code":"dli", + "code":"327", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"StreamingML", + "title":"StreamingML", + "githuburl":"" + }, + { + "uri":"dli_08_0110.html", + "product_code":"dli", + "code":"328", + "des":"Anomaly detection applies to various scenarios, including intrusion detection, financial fraud detection, sensor data monitoring, medical diagnosis, natural data detectio", + "doc_type":"sqlreference", + "kw":"Anomaly Detection,StreamingML,SQL Syntax Reference", + "title":"Anomaly Detection", + "githuburl":"" + }, + { + "uri":"dli_08_0111.html", + "product_code":"dli", + "code":"329", + "des":"Modeling and forecasting time series is a common task in many business verticals. Modeling is used to extract meaningful statistics and other characteristics of the data.", + "doc_type":"sqlreference", + "kw":"Time Series Forecasting,StreamingML,SQL Syntax Reference", + "title":"Time Series Forecasting", + "githuburl":"" + }, + { + "uri":"dli_08_0216.html", + "product_code":"dli", + "code":"330", + "des":"Clustering algorithms belong to unsupervised algorithms. K-Means, a clustering algorithm, partitions data points into related clusters by calculating the distance between", + "doc_type":"sqlreference", + "kw":"Real-Time Clustering,StreamingML,SQL Syntax Reference", + "title":"Real-Time Clustering", + "githuburl":"" + }, + { + "uri":"dli_08_0088.html", + "product_code":"dli", + "code":"331", + "des":"Deep learning has a wide range of applications in many industries, such as image classification, image recognition, and speech recognition. DLI provides several functions", + "doc_type":"sqlreference", + "kw":"Deep Learning Model Prediction,StreamingML,SQL Syntax Reference", + "title":"Deep Learning Model Prediction", + "githuburl":"" + }, + { + "uri":"dli_08_0125.html", + "product_code":"dli", + "code":"332", + "des":"Flink SQL reserves some strings as keywords. If you want to use the following character strings as field names, ensure that they are enclosed by back quotes, for example,", + "doc_type":"sqlreference", + "kw":"Reserved Keywords,Flink SQL Syntax,SQL Syntax Reference", + "title":"Reserved Keywords", + "githuburl":"" + }, + { + "uri":"dli_08_0001.html", + "product_code":"dli", + "code":"333", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Identifiers", + "title":"Identifiers", + "githuburl":"" + }, + { + "uri":"dli_08_0002.html", + "product_code":"dli", + "code":"334", + "des":"None.Aggregate function.", + "doc_type":"sqlreference", + "kw":"aggregate_func,Identifiers,SQL Syntax Reference", + "title":"aggregate_func", + "githuburl":"" + }, + { + "uri":"dli_08_0003.html", + "product_code":"dli", + "code":"335", + "des":"None.Alias, which must be STRING type. It can be assigned to a field, table, view, or subquery.", + "doc_type":"sqlreference", + "kw":"alias,Identifiers,SQL Syntax Reference", + "title":"alias", + "githuburl":"" + }, + { + "uri":"dli_08_0004.html", + "product_code":"dli", + "code":"336", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"attr_expr,Identifiers,SQL Syntax Reference", + "title":"attr_expr", + "githuburl":"" + }, + { + "uri":"dli_08_0005.html", + "product_code":"dli", + "code":"337", + "des":"None.List of attr_expr, which is separated by commas (,).", + "doc_type":"sqlreference", + "kw":"attr_expr_list,Identifiers,SQL Syntax Reference", + "title":"attr_expr_list", + "githuburl":"" + }, + { + "uri":"dli_08_0006.html", + "product_code":"dli", + "code":"338", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"attrs_value_set_expr,Identifiers,SQL Syntax Reference", + "title":"attrs_value_set_expr", + "githuburl":"" + }, + { + "uri":"dli_08_0007.html", + "product_code":"dli", + "code":"339", + "des":"None.Return a boolean expression.", + "doc_type":"sqlreference", + "kw":"boolean_expression,Identifiers,SQL Syntax Reference", + "title":"boolean_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0009.html", + "product_code":"dli", + "code":"340", + "des":"None.Formal parameter for function call. It is usually a field name, which is the same as col_name.", + "doc_type":"sqlreference", + "kw":"col,Identifiers,SQL Syntax Reference", + "title":"col", + "githuburl":"" + }, + { + "uri":"dli_08_0010.html", + "product_code":"dli", + "code":"341", + "des":"None.Column (field) description, which must be STRING type and cannot exceed 256 bytes.", + "doc_type":"sqlreference", + "kw":"col_comment,Identifiers,SQL Syntax Reference", + "title":"col_comment", + "githuburl":"" + }, + { + "uri":"dli_08_0011.html", + "product_code":"dli", + "code":"342", + "des":"None.Column name, which must be STRING type and cannot exceed 128 bytes.", + "doc_type":"sqlreference", + "kw":"col_name,Identifiers,SQL Syntax Reference", + "title":"col_name", + "githuburl":"" + }, + { + "uri":"dli_08_0012.html", + "product_code":"dli", + "code":"343", + "des":"None.Field list, which consists of one col_name or more. If there is more than one col_name, separate them by using a comma (,).", + "doc_type":"sqlreference", + "kw":"col_name_list,Identifiers,SQL Syntax Reference", + "title":"col_name_list", + "githuburl":"" + }, + { + "uri":"dli_08_0013.html", + "product_code":"dli", + "code":"344", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"condition,Identifiers,SQL Syntax Reference", + "title":"condition", + "githuburl":"" + }, + { + "uri":"dli_08_0014.html", + "product_code":"dli", + "code":"345", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"condition_list,Identifiers,SQL Syntax Reference", + "title":"condition_list", + "githuburl":"" + }, + { + "uri":"dli_08_0015.html", + "product_code":"dli", + "code":"346", + "des":"None.Common expression name.", + "doc_type":"sqlreference", + "kw":"cte_name,Identifiers,SQL Syntax Reference", + "title":"cte_name", + "githuburl":"" + }, + { + "uri":"dli_08_0016.html", + "product_code":"dli", + "code":"347", + "des":"None.Data type. Currently, only the primitive data types are supported.", + "doc_type":"sqlreference", + "kw":"data_type,Identifiers,SQL Syntax Reference", + "title":"data_type", + "githuburl":"" + }, + { + "uri":"dli_08_0017.html", + "product_code":"dli", + "code":"348", + "des":"None.Database description, which must be STRING type and cannot exceed 256 characters.", + "doc_type":"sqlreference", + "kw":"db_comment,Identifiers,SQL Syntax Reference", + "title":"db_comment", + "githuburl":"" + }, + { + "uri":"dli_08_0018.html", + "product_code":"dli", + "code":"349", + "des":"None.Database name, which must be STRING type and cannot exceed 128 bytes.", + "doc_type":"sqlreference", + "kw":"db_name,Identifiers,SQL Syntax Reference", + "title":"db_name", + "githuburl":"" + }, + { + "uri":"dli_08_0019.html", + "product_code":"dli", + "code":"350", + "des":"None.Returned result for the ELSE clause of the CASE WHEN statement.", + "doc_type":"sqlreference", + "kw":"else_result_expression,Identifiers,SQL Syntax Reference", + "title":"else_result_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0020.html", + "product_code":"dli", + "code":"351", + "des":"| AVRO| CSV| JSON| ORC| PARQUETCurrently, the preceding formats are supported.Both USING and STORED AS can be used for specifying the data format. You can specify the pre", + "doc_type":"sqlreference", + "kw":"file_format,Identifiers,SQL Syntax Reference", + "title":"file_format", + "githuburl":"" + }, + { + "uri":"dli_08_0021.html", + "product_code":"dli", + "code":"352", + "des":"None.File path, which is the OBS path", + "doc_type":"sqlreference", + "kw":"file_path,Identifiers,SQL Syntax Reference", + "title":"file_path", + "githuburl":"" + }, + { + "uri":"dli_08_0022.html", + "product_code":"dli", + "code":"353", + "des":"None.Function name, which must be STRING type.", + "doc_type":"sqlreference", + "kw":"function_name,Identifiers,SQL Syntax Reference", + "title":"function_name", + "githuburl":"" + }, + { + "uri":"dli_08_0023.html", + "product_code":"dli", + "code":"354", + "des":"None.Expression that includes GROUP BY.", + "doc_type":"sqlreference", + "kw":"groupby_expression,Identifiers,SQL Syntax Reference", + "title":"groupby_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0024.html", + "product_code":"dli", + "code":"355", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"having_condition,Identifiers,SQL Syntax Reference", + "title":"having_condition", + "githuburl":"" + }, + { + "uri":"dli_08_0026.html", + "product_code":"dli", + "code":"356", + "des":"None.Input expression of the CASE WHEN statement.", + "doc_type":"sqlreference", + "kw":"input_expression,Identifiers,SQL Syntax Reference", + "title":"input_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0029.html", + "product_code":"dli", + "code":"357", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"join_condition,Identifiers,SQL Syntax Reference", + "title":"join_condition", + "githuburl":"" + }, + { + "uri":"dli_08_0030.html", + "product_code":"dli", + "code":"358", + "des":"None.The condition of an inequality join.", + "doc_type":"sqlreference", + "kw":"non_equi_join_condition,Identifiers,SQL Syntax Reference", + "title":"non_equi_join_condition", + "githuburl":"" + }, + { + "uri":"dli_08_0031.html", + "product_code":"dli", + "code":"359", + "des":"None.Maximum number of output lines specified by LIMIT. Which must be INT type.", + "doc_type":"sqlreference", + "kw":"number,Identifiers,SQL Syntax Reference", + "title":"number", + "githuburl":"" + }, + { + "uri":"dli_08_0034.html", + "product_code":"dli", + "code":"360", + "des":"None.Partition column name, that is, partition field name, which must be STRING type.", + "doc_type":"sqlreference", + "kw":"partition_col_name,Identifiers,SQL Syntax Reference", + "title":"partition_col_name", + "githuburl":"" + }, + { + "uri":"dli_08_0035.html", + "product_code":"dli", + "code":"361", + "des":"None.Partition column value, that is, partition field value.", + "doc_type":"sqlreference", + "kw":"partition_col_value,Identifiers,SQL Syntax Reference", + "title":"partition_col_value", + "githuburl":"" + }, + { + "uri":"dli_08_0036.html", + "product_code":"dli", + "code":"362", + "des":"partition_specs : (partition_col_name = partition_col_value, partition_col_name = partition_col_value, ...);Table partition list, which is expressed by using key=value pa", + "doc_type":"sqlreference", + "kw":"partition_specs,Identifiers,SQL Syntax Reference", + "title":"partition_specs", + "githuburl":"" + }, + { + "uri":"dli_08_0037.html", + "product_code":"dli", + "code":"363", + "des":"None.Property name, which must be STRING type.", + "doc_type":"sqlreference", + "kw":"property_name,Identifiers,SQL Syntax Reference", + "title":"property_name", + "githuburl":"" + }, + { + "uri":"dli_08_0038.html", + "product_code":"dli", + "code":"364", + "des":"None.Property value, which must be STRING type.", + "doc_type":"sqlreference", + "kw":"property_value,Identifiers,SQL Syntax Reference", + "title":"property_value", + "githuburl":"" + }, + { + "uri":"dli_08_0039.html", + "product_code":"dli", + "code":"365", + "des":"None.Pattern matching string, which supports wildcard matching.", + "doc_type":"sqlreference", + "kw":"regex_expression,Identifiers,SQL Syntax Reference", + "title":"regex_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0040.html", + "product_code":"dli", + "code":"366", + "des":"None.Returned result for the THEN clause of the CASE WHEN statement.", + "doc_type":"sqlreference", + "kw":"result_expression,Identifiers,SQL Syntax Reference", + "title":"result_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0042.html", + "product_code":"dli", + "code":"367", + "des":"None.Query clause for the basic SELECT statement.", + "doc_type":"sqlreference", + "kw":"select_statement,Identifiers,SQL Syntax Reference", + "title":"select_statement", + "githuburl":"" + }, + { + "uri":"dli_08_0043.html", + "product_code":"dli", + "code":"368", + "des":"None.Separator, which can be customized by users, for example, comma (,), semicolon (;), and colon (:). Which must be CHAR type.", + "doc_type":"sqlreference", + "kw":"separator,Identifiers,SQL Syntax Reference", + "title":"separator", + "githuburl":"" + }, + { + "uri":"dli_08_0045.html", + "product_code":"dli", + "code":"369", + "des":"None.SQL statement containing the common expression defined by cte_name.", + "doc_type":"sqlreference", + "kw":"sql_containing_cte_name,Identifiers,SQL Syntax Reference", + "title":"sql_containing_cte_name", + "githuburl":"" + }, + { + "uri":"dli_08_0046.html", + "product_code":"dli", + "code":"370", + "des":"None.Subquery.", + "doc_type":"sqlreference", + "kw":"sub_query,Identifiers,SQL Syntax Reference", + "title":"sub_query", + "githuburl":"" + }, + { + "uri":"dli_08_0047.html", + "product_code":"dli", + "code":"371", + "des":"None.Table description, which must be STRING type and cannot exceed 256 bytes.", + "doc_type":"sqlreference", + "kw":"table_comment,Identifiers,SQL Syntax Reference", + "title":"table_comment", + "githuburl":"" + }, + { + "uri":"dli_08_0048.html", + "product_code":"dli", + "code":"372", + "des":"NoneTable name, which cannot exceed 128 bytes. The string type and \"$\" symbol are supported.", + "doc_type":"sqlreference", + "kw":"table_name,Identifiers,SQL Syntax Reference", + "title":"table_name", + "githuburl":"" + }, + { + "uri":"dli_08_0049.html", + "product_code":"dli", + "code":"373", + "des":"None.Table property list, which is expressed by using key=value pairs. key represents property_name, and value represents property_value. If there is more than one key=va", + "doc_type":"sqlreference", + "kw":"table_properties,Identifiers,SQL Syntax Reference", + "title":"table_properties", + "githuburl":"" + }, + { + "uri":"dli_08_0050.html", + "product_code":"dli", + "code":"374", + "des":"None.Table or view name, which must be STRING type. It can also be a subquery. If it is subquery, an alias must also be provided.", + "doc_type":"sqlreference", + "kw":"table_reference,Identifiers,SQL Syntax Reference", + "title":"table_reference", + "githuburl":"" + }, + { + "uri":"dli_08_0053.html", + "product_code":"dli", + "code":"375", + "des":"None.When expression of the CASE WHEN statement. It is used for matching with the input expression.", + "doc_type":"sqlreference", + "kw":"when_expression,Identifiers,SQL Syntax Reference", + "title":"when_expression", + "githuburl":"" + }, + { + "uri":"dli_08_0054.html", + "product_code":"dli", + "code":"376", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"where_condition,Identifiers,SQL Syntax Reference", + "title":"where_condition", + "githuburl":"" + }, + { + "uri":"dli_08_0055.html", + "product_code":"dli", + "code":"377", + "des":"None.Analysis window function. For details, see Window Functions.", + "doc_type":"sqlreference", + "kw":"window_function,Identifiers,SQL Syntax Reference", + "title":"window_function", + "githuburl":"" + }, + { + "uri":"dli_08_0060.html", + "product_code":"dli", + "code":"378", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Operators", + "title":"Operators", + "githuburl":"" + }, + { + "uri":"dli_08_0061.html", + "product_code":"dli", + "code":"379", + "des":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "doc_type":"sqlreference", + "kw":"Relational Operators,Operators,SQL Syntax Reference", + "title":"Relational Operators", + "githuburl":"" + }, + { + "uri":"dli_08_0062.html", + "product_code":"dli", + "code":"380", + "des":"Arithmetic operators include binary operators and unary operators. For both types of operators, the returned results are numbers. Table 1 lists the arithmetic operators s", + "doc_type":"sqlreference", + "kw":"Arithmetic Operators,Operators,SQL Syntax Reference", + "title":"Arithmetic Operators", + "githuburl":"" + }, + { + "uri":"dli_08_0063.html", + "product_code":"dli", + "code":"381", + "des":"Common logical operators include AND, OR, and NOT. The operation result can be TRUE, FALSE, or NULL (which means unknown). The priorities of the operators are as follows:", + "doc_type":"sqlreference", + "kw":"Logical Operators,Operators,SQL Syntax Reference", + "title":"Logical Operators", + "githuburl":"" + }, + { + "uri":"dli_08_00005.html", + "product_code":"dli", + "code":"382", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"sqlreference", + "kw":"Change History,SQL Syntax Reference", + "title":"Change History", + "githuburl":"" + } +] \ No newline at end of file diff --git a/docs/dli/sqlreference/CLASS.TXT.json b/docs/dli/sqlreference/CLASS.TXT.json new file mode 100644 index 00000000..2925ad5d --- /dev/null +++ b/docs/dli/sqlreference/CLASS.TXT.json @@ -0,0 +1,3440 @@ +[ + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Spark SQL Syntax Reference", + "uri":"dli_08_0221.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"1" + }, + { + "desc":"This section describes the common configuration items of the SQL syntax for DLI batch jobs.", + "product_code":"dli", + "title":"Common Configuration Items of Batch SQL Jobs", + "uri":"dli_08_0266.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"2" + }, + { + "desc":"This section describes the Spark SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.", + "product_code":"dli", + "title":"SQL Syntax Overview of Batch Jobs", + "uri":"dli_08_0219.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"3" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Databases", + "uri":"dli_08_0070.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"4" + }, + { + "desc":"This statement is used to create a database.IF NOT EXISTS: Prevents system errors if the database to be created exists.COMMENT: Describes a database.DBPROPERTIES: Specifi", + "product_code":"dli", + "title":"Creating a Database", + "uri":"dli_08_0071.html", + "doc_type":"sqlreference", + "p_code":"4", + "code":"5" + }, + { + "desc":"This statement is used to delete a database.IF EXISTS: Prevents system errors if the database to be deleted does not exist.DATABASE and SCHEMA can be used interchangeably", + "product_code":"dli", + "title":"Deleting a Database", + "uri":"dli_08_0072.html", + "doc_type":"sqlreference", + "p_code":"4", + "code":"6" + }, + { + "desc":"This syntax is used to view the information about a specified database, including the database name and database description.EXTENDED: Displays the database properties.If", + "product_code":"dli", + "title":"Viewing a Specified Database", + "uri":"dli_08_0073.html", + "doc_type":"sqlreference", + "p_code":"4", + "code":"7" + }, + { + "desc":"This syntax is used to query all current databases.NoneKeyword DATABASES is equivalent to SCHEMAS. You can use either of them in this statement.View all the current datab", + "product_code":"dli", + "title":"Viewing All Databases", + "uri":"dli_08_0074.html", + "doc_type":"sqlreference", + "p_code":"4", + "code":"8" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating an OBS Table", + "uri":"dli_08_0223.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"9" + }, + { + "desc":"Create an OBS table using the DataSource syntax.The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of suppor", + "product_code":"dli", + "title":"Creating an OBS Table Using the DataSource Syntax", + "uri":"dli_08_0076.html", + "doc_type":"sqlreference", + "p_code":"9", + "code":"10" + }, + { + "desc":"This statement is used to create an OBS table using the Hive syntax. The main differences between the DataSource and the Hive syntax lie in the supported data formats and", + "product_code":"dli", + "title":"Creating an OBS Table Using the Hive Syntax", + "uri":"dli_08_0077.html", + "doc_type":"sqlreference", + "p_code":"9", + "code":"11" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a DLI Table", + "uri":"dli_08_0224.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"12" + }, + { + "desc":"This DataSource syntax can be used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number", + "product_code":"dli", + "title":"Creating a DLI Table Using the DataSource Syntax", + "uri":"dli_08_0098.html", + "doc_type":"sqlreference", + "p_code":"12", + "code":"13" + }, + { + "desc":"This Hive syntax is used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of suppor", + "product_code":"dli", + "title":"Creating a DLI Table Using the Hive Syntax", + "uri":"dli_08_0204.html", + "doc_type":"sqlreference", + "p_code":"12", + "code":"14" + }, + { + "desc":"This statement is used to delete tables.If the table is stored in OBS, only the metadata is deleted. The data stored on OBS is not deleted.If the table is stored in DLI, ", + "product_code":"dli", + "title":"Deleting a Table", + "uri":"dli_08_0087.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"15" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Viewing Tables", + "uri":"dli_08_0089.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"16" + }, + { + "desc":"This statement is used to view all tables and views in the current database.FROM/IN: followed by the name of a database whose tables and views will be displayed.NoneCreat", + "product_code":"dli", + "title":"Viewing All Tables", + "uri":"dli_08_0090.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"17" + }, + { + "desc":"This statement is used to show the statements for creating a table.CREATE TABLE: statement for creating a tableThe table specified in this statement must exist. Otherwise", + "product_code":"dli", + "title":"Viewing Table Creation Statements", + "uri":"dli_08_0091.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"18" + }, + { + "desc":"Check the properties of a table.TBLPROPERTIES: This statement allows you to add a key/value property to a table.property_name is case sensitive. You cannot specify multip", + "product_code":"dli", + "title":"Viewing Table Properties", + "uri":"dli_08_0092.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"19" + }, + { + "desc":"This statement is used to query all columns in a specified table.COLUMNS: columns in the current tableFROM/IN: followed by the name of a database whose tables and views w", + "product_code":"dli", + "title":"Viewing All Columns in a Specified Table", + "uri":"dli_08_0093.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"20" + }, + { + "desc":"This statement is used to view all partitions in a specified table.PARTITIONS: partitions in a specified tablePARTITION: a specified partitionThe table specified in this ", + "product_code":"dli", + "title":"Viewing All Partitions in a Specified Table", + "uri":"dli_08_0094.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"21" + }, + { + "desc":"This statement is used to view the table statistics. The names and data types of all columns in a specified table will be returned.EXTENDED: displays all metadata of the ", + "product_code":"dli", + "title":"Viewing Table Statistics", + "uri":"dli_08_0105.html", + "doc_type":"sqlreference", + "p_code":"16", + "code":"22" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Modifying a Table", + "uri":"dli_08_0262.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"23" + }, + { + "desc":"This statement is used to add one or more new columns to a table.ADD COLUMNS: columns to addCOMMENT: column descriptionDo not run this SQL statement concurrently. Otherwi", + "product_code":"dli", + "title":"Adding a Column", + "uri":"dli_08_0263.html", + "doc_type":"sqlreference", + "p_code":"23", + "code":"24" + }, + { + "desc":"DLI controls multiple versions of backup data for restoration. After the multiversion function is enabled, the system automatically backs up table data when you delete or", + "product_code":"dli", + "title":"Enabling or Disabling Multiversion Backup", + "uri":"dli_08_0354.html", + "doc_type":"sqlreference", + "p_code":"23", + "code":"25" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Syntax for Partitioning a Table", + "uri":"dli_08_0080.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"26" + }, + { + "desc":"After an OBS partitioned table is created, no partition information is generated for the table. Partition information is generated only after you:Insert data to the OBS p", + "product_code":"dli", + "title":"Adding Partition Data (Only OBS Tables Supported)", + "uri":"dli_08_0081.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"27" + }, + { + "desc":"This statement is used to rename partitions.PARTITION: a specified partitionRENAME: new name of the partitionThis statement is used for OBS table operations.The table and", + "product_code":"dli", + "title":"Renaming a Partition (Only OBS Tables Supported)", + "uri":"dli_08_0082.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"28" + }, + { + "desc":"Deletes one or more partitions from a partitioned table.The table in which partitions are to be deleted must exist. Otherwise, an error is reported.The to-be-deleted part", + "product_code":"dli", + "title":"Deleting a Partition", + "uri":"dli_08_0083.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"29" + }, + { + "desc":"This statement is used to delete one or more partitions based on specified conditions.This statement is used for OBS table operations only.The table in which partitions a", + "product_code":"dli", + "title":"Deleting Partitions by Specifying Filter Criteria (Only OBS Tables Supported)", + "uri":"dli_08_0343.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"30" + }, + { + "desc":"This statement is used to modify the positions of table partitions.PARTITION: a specified partitionLOCATION: path of the partitionFor a table partition whose position is ", + "product_code":"dli", + "title":"Altering the Partition Location of a Table (Only OBS Tables Supported)", + "uri":"dli_08_0084.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"31" + }, + { + "desc":"This statement is used to update the partition information about a table in the Metastore.OrPARTITIONS: partition informationSERDEPROPERTIES: Serde attributeThis statemen", + "product_code":"dli", + "title":"Updating Partitioned Table Data (Only OBS Tables Supported)", + "uri":"dli_08_0079.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"32" + }, + { + "desc":"Spark caches Parquet metadata to improve performance. If you update a Parquet table, the cached metadata is not updated. Spark SQL cannot find the newly inserted data and", + "product_code":"dli", + "title":"Updating Table Metadata with REFRESH TABLE", + "uri":"dli_08_0359.html", + "doc_type":"sqlreference", + "p_code":"26", + "code":"33" + }, + { + "desc":"The LOAD DATA function can be used to import data in CSV, Parquet, ORC, JSON, and Avro formats. The data is converted into the Parquet data format for storage.INPATH: pat", + "product_code":"dli", + "title":"Importing Data to the Table", + "uri":"dli_08_0100.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"34" + }, + { + "desc":"This statement is used to insert the SELECT query result or a certain data record into a table.Insert the SELECT query result into a table.INSERT INTO [TABLE] [db_name.]t", + "product_code":"dli", + "title":"Inserting Data", + "uri":"dli_08_0095.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"35" + }, + { + "desc":"This statement is used to delete data from the DLI or OBS table.Only data in the DLI or OBS table can be deleted.", + "product_code":"dli", + "title":"Clearing Data", + "uri":"dli_08_0217.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"36" + }, + { + "desc":"This statement is used to directly write query results to a specified directory. The query results can be stored in CSV, Parquet, ORC, JSON, or Avro format.USING: Specifi", + "product_code":"dli", + "title":"Exporting Search Results", + "uri":"dli_08_0205.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"37" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Backing Up and Restoring Data of Multiple Versions", + "uri":"dli_08_0349.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"38" + }, + { + "desc":"After multiversion is enabled, backup data is retained for seven days by default. You can change the retention period by setting system parameterdli.multi.version.retenti", + "product_code":"dli", + "title":"Setting the Retention Period for Multiversion Backup Data", + "uri":"dli_08_0350.html", + "doc_type":"sqlreference", + "p_code":"38", + "code":"39" + }, + { + "desc":"After the multiversion function is enabled, you can run the SHOW HISTORY command to view the backup data of a table. For details about the syntax for enabling or disablin", + "product_code":"dli", + "title":"Viewing Multiversion Backup Data", + "uri":"dli_08_0351.html", + "doc_type":"sqlreference", + "p_code":"38", + "code":"40" + }, + { + "desc":"After the multiversion function is enabled, you can run the RESTORE TABLE statement to restore a table or partition of a specified version. For details about the syntax f", + "product_code":"dli", + "title":"Restoring Multiversion Backup Data", + "uri":"dli_08_0352.html", + "doc_type":"sqlreference", + "p_code":"38", + "code":"41" + }, + { + "desc":"After the multiversion function is enabled, expired backup data will be directly deleted by the system when theinsert overwrite or truncate statement is executed. You can", + "product_code":"dli", + "title":"Configuring the Trash Bin for Expired Multiversion Data", + "uri":"dli_08_0353.html", + "doc_type":"sqlreference", + "p_code":"38", + "code":"42" + }, + { + "desc":"The retention period of multiversion backup data takes effect each time the insert overwrite or truncate statement is executed. If neither statement is executed for the t", + "product_code":"dli", + "title":"Deleting Multiversion Backup Data", + "uri":"dli_08_0355.html", + "doc_type":"sqlreference", + "p_code":"38", + "code":"43" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with an HBase Table", + "uri":"dli_08_0118.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"44" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing HBase table.Before creating a DLI table and associating it with HBase, you need to create a", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with HBase", + "uri":"dli_08_0119.html", + "doc_type":"sqlreference", + "p_code":"44", + "code":"45" + }, + { + "desc":"This statement is used to insert data in a DLI table to the associated HBase table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field", + "product_code":"dli", + "title":"Inserting Data to an HBase Table", + "uri":"dli_08_0120.html", + "doc_type":"sqlreference", + "p_code":"44", + "code":"46" + }, + { + "desc":"This statement is used to query data in an HBase table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be querie", + "product_code":"dli", + "title":"Querying an HBase Table", + "uri":"dli_08_0121.html", + "doc_type":"sqlreference", + "p_code":"44", + "code":"47" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with an OpenTSDB Table", + "uri":"dli_08_0220.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"48" + }, + { + "desc":"Run the CREATE TABLE statement to create the DLI table and associate it with the existing metric in OpenTSDB. This syntax supports the OpenTSDB of CloudTable and MRS.Befo", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with OpenTSDB", + "uri":"dli_08_0122.html", + "doc_type":"sqlreference", + "p_code":"48", + "code":"49" + }, + { + "desc":"Run the INSERT INTO statement to insert the data in the DLI table to the associated OpenTSDB metric.If no metric exists on the OpenTSDB, a new metric is automatically cre", + "product_code":"dli", + "title":"Inserting Data to the OpenTSDB Table", + "uri":"dli_08_0123.html", + "doc_type":"sqlreference", + "p_code":"48", + "code":"50" + }, + { + "desc":"This SELECT command is used to query data in an OpenTSDB table.If no metric exists in OpenTSDB, an error will be reported when the corresponding DLI table is queried.If t", + "product_code":"dli", + "title":"Querying an OpenTSDB Table", + "uri":"dli_08_0124.html", + "doc_type":"sqlreference", + "p_code":"48", + "code":"51" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with a DWS table", + "uri":"dli_08_0192.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"52" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing DWS table.Before creating a DLI table and associating it with DWS, you need to create a dat", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with DWS", + "uri":"dli_08_0193.html", + "doc_type":"sqlreference", + "p_code":"52", + "code":"53" + }, + { + "desc":"This statement is used to insert data in a DLI table to the associated DWS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "product_code":"dli", + "title":"Inserting Data to the DWS Table", + "uri":"dli_08_0194.html", + "doc_type":"sqlreference", + "p_code":"52", + "code":"54" + }, + { + "desc":"This statement is used to query data in a DWS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried m", + "product_code":"dli", + "title":"Querying the DWS Table", + "uri":"dli_08_0195.html", + "doc_type":"sqlreference", + "p_code":"52", + "code":"55" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with an RDS Table", + "uri":"dli_08_0196.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"56" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing RDS table. This function supports access to the MySQL and PostgreSQL clusters of RDS.Before", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with RDS", + "uri":"dli_08_0197.html", + "doc_type":"sqlreference", + "p_code":"56", + "code":"57" + }, + { + "desc":"This statement is used to insert data in a DLI table to the associated RDS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "product_code":"dli", + "title":"Inserting Data to the RDS Table", + "uri":"dli_08_0198.html", + "doc_type":"sqlreference", + "p_code":"56", + "code":"58" + }, + { + "desc":"This statement is used to query data in an RDS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried ", + "product_code":"dli", + "title":"Querying the RDS Table", + "uri":"dli_08_0199.html", + "doc_type":"sqlreference", + "p_code":"56", + "code":"59" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with a CSS Table", + "uri":"dli_08_0200.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"60" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing CSS table.Before creating a DLI table and associating it with CSS, you need to create a dat", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with CSS", + "uri":"dli_08_0201.html", + "doc_type":"sqlreference", + "p_code":"60", + "code":"61" + }, + { + "desc":"This statement is used to insert data in a DLI table to the associated CSS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "product_code":"dli", + "title":"Inserting Data to the CSS Table", + "uri":"dli_08_0202.html", + "doc_type":"sqlreference", + "p_code":"60", + "code":"62" + }, + { + "desc":"This statement is used to query data in a CSS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.The table to be queried m", + "product_code":"dli", + "title":"Querying the CSS Table", + "uri":"dli_08_0203.html", + "doc_type":"sqlreference", + "p_code":"60", + "code":"63" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with a DCS Table", + "uri":"dli_08_0225.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"64" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing DCS key.Before creating a DLI table and associating it with DCS, you need to create a datas", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with DCS", + "uri":"dli_08_0226.html", + "doc_type":"sqlreference", + "p_code":"64", + "code":"65" + }, + { + "desc":"This statement is used to insert data in a DLI table to the DCS key.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2...\n [FROM DL", + "product_code":"dli", + "title":"Inserting Data to a DCS Table", + "uri":"dli_08_0227.html", + "doc_type":"sqlreference", + "p_code":"64", + "code":"66" + }, + { + "desc":"This statement is used to query data in a DCS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.Query data in the test_re", + "product_code":"dli", + "title":"Querying the DCS Table", + "uri":"dli_08_0228.html", + "doc_type":"sqlreference", + "p_code":"64", + "code":"67" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Datasource Connection with a DDS Table", + "uri":"dli_08_0229.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"68" + }, + { + "desc":"This statement is used to create a DLI table and associate it with an existing DDS collection.Before creating a DLI table and associating it with DDS, you need to create ", + "product_code":"dli", + "title":"Creating a DLI Table and Associating It with DDS", + "uri":"dli_08_0230.html", + "doc_type":"sqlreference", + "p_code":"68", + "code":"69" + }, + { + "desc":"This statement is used to insert data in a DLI table to the associated DDS table.Insert the SELECT query result into a table.INSERT INTO DLI_TABLE\n SELECT field1,field2.", + "product_code":"dli", + "title":"Inserting Data to the DDS Table", + "uri":"dli_08_0231.html", + "doc_type":"sqlreference", + "p_code":"68", + "code":"70" + }, + { + "desc":"This statement is used to query data in a DDS table.LIMIT is used to limit the query results. Only INT type is supported by the number parameter.If schema information is ", + "product_code":"dli", + "title":"Querying the DDS Table", + "uri":"dli_08_0232.html", + "doc_type":"sqlreference", + "p_code":"68", + "code":"71" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Views", + "uri":"dli_08_0129.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"72" + }, + { + "desc":"This statement is used to create views.CREATE VIEW: creates views based on the given select statement. The result of the select statement will not be written into the dis", + "product_code":"dli", + "title":"Creating a View", + "uri":"dli_08_0130.html", + "doc_type":"sqlreference", + "p_code":"72", + "code":"73" + }, + { + "desc":"This statement is used to delete views.DROP: Deletes the metadata of a specified view. Although views and tables have many common points, the DROP TABLE statement cannot ", + "product_code":"dli", + "title":"Deleting a View", + "uri":"dli_08_0131.html", + "doc_type":"sqlreference", + "p_code":"72", + "code":"74" + }, + { + "desc":"This statement returns the logical plan and physical execution plan for the SQL statement.EXTENDED: After this keyword is specified, the logical and physical plans are ou", + "product_code":"dli", + "title":"Viewing the Execution Plan", + "uri":"dli_08_0138.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"75" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Permissions Management", + "uri":"dli_08_0139.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"76" + }, + { + "desc":"Table 1 describes the SQL statement permission matrix in DLI in terms of permissions on databases, tables, and roles.For privilege granting or revocation on databases and", + "product_code":"dli", + "title":"Data Permissions List", + "uri":"dli_08_0140.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"77" + }, + { + "desc":"This statement is used to create a role in the current database or a specified database.Only users with the CREATE_ROLE permission on the database can create roles. For e", + "product_code":"dli", + "title":"Creating a Role", + "uri":"dli_08_0141.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"78" + }, + { + "desc":"This statement is used to delete a role in the current database or a specified database.NoneThe role_name to be deleted must exist in the current database or the specifie", + "product_code":"dli", + "title":"Deleting a Role", + "uri":"dli_08_0148.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"79" + }, + { + "desc":"This statement is used to bind a user with a role.NoneThe role_name and username must exist. Otherwise, an error will be reported.", + "product_code":"dli", + "title":"Binding a Role", + "uri":"dli_08_0142.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"80" + }, + { + "desc":"This statement is used to unbind the user with the role.Nonerole_name and user_name must exist and user_name has been bound to role_name.To unbind the user_name1 from rol", + "product_code":"dli", + "title":"Unbinding a Role", + "uri":"dli_08_0147.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"81" + }, + { + "desc":"This statement is used to display all roles or roles bound to the user_name in the current database.ALL: Displays all roles.Keywords ALL and user_name cannot coexist.To d", + "product_code":"dli", + "title":"Displaying a Role", + "uri":"dli_08_0143.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"82" + }, + { + "desc":"This statement is used to grant permissions to a user or role.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must be a user.The privilege mu", + "product_code":"dli", + "title":"Granting a Permission", + "uri":"dli_08_0144.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"83" + }, + { + "desc":"This statement is used to revoke permissions granted to a user or role.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must be a user.The pri", + "product_code":"dli", + "title":"Revoking a Permission", + "uri":"dli_08_0146.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"84" + }, + { + "desc":"This statement is used to show the permissions granted to a user or role in the resource.ROLE: The subsequent role_name must be a role.USER: The subsequent user_name must", + "product_code":"dli", + "title":"Displaying the Granted Permissions", + "uri":"dli_08_0145.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"85" + }, + { + "desc":"This statement is used to display the binding relationship between roles and a user in the current database.NoneThe ROLE variable must exist.", + "product_code":"dli", + "title":"Displaying the Binding Relationship Between All Roles and Users", + "uri":"dli_08_0149.html", + "doc_type":"sqlreference", + "p_code":"76", + "code":"86" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Types", + "uri":"dli_08_0056.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"87" + }, + { + "desc":"Data type is a basic attribute of data. It is used to distinguish different types of data. Different data types occupy different storage space and support different opera", + "product_code":"dli", + "title":"Overview", + "uri":"dli_08_0057.html", + "doc_type":"sqlreference", + "p_code":"87", + "code":"88" + }, + { + "desc":"Table 1 lists the primitive data types supported by DLI.VARCHAR and CHAR data is stored in STRING type on DLI. Therefore, the string that exceeds the specified length wil", + "product_code":"dli", + "title":"Primitive Data Types", + "uri":"dli_08_0058.html", + "doc_type":"sqlreference", + "p_code":"87", + "code":"89" + }, + { + "desc":"Spark SQL supports complex data types, as shown in Table 1.When a table containing fields of the complex data type is created, the storage format of this table cannot be ", + "product_code":"dli", + "title":"Complex Data Types", + "uri":"dli_08_0059.html", + "doc_type":"sqlreference", + "p_code":"87", + "code":"90" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"User-Defined Functions", + "uri":"dli_08_0282.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"91" + }, + { + "desc":"DLI allows you to create and use user-defined functions (UDF) and user-defined table functions (UDTF) in Spark jobs.If a function with the same name exists in the databas", + "product_code":"dli", + "title":"Creating a Function", + "uri":"dli_08_0283.html", + "doc_type":"sqlreference", + "p_code":"91", + "code":"92" + }, + { + "desc":"This statement is used to delete functions.TEMPORARY: Indicates whether the function to be deleted is a temporary function.IF EXISTS: Used when the function to be deleted", + "product_code":"dli", + "title":"Deleting a Function", + "uri":"dli_08_0284.html", + "doc_type":"sqlreference", + "p_code":"91", + "code":"93" + }, + { + "desc":"Displays information about a specified function.EXTENDED: displays extended usage information.The metadata (implementation class and usage) of an existing function is ret", + "product_code":"dli", + "title":"Displaying Function Details", + "uri":"dli_08_0281.html", + "doc_type":"sqlreference", + "p_code":"91", + "code":"94" + }, + { + "desc":"View all functions in the current project.In the preceding statement, regex is a regular expression. For details about its parameters, see Table 1.For details about other", + "product_code":"dli", + "title":"Displaying All Functions", + "uri":"dli_08_0285.html", + "doc_type":"sqlreference", + "p_code":"91", + "code":"95" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Built-in Functions", + "uri":"dli_08_0064.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"96" + }, + { + "desc":"Table 1 lists the mathematical functions supported in DLI.", + "product_code":"dli", + "title":"Mathematical Functions", + "uri":"dli_08_0065.html", + "doc_type":"sqlreference", + "p_code":"96", + "code":"97" + }, + { + "desc":"Table 1 lists the date functions supported in DLI.", + "product_code":"dli", + "title":"Date Functions", + "uri":"dli_08_0066.html", + "doc_type":"sqlreference", + "p_code":"96", + "code":"98" + }, + { + "desc":"Table 1 lists the string functions supported by DLI.", + "product_code":"dli", + "title":"String Functions", + "uri":"dli_08_0067.html", + "doc_type":"sqlreference", + "p_code":"96", + "code":"99" + }, + { + "desc":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "product_code":"dli", + "title":"Aggregate Functions", + "uri":"dli_08_0068.html", + "doc_type":"sqlreference", + "p_code":"96", + "code":"100" + }, + { + "desc":"A window function performs a calculation operation on a set of values related to the current value. A window function can be an aggregate function used in the GROUP BY cl", + "product_code":"dli", + "title":"Window Functions", + "uri":"dli_08_0069.html", + "doc_type":"sqlreference", + "p_code":"96", + "code":"101" + }, + { + "desc":"This statement is a basic query statement and is used to return the query results.The table to be queried must exist. Otherwise, an error is reported.To filter the record", + "product_code":"dli", + "title":"Basic SELECT Statements", + "uri":"dli_08_0150.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"102" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Filtering", + "uri":"dli_08_0151.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"103" + }, + { + "desc":"This statement is used to filter the query results using the WHERE clause.All is used to return repeated rows. By default, all repeated rows are returned. It is followed ", + "product_code":"dli", + "title":"WHERE Filtering Clause", + "uri":"dli_08_0152.html", + "doc_type":"sqlreference", + "p_code":"103", + "code":"104" + }, + { + "desc":"This statement is used to filter the query results using the HAVING clause.All is used to return repeated rows. By default, all repeated rows are returned. It is followed", + "product_code":"dli", + "title":"HAVING Filtering Clause", + "uri":"dli_08_0153.html", + "doc_type":"sqlreference", + "p_code":"103", + "code":"105" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Sorting", + "uri":"dli_08_0154.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"106" + }, + { + "desc":"This statement is used to order the result set of a query by the specified field.ASC/DESC: ASC sorts from the lowest value to the highest value. DESC sorts from the highe", + "product_code":"dli", + "title":"ORDER BY", + "uri":"dli_08_0155.html", + "doc_type":"sqlreference", + "p_code":"106", + "code":"107" + }, + { + "desc":"This statement is used to achieve the partial sorting of tables according to fields.ASC/DESC: ASC sorts from the lowest value to the highest value. DESC sorts from the hi", + "product_code":"dli", + "title":"SORT BY", + "uri":"dli_08_0156.html", + "doc_type":"sqlreference", + "p_code":"106", + "code":"108" + }, + { + "desc":"This statement is used to bucket a table and sort the table within buckets.CLUSTER BY: Buckets are created based on specified fields. Single fields and multiple fields ar", + "product_code":"dli", + "title":"CLUSTER BY", + "uri":"dli_08_0157.html", + "doc_type":"sqlreference", + "p_code":"106", + "code":"109" + }, + { + "desc":"This statement is used to bucket a table according to the field.DISTRIBUTE BY: Buckets are created based on specified fields. A single field or multiple fields are suppor", + "product_code":"dli", + "title":"DISTRIBUTE BY", + "uri":"dli_08_0158.html", + "doc_type":"sqlreference", + "p_code":"106", + "code":"110" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Grouping", + "uri":"dli_08_0159.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"111" + }, + { + "desc":"This statement is used to group a table based on columns.Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.Single-column GROU", + "product_code":"dli", + "title":"Column-Based GROUP BY", + "uri":"dli_08_0160.html", + "doc_type":"sqlreference", + "p_code":"111", + "code":"112" + }, + { + "desc":"This statement is used to group a table according to expressions.The groupby_expression can contain a single field or multiple fields, and also can call aggregate functio", + "product_code":"dli", + "title":"Expression-Based GROUP BY", + "uri":"dli_08_0161.html", + "doc_type":"sqlreference", + "p_code":"111", + "code":"113" + }, + { + "desc":"This statement filters a table after grouping it using the HAVING clause.The groupby_expression can contain a single field or multiple fields, and can also call aggregate", + "product_code":"dli", + "title":"GROUP BY Using HAVING", + "uri":"dli_08_0162.html", + "doc_type":"sqlreference", + "p_code":"111", + "code":"114" + }, + { + "desc":"This statement is used to generate the aggregate row, super-aggregate row, and the total row. The statement can achieve multi-layer statistics from right to left and disp", + "product_code":"dli", + "title":"ROLLUP", + "uri":"dli_08_0163.html", + "doc_type":"sqlreference", + "p_code":"111", + "code":"115" + }, + { + "desc":"This statement is used to generate the cross-table row and achieve the cross-statistics of the GROUP BY field.GROUPING SETS is the expansion of GROUP BY. For example:SELE", + "product_code":"dli", + "title":"GROUPING SETS", + "uri":"dli_08_0164.html", + "doc_type":"sqlreference", + "p_code":"111", + "code":"116" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"JOIN", + "uri":"dli_08_0165.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"117" + }, + { + "desc":"This statement is used to join and return the rows that meet the JOIN conditions from two tables as the result set.JOIN/INNER JOIN: Only the records that meet the JOIN co", + "product_code":"dli", + "title":"INNER JOIN", + "uri":"dli_08_0166.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"118" + }, + { + "desc":"Join the left table with the right table and return all joined records of the left table. If no joined record is found, NULL will be returned.LEFT OUTER JOIN: Returns all", + "product_code":"dli", + "title":"LEFT OUTER JOIN", + "uri":"dli_08_0167.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"119" + }, + { + "desc":"Match the right table with the left table and return all matched records of the right table. If no matched record is found, NULL will be returned.RIGHT OUTER JOIN: Return", + "product_code":"dli", + "title":"RIGHT OUTER JOIN", + "uri":"dli_08_0168.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"120" + }, + { + "desc":"Join all records from the right table and the left table and return all joined records. If no joined record is found, NULL will be returned.FULL OUTER JOIN: Matches all r", + "product_code":"dli", + "title":"FULL OUTER JOIN", + "uri":"dli_08_0169.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"121" + }, + { + "desc":"This statement has the same function as INNER JOIN, that is, the result set that meet the WHERE condition is returned. However, IMPLICIT JOIN does not use the condition s", + "product_code":"dli", + "title":"IMPLICIT JOIN", + "uri":"dli_08_0170.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"122" + }, + { + "desc":"Cartesian JOIN joins each record of table A with all records in table B. For example, if there are m records in table A and n records in table B, m x n records will be ge", + "product_code":"dli", + "title":"Cartesian JOIN", + "uri":"dli_08_0171.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"123" + }, + { + "desc":"This statement is used to query the records that meet the JOIN condition from the left table.LEFT SEMI JOIN: Indicates to only return the records from the left table. LEF", + "product_code":"dli", + "title":"LEFT SEMI JOIN", + "uri":"dli_08_0172.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"124" + }, + { + "desc":"This statement is used to join multiple tables using unequal values and return the result set that meet the condition.The non_equi_join_condition is similar to join_condi", + "product_code":"dli", + "title":"NON-EQUIJOIN", + "uri":"dli_08_0173.html", + "doc_type":"sqlreference", + "p_code":"117", + "code":"125" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Subquery", + "uri":"dli_08_0174.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"126" + }, + { + "desc":"Subqueries are nested in the WHERE clause, and the subquery result is used as the filtering condition.All is used to return repeated rows. By default, all repeated rows a", + "product_code":"dli", + "title":"Subquery Nested by WHERE", + "uri":"dli_08_0175.html", + "doc_type":"sqlreference", + "p_code":"126", + "code":"127" + }, + { + "desc":"This statement is used to nest subquery by FROM and use the subquery results as the data source of the external SELECT statement.All is used to return repeated rows. By d", + "product_code":"dli", + "title":"Subquery Nested by FROM", + "uri":"dli_08_0176.html", + "doc_type":"sqlreference", + "p_code":"126", + "code":"128" + }, + { + "desc":"This statement is used to embed a subquery in the HAVING clause. The subquery result is used as a part of the HAVING clause.All is used to return repeated rows. By defaul", + "product_code":"dli", + "title":"Subquery Nested by HAVING", + "uri":"dli_08_0177.html", + "doc_type":"sqlreference", + "p_code":"126", + "code":"129" + }, + { + "desc":"This statement is used to nest queries in the subquery.All is used to return repeated rows. By default, all repeated rows are returned. It is followed by asterisks (*) on", + "product_code":"dli", + "title":"Multi-Layer Nested Subquery", + "uri":"dli_08_0178.html", + "doc_type":"sqlreference", + "p_code":"126", + "code":"130" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Alias", + "uri":"dli_08_0179.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"131" + }, + { + "desc":"This statement is used to specify an alias for a table or the subquery result.table_reference: Can be a table, view, or subquery.As: Is used to connect to table_reference", + "product_code":"dli", + "title":"AS for Table", + "uri":"dli_08_0180.html", + "doc_type":"sqlreference", + "p_code":"131", + "code":"132" + }, + { + "desc":"This statement is used to specify an alias for a column.alias: gives an alias for the attr_expr field.AS: Whether to add AS does not affect the result.The to-be-queried t", + "product_code":"dli", + "title":"AS for Column", + "uri":"dli_08_0181.html", + "doc_type":"sqlreference", + "p_code":"131", + "code":"133" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Set Operations", + "uri":"dli_08_0182.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"134" + }, + { + "desc":"This statement is used to return the union set of multiple query results.UNION: The set operation is used to join the head and tail of a table based on certain conditions", + "product_code":"dli", + "title":"UNION", + "uri":"dli_08_0183.html", + "doc_type":"sqlreference", + "p_code":"134", + "code":"135" + }, + { + "desc":"This statement is used to return the intersection set of multiple query results.INTERSECT returns the intersection of multiple query results. The number of columns return", + "product_code":"dli", + "title":"INTERSECT", + "uri":"dli_08_0184.html", + "doc_type":"sqlreference", + "p_code":"134", + "code":"136" + }, + { + "desc":"This statement is used to return the difference set of two query results.EXCEPT minus the sets. A EXCEPT B indicates to remove the records that exist in both A and B from", + "product_code":"dli", + "title":"EXCEPT", + "uri":"dli_08_0185.html", + "doc_type":"sqlreference", + "p_code":"134", + "code":"137" + }, + { + "desc":"This statement is used to define the common table expression (CTE) using WITH...AS to simplify the query and make the result easier to read and maintain.cte_name: Name of", + "product_code":"dli", + "title":"WITH...AS", + "uri":"dli_08_0186.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"138" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"CASE...WHEN", + "uri":"dli_08_0187.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"139" + }, + { + "desc":"This statement is used to display result_expression according to the joined results of input_expression and when_expression.CASE: Subquery is supported in basic CASE stat", + "product_code":"dli", + "title":"Basic CASE Statement", + "uri":"dli_08_0188.html", + "doc_type":"sqlreference", + "p_code":"139", + "code":"140" + }, + { + "desc":"This statement is used to obtain the value of boolean_expression for each WHEN statement in a specified order. Then return the first result_expression with the value TRUE", + "product_code":"dli", + "title":"CASE Query Statement", + "uri":"dli_08_0189.html", + "doc_type":"sqlreference", + "p_code":"139", + "code":"141" + }, + { + "desc":"This statement is used together with the window function. The OVER statement is used to group data and sort the data within the group. The window function is used to gene", + "product_code":"dli", + "title":"OVER Clause", + "uri":"dli_08_0190.html", + "doc_type":"sqlreference", + "p_code":"1", + "code":"142" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink Opensource SQL 1.12 Syntax Reference", + "uri":"dli_08_0370.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"143" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Constraints and Definitions", + "uri":"dli_08_0371.html", + "doc_type":"sqlreference", + "p_code":"143", + "code":"144" + }, + { + "desc":"STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP,", + "product_code":"dli", + "title":"Supported Data Types", + "uri":"dli_08_0372.html", + "doc_type":"sqlreference", + "p_code":"144", + "code":"145" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Syntax", + "uri":"dli_08_0373.html", + "doc_type":"sqlreference", + "p_code":"144", + "code":"146" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Definition Language (DDL)", + "uri":"dli_08_0374.html", + "doc_type":"sqlreference", + "p_code":"146", + "code":"147" + }, + { + "desc":"Create a table with a specified name.COMPUTED COLUMNA computed column is a virtual column generated using column_name AS computed_column_expression. A computed column eva", + "product_code":"dli", + "title":"CREATE TABLE", + "uri":"dli_08_0375.html", + "doc_type":"sqlreference", + "p_code":"147", + "code":"148" + }, + { + "desc":"Create a view with multiple layers nested in it to simplify the development process.IF NOT EXISTSIf the view already exists, nothing happens.Create a view named viewName.", + "product_code":"dli", + "title":"CREATE VIEW", + "uri":"dli_08_0376.html", + "doc_type":"sqlreference", + "p_code":"147", + "code":"149" + }, + { + "desc":"Create a user-defined function.For details about how to create a user-defined function, see User-Defined Functions (UDFs).IF NOT EXISTSIf the function already exists, not", + "product_code":"dli", + "title":"CREATE FUNCTION", + "uri":"dli_08_0377.html", + "doc_type":"sqlreference", + "p_code":"147", + "code":"150" + }, + { + "desc":"SyntaxPrecautionsFlink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:The case of identifiers is preserved whether or not the", + "product_code":"dli", + "title":"Data Manipulation Language (DML)", + "uri":"dli_08_0378.html", + "doc_type":"sqlreference", + "p_code":"146", + "code":"151" + }, + { + "desc":"This section describes the Flink open source SQL 1.12 syntax supported by DLI. For details about the parameters and examples, see the syntax description.", + "product_code":"dli", + "title":"Overview", + "uri":"dli_08_0379.html", + "doc_type":"sqlreference", + "p_code":"143", + "code":"152" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"DDL Syntax", + "uri":"dli_08_0380.html", + "doc_type":"sqlreference", + "p_code":"143", + "code":"153" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating Source Tables", + "uri":"dli_08_0381.html", + "doc_type":"sqlreference", + "p_code":"153", + "code":"154" + }, + { + "desc":"DataGen is used to generate random data for debugging and testing.NoneWhen you create a DataGen table, the table field type cannot be Array, Map, or Row. You can use COMP", + "product_code":"dli", + "title":"DataGen Source Table", + "uri":"dli_08_0382.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"155" + }, + { + "desc":"DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types an", + "product_code":"dli", + "title":"GaussDB(DWS) Source Table", + "uri":"dli_08_0383.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"156" + }, + { + "desc":"Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excell", + "product_code":"dli", + "title":"HBase Source Table", + "uri":"dli_08_0384.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"157" + }, + { + "desc":"The JDBC connector is a Flink's built-in connector to read data from a database.An enhanced datasource connection with the instances has been established, so that you can", + "product_code":"dli", + "title":"JDBC Source Table", + "uri":"dli_08_0385.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"158" + }, + { + "desc":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "product_code":"dli", + "title":"Kafka Source Table", + "uri":"dli_08_0386.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"159" + }, + { + "desc":"The MySQL CDC source table, that is, the MySQL streaming source table, reads all historical data in the database first and then smoothly switches data read to the Binlog ", + "product_code":"dli", + "title":"MySQL CDC Source Table", + "uri":"dli_08_0387.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"160" + }, + { + "desc":"The Postgres CDC source table, that is, Postgres streaming source table, is used to read the full snapshot data and changed data of the PostgreSQL database in sequence. T", + "product_code":"dli", + "title":"Postgres CDC Source Table", + "uri":"dli_08_0388.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"161" + }, + { + "desc":"Create a source stream to obtain data from Redis as input for jobs.An enhanced datasource connection has been created for DLI to connect to the Redis database, so that yo", + "product_code":"dli", + "title":"Redis Source Table", + "uri":"dli_08_0389.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"162" + }, + { + "desc":"Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provi", + "product_code":"dli", + "title":"Upsert Kafka Source Table", + "uri":"dli_08_0390.html", + "doc_type":"sqlreference", + "p_code":"154", + "code":"163" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating Result Tables", + "uri":"dli_08_0391.html", + "doc_type":"sqlreference", + "p_code":"153", + "code":"164" + }, + { + "desc":"The BlackHole connector allows for swallowing all input records. It is designed for high-performance testing and UDF output. It is not a substantive sink. The BlackHole r", + "product_code":"dli", + "title":"BlackHole Result Table", + "uri":"dli_08_0392.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"165" + }, + { + "desc":"DLI can output Flink job data to the ClickHouse database. ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and prov", + "product_code":"dli", + "title":"ClickHouse Result Table", + "uri":"dli_08_0393.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"166" + }, + { + "desc":"DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex t", + "product_code":"dli", + "title":"GaussDB(DWS) Result Table", + "uri":"dli_08_0394.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"167" + }, + { + "desc":"DLI outputs Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "product_code":"dli", + "title":"Elasticsearch Result Table", + "uri":"dli_08_0395.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"168" + }, + { + "desc":"DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scal", + "product_code":"dli", + "title":"HBase Result Table", + "uri":"dli_08_0396.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"169" + }, + { + "desc":"DLI outputs the Flink job output data to RDS through the JDBC result table.An enhanced datasource connection with the instances has been established, so that you can conf", + "product_code":"dli", + "title":"JDBC Result Table", + "uri":"dli_08_0397.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"170" + }, + { + "desc":"DLI outputs the Flink job output data to Kafka through the Kafka result table.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subs", + "product_code":"dli", + "title":"Kafka Result Table", + "uri":"dli_08_0398.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"171" + }, + { + "desc":"The Print connector is used to print output data to the error file or TaskManager file, making it easier for you to view the result in code debugging.NoneThe Print result", + "product_code":"dli", + "title":"Print Result Table", + "uri":"dli_08_0399.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"172" + }, + { + "desc":"DLI outputs the Flink job output data to Redis. Redis is a key-value storage system that supports multiple types of data structures. It can be used in scenarios such as c", + "product_code":"dli", + "title":"Redis Result Table", + "uri":"dli_08_0400.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"173" + }, + { + "desc":"Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provi", + "product_code":"dli", + "title":"Upsert Kafka Result Table", + "uri":"dli_08_0401.html", + "doc_type":"sqlreference", + "p_code":"164", + "code":"174" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating Dimension Tables", + "uri":"dli_08_0402.html", + "doc_type":"sqlreference", + "p_code":"153", + "code":"175" + }, + { + "desc":"Create a GaussDB(DWS) table to connect to source streams for wide table generation.Ensure that you have created a GaussDB(DWS) cluster using your account.A DWS database t", + "product_code":"dli", + "title":"GaussDB(DWS) Dimension Table", + "uri":"dli_08_0403.html", + "doc_type":"sqlreference", + "p_code":"175", + "code":"176" + }, + { + "desc":"Create a Hbase dimension table to connect to the source streams for wide table generation.An enhanced datasource connection has been created for DLI to connect to HBase, ", + "product_code":"dli", + "title":"HBase Dimension Table", + "uri":"dli_08_0404.html", + "doc_type":"sqlreference", + "p_code":"175", + "code":"177" + }, + { + "desc":"Create a JDBC dimension table to connect to the source stream.You have created a JDBC instance for your account.When you create a Flink OpenSource SQL job, set Flink Vers", + "product_code":"dli", + "title":"JDBC Dimension Table", + "uri":"dli_08_0405.html", + "doc_type":"sqlreference", + "p_code":"175", + "code":"178" + }, + { + "desc":"Create a Redis table to connect to source streams for wide table generation.An enhanced datasource connection with Redis has been established, so that you can configure s", + "product_code":"dli", + "title":"Redis Dimension Table", + "uri":"dli_08_0406.html", + "doc_type":"sqlreference", + "p_code":"175", + "code":"179" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Format", + "uri":"dli_08_0407.html", + "doc_type":"sqlreference", + "p_code":"153", + "code":"180" + }, + { + "desc":"Apache Avro is supported for you to read and write Avro data based on an Avro schema with Flink. The Avro schema is derived from the table schema.KafkaUpsert KafkaCurrent", + "product_code":"dli", + "title":"Avro", + "uri":"dli_08_0408.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"181" + }, + { + "desc":"Canal is a Changelog Data Capture (CDC) tool that can stream changes in real-time from MySQL into other systems. Canal provides a unified format schema for changelog and ", + "product_code":"dli", + "title":"Canal", + "uri":"dli_08_0409.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"182" + }, + { + "desc":"The Avro Schema Registry (avro-confluent) format allows you to read records that were serialized by the io.confluent.kafka.serializers.KafkaAvroSerializer and to write re", + "product_code":"dli", + "title":"Confluent Avro", + "uri":"dli_08_0410.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"183" + }, + { + "desc":"The CSV format allows you to read and write CSV data based on a CSV schema. Currently, the CSV schema is derived from table schema.KafkaUpsert KafkaUse Kafka to send data", + "product_code":"dli", + "title":"CSV", + "uri":"dli_08_0411.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"184" + }, + { + "desc":"Debezium is a Changelog Data Capture (CDC) tool that can stream changes in real-time from other databases into Kafka. Debezium provides a unified format schema for change", + "product_code":"dli", + "title":"Debezium", + "uri":"dli_08_0412.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"185" + }, + { + "desc":"The JSON format allows you to read and write JSON data based on a JSON schema. Currently, the JSON schema is derived from table schema.KafkaUpsert KafkaElasticsearchIn th", + "product_code":"dli", + "title":"JSON", + "uri":"dli_08_0413.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"186" + }, + { + "desc":"Flink supports to interpret Maxwell JSON messages as INSERT/UPDATE/DELETE messages into Flink SQL system. This is useful in many cases to leverage this feature,such as:Sy", + "product_code":"dli", + "title":"Maxwell", + "uri":"dli_08_0414.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"187" + }, + { + "desc":"The raw format allows you to read and write raw (byte based) values as a single column.Note: This format encodes null values as null of the byte[] type. This may have lim", + "product_code":"dli", + "title":"Raw", + "uri":"dli_08_0415.html", + "doc_type":"sqlreference", + "p_code":"180", + "code":"188" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"DML Snytax", + "uri":"dli_08_0416.html", + "doc_type":"sqlreference", + "p_code":"143", + "code":"189" + }, + { + "desc":"SyntaxDescriptionSELECT is used to select data from a table.ALL indicates that all results are returned.DISTINCT indicates that the duplicated results are removed.Precaut", + "product_code":"dli", + "title":"SELECT", + "uri":"dli_08_0417.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"190" + }, + { + "desc":"SyntaxDescriptionUNION is used to return the union set of multiple query results.INTERSECT is used to return the intersection of multiple query results.EXCEPT is used to ", + "product_code":"dli", + "title":"Set Operations", + "uri":"dli_08_0418.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"191" + }, + { + "desc":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:Array functionsArray functionsGroup", + "product_code":"dli", + "title":"Window", + "uri":"dli_08_0419.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"192" + }, + { + "desc":"SyntaxPrecautionsCurrently, only equi-joins are supported, for example, joins that have at least one conjunctive condition with an equality predicate. Arbitrary cross or ", + "product_code":"dli", + "title":"JOIN", + "uri":"dli_08_0420.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"193" + }, + { + "desc":"FunctionThis clause is used to sort data in ascending order on a time attribute.PrecautionsCurrently, only sorting by time attribute is supported.ExampleSort data in asce", + "product_code":"dli", + "title":"OrderBy & Limit", + "uri":"dli_08_0421.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"194" + }, + { + "desc":"Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in", + "product_code":"dli", + "title":"Top-N", + "uri":"dli_08_0422.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"195" + }, + { + "desc":"Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.ROW_NUMBER(): Assigns a unique, sequential number to each row,", + "product_code":"dli", + "title":"Deduplication", + "uri":"dli_08_0423.html", + "doc_type":"sqlreference", + "p_code":"189", + "code":"196" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Functions", + "uri":"dli_08_0424.html", + "doc_type":"sqlreference", + "p_code":"143", + "code":"197" + }, + { + "desc":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "product_code":"dli", + "title":"User-Defined Functions (UDFs)", + "uri":"dli_08_0425.html", + "doc_type":"sqlreference", + "p_code":"197", + "code":"198" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Built-In Functions", + "uri":"dli_08_0426.html", + "doc_type":"sqlreference", + "p_code":"197", + "code":"199" + }, + { + "desc":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "product_code":"dli", + "title":"Mathematical Operation Functions", + "uri":"dli_08_0427.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"200" + }, + { + "desc":"SyntaxExampleTest input data.Test the data source kafka. The message content is as follows:{name:James,age:24,sex:male,grade:{math:95,science:[80,85],english:100}}\n{name:", + "product_code":"dli", + "title":"String Functions", + "uri":"dli_08_0428.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"201" + }, + { + "desc":"Table 1 lists the time functions supported by Flink OpenSource SQL.FunctionReturns a SQL date parsed from string in form of yyyy-MM-dd.Returns a SQL date parsed from stri", + "product_code":"dli", + "title":"Temporal Functions", + "uri":"dli_08_0429.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"202" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Conditional Functions", + "uri":"dli_08_0430.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"203" + }, + { + "desc":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.The following example converts the amount value to an integer.Flink jobs do not sup", + "product_code":"dli", + "title":"Type Conversion Functions", + "uri":"dli_08_0431.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"204" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Collection Functions", + "uri":"dli_08_0432.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"205" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Value Construction Functions", + "uri":"dli_08_0433.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"206" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Value Access Functions", + "uri":"dli_08_0434.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"207" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Hash Functions", + "uri":"dli_08_0435.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"208" + }, + { + "desc":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "product_code":"dli", + "title":"Aggregate Functions", + "uri":"dli_08_0436.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"209" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Table-Valued Functions", + "uri":"dli_08_0437.html", + "doc_type":"sqlreference", + "p_code":"199", + "code":"210" + }, + { + "desc":"The string_split function splits a target string into substrings based on the specified separator and returns a substring list.Create a Flink OpenSource SQL job by referr", + "product_code":"dli", + "title":"string_split", + "uri":"dli_08_0438.html", + "doc_type":"sqlreference", + "p_code":"210", + "code":"211" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink Opensource SQL 1.10 Syntax Reference", + "uri":"dli_08_0289.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"212" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Constraints and Definitions", + "uri":"dli_08_0290.html", + "doc_type":"sqlreference", + "p_code":"212", + "code":"213" + }, + { + "desc":"STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP,", + "product_code":"dli", + "title":"Supported Data Types", + "uri":"dli_08_0291.html", + "doc_type":"sqlreference", + "p_code":"213", + "code":"214" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Syntax Definition", + "uri":"dli_08_0292.html", + "doc_type":"sqlreference", + "p_code":"213", + "code":"215" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Definition Language (DDL)", + "uri":"dli_08_0293.html", + "doc_type":"sqlreference", + "p_code":"215", + "code":"216" + }, + { + "desc":"This clause is used to create a table with a specified name.COMPUTED COLUMNA computed column is a virtual column generated using column_name AS computed_column_expression", + "product_code":"dli", + "title":"CREATE TABLE", + "uri":"dli_08_0294.html", + "doc_type":"sqlreference", + "p_code":"216", + "code":"217" + }, + { + "desc":"Create a view with multiple layers nested in it to simplify the development process.IF NOT EXISTSIf the view already exists, nothing happens.Create a view named viewName.", + "product_code":"dli", + "title":"CREATE VIEW", + "uri":"dli_08_0295.html", + "doc_type":"sqlreference", + "p_code":"216", + "code":"218" + }, + { + "desc":"Create a user-defined function.IF NOT EXISTSIf the function already exists, nothing happens.LANGUAGE JAVA|SCALALanguage tag is used to instruct Flink runtime how to execu", + "product_code":"dli", + "title":"CREATE FUNCTION", + "uri":"dli_08_0296.html", + "doc_type":"sqlreference", + "p_code":"216", + "code":"219" + }, + { + "desc":"SyntaxPrecautionsFlink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:The case of identifiers is preserved whether they are q", + "product_code":"dli", + "title":"Data Manipulation Language (DML)", + "uri":"dli_08_0297.html", + "doc_type":"sqlreference", + "p_code":"215", + "code":"220" + }, + { + "desc":"This section describes the Flink OpenSource SQL syntax supported by DLI. For details about the parameters and examples, see the syntax description.", + "product_code":"dli", + "title":"Flink OpenSource SQL 1.10 Syntax", + "uri":"dli_08_0298.html", + "doc_type":"sqlreference", + "p_code":"212", + "code":"221" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Definition Language (DDL)", + "uri":"dli_08_0299.html", + "doc_type":"sqlreference", + "p_code":"212", + "code":"222" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Source Table", + "uri":"dli_08_0300.html", + "doc_type":"sqlreference", + "p_code":"222", + "code":"223" + }, + { + "desc":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "product_code":"dli", + "title":"Kafka Source Table", + "uri":"dli_08_0301.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"224" + }, + { + "desc":"Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove da", + "product_code":"dli", + "title":"DIS Source Table", + "uri":"dli_08_0302.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"225" + }, + { + "desc":"The JDBC connector is a Flink's built-in connector to read data from a database.An enhanced datasource connection with the database has been established, so that you can ", + "product_code":"dli", + "title":"JDBC Source Table", + "uri":"dli_08_0303.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"226" + }, + { + "desc":"DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types an", + "product_code":"dli", + "title":"GaussDB(DWS) Source Table", + "uri":"dli_08_0304.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"227" + }, + { + "desc":"Create a source stream to obtain data from Redis as input for jobs.An enhanced datasource connection with Redis has been established, so that you can configure security g", + "product_code":"dli", + "title":"Redis Source Table", + "uri":"dli_08_0305.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"228" + }, + { + "desc":"Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excell", + "product_code":"dli", + "title":"HBase Source Table", + "uri":"dli_08_0306.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"229" + }, + { + "desc":"You can call APIs to obtain data from the cloud ecosystem or an open source ecosystem and use the obtained data as input of Flink jobs.The customized source class needs t", + "product_code":"dli", + "title":"userDefined Source Table", + "uri":"dli_08_0358.html", + "doc_type":"sqlreference", + "p_code":"223", + "code":"230" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Result Table", + "uri":"dli_08_0307.html", + "doc_type":"sqlreference", + "p_code":"222", + "code":"231" + }, + { + "desc":"DLI exports Flink job data to ClickHouse result tables.ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and provide", + "product_code":"dli", + "title":"ClickHouse Result Table", + "uri":"dli_08_0344.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"232" + }, + { + "desc":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "product_code":"dli", + "title":"Kafka Result Table", + "uri":"dli_08_0308.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"233" + }, + { + "desc":"DLI exports the output data of the Flink job to Kafka in upsert mode.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription ", + "product_code":"dli", + "title":"Upsert Kafka Result Table", + "uri":"dli_08_0309.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"234" + }, + { + "desc":"DLI writes the Flink job output data into DIS. The data is filtered and imported to the DIS stream for future processing.DIS addresses the challenge of transmitting data ", + "product_code":"dli", + "title":"DIS Result Table", + "uri":"dli_08_0310.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"235" + }, + { + "desc":"DLI exports the output data of the Flink job to RDS.An enhanced datasource connection with the database has been established, so that you can configure security group rul", + "product_code":"dli", + "title":"JDBC Result Table", + "uri":"dli_08_0311.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"236" + }, + { + "desc":"DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex t", + "product_code":"dli", + "title":"GaussDB(DWS) Result Table", + "uri":"dli_08_0312.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"237" + }, + { + "desc":"DLI exports the output data of the Flink job to Redis. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scen", + "product_code":"dli", + "title":"Redis Result Table", + "uri":"dli_08_0313.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"238" + }, + { + "desc":"DLI exports Flink job output data to SMN.SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling ", + "product_code":"dli", + "title":"SMN Result Table", + "uri":"dli_08_0314.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"239" + }, + { + "desc":"DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scal", + "product_code":"dli", + "title":"HBase Result Table", + "uri":"dli_08_0315.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"240" + }, + { + "desc":"DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "product_code":"dli", + "title":"Elasticsearch Result Table", + "uri":"dli_08_0316.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"241" + }, + { + "desc":"OpenTSDB is a distributed, scalable time series database based on HBase. OpenTSDB is designed to collect monitoring information of a large-scale cluster and query data in", + "product_code":"dli", + "title":"OpenTSDB Result Table", + "uri":"dli_08_0348.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"242" + }, + { + "desc":"Write your Java code to insert the processed data into a specified database supported by your cloud service.Implement the custom sink class :The custom sink class is inhe", + "product_code":"dli", + "title":"User-defined Result Table", + "uri":"dli_08_0347.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"243" + }, + { + "desc":"The print connector exports your data output to the error file or the out file of TaskManager. It is mainly used for code debugging and output viewing.Read data from Kafk", + "product_code":"dli", + "title":"Print Result Table", + "uri":"dli_08_0345.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"244" + }, + { + "desc":"You can create a file system result table to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly accord", + "product_code":"dli", + "title":"File System Result Table", + "uri":"dli_08_0346.html", + "doc_type":"sqlreference", + "p_code":"231", + "code":"245" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Dimension Table", + "uri":"dli_08_0317.html", + "doc_type":"sqlreference", + "p_code":"222", + "code":"246" + }, + { + "desc":"Create a JDBC dimension table to connect to the source stream.You have created a JDBC instance for your account.The RDS table is used to connect to the source stream.CREA", + "product_code":"dli", + "title":"JDBC Dimension Table", + "uri":"dli_08_0318.html", + "doc_type":"sqlreference", + "p_code":"246", + "code":"247" + }, + { + "desc":"Create a GaussDB(DWS) dimension table to connect to the input stream.You have created a GaussDB(DWS) instance for your account.Use an RDS table to connect to the source s", + "product_code":"dli", + "title":"GaussDB(DWS) Dimension Table", + "uri":"dli_08_0319.html", + "doc_type":"sqlreference", + "p_code":"246", + "code":"248" + }, + { + "desc":"Create a Hbase dimension table to connect to the source stream.An enhanced datasource connection has been created for DLI to connect to HBase, so that jobs can run on the", + "product_code":"dli", + "title":"HBase Dimension Table", + "uri":"dli_08_0320.html", + "doc_type":"sqlreference", + "p_code":"246", + "code":"249" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Manipulation Language (DML)", + "uri":"dli_08_0321.html", + "doc_type":"sqlreference", + "p_code":"212", + "code":"250" + }, + { + "desc":"SyntaxDescriptionThis clause is used to select data from a table.ALL indicates that all results are returned.DISTINCT indicates that the duplicated results are removed.Pr", + "product_code":"dli", + "title":"SELECT", + "uri":"dli_08_0322.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"251" + }, + { + "desc":"SyntaxDescriptionUNION is used to return the union set of multiple query results.INTERSECT is used to return the intersection of multiple query results.EXCEPT is used to ", + "product_code":"dli", + "title":"Set Operations", + "uri":"dli_08_0323.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"252" + }, + { + "desc":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:Array functionsArray functionsGroup", + "product_code":"dli", + "title":"Window", + "uri":"dli_08_0324.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"253" + }, + { + "desc":"SyntaxPrecautionsCurrently, only equi-joins are supported, for example, joins that have at least one conjunctive condition with an equality predicate. Arbitrary cross or ", + "product_code":"dli", + "title":"JOIN", + "uri":"dli_08_0325.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"254" + }, + { + "desc":"FunctionThis clause is used to sort data in ascending order on a time attribute.PrecautionsCurrently, only sorting by time attribute is supported.ExampleSort data in asce", + "product_code":"dli", + "title":"OrderBy & Limit", + "uri":"dli_08_0326.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"255" + }, + { + "desc":"Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in", + "product_code":"dli", + "title":"Top-N", + "uri":"dli_08_0327.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"256" + }, + { + "desc":"Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.ROW_NUMBER(): Assigns a unique, sequential number to each row,", + "product_code":"dli", + "title":"Deduplication", + "uri":"dli_08_0328.html", + "doc_type":"sqlreference", + "p_code":"250", + "code":"257" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Functions", + "uri":"dli_08_0329.html", + "doc_type":"sqlreference", + "p_code":"212", + "code":"258" + }, + { + "desc":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "product_code":"dli", + "title":"User-Defined Functions", + "uri":"dli_08_0330.html", + "doc_type":"sqlreference", + "p_code":"258", + "code":"259" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Built-In Functions", + "uri":"dli_08_0331.html", + "doc_type":"sqlreference", + "p_code":"258", + "code":"260" + }, + { + "desc":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "product_code":"dli", + "title":"Mathematical Operation Functions", + "uri":"dli_08_0332.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"261" + }, + { + "desc":"SyntaxExampleTest input data.Test the data source kafka. The message content is as follows:\"{name:James,age:24,sex:male,grade:{math:95,science:[80,85],english:100}}\"\n\"{na", + "product_code":"dli", + "title":"String Functions", + "uri":"dli_08_0333.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"262" + }, + { + "desc":"Table 1 lists the temporal functions supported by Flink OpenSource SQL.FunctionReturns a date parsed from string in form of yyyy-MM-dd.Returns a date parsed from string i", + "product_code":"dli", + "title":"Temporal Functions", + "uri":"dli_08_0334.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"263" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Conditional Functions", + "uri":"dli_08_0335.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"264" + }, + { + "desc":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.The following example converts the amount value to an integer.Flink jobs do not sup", + "product_code":"dli", + "title":"Type Conversion Function", + "uri":"dli_08_0336.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"265" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Collection Functions", + "uri":"dli_08_0337.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"266" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Value Construction Functions", + "uri":"dli_08_0338.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"267" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Value Access Functions", + "uri":"dli_08_0339.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"268" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Hash Functions", + "uri":"dli_08_0340.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"269" + }, + { + "desc":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "product_code":"dli", + "title":"Aggregate Function", + "uri":"dli_08_0341.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"270" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Table-Valued Functions", + "uri":"dli_08_0342.html", + "doc_type":"sqlreference", + "p_code":"260", + "code":"271" + }, + { + "desc":"The split_cursor function can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be use", + "product_code":"dli", + "title":"split_cursor", + "uri":"dli_08_0357.html", + "doc_type":"sqlreference", + "p_code":"271", + "code":"272" + }, + { + "desc":"The string_split function splits a target string into substrings based on the specified separator and returns a substring list.Prepare test input data.Source table disSou", + "product_code":"dli", + "title":"string_split", + "uri":"dli_08_0356.html", + "doc_type":"sqlreference", + "p_code":"271", + "code":"273" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Historical Versions", + "uri":"dli_08_0450.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"274" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink SQL Syntax", + "uri":"dli_08_0233.html", + "doc_type":"sqlreference", + "p_code":"274", + "code":"275" + }, + { + "desc":"Currently, Flink SQL only supports the following operations: SELECT, FROM, WHERE, UNION, aggregation, window, JOIN between stream and table data, and JOIN between streams", + "product_code":"dli", + "title":"SQL Syntax Constraints and Definitions", + "uri":"dli_08_0075.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"276" + }, + { + "desc":"This section describes the Flink SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.", + "product_code":"dli", + "title":"SQL Syntax Overview of Stream Jobs", + "uri":"dli_08_0275.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"277" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Source Stream", + "uri":"dli_08_0234.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"278" + }, + { + "desc":"Create a source stream to obtain data from HBase of CloudTable as input data of the job. HBase is a column-oriented distributed cloud storage system that features enhance", + "product_code":"dli", + "title":"CloudTable HBase Source Stream", + "uri":"dli_08_0237.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"279" + }, + { + "desc":"Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove da", + "product_code":"dli", + "title":"DIS Source Stream", + "uri":"dli_08_0235.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"280" + }, + { + "desc":"DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully man", + "product_code":"dli", + "title":"DMS Source Stream", + "uri":"dli_08_0270.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"281" + }, + { + "desc":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "product_code":"dli", + "title":"MRS Kafka Source Stream", + "uri":"dli_08_0238.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"282" + }, + { + "desc":"Create a source stream to obtain data from Kafka as input data for jobs.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscripti", + "product_code":"dli", + "title":"Open-Source Kafka Source Stream", + "uri":"dli_08_0239.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"283" + }, + { + "desc":"Create a source stream to obtain data from OBS. DLI reads data stored by users in OBS as input data for jobs. OBS applies to various scenarios, such as big data analysis,", + "product_code":"dli", + "title":"OBS Source Stream", + "uri":"dli_08_0236.html", + "doc_type":"sqlreference", + "p_code":"278", + "code":"284" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Sink Stream", + "uri":"dli_08_0240.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"285" + }, + { + "desc":"DLI exports the job output data to HBase of CloudTable. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performa", + "product_code":"dli", + "title":"CloudTable HBase Sink Stream", + "uri":"dli_08_0243.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"286" + }, + { + "desc":"DLI exports the job output data to OpenTSDB of CloudTable. OpenTSDB is a distributed, scalable time series database based on HBase. It stores time series data. Time serie", + "product_code":"dli", + "title":"CloudTable OpenTSDB Sink Stream", + "uri":"dli_08_0244.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"287" + }, + { + "desc":"DLI exports the output data of the Flink job to OpenTSDB of MRS.OpenTSDB has been installed in the MRS cluster.In this scenario, jobs must run on the dedicated queue of D", + "product_code":"dli", + "title":"MRS OpenTSDB Sink Stream", + "uri":"dli_08_0286.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"288" + }, + { + "desc":"DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides th", + "product_code":"dli", + "title":"CSS Elasticsearch Sink Stream", + "uri":"dli_08_0252.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"289" + }, + { + "desc":"DLI exports the Flink job output data to Redis of DCS. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scen", + "product_code":"dli", + "title":"DCS Sink Stream", + "uri":"dli_08_0253.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"290" + }, + { + "desc":"DLI outputs the job output data to Document Database Service (DDS).DDS is compatible with the MongoDB protocol and is secure, highly available, reliable, scalable, and ea", + "product_code":"dli", + "title":"DDS Sink Stream", + "uri":"dli_08_0249.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"291" + }, + { + "desc":"DLI writes the Flink job output data into DIS. This cloud ecosystem is applicable to scenarios where data is filtered and imported to the DIS stream for future processing", + "product_code":"dli", + "title":"DIS Sink Stream", + "uri":"dli_08_0241.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"292" + }, + { + "desc":"DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully man", + "product_code":"dli", + "title":"DMS Sink Stream", + "uri":"dli_08_0271.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"293" + }, + { + "desc":"DLI outputs the Flink job output data to Data Warehouse Service (DWS). DWS database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more co", + "product_code":"dli", + "title":"DWS Sink Stream (JDBC Mode)", + "uri":"dli_08_0247.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"294" + }, + { + "desc":"Create a sink stream to export Flink job data to DWS through OBS-based dumping, specifically, output Flink job data to OBS and then import data from OBS to DWS. For detai", + "product_code":"dli", + "title":"DWS Sink Stream (OBS-based Dumping)", + "uri":"dli_08_0248.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"295" + }, + { + "desc":"DLI exports the output data of the Flink job to HBase of MRS.An MRS cluster has been created by using your account. DLI can interconnect with HBase clusters with Kerberos", + "product_code":"dli", + "title":"MRS HBase Sink Stream", + "uri":"dli_08_0255.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"296" + }, + { + "desc":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "product_code":"dli", + "title":"MRS Kafka Sink Stream", + "uri":"dli_08_0254.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"297" + }, + { + "desc":"DLI exports the output data of the Flink job to Kafka.Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It deli", + "product_code":"dli", + "title":"Open-Source Kafka Sink Stream", + "uri":"dli_08_0257.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"298" + }, + { + "desc":"You can create a sink stream to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly according to the ge", + "product_code":"dli", + "title":"File System Sink Stream (Recommended)", + "uri":"dli_08_0267.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"299" + }, + { + "desc":"Create a sink stream to export DLI data to OBS. DLI can export the job analysis results to OBS. OBS applies to various scenarios, such as big data analysis, cloud-native ", + "product_code":"dli", + "title":"OBS Sink Stream", + "uri":"dli_08_0242.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"300" + }, + { + "desc":"DLI outputs the Flink job output data to RDS. Currently, PostgreSQL and MySQL databases are supported. The PostgreSQL database can store data of more complex types and de", + "product_code":"dli", + "title":"RDS Sink Stream", + "uri":"dli_08_0245.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"301" + }, + { + "desc":"DLI exports Flink job output data to SMN.SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling ", + "product_code":"dli", + "title":"SMN Sink Stream", + "uri":"dli_08_0251.html", + "doc_type":"sqlreference", + "p_code":"285", + "code":"302" + }, + { + "desc":"The temporary stream is used to simplify SQL logic. If complex SQL logic is followed, write SQL statements concatenated with temporary streams. The temporary stream is ju", + "product_code":"dli", + "title":"Creating a Temporary Stream", + "uri":"dli_08_0258.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"303" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Creating a Dimension Table", + "uri":"dli_08_0259.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"304" + }, + { + "desc":"Create a Redis table to connect to the source stream.For details about the JOIN syntax, see JOIN Between Stream Data and Table Data.Redis clusters are not supported.Ensur", + "product_code":"dli", + "title":"Creating a Redis Table", + "uri":"dli_08_0260.html", + "doc_type":"sqlreference", + "p_code":"304", + "code":"305" + }, + { + "desc":"Create an RDS/DWS table to connect to the source stream.For details about the JOIN syntax, see JOIN.Ensure that you have created a PostgreSQL or MySQL RDS instance in RDS", + "product_code":"dli", + "title":"Creating an RDS Table", + "uri":"dli_08_0261.html", + "doc_type":"sqlreference", + "p_code":"304", + "code":"306" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Custom Stream Ecosystem", + "uri":"dli_08_0272.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"307" + }, + { + "desc":"Compile code to obtain data from the desired cloud ecosystem or open-source ecosystem as the input data of Flink jobs.The user-defined source class needs to inherit the R", + "product_code":"dli", + "title":"Custom Source Stream", + "uri":"dli_08_0273.html", + "doc_type":"sqlreference", + "p_code":"307", + "code":"308" + }, + { + "desc":"Compile code to write the data processed by DLI to a specified cloud ecosystem or open-source ecosystem.The user-defined sink class needs to inherit the RichSinkFunction ", + "product_code":"dli", + "title":"Custom Sink Stream", + "uri":"dli_08_0274.html", + "doc_type":"sqlreference", + "p_code":"307", + "code":"309" + }, + { + "desc":"Data type is a basic attribute of data and used to distinguish different types of data. Different data types occupy different storage space and support different operatio", + "product_code":"dli", + "title":"Data Type", + "uri":"dli_08_0207.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"310" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Built-In Functions", + "uri":"dli_08_0086.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"311" + }, + { + "desc":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "product_code":"dli", + "title":"Mathematical Operation Functions", + "uri":"dli_08_0191.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"312" + }, + { + "desc":"The common character string functions of DLI are as follows:FunctionConcatenates two character strings.Concatenates two character strings.SyntaxVARCHAR VARCHAR a || VARCH", + "product_code":"dli", + "title":"String Functions", + "uri":"dli_08_0096.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"313" + }, + { + "desc":"Table 1 lists the time functions supported by Flink SQL.None", + "product_code":"dli", + "title":"Temporal Functions", + "uri":"dli_08_0097.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"314" + }, + { + "desc":"This function is used to forcibly convert types.If the input is NULL, NULL is returned.Flink jobs do not support the conversion of bigint to timestamp using CAST. You can", + "product_code":"dli", + "title":"Type Conversion Functions", + "uri":"dli_08_0112.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"315" + }, + { + "desc":"An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved b", + "product_code":"dli", + "title":"Aggregate Functions", + "uri":"dli_08_0104.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"316" + }, + { + "desc":"Table-valued functions can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be used i", + "product_code":"dli", + "title":"Table-Valued Functions", + "uri":"dli_08_0206.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"317" + }, + { + "desc":"Example:The returned number of elements in the array is 3.HELLO WORLD is returned.", + "product_code":"dli", + "title":"Other Functions", + "uri":"dli_08_0101.html", + "doc_type":"sqlreference", + "p_code":"311", + "code":"318" + }, + { + "desc":"DLI supports the following three types of user-defined functions (UDFs):Regular UDF: takes in one or more input parameters and returns a single result.User-defined table-", + "product_code":"dli", + "title":"User-Defined Functions", + "uri":"dli_08_0099.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"319" + }, + { + "desc":"Table 1 describes the basic geospatial geometric elements.You can build complex geospatial geometries based on basic geospatial geometric elements. Table 2 describes the ", + "product_code":"dli", + "title":"Geographical Functions", + "uri":"dli_08_0209.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"320" + }, + { + "desc":"SyntaxDescriptionThe SELECT statement is used to select data from a table or insert constant data into a table.PrecautionsThe table to be queried must exist. Otherwise, a", + "product_code":"dli", + "title":"SELECT", + "uri":"dli_08_0102.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"321" + }, + { + "desc":"SyntaxorDescriptionIf the value of value is value1, result1 is returned. If the value is not any of the values listed in the clause, resultZ is returned. If no else state", + "product_code":"dli", + "title":"Condition Expression", + "uri":"dli_08_0103.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"322" + }, + { + "desc":"DescriptionGroup Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:time_attr can be processing-time or", + "product_code":"dli", + "title":"Window", + "uri":"dli_08_0218.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"323" + }, + { + "desc":"The JOIN operation allows you to query data from a table and write the query result to the sink stream. Currently, only RDSs and DCS Redis tables are supported. The ON ke", + "product_code":"dli", + "title":"JOIN Between Stream Data and Table Data", + "uri":"dli_08_0106.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"324" + }, + { + "desc":"Flink provides two time models: processing time and event time.DLI allows you to specify the time model during creation of the source stream and temporary stream.Processi", + "product_code":"dli", + "title":"Configuring Time Models", + "uri":"dli_08_0107.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"325" + }, + { + "desc":"Complex event processing (CEP) is used to detect complex patterns in endless data streams so as to identify and search patterns in various data rows. Pattern matching is ", + "product_code":"dli", + "title":"Pattern Matching", + "uri":"dli_08_0108.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"326" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"StreamingML", + "uri":"dli_08_0109.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"327" + }, + { + "desc":"Anomaly detection applies to various scenarios, including intrusion detection, financial fraud detection, sensor data monitoring, medical diagnosis, natural data detectio", + "product_code":"dli", + "title":"Anomaly Detection", + "uri":"dli_08_0110.html", + "doc_type":"sqlreference", + "p_code":"327", + "code":"328" + }, + { + "desc":"Modeling and forecasting time series is a common task in many business verticals. Modeling is used to extract meaningful statistics and other characteristics of the data.", + "product_code":"dli", + "title":"Time Series Forecasting", + "uri":"dli_08_0111.html", + "doc_type":"sqlreference", + "p_code":"327", + "code":"329" + }, + { + "desc":"Clustering algorithms belong to unsupervised algorithms. K-Means, a clustering algorithm, partitions data points into related clusters by calculating the distance between", + "product_code":"dli", + "title":"Real-Time Clustering", + "uri":"dli_08_0216.html", + "doc_type":"sqlreference", + "p_code":"327", + "code":"330" + }, + { + "desc":"Deep learning has a wide range of applications in many industries, such as image classification, image recognition, and speech recognition. DLI provides several functions", + "product_code":"dli", + "title":"Deep Learning Model Prediction", + "uri":"dli_08_0088.html", + "doc_type":"sqlreference", + "p_code":"327", + "code":"331" + }, + { + "desc":"Flink SQL reserves some strings as keywords. If you want to use the following character strings as field names, ensure that they are enclosed by back quotes, for example,", + "product_code":"dli", + "title":"Reserved Keywords", + "uri":"dli_08_0125.html", + "doc_type":"sqlreference", + "p_code":"275", + "code":"332" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Identifiers", + "uri":"dli_08_0001.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"333" + }, + { + "desc":"None.Aggregate function.", + "product_code":"dli", + "title":"aggregate_func", + "uri":"dli_08_0002.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"334" + }, + { + "desc":"None.Alias, which must be STRING type. It can be assigned to a field, table, view, or subquery.", + "product_code":"dli", + "title":"alias", + "uri":"dli_08_0003.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"335" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"attr_expr", + "uri":"dli_08_0004.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"336" + }, + { + "desc":"None.List of attr_expr, which is separated by commas (,).", + "product_code":"dli", + "title":"attr_expr_list", + "uri":"dli_08_0005.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"337" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"attrs_value_set_expr", + "uri":"dli_08_0006.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"338" + }, + { + "desc":"None.Return a boolean expression.", + "product_code":"dli", + "title":"boolean_expression", + "uri":"dli_08_0007.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"339" + }, + { + "desc":"None.Formal parameter for function call. It is usually a field name, which is the same as col_name.", + "product_code":"dli", + "title":"col", + "uri":"dli_08_0009.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"340" + }, + { + "desc":"None.Column (field) description, which must be STRING type and cannot exceed 256 bytes.", + "product_code":"dli", + "title":"col_comment", + "uri":"dli_08_0010.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"341" + }, + { + "desc":"None.Column name, which must be STRING type and cannot exceed 128 bytes.", + "product_code":"dli", + "title":"col_name", + "uri":"dli_08_0011.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"342" + }, + { + "desc":"None.Field list, which consists of one col_name or more. If there is more than one col_name, separate them by using a comma (,).", + "product_code":"dli", + "title":"col_name_list", + "uri":"dli_08_0012.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"343" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"condition", + "uri":"dli_08_0013.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"344" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"condition_list", + "uri":"dli_08_0014.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"345" + }, + { + "desc":"None.Common expression name.", + "product_code":"dli", + "title":"cte_name", + "uri":"dli_08_0015.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"346" + }, + { + "desc":"None.Data type. Currently, only the primitive data types are supported.", + "product_code":"dli", + "title":"data_type", + "uri":"dli_08_0016.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"347" + }, + { + "desc":"None.Database description, which must be STRING type and cannot exceed 256 characters.", + "product_code":"dli", + "title":"db_comment", + "uri":"dli_08_0017.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"348" + }, + { + "desc":"None.Database name, which must be STRING type and cannot exceed 128 bytes.", + "product_code":"dli", + "title":"db_name", + "uri":"dli_08_0018.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"349" + }, + { + "desc":"None.Returned result for the ELSE clause of the CASE WHEN statement.", + "product_code":"dli", + "title":"else_result_expression", + "uri":"dli_08_0019.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"350" + }, + { + "desc":"| AVRO| CSV| JSON| ORC| PARQUETCurrently, the preceding formats are supported.Both USING and STORED AS can be used for specifying the data format. You can specify the pre", + "product_code":"dli", + "title":"file_format", + "uri":"dli_08_0020.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"351" + }, + { + "desc":"None.File path, which is the OBS path", + "product_code":"dli", + "title":"file_path", + "uri":"dli_08_0021.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"352" + }, + { + "desc":"None.Function name, which must be STRING type.", + "product_code":"dli", + "title":"function_name", + "uri":"dli_08_0022.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"353" + }, + { + "desc":"None.Expression that includes GROUP BY.", + "product_code":"dli", + "title":"groupby_expression", + "uri":"dli_08_0023.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"354" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"having_condition", + "uri":"dli_08_0024.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"355" + }, + { + "desc":"None.Input expression of the CASE WHEN statement.", + "product_code":"dli", + "title":"input_expression", + "uri":"dli_08_0026.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"356" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"join_condition", + "uri":"dli_08_0029.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"357" + }, + { + "desc":"None.The condition of an inequality join.", + "product_code":"dli", + "title":"non_equi_join_condition", + "uri":"dli_08_0030.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"358" + }, + { + "desc":"None.Maximum number of output lines specified by LIMIT. Which must be INT type.", + "product_code":"dli", + "title":"number", + "uri":"dli_08_0031.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"359" + }, + { + "desc":"None.Partition column name, that is, partition field name, which must be STRING type.", + "product_code":"dli", + "title":"partition_col_name", + "uri":"dli_08_0034.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"360" + }, + { + "desc":"None.Partition column value, that is, partition field value.", + "product_code":"dli", + "title":"partition_col_value", + "uri":"dli_08_0035.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"361" + }, + { + "desc":"partition_specs : (partition_col_name = partition_col_value, partition_col_name = partition_col_value, ...);Table partition list, which is expressed by using key=value pa", + "product_code":"dli", + "title":"partition_specs", + "uri":"dli_08_0036.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"362" + }, + { + "desc":"None.Property name, which must be STRING type.", + "product_code":"dli", + "title":"property_name", + "uri":"dli_08_0037.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"363" + }, + { + "desc":"None.Property value, which must be STRING type.", + "product_code":"dli", + "title":"property_value", + "uri":"dli_08_0038.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"364" + }, + { + "desc":"None.Pattern matching string, which supports wildcard matching.", + "product_code":"dli", + "title":"regex_expression", + "uri":"dli_08_0039.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"365" + }, + { + "desc":"None.Returned result for the THEN clause of the CASE WHEN statement.", + "product_code":"dli", + "title":"result_expression", + "uri":"dli_08_0040.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"366" + }, + { + "desc":"None.Query clause for the basic SELECT statement.", + "product_code":"dli", + "title":"select_statement", + "uri":"dli_08_0042.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"367" + }, + { + "desc":"None.Separator, which can be customized by users, for example, comma (,), semicolon (;), and colon (:). Which must be CHAR type.", + "product_code":"dli", + "title":"separator", + "uri":"dli_08_0043.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"368" + }, + { + "desc":"None.SQL statement containing the common expression defined by cte_name.", + "product_code":"dli", + "title":"sql_containing_cte_name", + "uri":"dli_08_0045.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"369" + }, + { + "desc":"None.Subquery.", + "product_code":"dli", + "title":"sub_query", + "uri":"dli_08_0046.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"370" + }, + { + "desc":"None.Table description, which must be STRING type and cannot exceed 256 bytes.", + "product_code":"dli", + "title":"table_comment", + "uri":"dli_08_0047.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"371" + }, + { + "desc":"NoneTable name, which cannot exceed 128 bytes. The string type and \"$\" symbol are supported.", + "product_code":"dli", + "title":"table_name", + "uri":"dli_08_0048.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"372" + }, + { + "desc":"None.Table property list, which is expressed by using key=value pairs. key represents property_name, and value represents property_value. If there is more than one key=va", + "product_code":"dli", + "title":"table_properties", + "uri":"dli_08_0049.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"373" + }, + { + "desc":"None.Table or view name, which must be STRING type. It can also be a subquery. If it is subquery, an alias must also be provided.", + "product_code":"dli", + "title":"table_reference", + "uri":"dli_08_0050.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"374" + }, + { + "desc":"None.When expression of the CASE WHEN statement. It is used for matching with the input expression.", + "product_code":"dli", + "title":"when_expression", + "uri":"dli_08_0053.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"375" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"where_condition", + "uri":"dli_08_0054.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"376" + }, + { + "desc":"None.Analysis window function. For details, see Window Functions.", + "product_code":"dli", + "title":"window_function", + "uri":"dli_08_0055.html", + "doc_type":"sqlreference", + "p_code":"333", + "code":"377" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Operators", + "uri":"dli_08_0060.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"378" + }, + { + "desc":"All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.Relationship operators are binary operators. Two compared data ", + "product_code":"dli", + "title":"Relational Operators", + "uri":"dli_08_0061.html", + "doc_type":"sqlreference", + "p_code":"378", + "code":"379" + }, + { + "desc":"Arithmetic operators include binary operators and unary operators. For both types of operators, the returned results are numbers. Table 1 lists the arithmetic operators s", + "product_code":"dli", + "title":"Arithmetic Operators", + "uri":"dli_08_0062.html", + "doc_type":"sqlreference", + "p_code":"378", + "code":"380" + }, + { + "desc":"Common logical operators include AND, OR, and NOT. The operation result can be TRUE, FALSE, or NULL (which means unknown). The priorities of the operators are as follows:", + "product_code":"dli", + "title":"Logical Operators", + "uri":"dli_08_0063.html", + "doc_type":"sqlreference", + "p_code":"378", + "code":"381" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Change History", + "uri":"dli_08_00005.html", + "doc_type":"sqlreference", + "p_code":"", + "code":"382" + } +] \ No newline at end of file diff --git a/docs/dli/sqlreference/PARAMETERS.txt b/docs/dli/sqlreference/PARAMETERS.txt new file mode 100644 index 00000000..6da8d5f0 --- /dev/null +++ b/docs/dli/sqlreference/PARAMETERS.txt @@ -0,0 +1,3 @@ +version="" +language="en-us" +type="" \ No newline at end of file diff --git a/docs/dli/sqlreference/dli_08_00005.html b/docs/dli/sqlreference/dli_08_00005.html new file mode 100644 index 00000000..7d779899 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_00005.html @@ -0,0 +1,20 @@ + + +

Change History

+
+
+ + + + + + + +

Released On

+

Description

+

2023-05-11

+

This is the first official release.

+
+
+
+ diff --git a/docs/dli/sqlreference/dli_08_0001.html b/docs/dli/sqlreference/dli_08_0001.html new file mode 100644 index 00000000..fa3eac02 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0001.html @@ -0,0 +1,97 @@ + + +

Identifiers

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0002.html b/docs/dli/sqlreference/dli_08_0002.html new file mode 100644 index 00000000..b448b2ec --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0002.html @@ -0,0 +1,14 @@ + + +

aggregate_func

+

Syntax

None.

+
+

Description

Aggregate function.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0003.html b/docs/dli/sqlreference/dli_08_0003.html new file mode 100644 index 00000000..f12764b9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0003.html @@ -0,0 +1,14 @@ + + +

alias

+

Syntax

None.

+
+

Description

Alias, which must be STRING type. It can be assigned to a field, table, view, or subquery.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0004.html b/docs/dli/sqlreference/dli_08_0004.html new file mode 100644 index 00000000..ca5108c3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0004.html @@ -0,0 +1,93 @@ + + +

attr_expr

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

attr_expr

+

Attribute expression.

+

attr

+

Table field, which is the same as col_name.

+

const_value

+

Constant value.

+

case_expr

+

Case expression.

+

math_func

+

Mathematical function.

+

date_func

+

Date function.

+

string_func

+

String function.

+

aggregate_func

+

Aggregate function.

+

window_func

+

Analysis window function.

+

user_define_func

+

User-defined function.

+

general_binary_operator

+

General binary operator.

+

general_unary_operator

+

General unary operator.

+

(

+

Start of the specified subattribute expression.

+

)

+

End of the specified subattribute expression.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0005.html b/docs/dli/sqlreference/dli_08_0005.html new file mode 100644 index 00000000..16a7911b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0005.html @@ -0,0 +1,14 @@ + + +

attr_expr_list

+

Syntax

None.

+
+

Description

List of attr_expr, which is separated by commas (,).

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0006.html b/docs/dli/sqlreference/dli_08_0006.html new file mode 100644 index 00000000..455ca5fc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0006.html @@ -0,0 +1,43 @@ + + +

attrs_value_set_expr

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + +

Syntax

+

Description

+

attrs_value_set_expr

+

Collection of attribute values.

+

sub_query

+

Subquery clause.

+

(

+

Start of the specified subquery expression.

+

)

+

End of the specified subquery expression.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0007.html b/docs/dli/sqlreference/dli_08_0007.html new file mode 100644 index 00000000..ebbac892 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0007.html @@ -0,0 +1,14 @@ + + +

boolean_expression

+

Syntax

None.

+
+

Description

Return a boolean expression.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0009.html b/docs/dli/sqlreference/dli_08_0009.html new file mode 100644 index 00000000..ef44a083 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0009.html @@ -0,0 +1,20 @@ + + + +

col

+ +
+

Syntax

None.

+
+ +

Description

Formal parameter for function call. It is usually a field name, which is the same as col_name.

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0010.html b/docs/dli/sqlreference/dli_08_0010.html new file mode 100644 index 00000000..251bb18f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0010.html @@ -0,0 +1,14 @@ + + +

col_comment

+

Syntax

None.

+
+

Description

Column (field) description, which must be STRING type and cannot exceed 256 bytes.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0011.html b/docs/dli/sqlreference/dli_08_0011.html new file mode 100644 index 00000000..46d9300a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0011.html @@ -0,0 +1,14 @@ + + +

col_name

+

Syntax

None.

+
+

Description

Column name, which must be STRING type and cannot exceed 128 bytes.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0012.html b/docs/dli/sqlreference/dli_08_0012.html new file mode 100644 index 00000000..862e1fc4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0012.html @@ -0,0 +1,20 @@ + + + +

col_name_list

+ +
+

Syntax

None.

+
+ +

Description

Field list, which consists of one col_name or more. If there is more than one col_name, separate them by using a comma (,).

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0013.html b/docs/dli/sqlreference/dli_08_0013.html new file mode 100644 index 00000000..baa42cf7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0013.html @@ -0,0 +1,123 @@ + + +

condition

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

condition

+

Judgment condition.

+

>

+

Relational operator: >

+

>=

+

Relational operator: ≥

+

<

+

Relational operator: <

+

<=

+

Relational operator: ≤

+

=

+

Relational operator: =

+

<>

+

Relational operator: <>

+

is

+

Relational operator: is

+

is not

+

Relational operator: is not

+

const_null

+

Constant value: null

+

like

+

Relational operator: used for wildcard matching.

+

pattern_string

+

Pattern matching string, which supports wildcard matching. In WHERE LIKE, SQL wildcard characters "%" and "_" are supported. "%" represents one or more characters. "_" represents only one character.

+

attr_expr

+

Attribute expression.

+

attrs_value_set_expr

+

Collection of attribute values.

+

in

+

Keyword used to determine whether attributes are in the same collection.

+

const_string

+

String constant.

+

const_int

+

Integer constant.

+

(

+

Start of the specified constant collection.

+

)

+

End of the specified constant collection.

+

,

+

Separator comma (,)

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0014.html b/docs/dli/sqlreference/dli_08_0014.html new file mode 100644 index 00000000..dc7830e0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0014.html @@ -0,0 +1,58 @@ + + +

condition_list

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

condition_list

+

List of judgment conditions.

+

and

+

Logical operator: AND

+

or

+

Logical operator: OR

+

not

+

Logical operator: NOT

+

(

+

Start of the subjudgment condition.

+

)

+

End of the subjudgment condition.

+

condition

+

Judgment condition.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0015.html b/docs/dli/sqlreference/dli_08_0015.html new file mode 100644 index 00000000..dcf7ff74 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0015.html @@ -0,0 +1,14 @@ + + +

cte_name

+

Syntax

None.

+
+

Description

Common expression name.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0016.html b/docs/dli/sqlreference/dli_08_0016.html new file mode 100644 index 00000000..7c8bef0a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0016.html @@ -0,0 +1,14 @@ + + +

data_type

+

Syntax

None.

+
+

Description

Data type. Currently, only the primitive data types are supported.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0017.html b/docs/dli/sqlreference/dli_08_0017.html new file mode 100644 index 00000000..0ec05b83 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0017.html @@ -0,0 +1,14 @@ + + +

db_comment

+

Syntax

None.

+
+

Description

Database description, which must be STRING type and cannot exceed 256 characters.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0018.html b/docs/dli/sqlreference/dli_08_0018.html new file mode 100644 index 00000000..9e8ece60 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0018.html @@ -0,0 +1,14 @@ + + +

db_name

+

Syntax

None.

+
+

Description

Database name, which must be STRING type and cannot exceed 128 bytes.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0019.html b/docs/dli/sqlreference/dli_08_0019.html new file mode 100644 index 00000000..4234f026 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0019.html @@ -0,0 +1,20 @@ + + + +

else_result_expression

+ +
+

Syntax

None.

+
+ +

Description

Returned result for the ELSE clause of the CASE WHEN statement.

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0020.html b/docs/dli/sqlreference/dli_08_0020.html new file mode 100644 index 00000000..ce79579d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0020.html @@ -0,0 +1,18 @@ + + +

file_format

+

Format

| AVRO

+

| CSV

+

| JSON

+

| ORC

+

| PARQUET

+
+

Description

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0021.html b/docs/dli/sqlreference/dli_08_0021.html new file mode 100644 index 00000000..800f6d4d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0021.html @@ -0,0 +1,14 @@ + + +

file_path

+

Syntax

None.

+
+

Description

File path, which is the OBS path

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0022.html b/docs/dli/sqlreference/dli_08_0022.html new file mode 100644 index 00000000..2213fa5e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0022.html @@ -0,0 +1,14 @@ + + +

function_name

+

Syntax

None.

+
+

Description

Function name, which must be STRING type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0023.html b/docs/dli/sqlreference/dli_08_0023.html new file mode 100644 index 00000000..5d847e41 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0023.html @@ -0,0 +1,14 @@ + + +

groupby_expression

+

Syntax

None.

+
+

Description

Expression that includes GROUP BY.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0024.html b/docs/dli/sqlreference/dli_08_0024.html new file mode 100644 index 00000000..4e59b126 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0024.html @@ -0,0 +1,93 @@ + + +

having_condition

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

having_condition

+

Judgment condition of having.

+

and

+

Logical operator: AND

+

or

+

Logical operator: OR

+

not

+

Logical operator: NOT

+

(

+

Start of the subjudgment condition.

+

)

+

End of the subjudgment condition.

+

condition

+

Judgment condition.

+

const_set

+

Collection of constants, which are separated by using comma (,).

+

in

+

Keyword used to determine whether attributes are in the same collection.

+

attrs_value_set_expr

+

Collection of attribute values.

+

attr_expr

+

Attribute expression.

+

Equality and inequality

+

Equation and inequality. For details, see Relational Operators.

+

pattern_string

+

Pattern matching string, which supports wildcard matching. In WHERE LIKE, SQL wildcard characters "%" and "_" are supported. "%" represents one or more characters. "_" represents only one character.

+

like

+

Relational operator: used for wildcard matching.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0026.html b/docs/dli/sqlreference/dli_08_0026.html new file mode 100644 index 00000000..de72e9f6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0026.html @@ -0,0 +1,20 @@ + + + +

input_expression

+ +
+

Syntax

None.

+
+ +

Description

Input expression of the CASE WHEN statement.

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0029.html b/docs/dli/sqlreference/dli_08_0029.html new file mode 100644 index 00000000..dfee6671 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0029.html @@ -0,0 +1,88 @@ + + +

join_condition

+

Syntax

+
+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

join_condition

+

Judgment condition of join.

+

and

+

Logical operator: AND

+

or

+

Logical operator: OR

+

not

+

Logical operator: NOT

+

(

+

Start of the subjudgment condition.

+

)

+

End of the subjudgment condition.

+

condition

+

Judgment condition.

+

const_set

+

Collection of constants, which are separated by using comma (,).

+

in

+

Keyword used to determine whether attributes are in the same collection.

+

atrrs_value_set_expr

+

Collection of attribute values.

+

attr_expr

+

Attribute expression.

+

Equality and inequality

+

Equation and inequality. For details, see Relational Operators.

+

pattern_string

+

Pattern matching string, which supports wildcard matching. In WHERE LIKE, SQL wildcard characters "%" and "_" are supported. "%" represents one or more characters. "_" represents only one character.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0030.html b/docs/dli/sqlreference/dli_08_0030.html new file mode 100644 index 00000000..15f0e5c5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0030.html @@ -0,0 +1,14 @@ + + +

non_equi_join_condition

+

Syntax

None.

+
+

Description

The condition of an inequality join.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0031.html b/docs/dli/sqlreference/dli_08_0031.html new file mode 100644 index 00000000..5a172e72 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0031.html @@ -0,0 +1,14 @@ + + +

number

+

Syntax

None.

+
+

Description

Maximum number of output lines specified by LIMIT. Which must be INT type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0034.html b/docs/dli/sqlreference/dli_08_0034.html new file mode 100644 index 00000000..f540e708 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0034.html @@ -0,0 +1,14 @@ + + +

partition_col_name

+

Syntax

None.

+
+

Description

Partition column name, that is, partition field name, which must be STRING type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0035.html b/docs/dli/sqlreference/dli_08_0035.html new file mode 100644 index 00000000..31ce7aaf --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0035.html @@ -0,0 +1,14 @@ + + +

partition_col_value

+

Syntax

None.

+
+

Description

Partition column value, that is, partition field value.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0036.html b/docs/dli/sqlreference/dli_08_0036.html new file mode 100644 index 00000000..a7dccddc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0036.html @@ -0,0 +1,14 @@ + + +

partition_specs

+

Syntax

partition_specs : (partition_col_name = partition_col_value, partition_col_name = partition_col_value, ...);

+
+

Description

Table partition list, which is expressed by using key=value pairs, in which key represents partition_col_name, and value represents partition_col_value. If there is more than one partition field, separate every two key=value pairs by using a comma (,).

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0037.html b/docs/dli/sqlreference/dli_08_0037.html new file mode 100644 index 00000000..50547b70 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0037.html @@ -0,0 +1,14 @@ + + +

property_name

+

Syntax

None.

+
+

Description

Property name, which must be STRING type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0038.html b/docs/dli/sqlreference/dli_08_0038.html new file mode 100644 index 00000000..47370ea4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0038.html @@ -0,0 +1,14 @@ + + +

property_value

+

Syntax

None.

+
+

Description

Property value, which must be STRING type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0039.html b/docs/dli/sqlreference/dli_08_0039.html new file mode 100644 index 00000000..b30f251f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0039.html @@ -0,0 +1,14 @@ + + +

regex_expression

+

Syntax

None.

+
+

Description

Pattern matching string, which supports wildcard matching.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0040.html b/docs/dli/sqlreference/dli_08_0040.html new file mode 100644 index 00000000..496d0a74 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0040.html @@ -0,0 +1,20 @@ + + + +

result_expression

+ +
+

Syntax

None.

+
+ +

Description

Returned result for the THEN clause of the CASE WHEN statement.

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0042.html b/docs/dli/sqlreference/dli_08_0042.html new file mode 100644 index 00000000..0f3a60f7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0042.html @@ -0,0 +1,14 @@ + + +

select_statement

+

Syntax

None.

+
+

Description

Query clause for the basic SELECT statement.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0043.html b/docs/dli/sqlreference/dli_08_0043.html new file mode 100644 index 00000000..aefee37f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0043.html @@ -0,0 +1,14 @@ + + +

separator

+

Syntax

None.

+
+

Description

Separator, which can be customized by users, for example, comma (,), semicolon (;), and colon (:). Which must be CHAR type.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0045.html b/docs/dli/sqlreference/dli_08_0045.html new file mode 100644 index 00000000..b912ab92 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0045.html @@ -0,0 +1,14 @@ + + +

sql_containing_cte_name

+

Syntax

None.

+
+

Description

SQL statement containing the common expression defined by cte_name.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0046.html b/docs/dli/sqlreference/dli_08_0046.html new file mode 100644 index 00000000..b3662c0d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0046.html @@ -0,0 +1,14 @@ + + +

sub_query

+

Syntax

None.

+
+

Description

Subquery.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0047.html b/docs/dli/sqlreference/dli_08_0047.html new file mode 100644 index 00000000..ec4e2306 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0047.html @@ -0,0 +1,14 @@ + + +

table_comment

+

Syntax

None.

+
+

Description

Table description, which must be STRING type and cannot exceed 256 bytes.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0048.html b/docs/dli/sqlreference/dli_08_0048.html new file mode 100644 index 00000000..4e5740a1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0048.html @@ -0,0 +1,14 @@ + + +

table_name

+

Syntax

None

+
+

Description

Table name, which cannot exceed 128 bytes. The string type and "$" symbol are supported.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0049.html b/docs/dli/sqlreference/dli_08_0049.html new file mode 100644 index 00000000..3ef85188 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0049.html @@ -0,0 +1,14 @@ + + +

table_properties

+

Syntax

None.

+
+

Description

Table property list, which is expressed by using key=value pairs. key represents property_name, and value represents property_value. If there is more than one key=value pair, separate every two key=value pairs by using a comma (,).

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0050.html b/docs/dli/sqlreference/dli_08_0050.html new file mode 100644 index 00000000..e1a88b24 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0050.html @@ -0,0 +1,14 @@ + + +

table_reference

+

Syntax

None.

+
+

Description

Table or view name, which must be STRING type. It can also be a subquery. If it is subquery, an alias must also be provided.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0053.html b/docs/dli/sqlreference/dli_08_0053.html new file mode 100644 index 00000000..c895e9df --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0053.html @@ -0,0 +1,20 @@ + + + +

when_expression

+ +
+

Syntax

None.

+
+ +

Description

When expression of the CASE WHEN statement. It is used for matching with the input expression.

+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0054.html b/docs/dli/sqlreference/dli_08_0054.html new file mode 100644 index 00000000..63cefcf4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0054.html @@ -0,0 +1,137 @@ + + + +

where_condition

+ +
+

Syntax

+
+ +

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

+

Description

+

where_condition

+

Judgment condition of where.

+

and

+

Logical operator: AND

+

or

+

Logical operator: OR

+

not

+

Logical operator: NOT

+

(

+

Start of the subjudgment condition.

+

)

+

End of the subjudgment condition.

+

condition

+

Judgment condition.

+

exists

+

Keyword used to determine whether a non-empty collection exists. If exists is followed by a subquery, then the subquery must contain a judgment condition.

+

in

+

Keyword used to determine whether attributes are in the same collection.

+

attrs_value_set_expr

+

Collection of attribute values.

+

attr_expr

+

Attribute expression.

+
+
+
+ +
+ +
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0055.html b/docs/dli/sqlreference/dli_08_0055.html new file mode 100644 index 00000000..e5e8d7d6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0055.html @@ -0,0 +1,14 @@ + + +

window_function

+

Syntax

None.

+
+

Description

Analysis window function. For details, see Window Functions.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0056.html b/docs/dli/sqlreference/dli_08_0056.html new file mode 100644 index 00000000..932e3b03 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0056.html @@ -0,0 +1,19 @@ + + +

Data Types

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0057.html b/docs/dli/sqlreference/dli_08_0057.html new file mode 100644 index 00000000..cbfa2709 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0057.html @@ -0,0 +1,12 @@ + + +

Overview

+

Data type is a basic attribute of data. It is used to distinguish different types of data. Different data types occupy different storage space and support different operations. Data is stored in data tables in the database. A data type is specified for each column of a data table. Therefore, data to be stored in a data table must comply with the attribute of the specific data type. Otherwise, errors may occur.

+

DLI only supports primitive data types.

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0058.html b/docs/dli/sqlreference/dli_08_0058.html new file mode 100644 index 00000000..276beb5b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0058.html @@ -0,0 +1,266 @@ + + +

Primitive Data Types

+

Table 1 lists the primitive data types supported by DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Primitive data types

Data Type

+

Description

+

Storage Space

+

Value Range

+

Support by OBS Table

+

Support by DLI Table

+

INT

+

Signed integer

+

4 bytes

+

–2147483648 to 2147483647

+

Yes

+

Yes

+

STRING

+

Character string

+

-

+

-

+

Yes

+

Yes

+

FLOAT

+

Single-precision floating point

+

4 bytes

+

-

+

Yes

+

Yes

+

DOUBLE

+

Double-precision floating-point

+

8 bytes

+

-

+

Yes

+

Yes

+

DECIMAL(precision,scale)

+

Decimal number. Data type of valid fixed places and decimal places, for example, 3.5.

+
  • precision: indicates the maximum number of digits that can be displayed.
  • scale: indicates the number of decimal places.
+

-

+

1<=precision<=38

+

0<=scale<=38

+

If precision and scale are not specified, DECIMAL (38,38) is used by default.

+

Yes

+

Yes

+

BOOLEAN

+

Boolean

+

1 byte

+

TRUE/FALSE

+

Yes

+

Yes

+

SMALLINT/SHORT

+

Signed integer

+

2 bytes

+

-32768~32767

+

Yes

+

Yes

+

TINYINT

+

Signed integer

+

1 byte

+

-128~127

+

Yes

+

No

+

BIGINT/LONG

+

Signed integer

+

8 bytes

+

–9223372036854775808 to 9223372036854775807

+

Yes

+

Yes

+

TIMESTAMP

+

Timestamp in raw data format, indicating the date and time Example: 1621434131222

+

-

+

-

+

Yes

+

Yes

+

CHAR

+

Fixed-length character string

+

-

+

-

+

Yes

+

Yes

+

VARCHAR

+

Variable-length character string

+

-

+

-

+

Yes

+

Yes

+

DATE

+

Date type in the format of yyyy-mm-dd, for example, 2014-05-29

+

-

+

DATE does not contain time information. Its value ranges from 0000-01-01 to 9999-12-31.

+

Yes

+

Yes

+
+
+
  • VARCHAR and CHAR data is stored in STRING type on DLI. Therefore, the string that exceeds the specified length will not be truncated.
  • FLOAT data is stored as DOUBLE data on DLI.
+
+

INT

Signed integer with a storage space of 4 bytes. Its value ranges from –2147483648 to 2147483647. If this field is NULL, value 0 is used by default.

+
+

STRING

Character string.

+
+

FLOAT

Single-precision floating point with a storage space of 4 bytes. If this field is NULL, value 0 is used by default.

+

Due to the limitation of storage methods of floating point data, do not use the formula a==b to check whether two floating point values are the same. You are advised to use the formula: absolute value of (a-b) <= EPSILON. EPSILON indicates the allowed error range which is usually 1.19209290E-07F. If the formula is satisfied, the compared two floating point values are considered the same.

+
+

DOUBLE

Double-precision floating point with a storage space of 8 bytes. If this field is NULL, value 0 is used by default.

+

Due to the limitation of storage methods of floating point data, do not use the formula a==b to check whether two floating point values are the same. You are advised to use the formula: absolute value of (a-b) <= EPSILON. EPSILON indicates the allowed error range which is usually 2.2204460492503131E-16. If the formula is satisfied, the compared two floating point values are considered the same.

+
+

DECIMAL

Decimal(p,s) indicates that the total digit length is p, including p – s integer digits and s fractional digits. p indicates the maximum number of decimal digits that can be stored, including the digits to both the left and right of the decimal point. The value of p ranges from 1 to 38. s indicates the maximum number of decimal digits that can be stored to the right of the decimal point. The fractional digits must be values ranging from 0 to p. The fractional digits can be specified only after significant digits are specified. Therefore, the following inequality is concluded: 0 ≤ sp. For example, decimal (10,6) indicates that the value contains 10 digits, in which there are four integer digits and six fractional digits.

+
+

BOOLEAN

Boolean, which can be TRUE or FALSE.

+
+

SMALLINT/SHORT

Signed integer with a storage space of 2 bytes. Its value ranges from –32768 to 32767. If this field is NULL, value 0 is used by default.

+
+

TINYINT

Signed integer with a storage space of 1 byte. Its value ranges from –128 to 127. If this field is NULL, value 0 is used by default.

+
+

BIGINT/LONG

Signed integer with a storage space of 8 bytes. Its value ranges from –9223372036854775808 to 9223372036854775807. It does not support scientific notation. If this field is NULL, value 0 is used by default.

+
+

TIMESTAMP

Legacy UNIX TIMESTAMP is supported, providing the precision up to the microsecond level. TIMESTAMP is defined by the difference between the specified time and UNIX epoch (UNIX epoch time: 1970-01-01 00:00:00) in seconds. Data of the STRING type supports implicit conversion to TIMESTAMP. (The STRING must in the yyyy-MM-dd HH:MM:SS[.ffffff] format. The precision after the decimal point is optional.)

+
+

CHAR

Character string with a fixed length. In DLI, the STRING type is used.

+
+

VARCHAR

VARCHAR is declared with a length that indicates the maximum number of characters in a string. During conversion from STRING to VARCHAR, if the number of characters in STRING exceeds the specified length, the excess characters of STRING are automatically trimmed. Similar to STRING, the spaces at the end of VARCHAR are meaningful and affect the comparison result. In DLI, the STRING type is used.

+
+

DATE

DATE supports only explicit conversion (cast) with DATE, TIMESTAMP, and STRING. For details, see Table 2.

+ +
+ + + + + + + + + + + + + + + + + + + +
Table 2 cast function conversion

Explicit Conversion

+

Conversion Result

+

cast(date as date)

+

Same as value of DATE.

+

cast(timestamp as date)

+

The date (yyyy-mm-dd) is obtained from TIMESTAMP based on the local time zone and returned as the value of DATE.

+

cast(string as date)

+

If the STRING is in the yyyy-MM-dd format, the corresponding date (yyyy-mm-dd) is returned as the value of DATE. If the STRING is not in the yyyy-MM-dd format, NULL is returned.

+

cast(date as timestamp)

+

Timestamp that maps to the zero hour of the date (yyyy-mm-dd) specified by DATE is generated based on the local time zone and returned as the value of DATE.

+

cast(date as string)

+

A STRING in the yyyy-MM-dd format is generated based on the date (yyyy-mm-dd) specified by DATE and returned as the value of DATE.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0059.html b/docs/dli/sqlreference/dli_08_0059.html new file mode 100644 index 00000000..c7656b9e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0059.html @@ -0,0 +1,100 @@ + + +

Complex Data Types

+

Spark SQL supports complex data types, as shown in Table 1.

+ +
+ + + + + + + + + + + + + + + + + +
Table 1 Complex data types

Data Type

+

Description

+

Syntax

+

ARRAY

+

A set of ordered fields that construct an ARRAY with the specified values. The value can be of any type and the data type of all fields must be the same.

+

array(<value>,<value>[, ...])

+

For details, see Example of ARRAY.

+

MAP

+

A group of unordered key/value pairs used to generate a MAP. The key must be native data type, but the value can be either native data type or complex data type. The type of the same MAP key, as well as the MAP value, must be the same.

+

map(K <key1>, V <value1>, K <key2>, V <value2>[, ...])

+

For details, see Example of Map.

+

STRUCT

+

Indicates a group of named fields. The data types of the fields can be different.

+

struct(<value1>,<value2>[, ...])

+

For details, see Example of STRUCT.

+
+
+

Restrictions

+
+

Example of ARRAY

Create an array_test table, set id to ARRAY<INT>, and name to STRING. After the table is created, insert test data into array_test. The procedure is as follows:

+
  1. Create a table.

    CREATE TABLE array_test(name STRING, id ARRAY < INT >) USING PARQUET;

    +
  2. Run the following statements to insert test data:

    INSERT INTO array_test VALUES ('test',array(1,2,3,4));

    +

    INSERT INTO array_test VALUES ('test2',array(4,5,6,7))

    +

    INSERT INTO array_test VALUES ('test3',array(7,8,9,0));

    +
  3. Query the result.

    To query all data in the array_test table, run the following statement:

    +

    SELECT * FROM array_test;

    +
    test3	[7,8,9,0]
    +test2	[4,5,6,7]
    +test	[1,2,3,4]
    +

    To query the data of element 0 in the id array in the array_test table, run the following statement:

    +

    SELECT id[0] FROM array_test;

    +
    7
    +4
    +1
    +
+
+

Example of Map

Create the map_test table and set score to map<STRING,INT>. The key is of the STRING type and the value is of the INT type. After the table is created, insert test data to map_test. The procedure is as follows:

+
  1. Create a table.

    CREATE TABLE map_test(id STRING, score map<STRING,INT>) USING PARQUET;

    +
  2. Run the following statements to insert test data:

    INSERT INTO map_test VALUES ('test4',map('math',70,'chemistry',84));

    +

    INSERT INTO map_test VALUES ('test5',map('math',85,'chemistry',97));

    +

    INSERT INTO map_test VALUES ('test6',map('math',88,'chemistry',80));

    +
  3. Query the result.

    To query all data in the map_test table, run the following statement:

    +

    SELECT * FROM map_test;

    +
    test6	{"chemistry":80,"math":88}
    +test5	{"chemistry":97,"math":85}
    +test4	{"chemistry":84,"math":70}
    +

    To query the math score in the map_test table, run the following statement:

    +

    SELECT id, score['Math'] FROM map_test;

    +
    test6	88
    +test5	85
    +test4	70
    +
+
+

Example of STRUCT

Create a struct_test table and set info to the STRUCT<name:STRING, age:INT> data type (the field consists of name and age, where the type of name is STRING and age is INT). After the table is created, insert test data into the struct_test table. The procedure is as follows:

+
  1. Create a table.

    CREATE TABLE struct_test(id INT, info STRUCT<name:STRING,age:INT>) USING PARQUET;

    +
  2. Run the following statements to insert test data:

    INSERT INTO struct_test VALUES (8, struct('zhang',23));

    +

    INSERT INTO struct_test VALUES (9, struct('li',25));

    +

    INSERT INTO struct_test VALUES (10, struct('wang',26));

    +
  3. Query the result.

    To query all data in the struct_test table, run the following statement:

    +

    SELECT * FROM struct_test;

    +
    8	{"name":"zhang","age":23}
    +10	{"name":"wang","age":26}
    +9	{"name":"li","age":25}
    +

    Query name and age in the struct_test table.

    +

    SELECT id,info.name,info.age FROM struct_test;

    +
    8	zhang	23
    +10	wang	26
    +9	li	25
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0060.html b/docs/dli/sqlreference/dli_08_0060.html new file mode 100644 index 00000000..6d0c39d5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0060.html @@ -0,0 +1,15 @@ + + +

Operators

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0061.html b/docs/dli/sqlreference/dli_08_0061.html new file mode 100644 index 00000000..7ded999b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0061.html @@ -0,0 +1,144 @@ + + +

Relational Operators

+

All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.

+

Relationship operators are binary operators. Two compared data types must be of the same type or they must support implicit conversion.

+

Table 1 lists the relational operators provided by DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Relational operators

Operator

+

Result Type

+

Description

+

A = B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. This operator is used for value assignment.

+

A == B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. This operator cannot be used for value assignment.

+

A <=> B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A and B are NULL, then TRUE is returned. If A or B is NULL, then FALSE is returned.

+

A <> B

+

BOOLEAN

+

If A is not equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned. This operator follows the standard SQL syntax.

+

A != B

+

BOOLEAN

+

This operator is the same as the <> logical operator. It follows the SQL Server syntax.

+

A < B

+

BOOLEAN

+

If A is less than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A <= B

+

BOOLEAN

+

If A is less than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A > B

+

BOOLEAN

+

If A is greater than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A >= B

+

BOOLEAN

+

If A is greater than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A BETWEEN B AND C

+

BOOLEAN

+

If A is greater than or equal to B and less than or equal to C, then TRUE is returned. Otherwise, FALSE is returned. If A, B, or C is NULL, then NULL is returned.

+

A NOT BETWEEN B AND C

+

BOOLEAN

+

If A is less than B or greater than C, TRUE is returned; otherwise, FALSE is returned. If A, B, or C is NULL, then NULL is returned.

+

A IS NULL

+

BOOLEAN

+

If A is NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS NOT NULL

+

BOOLEAN

+

If A is not NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A LIKE B

+

BOOLEAN

+

If A matches B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A NOT LIKE B

+

BOOLEAN

+

If A does not match B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A RLIKE B

+

BOOLEAN

+

This operator is used for the LIKE operation of JAVA. If A or its substring matches B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A REGEXP B

+

BOOLEAN

+

The result is the same as A RLIKE B.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0062.html b/docs/dli/sqlreference/dli_08_0062.html new file mode 100644 index 00000000..6b7d9b88 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0062.html @@ -0,0 +1,86 @@ + + +

Arithmetic Operators

+

Arithmetic operators include binary operators and unary operators. For both types of operators, the returned results are numbers. Table 1 lists the arithmetic operators supported by DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Arithmetic operators

Operator

+

Result Type

+

Description

+

A + B

+

All numeric types

+

A plus B. The result type is associated with the operation data type. For example, if floating-point number is added to an integer, the result will be a floating-point number.

+

A–B

+

All numeric types

+

A minus B. The result type is associated with the operation data type.

+

A * B

+

All numeric types

+

Multiply A and B. The result type is associated with the operation data type.

+

A / B

+

All numeric types

+

Divide A by B. The result is a number of the double type (double-precision number).

+

A % B

+

All numeric types

+

A on the B Modulo. The result type is associated with the operation data type.

+

A & B

+

All numeric types

+

Check the value of the two parameters in binary expressions and perform the AND operation by bit. If the same bit of both expressions are 1, then the bit is set to 1. Otherwise, the bit is 0.

+

A | B

+

All numeric types

+

Check the value of the two parameters in binary expressions and perform the OR operation by bit. If one bit of either expression is 1, then the bit is set to 1. Otherwise, the bit is set to 0.

+

A ^ B

+

All numeric types

+

Check the value of the two parameters in binary expressions and perform the XOR operation by bit. Only when one bit of either expression is 1, the bit is 1. Otherwise, the bit is 0.

+

~A

+

All numeric types

+

Perform the NOT operation on one expression by bit.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0063.html b/docs/dli/sqlreference/dli_08_0063.html new file mode 100644 index 00000000..d26e3b3d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0063.html @@ -0,0 +1,80 @@ + + +

Logical Operators

+

Common logical operators include AND, OR, and NOT. The operation result can be TRUE, FALSE, or NULL (which means unknown). The priorities of the operators are as follows: NOT > AND > OR.

+

Table 1 lists the calculation rules, where A and B represent logical expressions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Logical operators

Operator

+

Result Type

+

Description

+

A AND B

+

BOOLEAN

+

If A and B are TRUE, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A OR B

+

BOOLEAN

+

If A or B is TRUE, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned. If one is TRUE and the other is NULL, then TRUE is returned.

+

NOT A

+

BOOLEAN

+

If A is FALSE, then TRUE is returned. If A is NULL, then NULL is returned. Otherwise, FALSE is returned.

+

! A

+

BOOLEAN

+

Same as NOT A.

+

A IN (val1, val2, ...)

+

BOOLEAN

+

If A is equal to any value in (val1, val2, ...), then TRUE is returned. Otherwise, FALSE is returned.

+

A NOT IN (val1, val2, ...)

+

BOOLEAN

+

If A is not equal to any value in (val1, val2, ...), then TRUE is returned. Otherwise, FALSE is returned.

+

EXISTS (subquery)

+

BOOLEAN

+

If the result of any subquery contains at least one line, then TRUE is returned. Otherwise, FALSE is returned.

+

NOT EXISTS (subquery)

+

BOOLEAN

+

If the subquery output does not contain any row, TRUE is returned; otherwise, FALSE is returned.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0064.html b/docs/dli/sqlreference/dli_08_0064.html new file mode 100644 index 00000000..c1426d18 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0064.html @@ -0,0 +1,23 @@ + + +

Built-in Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0065.html b/docs/dli/sqlreference/dli_08_0065.html new file mode 100644 index 00000000..66f7001f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0065.html @@ -0,0 +1,309 @@ + + +

Mathematical Functions

+

Table 1 lists the mathematical functions supported in DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Mathematical functions

Function

+

Return Type

+

Description

+

round(DOUBLE a)

+

DOUBLE

+

Round a.

+

round(DOUBLE a, INT d)

+

DOUBLE

+

Round a to d decimal places. Example: round(21.263,2) = 21.26.

+

bround(DOUBLE a)

+

DOUBLE

+

Round off a figure using the HALF_EVEN rounding mode.

+

If the figure to be rounded off ends in 5, the HALF_EVEN rounding mode is as follows:

+
  • Round up if the digit in the place preceding 5 is odd.
  • Round down if the digit in the place preceding 5 is even.
+

Example: bround(7.5) = 8.0, bround(6.5) = 6.0.

+

bround(DOUBLE a, INT d)

+

DOUBLE

+

Retain d decimal places and round the d+1 decimal place using the HALF_EVEN rounding mode.

+

If the figure to be rounded off ends in 5, it will be rounded off as follows:

+
  • Round up if the d decimal digit is odd.
  • Round down if the d decimal digit is even.
+

Example: bround(8.25, 1) = 8.2, bround(8.35, 1) = 8.4.

+

floor(DOUBLE a)

+

BIGINT

+

Return the largest integer that is less than or equal to a. Example: floor(21.2) = 21.

+

ceil(DOUBLE a), ceiling(DOUBLE a)

+

BIGINT

+

Return the smallest integer that is greater than or equal to a. Example: ceil(21.2) = 22.

+

rand(), rand(INT seed)

+

DOUBLE

+

Return a random number that is distributed uniformly from 0 through 1 (1 is exclusive). If the seed is specified, a stable random number sequence is displayed.

+

exp(DOUBLE a), exp(DECIMAL a)

+

DOUBLE

+

Return the value of e raised to the power of a.

+

ln(DOUBLE a), ln(DECIMAL a)

+

DOUBLE

+

Return the natural logarithm of the argument a.

+

log10(DOUBLE a), log10(DECIMAL a)

+

DOUBLE

+

Return the base 10 logarithm of the argument a.

+

log2(DOUBLE a), log2(DECIMAL a)

+

DOUBLE

+

Return the base 2 logarithm of the argument a.

+

log(DOUBLE base, DOUBLE a)

+

log(DECIMAL base, DECIMAL a)

+

DOUBLE

+

Return the base base logarithm of the argument a.

+

pow(DOUBLE a, DOUBLE p), power(DOUBLE a, DOUBLE p)

+

DOUBLE

+

Return the value of a raised to the power of p.

+

sqrt(DOUBLE a), sqrt(DECIMAL a)

+

DOUBLE

+

Return the square root of a.

+

bin(BIGINT a)

+

STRING

+

Return a number in binary format.

+

hex(BIGINT a) hex(STRING a)

+

STRING

+

Convert an integer or character to its hexadecimal representation.

+

conv(BIGINT num, INT from_base, INT to_base), conv(STRING num, INT from_base, INT to_base)

+

STRING

+

Convert a number from from_base to to_base. Example: Convert 5 from decimal to quaternary using conv(5,10,4) = 11.

+

abs(DOUBLE a)

+

DOUBLE

+

Return the absolute value.

+

pmod(INT a, INT b), pmod(DOUBLE a, DOUBLE b)

+

INT or DOUBLE

+

Return the positive value of the remainder after division of a by b.

+

sin(DOUBLE a), sin(DECIMAL a)

+

DOUBLE

+

Return the sine value of a.

+

asin(DOUBLE a), asin(DECIMAL a)

+

DOUBLE

+

Return the arc sine value of a.

+

cos(DOUBLE a), cos(DECIMAL a)

+

DOUBLE

+

Return the cosine value of a.

+

acos(DOUBLE a), acos(DECIMAL a)

+

DOUBLE

+

Return the arc cosine value of a.

+

tan(DOUBLE a), tan(DECIMAL a)

+

DOUBLE

+

Return the tangent value of a.

+

atan(DOUBLE a), atan(DECIMAL a)

+

DOUBLE

+

Return the arc tangent value of a.

+

degrees(DOUBLE a), degrees(DECIMAL a)

+

DOUBLE

+

Convert the value of a from radians to degrees.

+

radians(DOUBLE a), radians(DECIMAL a)

+

DOUBLE

+

Convert the value of a from degrees to radians.

+

positive(INT a), positive(DOUBLE a)

+

INT or DOUBLE

+

Return a. Example: positive(2) = 2.

+

negative(INT a), negative(DOUBLE a)

+

INT or DOUBLE

+

Return –a. Example: negative(2) = –2.

+

sign(DOUBLE a), sign(DECIMAL a)

+

DOUBLE or INT

+

Return the sign of a. 1.0 is returned if a is positive. –1.0 is returned if a is negative. Otherwise, 0.0 is returned.

+

e()

+

DOUBLE

+

Return the value of e.

+

pi()

+

DOUBLE

+

Return the value of pi.

+

factorial(INT a)

+

BIGINT

+

Return the factorial of a.

+

cbrt(DOUBLE a)

+

DOUBLE

+

Return the cube root of a.

+

shiftleft(TINYINT|SMALLINT|INT a, INT b)

+

shiftleft(BIGINT a, INT b)

+

INT

+

BIGINT

+

Bitwise signed left shift. Interpret a as a binary number and shift the binary number b positions to the left.

+

shiftright(TINYINT|SMALLINT|INT a, INT b)

+

shiftright(BIGINT a, INT b)

+

INT

+

BIGINT

+

Bitwise signed right shift. Interpret a as a binary number and shift the binary number b positions to the right.

+

shiftrightunsigned(TINYINT|SMALLINT|INT a, INT b),

+

shiftrightunsigned(BIGINT a, INT b)

+

INT

+

BIGINT

+

Bitwise unsigned right shift. Interpret a as a binary number and shift the binary number b positions to the right.

+

greatest(T v1, T v2, ...)

+

T

+

Return the maximum value of a list of values.

+

least(T v1, T v2, ...)

+

T

+

Return the minimum value of a list of values.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0066.html b/docs/dli/sqlreference/dli_08_0066.html new file mode 100644 index 00000000..9dccc808 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0066.html @@ -0,0 +1,207 @@ + + +

Date Functions

+

Table 1 lists the date functions supported in DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Date/time functions

Function

+

Return Type

+

Description

+

from_unixtime(bigint unixtime[, string format])

+

STRING

+

Convert a timestamp to the time format "yyyy-MM-dd HH:mm:ss" or "yyyyMMddHHmmss.uuuuuu".

+

For example, select FROM_UNIXTIME(1608135036,'yyyy-MM-dd HH:mm:ss').

+

unix_timestamp()

+

BIGINT

+

Return a Unix timestamp (the number of seconds that have elapsed since 1970-01-01 00:00:00) represented by an unsigned integer when the function is called without arguments.

+

unix_timestamp(string date)

+

BIGINT

+

Return the number of seconds between a specified date and 1970-01-01 00:00:00.

+

unix_timestamp(string date, string pattern)

+

BIGINT

+

Convert a time string with a given pattern to a Unix timestamp. Example: unix_timestamp("2009-03-20", "yyyy-MM-dd") = 1237532400.

+

to_date(string timestamp)

+

STRING

+

Return the date part of a time string. Example: to_date("1970-01-01 00:00:00") = "1970-01-01".

+

year(string date)

+

INT

+

Return the year part of a date.

+

quarter(string date/timestamp/string)

+

INT

+

Return the quarter of the year for a date, timestamp, or string. Example: quarter('2015-04-01')=2.

+

month(string date)

+

INT

+

Return the month (from 1 to 12) part of a date.

+

day(string date) dayofmonth(string date)

+

INT

+

Return the day part of a date.

+

hour(string date)

+

INT

+

Return the hour (from 0 to 23) part of a date.

+

minute(string date)

+

INT

+

Return the minute (from 0 to 59) part of a date.

+

second(string date)

+

INT

+

Return the second (from 0 to 59) part of a date.

+

weekofyear(string date)

+

INT

+

Return the week number (from 0 to 53) of a date.

+

datediff(string enddate, string startdate)

+

INT

+

Return the number of days from startdate to enddate.

+

date_add(string startdate, int days)

+

STRING

+

Add a number of days to a date.

+

date_sub(string startdate, int days)

+

STRING

+

Subtract a number of days from a date.

+

from_utc_timestamp(string timestamp, string timezone)

+

TIMESTAMP

+

Convert a UTC timestamp to a timestamp in a given time zone. For example, from_utc_timestamp('1970-01-01 08:00:00','PST') returns 1970-01-01 00:00:00.

+

to_utc_timestamp(string timestamp, string timezone)

+

TIMESTAMP

+

Convert a timestamp in a given time zone to a UTC timestamp. For example, to_utc_timestamp('1970-01-01 00:00:00','PST') returns 1970-01-01 08:00:00.

+

current_date()

+

DATE

+

Return the current date, for example, 2016-07-04.

+

current_timestamp()

+

TIMESTAMP

+

Return the current time, for example, 2016-07-04 11:18:11.685.

+

add_months(string start_date, int num_months)

+

STRING

+

Return the date that is num_months after start_date.

+

last_day(string date)

+

STRING

+

Return the last day of the month to which a date belongs. The returned date is in the format of yyyy-MM-dd, for example, 2015-08-31.

+

next_day(string start_date, string day_of_week)

+

STRING

+

Return the first date that is later than start_date and nearest to day_of_week. The returned date in the format of yyyy-MM-dd. day_of_week specifies a day of a week. For example, the value of day_of_week can be Monday or FRIDAY.

+

trunc(string date, string format)

+

STRING

+

Reset the date in a specified format. Supported formats are MONTH/MON/MM and YEAR/YYYY/YY. Example: trunc('2015-03-17', 'MM') = 2015-03-01.

+

months_between(string date1, string date2)

+

DOUBLE

+

Return number of months between dates date1 and date2.

+

date_format(date/timestamp/string ts, string fmt)

+

STRING

+

Return the formatted value of date/timestamp/string. The Java SimpleDateFormat format is supported. Example: date_format('2015-04-08', 'y') = '2015'.

+

In the format, y indicates the year. Y indicates the year when the current week is located. A week starts from Sunday and ends on Saturday. If a week crosses years, this week is counted as the next year.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0067.html b/docs/dli/sqlreference/dli_08_0067.html new file mode 100644 index 00000000..8199d1bb --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0067.html @@ -0,0 +1,235 @@ + + +

String Functions

+

Table 1 lists the string functions supported by DLI.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 String functions

Function

+

Return Type

+

Description

+

ascii(string str)

+

INT

+

Returns the numeric value of the first character in a string.

+

concat(string A, string B...)

+

STRING

+

Return a string resulting from concatenating the input strings. This function can take any number of input strings.

+

concat_ws(string SEP, string A, string B...)

+

STRING

+

Return a string resulting from concatenating the input strings, which are separated by specified separators.

+

encode(string src, string charset)

+

BINARY

+

Encode src in the encoding mode specified by charset.

+

find_in_set(string str, string strList)

+

INT

+

Return the position of the first occurrence of str in strList. If the value of any parameter is NULL, NULL is returned. If the first parameter contains a comma (,), 0 is returned.

+

get_json_object(string json_string, string path)

+

STRING

+

Parse the JSON object in a specified JSON path. The function will return NULL if the JSON object is invalid.

+

instr(string str, string substr)

+

INT

+

Return the position of the first occurrence of substr in str. Return NULL if NULL is contained in the parameters and return 0 if substr does not exist in str. Note that the subscripts start from 1.

+

length(string A)

+

INT

+

Return the length of a string.

+

locate(string substr, string str[, int pos])

+

INT

+

Return the position of the first occurrence of substr in str after position pos (starting from 1).

+

lower(string A) lcase(string A)

+

STRING

+

Convert all characters of a string to lower case.

+

lpad(string str, int len, string pad)

+

STRING

+

Return a string of a specified length. If the length of the given string (str) is shorter than the specified length (len), the given string is left-padded with pad to the specified length.

+

ltrim(string A)

+

STRING

+

Trim spaces from the left hand side of a string.

+

parse_url(string urlString, string partToExtract [, string keyToExtract])

+

STRING

+

Return the specified part of the specified URL. Valid values of partToExtract include HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO.

+

For example, parse_url ('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1 ',' HOST ') returns 'facebook.com'.

+

When the second parameter is QUERY, the third parameter can be used to extract the value of a specific parameter. For example, parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1') returns 'v1'.

+

printf(String format, Obj... args)

+

STRING

+

Print the input according to a specified format.

+

regexp_extract(string subject, string pattern, int index)

+

STRING

+

Extract the string specified by the regular expression. regexp_extract ('foothebar ',' foo (.*?) (bar) '2) returns 'bar.'

+

regexp_replace(string A, string B, string C)

+

STRING

+

Replace character B in string A with character C.

+

repeat(string str, int n)

+

STRING

+

Repeat a string N times.

+

reverse(string A)

+

STRING

+

Return the reversed string.

+

rpad(string str, int len, string pad)

+

STRING

+

Return a string of a specified length. If the length of the given string (str) is shorter than the specified length (len), the given string is right-padded with pad to the specified length.

+

rtrim(string A)

+

STRING

+

Trim spaces from the right hand side of a string.

+

space(int n)

+

STRING

+

Returns a specified number of spaces.

+

substr(string A, int start) substring(string A, int start)

+

STRING

+

Return the substring starting from the specified start position in string A till the end of the string.

+

substr(string A, int start, int len) substring(string A, int start, int len)

+

STRING

+

Return the substring of a specified length starting from the specified start position in A string.

+

substring_index(string A, string delim, int count)

+

STRING

+

Return the substring from string A before count occurrences of the delimiter delim.

+

translate(string|char|varchar input, string|char|varchar from, string|char|varchar to)

+

STRING

+

Translate the input string by replacing the characters or string specified by from with the characters or string specified by to. For example, replace bcd in abcde with BCD using translate ("abcde", "bcd", "BCD").

+

trim(string A)

+

STRING

+

Trim spaces from both ends of a string.

+

upper(string A) ucase(string A)

+

STRING

+

Convert all characters of a string to upper case.

+

initcap(string A)

+

STRING

+

Convert the first letter of each word of a string to upper case and all other letters to lower case.

+

levenshtein(string A, string B)

+

INT

+

Return the Levenshtein distance between two strings. Example: levenshtein ('kitten ',' sitting ') = 3.

+

soundex(string A)

+

STRING

+

Return the soundex string from str. Example: soundex ('Miller ') = M460.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0068.html b/docs/dli/sqlreference/dli_08_0068.html new file mode 100644 index 00000000..55c28034 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0068.html @@ -0,0 +1,123 @@ + + +

Aggregate Functions

+

An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved by an SQL statement. Table 1 lists aggregate functions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Aggregate functions

Function

+

Return Type

+

Description

+

count(*), count(expr), count(DISTINCT expr[, expr...])

+

BIGINT

+

Return the total number of retrieved records.

+

sum(col), sum(DISTINCT col)

+

DOUBLE

+

Return the sum of the values in a column.

+

avg(col), avg(DISTINCT col)

+

DOUBLE

+

Return the average of the values in a column.

+

min(col)

+

DOUBLE

+

Return the minimum value of a column.

+

max(col)

+

DOUBLE

+

Return the maximum value of a column.

+

variance(col), var_pop(col)

+

DOUBLE

+

Return the variance of a numeric column.

+

var_samp(col)

+

DOUBLE

+

Return the sample variance of a numeric column.

+

stddev_pop(col)

+

DOUBLE

+

Return the deviation of a numeric column.

+

stddev_samp(col)

+

DOUBLE

+

Return the sample deviation of a numeric column.

+

covar_pop(col1, col2)

+

DOUBLE

+

Return the covariance of a pair of numeric columns.

+

covar_samp(col1, col2)

+

DOUBLE

+

Return the sample covariance of a pair of numeric columns.

+

corr(col1, col2)

+

DOUBLE

+

Return the coefficient of correlation of a pair of numeric columns.

+

percentile(BIGINT col, p)

+

DOUBLE

+

Return the exact pth percentile of a column. p must be between 0 and 1. Otherwise, this function returns null. This function does not work with floating point types.

+

percentile_approx(DOUBLE col, p [, B])

+

DOUBLE

+

Return an approximate pth percentile of a numeric column (including floating point types) in a group. p must be between 0 and 1. B controls approximation accuracy. Higher values of B mean better approximations, and the default value is 10,000. When the number of distinct values in the numeric column is smaller than B, an exact percentile value is returned.

+
+
+

Functions such as var_pop, stddev_pop, var_samp, stddev_samp, covar_pop, covar_samp, corr, and percentile_approx, do not support non-numeral data types, such as TimeStamp.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0069.html b/docs/dli/sqlreference/dli_08_0069.html new file mode 100644 index 00000000..f90a555e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0069.html @@ -0,0 +1,79 @@ + + +

Window Functions

+

A window function performs a calculation operation on a set of values related to the current value. A window function can be an aggregate function used in the GROUP BY clause, such as sum, max, min, count, and avg functions. The window functions also include the functions listed in Table 1. A window contains multiple rows defined by an OVER clause. A window function works on one window.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Functions

Function

+

Return Type

+

Description

+

first_value(col)

+

Data type of the argument.

+

Return the value of the first data record from a column.

+

last_value(col)

+

Data type of the argument.

+

Return the value of the last data record from a column.

+

lag (col,n,DEFAULT)

+

Data type of the argument.

+

Return the value from the nth row preceding the current row. The first argument specifies the column name. The second argument specifies the nth row preceding the current row. The configuration of the second argument is optional, and the default argument value is 1 if the argument is not specified. The third argument is set to a default value. If the nth row preceding the current row is null, the default value is used. The default value of the third argument is NULL if the argument is not specified.

+

lead (col,n,DEFAULT)

+

Data type of the argument.

+

Return the value from the nth row following the current row. The first argument specifies the column name. The second argument specifies the nth row following the current row. The configuration of the second argument is optional, and the default argument value is 1 if the argument is not specified. The third argument is set to a default value. If the nth row following the current row is null, the default value is used. The default value of the third argument is NULL if the argument is not specified.

+

row_number() over (order by col_1[,col_2 ...])

+

INT

+

Assign a unique number to each row.

+

rank()

+

INT

+

Return the rank of a value in a set of values. When multiple values share the same rank, the next rank in the sequence is not consecutive.

+

cume_dist()

+

DOUBLE

+

Calculate the relative position of a value in a row.

+

percent_rank()

+

DOUBLE

+

Return the rank of a value from the column specified by the ORDER BY clause of the window. The return value is a decimal between 0 and 1, which is calculated using (RANK - 1)/(- 1).

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0070.html b/docs/dli/sqlreference/dli_08_0070.html new file mode 100644 index 00000000..a5e5518e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0070.html @@ -0,0 +1,21 @@ + + +

Databases

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0071.html b/docs/dli/sqlreference/dli_08_0071.html new file mode 100644 index 00000000..827c5786 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0071.html @@ -0,0 +1,64 @@ + + +

Creating a Database

+

Function

This statement is used to create a database.

+
+

Syntax

1
+2
+3
CREATE [DATABASE | SCHEMA] [IF NOT EXISTS] db_name
+  [COMMENT db_comment]
+  [WITH DBPROPERTIES (property_name=property_value, ...)];
+
+ +
+
+

Keyword

+ +
+

Parameters

+
+ + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

db_comment

+

Database description

+

property_name

+

Database property name

+

property_value

+

Database property value

+
+
+
+

Precautions

+
+

Example

  1. Create a queue. A queue is the basis for using DLI. Before executing SQL statements, you need to create a queue.
  2. On the DLI management console, click SQL Editor in the navigation pane on the left. The SQL Editor page is displayed.
  3. In the editing window on the right of the SQL Editor page, enter the following SQL statement for creating a database and click Execute. Read and agree to the privacy agreement, and click OK.

    If database testdb does not exist, run the following statement to create database testdb:

    +
    1
    CREATE DATABASE IF NOT EXISTS testdb;
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0072.html b/docs/dli/sqlreference/dli_08_0072.html new file mode 100644 index 00000000..8d7c3d1d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0072.html @@ -0,0 +1,43 @@ + + +

Deleting a Database

+

Function

This statement is used to delete a database.

+
+

Syntax

1
DROP [DATABASE | SCHEMA] [IF EXISTS] db_name [RESTRICT|CASCADE];
+
+ +
+
+

Keyword

IF EXISTS: Prevents system errors if the database to be deleted does not exist.

+
+

Precautions

+
+

Parameters

+
+ + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+
+
+
+

Example

  1. Create a database, for example, testdb, by referring to Example.
  2. Run the following statement to delete database testdb if it exists:
    1
    DROP DATABASE IF EXISTS testdb;
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0073.html b/docs/dli/sqlreference/dli_08_0073.html new file mode 100644 index 00000000..0f6c10d6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0073.html @@ -0,0 +1,43 @@ + + +

Viewing a Specified Database

+

Function

This syntax is used to view the information about a specified database, including the database name and database description.

+
+

Syntax

1
DESCRIBE DATABASE [EXTENDED] db_name;
+
+ +
+
+

Keyword

EXTENDED: Displays the database properties.

+
+

Parameters

+
+ + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+
+
+
+

Precautions

If the database to be viewed does not exist, the system reports an error.

+
+

Example

  1. Create a database, for example, testdb, by referring to Example.
  2. Run the following statement to query information about the testdb database:
    1
    DESCRIBE DATABASE testdb;
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0074.html b/docs/dli/sqlreference/dli_08_0074.html new file mode 100644 index 00000000..23567df1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0074.html @@ -0,0 +1,48 @@ + + +

Viewing All Databases

+

Function

This syntax is used to query all current databases.

+
+

Syntax

1
SHOW [DATABASES | SCHEMAS] [LIKE regex_expression];
+
+ +
+
+

Keyword

None

+
+

Parameters

+
+ + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

regex_expression

+

Database name

+
+
+
+

Precautions

Keyword DATABASES is equivalent to SCHEMAS. You can use either of them in this statement.

+
+

Example

View all the current databases.

+
1
SHOW DATABASES;
+
+ +
+

View all databases whose names start with test.

+
1
SHOW DATABASES LIKE "test.*";
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0075.html b/docs/dli/sqlreference/dli_08_0075.html new file mode 100644 index 00000000..3ce7aef8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0075.html @@ -0,0 +1,121 @@ + + +

SQL Syntax Constraints and Definitions

+

Syntax Constraints

+
+

Data Types Supported by Syntax

+
+

Syntax Definition

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
INSERT INTO stream_name query;
+query:
+  values
+  | {
+      select
+      | selectWithoutFrom
+      | query UNION [ ALL ] query
+    }
+
+orderItem:
+  expression [ ASC | DESC ]
+
+select:
+  SELECT 
+  { * | projectItem [, projectItem ]* }
+  FROM tableExpression [ JOIN tableExpression ]
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+selectWithoutFrom:
+  SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+
+projectItem:
+  expression [ [ AS ] columnAlias ]
+  | tableAlias . *
+
+tableExpression:
+  tableReference 
+
+tableReference:
+  tablePrimary
+  [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ]
+
+tablePrimary:
+  [ TABLE ] [ [ catalogName . ] schemaName . ] tableName
+  | LATERAL TABLE '(' functionName '(' expression [, expression ]* ')' ')'
+  | UNNEST '(' expression ')'
+
+values:
+  VALUES expression [, expression ]*
+
+groupItem:
+  expression
+  | '(' ')'
+  | '(' expression [, expression ]* ')'
+  | CUBE '(' expression [, expression ]* ')'
+  | ROLLUP '(' expression [, expression ]* ')'
+  | GROUPING SETS '(' groupItem [, groupItem ]* ')'
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0076.html b/docs/dli/sqlreference/dli_08_0076.html new file mode 100644 index 00000000..bfb5a9f8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0076.html @@ -0,0 +1,258 @@ + + +

Creating an OBS Table Using the DataSource Syntax

+

Function

Create an OBS table using the DataSource syntax.

+

The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of supported partitions. For details, see syntax and precautions.

+
+

Usage

+
+

Precautions

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 
+  [(col_name1 col_type1 [COMMENT col_comment1], ...)]
+  USING file_format 
+  [OPTIONS (path 'obs_path', key1=val1, key2=val2, ...)] 
+  [PARTITIONED BY (col_name1, col_name2, ...)]
+  [COMMENT table_comment]
+  [AS select_statement];
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name

+

The value can contain letters, numbers, and underscores (_), but cannot contain only numbers or start with a number or underscore (_).

+

table_name

+

Name of the table to be created in the database

+

The value can contain letters, numbers, and underscores (_), but cannot contain only numbers or start with a number or underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$.

+

Special characters must be enclosed in single quotation marks ('').

+

col_name

+

Column names with data types separated by commas (,)

+

The column name contains letters, digits, and underscores (_). It cannot contain only digits and must contain at least one letter.

+

col_type

+

Data type of a column field

+

col_comment

+

Column field description

+

file_format

+

Input format of the table. The value can be orc, parquet, json, csv, or avro.

+

path

+

OBS storage path where data files are stored

+

Format: obs://bucketName/tblPath

+

bucketName: bucket name

+

tblPath: directory name. You do not need to specify the file name following the directory.

+

For details about attribute names and values during table creation, see Table 2.

+

For details about the table attribute names and values when file_format is set to csv, see Table 2 and Table 3.

+

table_comment

+

Description of the table

+

select_statement

+

The CREATE TABLE AS statement is used to insert the SELECT query result of the source table or a data record to a new table in OBS bucket.

+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 2 OPTIONS parameter description

Parameter

+

Description

+

Default Value

+

path

+

Specified table storage location. Currently, only OBS is supported.

+

-

+

multiLevelDirEnable

+

Whether to iteratively query data in subdirectories when subdirectories are nested. When this parameter is set to true, all files in the table path, including files in subdirectories, are iteratively read when a table is queried.

+

false

+

dataDelegated

+

Whether to clear data in the path when deleting a table or partition

+

false

+

compression

+

Specified compression format. Generally, you need to set this parameter to zstd for parquet files.

+

-

+
+
+
When the file format is set to CSV, you can set the following OPTIONS parameters: +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 OPTIONS parameter description of the CSV data format

Parameter

+

Description

+

Default Value

+

delimiter

+

Data separator

+

Comma (,)

+

quote

+

Quotation character

+

Double quotation marks (" ")

+

escape

+

Escape character

+

Backslash (\)

+

multiLine

+

Whether the column data contains carriage return characters or transfer characters. The value true indicates yes and the value false indicates no.

+

false

+

dateFormat

+

Date format of the date field in a CSV file

+

yyyy-MM-dd

+

timestampFormat

+

Date format of the timestamp field in a CSV file

+

yyyy-MM-dd HH:mm:ss

+

mode

+

Mode for parsing CSV files. The options are as follows:

+
  • PERMISSIVE: Permissive mode. If an incorrect field is encountered, set the line to Null.
  • DROPMALFORMED: When an incorrect field is encountered, the entire line is discarded.
  • FAILFAST: Error mode. If an error occurs, it is automatically reported.
+

PERMISSIVE

+

header

+

Whether CSV contains header information. The value true indicates that the table header information is contained, and the value false indicates that the information is not included.

+

false

+

nullValue

+

Character that represents the null value. For example, nullValue= "\\N" indicates that \N represents the null value.

+

-

+

comment

+

Character that indicates the beginning of the comment. For example, comment= '#' indicates that the line starting with # is a comment.

+

-

+

compression

+

Data compression format. Currently, gzip, bzip2, and deflate are supported. If you do not want to compress data, enter none.

+

none

+

encoding

+

Data encoding format. Available values are utf-8, gb2312, and gbk. Value utf-8 will be used if this parameter is left empty.

+

utf-8

+
+
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0077.html b/docs/dli/sqlreference/dli_08_0077.html new file mode 100644 index 00000000..a9cf61da --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0077.html @@ -0,0 +1,175 @@ + + +

Creating an OBS Table Using the Hive Syntax

+

Function

This statement is used to create an OBS table using the Hive syntax. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of supported partitions. For details, see syntax and precautions.

+
+

Usage

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name 
+  [(col_name1 col_type1 [COMMENT col_comment1], ...)]
+  [COMMENT table_comment] 
+  [PARTITIONED BY (col_name2 col_type2, [COMMENT col_comment2], ...)] 
+  [ROW FORMAT row_format]
+  [STORED AS file_format] 
+  LOCATION 'obs_path'
+  [TBLPROPERTIES (key = value)]
+  [AS select_statement];
+
+row_format:
+  : SERDE serde_cls [WITH SERDEPROPERTIES (key1=val1, key2=val2, ...)]
+  | DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
+      [COLLECTION ITEMS TERMINATED BY char]
+      [MAP KEYS TERMINATED BY char]
+      [LINES TERMINATED BY char]
+      [NULL DEFINED AS char]
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

col_name

+

Field name

+

col_type

+

Field type

+

col_comment

+

Field description

+

row_format

+

Line data format

+

file_format

+

OBS table storage format. TEXTFILE, AVRO, ORC, SEQUENCEFILE, RCFILE, and PARQUET are supported.

+

table_comment

+

Table description

+

obs_path

+

OBS path

+

key = value

+

Set table properties and values.

+

For example, if you want to enable multiversion, you can set "dli.multi.version.enable"="true".

+

select_statement

+

The CREATE TABLE AS statement is used to insert the SELECT query result of the source table or a data record to a new table in OBS bucket.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0079.html b/docs/dli/sqlreference/dli_08_0079.html new file mode 100644 index 00000000..7909e5d0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0079.html @@ -0,0 +1,57 @@ + + +

Updating Partitioned Table Data (Only OBS Tables Supported)

+

Function

This statement is used to update the partition information about a table in the Metastore.

+
+

Syntax

1
MSCK REPAIR TABLE table_name;
+
+ +
+

Or

+
ALTER TABLE table_name RECOVER PARTITIONS;
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

partition_specs

+

Partition fields

+

obs_path

+

OBS path

+
+
+
+

Precautions

+
+

Example

Run the following statements to update the partition information about table ptable in the Metastore:

+
1
MSCK REPAIR TABLE ptable;
+
+ +
+

Or

+
ALTER TABLE ptable RECOVER PARTITIONS;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0080.html b/docs/dli/sqlreference/dli_08_0080.html new file mode 100644 index 00000000..86459dd1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0080.html @@ -0,0 +1,27 @@ + + +

Syntax for Partitioning a Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0081.html b/docs/dli/sqlreference/dli_08_0081.html new file mode 100644 index 00000000..a01e080b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0081.html @@ -0,0 +1,95 @@ + + +

Adding Partition Data (Only OBS Tables Supported)

+

Function

After an OBS partitioned table is created, no partition information is generated for the table. Partition information is generated only after you:

+ +

The following describes how to use the ALTER TABLE statement to add a partition.

+
+

Syntax

1
+2
+3
+4
+5
ALTER TABLE table_name ADD [IF NOT EXISTS]
+  PARTITION partition_specs1
+  [LOCATION 'obs_path1']
+  PARTITION partition_specs2
+  [LOCATION 'obs_path2'];
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

partition_specs

+

Partition fields

+

obs_path

+

OBS path

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0082.html b/docs/dli/sqlreference/dli_08_0082.html new file mode 100644 index 00000000..e6070366 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0082.html @@ -0,0 +1,56 @@ + + +

Renaming a Partition (Only OBS Tables Supported)

+

Function

This statement is used to rename partitions.

+
+

Syntax

1
+2
+3
ALTER TABLE table_name
+  PARTITION partition_specs
+  RENAME TO PARTITION partition_specs;
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

partition_specs

+

Partition fields

+
+
+
+

Precautions

+
+

Example

To modify the name of the city='xxx',dt='2008-08-08' partition in the student table to city='xxx',dt='2009-09-09', run the following statement:

+
1
+2
+3
ALTER TABLE student
+  PARTITION (city='xxx',dt='2008-08-08')
+  RENAME TO PARTITION (city='xxx',dt='2009-09-09');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0083.html b/docs/dli/sqlreference/dli_08_0083.html new file mode 100644 index 00000000..291c27c4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0083.html @@ -0,0 +1,61 @@ + + +

Deleting a Partition

+

Function

Deletes one or more partitions from a partitioned table.

+
+

Precautions

+
+

Syntax

1
+2
+3
ALTER TABLE [db_name.]table_name
+  DROP [IF EXISTS]
+  PARTITION partition_spec1[,PARTITION partition_spec2,...];
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits and cannot start with an underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). It cannot contain only digits and cannot start with an underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

partition_specs

+

Partition information, in the format of "key=value", where key indicates the partition field and value indicates the partition value. In a table partitioned using multiple fields, if you specify all the fields of a partition name, only the partition is deleted; if you specify only some fields of a partition name, all matching partitions will be deleted. By default, the partition_specs parameter contains (). For example: PARTITION (dt='2009-09-09',city='xxx')

+
+
+
+

Example

To delete the dt = '2008-08-08', city = 'xxx' partition in the student table, run the following statement:

+
1
+2
+3
ALTER TABLE student
+  DROP
+  PARTITION (dt = '2008-08-08', city = 'xxx');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0084.html b/docs/dli/sqlreference/dli_08_0084.html new file mode 100644 index 00000000..1193cbfc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0084.html @@ -0,0 +1,61 @@ + + +

Altering the Partition Location of a Table (Only OBS Tables Supported)

+

Function

This statement is used to modify the positions of table partitions.

+
+

Syntax

1
+2
+3
ALTER TABLE table_name
+  PARTITION partition_specs
+  SET LOCATION obs_path;
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

partition_specs

+

Partition fields

+

obs_path

+

OBS path

+
+
+
+

Precautions

+
+

Example

To set the OBS path of partition dt='2008-08-08',city='xxx' in table student to obs://bucketName/fileName/student/dt=2008-08-08/city=xxx, run the following statement:

+
1
+2
+3
ALTER TABLE student
+  PARTITION(dt='2008-08-08',city='xxx')
+  SET LOCATION 'obs://bucketName/fileName/student/dt=2008-08-08/city=xxx';
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0086.html b/docs/dli/sqlreference/dli_08_0086.html new file mode 100644 index 00000000..2d82dd0f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0086.html @@ -0,0 +1,27 @@ + + +

Built-In Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0087.html b/docs/dli/sqlreference/dli_08_0087.html new file mode 100644 index 00000000..63347607 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0087.html @@ -0,0 +1,48 @@ + + +

Deleting a Table

+

Function

This statement is used to delete tables.

+
+

Syntax

1
DROP TABLE [IF EXISTS] [db_name.]table_name;
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+
+
+
+

Precautions

The to-be-deleted table must exist in the current database. Otherwise, an error is reported. To avoid this error, add IF EXISTS in this statement.

+
+

Example

  1. Create a table. For details, see Creating an OBS Table or Creating a DLI Table.
  2. Run the following statement to delete table test from the current database:
    1
    DROP TABLE IF EXISTS test;
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0088.html b/docs/dli/sqlreference/dli_08_0088.html new file mode 100644 index 00000000..fd80981e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0088.html @@ -0,0 +1,122 @@ + + +

Deep Learning Model Prediction

+

Deep learning has a wide range of applications in many industries, such as image classification, image recognition, and speech recognition. DLI provides several functions to load deep learning models for prediction.

+

Currently, models DeepLearning4j and Keras are supported. In Keras, TensorFlow, CNTK, or Theano can serve as the backend engine. With importing of the neural network model from Keras, models of mainstream learning frameworks such as Theano, TensorFlow, Caffe, and CNTK can be imported.

+

Syntax

1
+2
+3
+4
+5
+6
+7
-- Image classification: returns the predicted category IDs used for image classification.
+DL_IMAGE_MAX_PREDICTION_INDEX(field_name, model_path, is_dl4j_model)
+DL_IMAGE_MAX_PREDICTION_INDEX(field_name, keras_model_config_path, keras_weights_path) -- Suitable for the Keras model
+
+--Text classification: returns the predicted category IDs used for text classification.
+DL_TEXT_MAX_PREDICTION_INDEX(field_name, model_path, is_dl4j_model) -- Use the default word2vec model.
+DL_TEXT_MAX_PREDICTION_INDEX(field_name, word2vec_path, model_path, is_dl4j_model)
+
+ +
+

Models and configuration files must be stored on OBS. The path format is obs://your_ak:your_sk@obs.your_obs_region.xxx.com:443/your_model_path.

+
+
+

Parameter Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

field_name

+

Yes

+

Name of the field, data in which is used for prediction, in the data stream.

+

In image classification, this parameter needs to declare ARRAY[TINYINT].

+

In image classification, this parameter needs to declare String.

+

model_path

+

Yes

+

Complete save path of the model on OBS, including the model structure and model weight.

+

is_dl4j_model

+

Yes

+

Whether the model is a Deeplearning4j model

+

Value true indicates that the model is a Deeplearning4j model, while value false indicates that the model is a Keras model.

+

keras_model_config_path

+

Yes

+

Complete save path of the model structure on OBS. In Keras, you can obtain the model structure by using model.to_json().

+

keras_weights_path

+

Yes

+

Complete save path of the model weight on OBS. In Keras, you can obtain the model weight by using model.save_weights(filepath).

+

word2vec_path

+

Yes

+

Complete save path of the word2vec model on OBS.

+
+
+
+

Example

For prediction in image classification, use the Mnist dataset as the input and load the pre-trained Deeplearning4j model or Keras model to predict the digit representing each image in real time.

+
1
+2
+3
+4
+5
+6
CREATE SOURCE STREAM Mnist(
+    image Array[TINYINT]
+)
+SELECT DL_IMAGE_MAX_PREDICTION_INDEX(image, 'your_dl4j_model_path', false) FROM Mnist
+SELECT DL_IMAGE_MAX_PREDICTION_INDEX(image, 'your_keras_model_path', true) FROM Mnist
+SELECT DL_IMAGE_MAX_PREDICTION_INDEX(image, 'your_keras_model_config_path', 'keras_weights_path') FROM Mnist
+
+ +
+

For prediction in text classification, use data of a group of news titles as the input and load the pre-trained Deeplearning4j model or Keras model to predict the category of each news title in real time, such as economy, sports, and entertainment.

+
1
+2
+3
+4
+5
+6
+7
CREATE SOURCE STREAM News(
+    title String
+)
+SELECT DL_TEXT_MAX_PREDICTION_INDEX(title, 'your_dl4j_word2vec_model_path','your_dl4j_model_path', false) FROM News
+SELECT DL_TEXT_MAX_PREDICTION_INDEX(title, 'your_keras_word2vec_model_path','your_keras_model_path', true) FROM News
+SELECT DL_TEXT_MAX_PREDICTION_INDEX(title, 'your_dl4j_model_path', false) FROM New
+SELECT DL_TEXT_MAX_PREDICTION_INDEX(title, 'your_keras_model_path', true) FROM New
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0089.html b/docs/dli/sqlreference/dli_08_0089.html new file mode 100644 index 00000000..cb6e1ad3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0089.html @@ -0,0 +1,25 @@ + + +

Viewing Tables

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0090.html b/docs/dli/sqlreference/dli_08_0090.html new file mode 100644 index 00000000..0d200bfa --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0090.html @@ -0,0 +1,52 @@ + + +

Viewing All Tables

+

Function

This statement is used to view all tables and views in the current database.

+
+

Syntax

1
SHOW TABLES [IN | FROM db_name] [LIKE regex_expression];
+
+ +
+
+

Keyword

FROM/IN: followed by the name of a database whose tables and views will be displayed.

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

regex_expression

+

Name of a database table.

+
+
+
+

Precautions

None

+
+

Example

  1. Create a table. For details, see Creating an OBS Table or Creating a DLI Table.
  2. To show all tables and views in the current database, run the following statement:
    1
    SHOW TABLES;
    +
    + +
    +
  3. To show all tables started with test in the testdb database, run the following statement:
    1
    SHOW TABLES IN testdb LIKE "test*";
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0091.html b/docs/dli/sqlreference/dli_08_0091.html new file mode 100644 index 00000000..85a40bc7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0091.html @@ -0,0 +1,44 @@ + + +

Viewing Table Creation Statements

+

Function

This statement is used to show the statements for creating a table.

+
+

Syntax

1
SHOW CREATE TABLE table_name;
+
+ +
+
+

Keyword

CREATE TABLE: statement for creating a table

+
+

Parameters

+
+ + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+
+
+
+

Precautions

The table specified in this statement must exist. Otherwise, an error will occur.

+
+

Example

  1. Create a table. For details, see Creating an OBS Table or Creating a DLI Table.
+
  1. Run the following statement to view the statement that is used to create table test:
    1
    SHOW CREATE TABLE test;
    +
    + +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0092.html b/docs/dli/sqlreference/dli_08_0092.html new file mode 100644 index 00000000..5917b5d1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0092.html @@ -0,0 +1,48 @@ + + +

Viewing Table Properties

+

Function

Check the properties of a table.

+
+

Syntax

1
SHOW TBLPROPERTIES table_name [('property_name')];
+
+ +
+
+

Keyword

TBLPROPERTIES: This statement allows you to add a key/value property to a table.

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

property_name

+
  • If this parameter is not specified, all properties and their values are returned.
  • If a property name is specified, only the specified property and its value are returned.
+
+
+
+

Precautions

property_name is case sensitive. You cannot specify multiple property_name attributes at the same time. Otherwise, an error occurs.

+
+

Example

To return the value of property_key1 in the test table, run the following statement:

+
1
SHOW TBLPROPERTIES test ('property_key1');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0093.html b/docs/dli/sqlreference/dli_08_0093.html new file mode 100644 index 00000000..36a911f7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0093.html @@ -0,0 +1,48 @@ + + +

Viewing All Columns in a Specified Table

+

Function

This statement is used to query all columns in a specified table.

+
+

Syntax

1
SHOW COLUMNS {FROM | IN} table_name [{FROM | IN} db_name];
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

table_name

+

Table name

+

db_name

+

Database name

+
+
+
+

Precautions

The specified table must exist in the database. If the table does not exist, an error is reported.

+
+

Example

Run the following statement to view all columns in the student table.

+
1
SHOW COLUMNS IN student;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0094.html b/docs/dli/sqlreference/dli_08_0094.html new file mode 100644 index 00000000..05467e6c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0094.html @@ -0,0 +1,59 @@ + + +

Viewing All Partitions in a Specified Table

+

Function

This statement is used to view all partitions in a specified table.

+
+

Syntax

1
+2
SHOW PARTITIONS [db_name.]table_name
+  [PARTITION partition_specs];
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits and cannot start with an underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). It cannot contain only digits and cannot start with an underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

partition_specs

+

Partition information, in the format of "key=value", where key indicates the partition field and value indicates the partition value. If a partition field contains multiple fields, the system displays all partition information that matches the partition field.

+
+
+
+

Precautions

The table specified in this statement must exist and must be a partitioned table. Otherwise, an error is reported.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0095.html b/docs/dli/sqlreference/dli_08_0095.html new file mode 100644 index 00000000..703ab465 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0095.html @@ -0,0 +1,105 @@ + + +

Inserting Data

+

Function

This statement is used to insert the SELECT query result or a certain data record into a table.

+
+

Syntax

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 INSERT parameter description

Parameter

+

Description

+

db_name

+

Name of the database where the target table resides.

+

table_name

+

Name of the target table.

+

part_spec

+

Detailed partition information. If there are multiple partition fields, all fields must be contained, but the corresponding values are optional. The system matches the corresponding partition. A maximum of 100,000 partitions can be created in a single table.

+

select_statement

+

SELECT query on the source table (DLI and OBS tables).

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

Before importing data, you must create a table. For details, see Creating an OBS Table or Creating a DLI Table.

+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0096.html b/docs/dli/sqlreference/dli_08_0096.html new file mode 100644 index 00000000..fd91fe5f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0096.html @@ -0,0 +1,735 @@ + + +

String Functions

+

The common character string functions of DLI are as follows:

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 String Operators

Operator

+

Returned Data Type

+

Description

+

||

+

VARCHAR

+

Concatenates two strings.

+

CHAR_LENGTH

+

INT

+

Returns the number of characters in a string.

+

CHARACTER_LENGTH

+

INT

+

Returns the number of characters in a string.

+

CONCAT

+

VARCHAR

+

Concatenates two or more string values to form a new string. If the value of any parameter is NULL, skip this parameter.

+

CONCAT_WS

+

VARCHAR

+

Concatenates each parameter value and the separator specified by the first parameter separator to form a new string. The length and type of the new string depend on the input value.

+

HASH_CODE

+

INT

+

Returns the absolute value of HASH_CODE() of a string. In addition to string, int, bigint, float, and double are also supported.

+

INITCAP

+

VARCHAR

+

Returns a string whose first letter is in uppercase and the other letters in lowercase. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.

+

IS_ALPHA

+

BOOLEAN

+

Checks whether a string contains only letters.

+

IS_DIGITS

+

BOOLEAN

+

Checks whether a string contains only digits.

+

IS_NUMBER

+

BOOLEAN

+

Checks whether a string is numeric.

+

IS_URL

+

BOOLEAN

+

Checks whether a string is a valid URL.

+

JSON_VALUE

+

VARCHAR

+

Obtains the value of a specified path in a JSON string.

+

KEY_VALUE

+

VARCHAR

+

Obtains the value of a key in a key-value pair string.

+

LOWER

+

VARCHAR

+

Returns a string of lowercase characters.

+

LPAD

+

VARCHAR

+

Concatenates the pad string to the left of the str string until the length of the new string reaches the specified length len.

+

MD5

+

VARCHAR

+

Returns the MD5 value of a string. If the parameter is an empty string (that is, the parameter is "), an empty string is returned.

+

OVERLAY

+

VARCHAR

+

Replaces the substring of x with y. Replace length+1 characters starting from start_position.

+

POSITION

+

INT

+

Returns the position of the first occurrence of the target string x in the queried string y. If the target string x does not exist in the queried string y, 0 is returned.

+

REPLACE

+

VARCHAR

+

Replaces all str2 in the str1 string with str3.

+
  • str1: original character.
  • str2: target character.
  • str3: replacement character.
+

RPAD

+

VARCHAR

+

Concatenates the pad string to the right of the str string until the length of the new string reaches the specified length len.

+

SHA1

+

STRING

+

Returns the SHA1 value of the expr string.

+

SHA256

+

STRING

+

Returns the SHA256 value of the expr string.

+

STRING_TO_ARRAY

+

ARRAY[STRING]

+

Separates the value string as string arrays by using the delimiter.

+

SUBSTRING

+

VARCHAR

+

Returns the substring starting from a fixed position of A. The start position starts from 1.

+

TRIM

+

STRING

+

Removes A at the start position, or end position, or both the start and end positions from B. By default, string expressions A at both the start and end positions are removed.

+

UPPER

+

VARCHAR

+

Returns a string converted to uppercase characters.

+
+
+

||

+
+

CHAR_LENGTH

+
+

CHARACTER_LENGTH

+
+

CONCAT

+
+

CONCAT_WS

+
+

HASH_CODE

+
+

INITCAP

+
+

IS_ALPHA

+
+

IS_DIGITS

+
+

IS_NUMBER

+
+

IS_URL

+
+

JSON_VALUE

+
+

KEY_VALUE

+
+

LOWER

+
+

LPAD

+
+

MD5

+
+

OVERLAY

+
+

POSITION

+
+

REPLACE

+
+

RPAD

+
+

SHA1

+
+

SHA256

+
+

STRING_TO_ARRAY

+
+

SUBSTRING

+
+

TRIM

+
+

UPPER

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0097.html b/docs/dli/sqlreference/dli_08_0097.html new file mode 100644 index 00000000..88ed01b0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0097.html @@ -0,0 +1,166 @@ + + +

Temporal Functions

+

Table 1 lists the time functions supported by Flink SQL.

+

Function Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Time Function

Function

+

Return Type

+

Description

+

DATE string

+

DATE

+

Parse the date string (yyyy-MM-dd) to a SQL date.

+

TIME string

+

TIME

+

Parse the time string (HH:mm:ss) to the SQL time.

+

TIMESTAMP string

+

TIMESTAMP

+

Convert the time string into timestamp. The time string format is yyyy-MM-dd HH:mm:ss.fff.

+

INTERVAL string range

+

INTERVAL

+

There are two types of intervals: yyyy-MM and dd HH:mm:sss.fff'. The range of yyyy-MM can be YEAR or YEAR TO MONTH, with the precision of month. The range of dd HH:mm:sss.fff' can be DAY TO HOUR, DAY TO MINUTE, DAY TO SECOND, or DAY TO MILLISECONDS, with the precision of millisecond. For example, if the range is DAY TO SECOND, the day, hour, minute, and second are all valid and the precision is second. DAY TO MINUTE indicates that the precision is minute.

+

The following is an example:

+

INTERVAL '10 00:00:00.004' DAY TO milliseconds indicates that the interval is 10 days and 4 milliseconds.

+

INTERVAL '10' DAY indicates that the interval is 10 days and INTERVAL '2-10' YEAR TO MONTH indicates that the interval is 2 years and 10 months.

+

CURRENT_DATE

+

DATE

+

Return the SQL date of UTC time zone.

+

CURRENT_TIME

+

TIME

+

Return the SQL time of UTC time zone.

+

CURRENT_TIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of UTC time zone.

+

LOCALTIME

+

TIME

+

Return the SQL time of the current time zone.

+

LOCALTIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of the current time zone.

+

EXTRACT(timeintervalunit FROM temporal)

+

INT

+

Extract part of the time point or interval. Return the part in the int type.

+

For example, 5 is returned from EXTRACT(DAY FROM DATE "2006-06-05").

+

FLOOR(timepoint TO timeintervalunit)

+

TIME

+

Round a time point down to the given unit.

+

For example, 12:44:00 is returned from FLOOR(TIME '12:44:31' TO MINUTE).

+

CEIL(timepoint TO timeintervalunit)

+

TIME

+

Round a time point up to the given unit.

+

For example, 12:45:00 is returned from CEIL(TIME '12:44:31' TO MINUTE).

+

QUARTER(date)

+

INT

+

Return the quarter from the SQL date.

+

(timepoint, temporal) OVERLAPS (timepoint, temporal)

+

BOOLEAN

+

Check whether two intervals overlap. The time points and time are converted into a time range with a start point and an end point. The function is leftEnd >= rightStart && rightEnd >= leftStart. If leftEnd is greater than or equal to rightStart and rightEnd is greater than or equal to leftStart, true is returned. Otherwise, false is returned.

+

The following is an example:

+
  • If leftEnd is 3:55:00 (2:55:00+1:00:00), rightStart is 3:30:00, rightEnd is 5:30:00 (3:30:00+2:00:00), and leftStart is 2:55:00, true will be returned.

    Specifically, true is returned from (TIME '2:55:00', INTERVAL '1' HOUR) OVERLAPS (TIME '3:30:00', INTERVAL '2' HOUR).

    +
+
  • If leftEnd is 10:00:00, rightStart is 10:15:00, rightEnd is 13:15:00 (10:15:00+3:00:00), and leftStart is 9:00:00, false will be returned.

    Specifically, false is returned from (TIME '9:00:00', TIME '10:00:00') OVERLAPS (TIME '10:15:00', INTERVAL '3' HOUR).

    +
+

TO_TIMESTAMP(long expr)

+

TIMESTAMP

+

Convert a timestamp to time.

+

The input parameter this function must be of the BIGINT type. Other data types, such as VARCHAR and STRING, are not supported.

+

For example, TO_TIMESTAMP (1628765159000) is converted to 2021-08-12 18:45:59.

+

UNIX_TIMESTAMP

+

BIGINT

+

Returns the timestamp of a specified parameter. The timestamp type is BIGINT and the unit is second.

+

The following methods are supported:

+
  • UNIX_TIMESTAMP(): returns the timestamp of the current time if no parameter is specified.
  • UNIX_TIMESTAMP(STRING datestr): returns the timestamp indicated by the parameter if only one parameter is contained. The format of datestr must be yyyy-MM-dd HH:mm:ss.
  • UNIX_TIMESTAMP(STRING datestr, STRING format): returns the timestamp indicated by the first parameter if two parameters are contained. The second parameter can specify the format of datestr.
+

UNIX_TIMESTAMP_MS

+

BIGINT

+

Returns the timestamp of a specified parameter. The timestamp type is BIGINT and the unit is millisecond.

+

The following methods are supported:

+
  • UNIX_TIMESTAMP_MS(): returns the timestamp of the current time if no parameter is specified.
  • UNIX_TIMESTAMP_MS(STRING datestr): returns the timestamp indicated by the parameter if only one parameter is contained. The format of datestr must be yyyy-MM-dd HH:mm:ss.SSS.
  • UNIX_TIMESTAMP_MS(STRING datestr, STRING format): returns the timestamp indicated by the first parameter if two parameters are contained. The second parameter can specify the format of datestr.
+
+
+
+

Precautions

None

+
+

Example

insert into temp SELECT Date '2015-10-11'  FROM  OrderA;//Date is returned
+insert into temp1 SELECT Time '12:14:50'  FROM  OrderA;//Time is returned
+insert into temp2 SELECT Timestamp '2015-10-11 12:14:50'  FROM  OrderA;//Timestamp is returned
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0098.html b/docs/dli/sqlreference/dli_08_0098.html new file mode 100644 index 00000000..0ac5766d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0098.html @@ -0,0 +1,127 @@ + + +

Creating a DLI Table Using the DataSource Syntax

+

Function

This DataSource syntax can be used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of supported partitions. For details, see syntax and precautions.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 
+  [(col_name1 col_type1 [COMMENT col_comment1], ...)]
+  USING file_format 
+  [OPTIONS (key1=val1, key2=val2, ...)] 
+  [PARTITIONED BY (col_name1, col_name2, ...)]
+  [COMMENT table_comment]
+  [AS select_statement];
+
+ +
+
+

Keyword

+
+

Parameter Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

col_name

+

Column names with data types separated by commas (,). The column name contains letters, digits, and underscores (_). It cannot contain only digits and must contain at least one letter.

+

col_type

+

Field type

+

col_comment

+

Field description

+

file_format

+

Data storage format of DLI tables. The value can be parquet only.

+

table_comment

+

Table description

+

select_statement

+

The CREATE TABLE AS statement is used to insert the SELECT query result of the source table or a data record to a newly created DLI table.

+
+
+ +
+ + + + + + + + + + + + + +
Table 2 OPTIONS parameter description

Parameter

+

Description

+

Default Value

+

multiLevelDirEnable

+

Whether to iteratively query data in subdirectories. When this parameter is set to true, all files in the table path, including files in subdirectories, are iteratively read when a table is queried.

+

false

+

compression

+

Specified compression format. Generally, you need to set this parameter to zstd for parquet files.

+

-

+
+
+
+

Precautions

+ +
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0099.html b/docs/dli/sqlreference/dli_08_0099.html new file mode 100644 index 00000000..70372b89 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0099.html @@ -0,0 +1,196 @@ + + +

User-Defined Functions

+

Overview

DLI supports the following three types of user-defined functions (UDFs):

+
+ +

UDFs can only be used in dedicated queues.

+
+

POM Dependency

<dependency>
+   <groupId>org.apache.flink</groupId>
+   <artifactId>flink-table_2.11</artifactId>
+   <version>1.7.2</version>
+   <scope>provided</scope>
+</dependency>
+<dependency>
+        <groupId>org.apache.flink</groupId>
+        <artifactId>flink-streaming-java_2.11</artifactId>
+        <version>1.7.2</version>
+        <scope>provided</scope>
+</dependency>
+
+

Precautions

+
+

Using UDFs

  1. Write the code of custom functions. For details about the code examples, see UDF, UDTF, or UDAF.
  2. Compile the UDF code, pack it into a JAR package, and upload the package to OBS.
  3. In the left navigation pane of the DLI management console, click Job Management > Flink Jobs. Locate the row where the target resides and click Edit in the Operation column to switch to the page where you can edit the job.
  4. On the Running Parameters tab page, select an exclusive queue for Queue. The UDF Jar parameter is displayed. Select the JAR file stored on OBS and click Save.

    Before selecting a user-defined function JAR package, upload the JAR package to the created OBS bucket.

    +
    +

    After the JAR package is selected, add the UDF statement to the SQL statement.

    +
+
+

UDF

The regular UDF must inherit the ScalarFunction function and implement the eval method. The open and close functions are optional.

+
+

Example code

+
import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.ScalarFunction;
+public class UdfScalarFunction extends ScalarFunction {
+  private int factor = 12;
+  public UdfScalarFunction() {
+    this.factor = 12;
+  }
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  /**
+   * Custom logic
+   * @param s
+   * @return
+   */
+   public int eval(String s) {
+     return s.hashCode() * factor;
+   }
+   /**
+    * Optional
+    */
+   @Override
+   public void close() {}
+}
+

Example

+
1
+2
CREATE FUNCTION udf_test AS 'com.xxx.udf.UdfScalarFunction';
+INSERT INTO sink_stream select udf_test(attr) FROM source_stream;
+
+ +
+

UDTF

The UDTF must inherit the TableFunction function and implement the eval method. The open and close functions are optional. If the UDTF needs to return multiple columns, you only need to declare the returned value as Tuple or Row. If Row is used, you need to overload the getResultType method to declare the returned field type.

+
+

Example code

+
import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.TableFunction;
+import org.apache.flink.types.Row;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+public class UdfTableFunction extends TableFunction<Row> {
+  private Logger log = LoggerFactory.getLogger(TableFunction.class);
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  public void eval(String str, String split) {
+    for (String s : str.split(split)) {
+      Row row = new Row(2);
+      row.setField(0, s);
+      row.setField(1, s.length());
+      collect(row);
+    }
+  }
+  /**
+   * Declare the type returned by the function
+   * @return
+   */
+  @Override
+  public TypeInformation<Row> getResultType() {
+  return Types.ROW(Types.STRING, Types.INT);
+  }
+  /**
+    * Optional
+   */
+  @Override
+  public void close() {}
+ }
+

Example

+

The UDTF supports CROSS JOIN and LEFT JOIN. When the UDTF is used, the LATERAL and TABLE keywords must be included.

+ +
1
+2
+3
+4
+5
+6
+7
CREATE FUNCTION udtf_test AS 'com.xxx.udf.TableFunction';
+// CROSS JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream, LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length);
+// LEFT JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream LEFT JOIN LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length) ON TRUE;
+
+ +
+

UDAF

The UDAF must inherit the AggregateFunction function. You need to create an accumulator for storing the computing result, for example, WeightedAvgAccum in the following example code.

+
+

Example code

+
public class WeightedAvgAccum {
+public long sum = 0;
+public int count = 0;
+}
+

+
import org.apache.flink.table.functions.AggregateFunction;
+import java.util.Iterator;
+/**
+* The first type variable is the type returned by the aggregation function, and the second type variable is of the Accumulator type.
+* Weighted Average user-defined aggregate function.
+*/
+public class UdfAggFunction extends AggregateFunction<Long, WeightedAvgAccum> {
+// Initialize the accumulator.
+  @Override
+  public WeightedAvgAccum createAccumulator() {
+    return new WeightedAvgAccum();
+  }
+// Return the intermediate computing value stored in the accumulator.
+  @Override
+  public Long getValue(WeightedAvgAccum acc) {
+    if (acc.count == 0) {
+       return null;
+    } else {
+      return acc.sum / acc.count;
+ }
+}
+// Update the intermediate computing value according to the input.
+public void accumulate(WeightedAvgAccum acc, long iValue) {
+acc.sum += iValue;
+acc.count += 1;
+}
+// Perform the retraction operation, which is opposite to the accumulate operation.
+public void retract(WeightedAvgAccum acc, long iValue) {
+acc.sum -= iValue;
+acc.count -= 1;
+}
+// Combine multiple accumulator values.
+public void merge(WeightedAvgAccum acc, Iterable<WeightedAvgAccum> it) {
+Iterator<WeightedAvgAccum> iter = it.iterator();
+while (iter.hasNext()) {
+WeightedAvgAccum a = iter.next();
+acc.count += a.count;
+acc.sum += a.sum;
+}
+}
+// Reset the intermediate computing value.
+public void resetAccumulator(WeightedAvgAccum acc) {
+acc.count = 0;
+acc.sum = 0L;
+}
+}
+

Example

+
1
+2
CREATE FUNCTION udaf_test AS 'com.xxx.udf.UdfAggFunction';
+INSERT INTO sink_stream SELECT udaf_test(attr2) FROM source_stream GROUP BY attr1;
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0100.html b/docs/dli/sqlreference/dli_08_0100.html new file mode 100644 index 00000000..7d930dac --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0100.html @@ -0,0 +1,233 @@ + + +

Importing Data to the Table

+

Function

The LOAD DATA function can be used to import data in CSV, Parquet, ORC, JSON, and Avro formats. The data is converted into the Parquet data format for storage.

+
+

Syntax

1
+2
LOAD DATA INPATH 'folder_path' INTO TABLE [db_name.]table_name
+  OPTIONS(property_name=property_value, ...);
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

folder_path

+

OBS path of the file or folder used for storing the raw data.

+

db_name

+

Enter the database name. If this parameter is not specified, the current database is used.

+

table_name

+

Name of the DLI table to which data is to be imported.

+
+
+

The following configuration options can be used during data import:

+ +
+ +

Precautions

+
+

Example

Before importing data, you must create a table. For details, see Creating an OBS Table or Creating a DLI Table.

+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0101.html b/docs/dli/sqlreference/dli_08_0101.html new file mode 100644 index 00000000..8fabf3ce --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0101.html @@ -0,0 +1,69 @@ + + +

Other Functions

+

Array Functions

+
+ + + + + + + + + + + + + +
Table 1 Array functions

Function

+

Return Data Type

+

Description

+

CARDINALITY(ARRAY)

+

INT

+

Return the element count of an array.

+

ELEMENT(ARRAY)

+

-

+

Return the sole element of an array with a single element. If the array contains no elements, null is returned. If the array contains multiple elements, an exception is reported.

+
+
+

Example:

+

The returned number of elements in the array is 3.

+
insert into temp select CARDINALITY(ARRAY[TRUE, TRUE, FALSE]) from source_stream;
+

HELLO WORLD is returned.

+
insert into temp select ELEMENT(ARRAY['HELLO WORLD']) from source_stream;
+
+

Attribute Access Functions

+
+ + + + + + + + + + + + + +
Table 2 Attribute access functions

Function

+

Return Data Type

+

Description

+

tableName.compositeType.field

+

-

+

Select a single field, use the name to access the field of Apache Flink composite types, such as Tuple and POJO, and return the value.

+

tableName.compositeType.*

+

-

+

Select all fields, and convert Apache Flink composite types, such as Tuple and POJO, and all their direct subtypes into a simple table. Each subtype is a separate field.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0102.html b/docs/dli/sqlreference/dli_08_0102.html new file mode 100644 index 00000000..d3cd84bb --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0102.html @@ -0,0 +1,194 @@ + + +

SELECT

+

SELECT

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]  { * | projectItem [, projectItem ]* }  
+  FROM tableExpression  
+  [ WHERE booleanExpression ]  
+  [ GROUP BY { groupItem [, groupItem ]* } ]  
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

The SELECT statement is used to select data from a table or insert constant data into a table.

+

Precautions

+ +

Example

+

Select the order which contains more than 3 pieces of data.

+
1
insert into temp SELECT  * FROM Orders WHERE units > 3; 
+
+ +
+

Insert a group of constant data.

+
1
insert into temp select 'Lily', 'male', 'student', 17;
+
+ +
+
+

WHERE Filtering Clause

Syntax

+
1
+2
+3
SELECT   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+
+ +
+

Description

+

This statement is used to filter the query results using the WHERE clause.

+

Precautions

+ +

Example

+

Filter orders which contain more than 3 pieces and fewer than 10 pieces of data.

+
1
+2
insert into temp SELECT  * FROM Orders
+  WHERE units > 3 and units < 10; 
+
+ +
+
+

HAVING Filtering Clause

Function

+

This statement is used to filter the query results using the HAVING clause.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. GROUP BY applies first for grouping and HAVING then applies for filtering. The arithmetic operation and aggregate function are supported by the HAVING clause.

+

Precautions

+

If the filtering condition is subject to the query results of GROUP BY, the HAVING clause, rather than the WHERE clause, must be used for filtering.

+

Example

+

Group the student table according to the name field and filter the records in which the maximum score is higher than 95 based on groups.

+
1
+2
+3
insert into temp SELECT name, max(score) FROM student
+  GROUP BY name
+  HAVING max(score) >95
+
+ +
+
+

Column-Based GROUP BY

Function

+

This statement is used to group a table based on columns.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.

+ +

Precautions

+

None

+

Example

+

Group the student table according to the score and name fields and return the grouping results.

+
1
+2
insert into temp SELECT name,score, max(score) FROM student 
+  GROUP BY name,score;
+
+ +
+
+

Expression-Based GROUP BY

Function

+

This statement is used to group a table according to expressions.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

groupItem can have one or more fields. The fields can be called by string functions, but cannot be called by aggregate functions.

+

Precautions

+

None

+

Example

+

Use the substring function to obtain the character string from the name field, group the student table according to the obtained character string, and return each sub character string and the number of records.

+
1
+2
insert into temp SELECT substring(name,6),count(name) FROM student
+  GROUP BY substring(name,6);
+
+ +
+
+

GROUP BY Using HAVING

Function

+

This statement filters a table after grouping it using the HAVING clause.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. GROUP BY applies first for grouping and HAVING then applies for filtering.

+

Precautions

+ +

Example

+

Group the transactions according to num, use the HAVING clause to filter the records in which the maximum value derived from multiplying price with amount is higher than 5000, and return the filtered results.

+
1
+2
+3
+4
insert into temp SELECT num, max(price*amount) FROM transactions
+  WHERE time > '2016-06-01'
+  GROUP BY num
+  HAVING max(price*amount)>5000;
+
+ +
+
+

UNION

Syntax

+
1
query UNION [ ALL ] query
+
+ +
+

Description

+

This statement is used to return the union set of multiple query results.

+

Precautions

+ +

Example

+

Output the union set of Orders1 and Orders2 without duplicate records.

+
1
+2
insert into temp SELECT  * FROM Orders1
+  UNION SELECT  * FROM Orders2;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0103.html b/docs/dli/sqlreference/dli_08_0103.html new file mode 100644 index 00000000..fcc0340a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0103.html @@ -0,0 +1,77 @@ + + +

Condition Expression

+

CASE Expression

Syntax

+
1
+2
+3
CASE value WHEN value1 [, value11 ]* THEN result1
+  [ WHEN valueN [, valueN1 ]* THEN resultN ]* [ ELSE resultZ ]
+  END
+
+ +
+

or

+
1
+2
+3
CASE WHEN condition1 THEN result1
+  [ WHEN conditionN THEN resultN ]* [ ELSE resultZ ]
+  END
+
+ +
+

Description

+ +

Precautions

+ +

Example

+

If the value of units equals 5, 1 is returned. Otherwise, 0 is returned.

+

Example 1:

+
1
insert into temp SELECT  CASE units WHEN 5 THEN 1 ELSE 0 END FROM Orders;
+
+ +
+

Example 2:

+
1
insert into temp SELECT CASE WHEN units = 5 THEN 1 ELSE 0 END FROM Orders;
+
+ +
+
+

NULLIF Expression

Syntax

+
1
NULLIF(value, value)
+
+ +
+

Description

+

If the values are the same, NULL is returned. For example, NULL is returned from NULLIF (5,5) and 5 is returned from NULLIF (5,0).

+

Precautions

+

None

+

Example

+

If the value of units equals 3, null is returned. Otherwise, the value of units is returned.

+
1
insert into temp SELECT  NULLIF(units, 3) FROM Orders;
+
+ +
+
+

COALESCE Expression

Syntax

+
1
COALESCE(value, value [, value ]* )
+
+ +
+

Description

+

Return the first value that is not NULL, counting from left to right.

+

Precautions

+

All values must be of the same type.

+

Example

+

5 is returned from the following example:

+
1
insert into temp SELECT  COALESCE(NULL, 5) FROM Orders;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0104.html b/docs/dli/sqlreference/dli_08_0104.html new file mode 100644 index 00000000..c32b9003 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0104.html @@ -0,0 +1,433 @@ + + +

Aggregate Functions

+

An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved by an SQL statement. Table 1 lists aggregate functions.

+
Sample data: Table T1
|score|
+|81   |
+|100  |
+|60   |
+|95   |
+|86   |
+
+

Common Aggregate Functions

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Common aggregation functions

Function

+

Return Data Type

+

Description

+

COUNT(*)

+

BIGINT

+

Return count of tuples.

+

COUNT([ ALL ] expression...

+

BIGINT

+

Returns the number of input rows for which the expression is not NULL. Use DISTINCT for a unique instance of each value.

+

AVG(numeric)

+

DOUBLE

+

Return average (arithmetic mean) of all input values.

+

SUM(numeric)

+

DOUBLE

+

Return the sum of all input numerical values.

+

MAX(value)

+

DOUBLE

+

Return the maximum value of all input values.

+

MIN(value)

+

DOUBLE

+

Return the minimum value of all input values.

+

STDDEV_POP(value)

+

DOUBLE

+

Return the population standard deviation of all numeric fields of all input values.

+

STDDEV_SAMP(value)

+

DOUBLE

+

Return the sample standard deviation of all numeric fields of all input values.

+

VAR_POP(value)

+

DOUBLE

+

Return the population variance (square of population standard deviation) of numeral fields of all input values.

+

VAR_SAMP(value)

+

DOUBLE

+

Return the sample variance (square of the sample standard deviation) of numeric fields of all input values.

+
+
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0105.html b/docs/dli/sqlreference/dli_08_0105.html new file mode 100644 index 00000000..f377aa39 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0105.html @@ -0,0 +1,48 @@ + + +

Viewing Table Statistics

+

Function

This statement is used to view the table statistics. The names and data types of all columns in a specified table will be returned.

+
+

Syntax

1
DESCRIBE [EXTENDED|FORMATTED] [db_name.]table_name;
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+
+
+
+

Precautions

The to-be-queried table must exist. If this statement is used to query the information about a table that does not exist, an error is reported.

+
+

Example

To query the names and data types of all columns in the student table, run the following statement:

+
1
DESCRIBE student;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0106.html b/docs/dli/sqlreference/dli_08_0106.html new file mode 100644 index 00000000..6adce888 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0106.html @@ -0,0 +1,146 @@ + + +

JOIN Between Stream Data and Table Data

+

The JOIN operation allows you to query data from a table and write the query result to the sink stream. Currently, only RDSs and DCS Redis tables are supported. The ON keyword describes the Key used for data query and then writes the Value field to the sink stream.

+

For details about the data definition statements of RDS tables, see Creating an RDS Table.

+

For details about the data definition statements of Redis tables, see Creating a Redis Table.

+

Syntax

1
+2
FROM tableExpression JOIN tableExpression
+  ON value11 = value21 [ AND value12 = value22]
+
+ +
+
+

Syntax Description

The ON keyword only supports equivalent query of table attributes. If level-2 keys exist (specifically, the Redis value type is HASH), the AND keyword needs to be used to express the equivalent query between Key and Hash Key.

+
+

Precautions

None

+
+

Example

Perform equivalent JOIN between the vehicle information source stream and the vehicle price table, get the vehicle price data, and write the price data into the vehicle information sink stream.

+
+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
CREATE SOURCE STREAM car_infos (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_detail_type STRING
+)
+WITH (
+  type = "dis",
+  region = "",
+  channel = "dliinput",
+  partition_count = "1",
+  encode = "csv",
+  field_delimiter = ","
+);
+
+/** Create a data dimension table to connect to the source stream to fulfill field backfill.
+  *
+  * Reconfigure the following options according to actual conditions:
+  * value_type: indicates the value type of the Redis key value. The value can be STRING, HASH, SET, ZSET, or LIST. For the HASH type, you need to specify hash_key_column as the layer-2 primary key. For the SET type, you need to concatenate all queried values using commas (,).
+  * key_column: indicates the column name corresponding to the primary key of the dimension table.
+  * hash_key_column: indicates the column name corresponding to the KEY of the HASHMAP when value_type is HASH. If value_type is not HASH, you do not need to set this option.
+  * cluster_address: indicates the DCS Redis cluster address.
+  * password: indicates the DCS Redis cluster password.
+  **/
+CREATE TABLE car_price_table (
+  car_brand STRING,
+  car_detail_type STRING,
+  car_price STRING
+)
+WITH (
+  type = "dcs_redis",
+  value_type = "hash",
+  key_column = "car_brand",
+  hash_key_column = "car_detail_type",
+  cluster_address = "192.168.1.238:6379",
+  password = "xxxxxxxx"
+);
+
+CREATE SINK STREAM audi_car_owner_info (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_detail_type STRING,
+  car_price STRING
+)
+WITH (
+  type = "dis",
+  region = "",
+  channel = "dlioutput",
+  partition_key = "car_owner",
+  encode = "csv",
+  field_delimiter = ","
+);
+
+INSERT INTO audi_car_owner_info
+SELECT t1.car_id, t1.car_owner, t2.car_brand, t1.car_detail_type, t2.car_price
+FROM car_infos as t1 join car_price_table as t2
+ON t2.car_brand = t1.car_brand and t2.car_detail_type = t1.car_detail_type
+WHERE t1.car_brand = "audi";
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0107.html b/docs/dli/sqlreference/dli_08_0107.html new file mode 100644 index 00000000..b1a75c83 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0107.html @@ -0,0 +1,186 @@ + + +

Configuring Time Models

+

Flink provides two time models: processing time and event time.

+

DLI allows you to specify the time model during creation of the source stream and temporary stream.

+

Configuring Processing Time

Processing time refers to the system time, which is irrelevant to the data timestamp.

+

Syntax

+
1
+2
+3
+4
CREATE SOURCE STREAM stream_name(...) WITH (...)
+TIMESTAMP BY proctime.proctime;
+CREATE TEMP STREAM stream_name(...)
+TIMESTAMP BY proctime.proctime;
+
+ +
+

Description

+

To set the processing time, you only need to add proctime.proctime following TIMESTAMP BY. You can directly use the proctime field later.

+

Precautions

+

None

+

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
CREATE SOURCE STREAM student_scores (
+  student_number STRING, /* Student ID */
+  student_name STRING, /* Name */
+  subject STRING, /* Subject */
+  score INT /* Score */
+)
+WITH (
+  type = "dis",
+  region = "",
+  channel = "dliinput",
+  partition_count = "1",
+  encode = "csv",
+  field_delimiter=","
+)TIMESTAMP BY proctime.proctime;
+
+INSERT INTO score_greate_90
+SELECT student_name, sum(score) over (order by proctime RANGE UNBOUNDED PRECEDING) 
+FROM student_scores;
+
+ +
+
+

Configuring Event Time

Event Time refers to the time when an event is generated, that is, the timestamp generated during data generation.

+

Syntax

+
1
+2
+3
CREATE SOURCE STREAM stream_name(...) WITH (...)
+TIMESTAMP BY {attr_name}.rowtime
+SET WATERMARK (RANGE {time_interval} | ROWS {literal}, {time_interval});
+
+ +
+

Description

+

To set the event time, you need to select a certain attribute in the stream as the timestamp and set the watermark policy.

+

Out-of-order events or late events may occur due to network faults. The watermark must be configured to trigger the window for calculation after waiting for a certain period of time. Watermarks are mainly used to process out-of-order data before generated events are sent to DLI during stream processing.

+

The following two watermark policies are available:

+ +

Parameters are separated by commas (,). The first parameter indicates the watermark sending interval and the second indicates the maximum event delay.

+
+

Precautions

+

None

+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0108.html b/docs/dli/sqlreference/dli_08_0108.html new file mode 100644 index 00000000..2e621783 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0108.html @@ -0,0 +1,223 @@ + + +

Pattern Matching

+

Complex event processing (CEP) is used to detect complex patterns in endless data streams so as to identify and search patterns in various data rows. Pattern matching is a powerful aid to complex event handling.

+

CEP is used in a collection of event-driven business processes, such as abnormal behavior detection in secure applications and the pattern of searching for prices, transaction volume, and other behavior in financial applications. It also applies to fraud detection and sensor data analysis.

+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
MATCH_RECOGNIZE (
+      [ PARTITION BY expression [, expression ]* ]
+      [ ORDER BY orderItem [, orderItem ]* ]
+      [ MEASURES measureColumn [, measureColumn ]* ]
+      [ ONE ROW PER MATCH | ALL ROWS PER MATCH ]
+      [ AFTER MATCH
+            ( SKIP TO NEXT ROW
+            | SKIP PAST LAST ROW
+            | SKIP TO FIRST variable
+            | SKIP TO LAST variable
+            | SKIP TO variable )
+      ]
+      PATTERN ( pattern )
+      [ WITHIN intervalLiteral ]
+      DEFINE variable AS condition [, variable AS condition ]*
+) MR
+
+ +
+

Pattern matching in SQL is performed using the MATCH_RECOGNIZE clause. MATCH_RECOGNIZE enables you to do the following tasks:

+
  • Logically partition and order the data that is used in the MATCH_RECOGNIZE clause with its PARTITION BY and ORDER BY clauses.
  • Define patterns of rows to seek using the PATTERN clause of the MATCH_RECOGNIZE clause. These patterns use regular expression syntax.
  • Specify the logical conditions required to map a row to a row pattern variable in the DEFINE clause.
  • Define measures, which are expressions usable in other parts of the SQL query, in the MEASURES clause.
+
+
+

Syntax description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Syntax description

Parameter

+

Mandatory

+

Description

+

PARTITION BY

+

No

+

Logically divides the rows into groups.

+

ORDER BY

+

No

+

Logically orders the rows in a partition.

+

[ONE ROW | ALL ROWS] PER MATCH

+

No

+

Chooses summaries or details for each match.

+
  • ONE ROW PER MATCH: Each match produces one summary row.
  • ALL ROWS PER MATCH: A match spanning multiple rows will produce one output row for each row in the match.
+

The following provides an example:

+
		SELECT * FROM MyTable MATCH_RECOGNIZE
+		(
+		  MEASURES AVG(B.id) as Bid
+		  ALL ROWS PER MATCH
+		  PATTERN (A B C)
+		  DEFINE
+			A AS A.name = 'a',
+			B AS B.name = 'b',
+			C as C.name = 'c'
+		) MR
+

Example description

+

Assume that the format of MyTable is (id, name) and there are three data records: (1, a), (2, b), and (3, c).

+

ONE ROW PER MATCH outputs the average value 2 of B.

+

ALL ROWS PER MATCH outputs each record and the average value of B, specifically, (1,a, null), (2,b,2), (3,c,2).

+

MEASURES

+

No

+

Defines calculations for export from the pattern matching.

+

PATTERN

+

Yes

+

Defines the row pattern that will be matched.

+
  • PATTERN (A B C) indicates to detect concatenated events A, B, and C.
  • PATTERN (A | B) indicates to detect A or B.
+
  • Modifiers
    • *: 0 or more iterations. For example, A* indicates to match A for 0 or more times.
    • +: 1 or more iterations. For example, A+ indicates to match A for 1 or more times.
    • ? : 0 or 1 iteration. For example, A? indicates to match A for 0 times or once.
    • {n}: n iterations (n > 0). For example, A{5} indicates to match A for five times.
    • {n,}: n or more iterations (n ≥ 0). For example, A{5,} indicates to match A for five or more times.
    • {n, m}: between n and m (inclusive) iterations (0 ≤ nm, 0 < m). For example, A{3,6} indicates to match A for 3 to 6 times.
    • {, m}: between 0 and m (inclusive) iterations (m > 0). For example, A{,4} indicates to match A for 0 to 4 times.
    +
+

DEFINE

+

Yes

+

Defines primary pattern variables.

+

AFTER MATCH SKIP

+

No

+

Defines where to restart the matching process after a match is found.

+
  • SKIP TO NEXT ROW: Resumes pattern matching at the row after the first row of the current match.
  • SKIP PAST LAST ROW: Resumes pattern matching at the next row after the last row of the current match.
  • SKIP TO FIRST variable: Resumes pattern matching at the first row that is mapped to the pattern variable.
  • SKIP TO LAST variable: Resumes pattern matching at the last row that is mapped to the pattern variable.
  • SKIP TO variable: Same as SKIP TO LAST variable.
+
+
+
+

Functions Supported by CEP

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 2 Function description

Function

+

Description

+

MATCH_NUMBER()

+

Finds which rows are in which match. It can be used in the MEASURES and DEFINE clauses.

+

CLASSIFIER()

+

Finds which pattern variable applies to which rows. It can be used in the MEASURES and DEFINE clauses.

+

FIRST()/LAST()

+

FIRST returns the value of an expression evaluated in the first row of the group of rows mapped to a pattern variable. LAST returns the value of an expression evaluated in the last row of the group of rows mapped to a pattern variable. In PATTERN (A B+ C), FIRST (B.id) indicates the ID of the first B in the match, and LAST (B.id) indicates the ID of the last B in the match.

+

NEXT()/PREV()

+

Relative offset, which can be used in DEFINE. For example, PATTERN (A B+) DEFINE B AS B.price > PREV(B.price)

+

RUNNING/FINAL

+

RUNNING indicates to match the middle value, while FINAL indicates to match the final result value. Generally, RUNNING/FINAL is valid only in ALL ROWS PER MATCH. For example, if there are three records (a, 2), (b, 6), and (c, 12), then the values of RUNNING AVG (A.price) and FINAL AVG (A.price) are (2,6), (4,6), (6,6).

+

Aggregate functions (COUNT, SUM, AVG, MAX, MIN)

+

Aggregation operations. These functions can be used in the MEASURES and DEFINE clauses. For details, see Aggregate Functions.

+
+
+
+

Example

+

CEP conducts pattern matching based on license plate switchover features on the data of vehicles collected by cameras installed on urban roads or high-speed roads in different areas within 5 minutes.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
INSERT INTO fake_licensed_car
+SELECT * FROM camera_license_data MATCH_RECOGNIZE
+(
+  PARTITION BY car_license_number
+  ORDER BY proctime
+  MEASURES A.car_license_number as car_license_number, A.camera_zone_number as first_zone, B.camera_zone_number as second_zone
+  ONE ROW PER MATCH
+  AFTER MATCH SKIP TO LAST C
+  PATTERN (A B+ C)
+  WITHIN interval '5' minute
+  DEFINE
+    B AS B.camera_zone_number <> A.camera_zone_number,
+	C AS C.camera_zone_number = A.camera_zone_number
+) MR;
+
+ +
+

+

According to this rule, if a vehicle of a license plate number drives from area A to area B but another vehicle of the same license plate number is detected in area A within 5 minutes, then the vehicle in area A is considered to carry a fake license plate.

+

Input data:

+
Zhejiang B88888, zone_A
+Zhejiang AZ626M, zone_A
+Zhejiang B88888, zone_A
+Zhejiang AZ626M, zone_A
+Zhejiang AZ626M, zone_A
+Zhejiang B88888, zone_B
+Zhejiang B88888, zone_B
+Zhejiang AZ626M, zone_B
+Zhejiang AZ626M, zone_B
+Zhejiang AZ626M, zone_C
+Zhejiang B88888, zone_A
+Zhejiang B88888, zone_A
+

The output is as follows:

+
Zhejiang B88888, zone_A, zone_B
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0109.html b/docs/dli/sqlreference/dli_08_0109.html new file mode 100644 index 00000000..3e52dc99 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0109.html @@ -0,0 +1,21 @@ + + +

StreamingML

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0110.html b/docs/dli/sqlreference/dli_08_0110.html new file mode 100644 index 00000000..285dc8dd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0110.html @@ -0,0 +1,115 @@ + + +

Anomaly Detection

+

Anomaly detection applies to various scenarios, including intrusion detection, financial fraud detection, sensor data monitoring, medical diagnosis, natural data detection, and more. The typical algorithms for anomaly detection include the statistical modeling method, distance-based calculation method, linear model, and nonlinear model.

+

DLI uses an anomaly detection method based on the random forest, which has the following characteristics:

+ +

Syntax

1
SRF_UNSUP(ARRAY[Field 1, Field 2, ...], 'Optional parameter list')
+
+ +
+
  • The anomaly score returned by the function is a DOUBLE value in the range of [0, 1].
  • The field names must be of the same type. If the field types are different, you can use the CAST function to escape the field names, for example, [a, CAST(b as DOUBLE)].
  • The syntax of the optional parameter list is as follows: "key1=value,key2=value2,..."
+
+
+

Parameter Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter Description

Parameter

+

Mandatory

+

Description

+

Default Value

+

transientThreshold

+

No

+

Threshold for which the histogram change is indicating a change in the data.

+

5

+

numTrees

+

No

+

Number of trees composing the random forest.

+

15

+

maxLeafCount

+

No

+

Maximum number of leaf nodes one tree can have.

+

15

+

maxTreeHeight

+

No

+

Maximum height of the tree.

+

12

+

seed

+

No

+

Random seed value used by the algorithm.

+

4010

+

numClusters

+

No

+

Number of types of data to be detected. By default, the following two data types are available: anomalous and normal data.

+

2

+

dataViewMode

+

No

+

Algorithm learning mode.

+
  • Value history indicates that all historical data is considered.
  • Value horizon indicates that only historical data of a recent time period (typically a size of 4 windows) is considered.
+

history

+
+
+
+

Example

Anomaly detection is conducted on the c field in data stream MyTable. If the anomaly score is greater than 0.8, then the detection result is considered to be anomaly.

+
1
+2
+3
+4
+5
+6
SELECT c,
+	CASE WHEN SRF_UNSUP(ARRAY[c], "numTrees=15,seed=4010") OVER (ORDER BY proctime RANGE BETWEEN INTERVAL '99' SECOND PRECEDING AND CURRENT ROW) > 0.8
+		 THEN 'anomaly' 
+		 ELSE 'not anomaly' 
+	END
+FROM MyTable  
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0111.html b/docs/dli/sqlreference/dli_08_0111.html new file mode 100644 index 00000000..10ab8ed1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0111.html @@ -0,0 +1,138 @@ + + +

Time Series Forecasting

+

Modeling and forecasting time series is a common task in many business verticals. Modeling is used to extract meaningful statistics and other characteristics of the data. Forecasting is the use of a model to predict future data. DLI provides a series of stochastic linear models to help users conduct online modeling and forecasting in real time.

+

ARIMA (Non-Seasonal)

Auto-Regressive Integrated Moving Average (ARIMA) is a classical model used for time series forecasting and is closely correlated with the AR, MA, and ARMA models.

+
+ +

Syntax

+
1
+2
+3
+4
+5
AR_PRED(field, degree): Use the AR model to forecast new data.
+AR_COEF(field, degree): Return the weight of the AR model.
+ARMA_PRED(field, degree): Use the ARMA model to forecast new data.
+ARMA_COEF(field, degree): Return the weight of the ARMA model.
+ARIMA_PRED(field, degree, derivativeOrder): Use ARIMA to forecast new data.
+
+ +
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter Description

Parameter

+

Mandatory

+

Description

+

Default Value

+

field

+

Yes

+

Name of the field, data in which is used for prediction, in the data stream.

+

-

+

degree

+

No

+

Defines how many steps in the past are going to be considered for the next prediction. Currently, only "p = q = degree" is allowed.

+

5

+

derivativeOrder

+

No

+

Derivative order. Generally, this parameter is set to 1 or 2.

+

1

+
+
+

Example

+

Separately use AR, ARMA, and ARIMA to forecast the time series ordered by rowtime.

+
1
+2
+3
+4
+5
SELECT b, 
+	AR_PRED(b) OVER (ORDER BY rowtime ROWS  BETWEEN 5 PRECEDING AND CURRENT ROW) AS ar, 
+	ARMA_PRED(b) OVER (ORDER BY rowtime ROWS  BETWEEN 5 PRECEDING AND CURRENT ROW) AS arma,  
+	ARIMA_PRED(b) OVER (ORDER BY rowtime ROWS  BETWEEN 5 PRECEDING AND CURRENT ROW) AS arima 
+FROM MyTable
+
+ +
+

Holt Winters

The Holt-Winters algorithm is one of the Exponential smoothing methods used to forecast seasonal data in time series.

+
+

Syntax

+
1
HOLT_WINTERS(field, seasonality, forecastOrder)
+
+ +
+ +
+ + + + + + + + + + + + + + + + + +
Table 2 Parameter Description

Parameter

+

Mandatory

+

Description

+

field

+

Yes

+

Name of the field, data in which is used for prediction, in the data stream.

+

seasonality

+

Yes

+

Seasonality space used to perform the prediction. For example, if data samples are collected daily, and the season space to consider is a week, then seasonality is 7.

+

forecastOrder

+

No

+

Value to be forecast, specifically, the number of steps to be considered in the future for producing the forecast.

+

If forecastOrder is set to 1, the algorithm forecasts the next value.

+

If forecastOrder is set to 2, the algorithm forecasts the value of 2 steps ahead in the future. The default value is 1.

+

When using this parameter, ensure that the OVER window size is greater than the value of this parameter.

+
+
+

Example

+

Use Holt-Winters to forecast time series ordered by rowtime.

+
1
+2
+3
+4
SELECT b, 
+	HOLT_WINTERS(b, 5) OVER (ORDER BY rowtime ROWS  BETWEEN 5 PRECEDING AND CURRENT ROW) AS a1, 
+	HOLT_WINTERS(b, 5, 2) OVER (ORDER BY rowtime ROWS  BETWEEN 5 PRECEDING AND CURRENT ROW) AS a2
+FROM MyTable  
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0112.html b/docs/dli/sqlreference/dli_08_0112.html new file mode 100644 index 00000000..72217190 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0112.html @@ -0,0 +1,202 @@ + + +

Type Conversion Functions

+

Syntax

CAST(value AS type)
+
+

Syntax Description

This function is used to forcibly convert types.

+
+

Precautions

+
+

Example

Convert amount into a character string. The specified length of the string is invalid after the conversion.

+
insert into temp select cast(amount as VARCHAR(10)) from source_stream;
+
+

Common Type Conversion Functions

+
+ + + + + + + + + + + + + + + + +
Table 1 Common type conversion functions

Function

+

Description

+

cast(v1 as varchar)

+

Converts v1 to a string. The value of v1 can be of the numeric type or of the timestamp, date, or time type.

+

cast (v1 as int)

+

Converts v1 to the int type. The value of v1 can be a number or a character.

+

cast(v1 as timestamp)

+

Converts v1 to the timestamp type. The value of v1 can be of the string, date, or time type.

+

cast(v1 as date)

+

Converts v1 to the date type. The value of v1 can be of the string or timestamp type.

+
+
+
+ +

Detailed Sample Code

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
/** source **/
+CREATE
+SOURCE STREAM car_infos (cast_int_to_varchar int, cast_String_to_int string,
+case_string_to_timestamp string, case_timestamp_to_date timestamp) WITH (
+  type = "dis",
+  region = "xxxxx",
+  channel = "dis-input",
+  partition_count = "1",
+  encode = "json",
+  offset = "13",
+  json_config =
+"cast_int_to_varchar=cast_int_to_varchar;cast_String_to_int=cast_String_to_int;case_string_to_timestamp=case_string_to_timestamp;case_timestamp_to_date=case_timestamp_to_date"
+ 
+);
+/** sink **/
+CREATE
+SINK STREAM cars_infos_out (cast_int_to_varchar varchar, cast_String_to_int
+int, case_string_to_timestamp timestamp, case_timestamp_to_date date) WITH (
+  type = "dis",
+  region = "xxxxx",
+  channel = "dis-output",
+  partition_count = "1",
+  encode = "json",
+  offset = "4",
+  json_config =
+"cast_int_to_varchar=cast_int_to_varchar;cast_String_to_int=cast_String_to_int;case_string_to_timestamp=case_string_to_timestamp;case_timestamp_to_date=case_timestamp_to_date",
+  enable_output_null="true"
+);
+/** Statistics on static car information**/
+INSERT
+INTO
+  cars_infos_out
+SELECT
+  cast(cast_int_to_varchar as varchar),
+  cast(cast_String_to_int as int),
+  cast(case_string_to_timestamp as timestamp),
+  cast(case_timestamp_to_date as date)
+FROM
+  car_infos;
+
+ +
+

Returned data

+
{"case_string_to_timestamp":1514736001000,"cast_int_to_varchar":"5","case_timestamp_to_date":"2018-01-01","cast_String_to_int":100}
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0118.html b/docs/dli/sqlreference/dli_08_0118.html new file mode 100644 index 00000000..d741ee1c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0118.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with an HBase Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0119.html b/docs/dli/sqlreference/dli_08_0119.html new file mode 100644 index 00000000..4b9903a1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0119.html @@ -0,0 +1,120 @@ + + +

Creating a DLI Table and Associating It with HBase

+

Function

This statement is used to create a DLI table and associate it with an existing HBase table.

+
+

Prerequisites

+
+

Syntax

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

USING [CLOUDTABLE | HBASE]

+

Specify the HBase datasource to CLOUDTABLE or HBASE. The value is case insensitive.

+

ZKHost

+

ZooKeeper IP address of the HBase cluster.

+

Before obtaining the ZooKeeper IP address, you need to create a datasource connection first..

+
  • Access the CloudTable cluster and enter the ZooKeeper IP address (internal network).
  • To access the MRS cluster, enter the IP address of the node where the ZooKeeper is located and the external port number of the ZooKeeper. The format is ZK_IP1:ZK_PORT1,ZK_IP2:ZK_PORT2.
+
NOTE:
+

TableName

+

Specifies the name of a table that has been created in the HBase cluster.

+

RowKey

+

Specifies the row key field of the table connected to DLI. The single and composite row keys are supported. A single row key can be of the numeric or string type. The length does not need to be specified. The composite row key supports only fixed-length data of the string type. The format is attribute name 1:Length, attribute name 2:length.

+

Cols

+

Provides mappings between fields in the DLI table and columns in the HBase table. The mappings are separated by commas (,). In a mapping, the field in the DLI table is located before the colon (:) and information about the HBase table follows the colon (:). In the HBase table information, the column family and column name are separated using a dot (.).

+
+
+
+

Precautions

+
+

Example

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE TABLE test_hbase(
+ATTR1 int,
+ATTR2 int,
+ATTR3 string)
+using hbase OPTIONS (
+'ZKHost'='to-hbase-1174405101-CE1bDm5B.datasource.com:2181',
+'TableName'='HBASE_TABLE',
+'RowKey'='ATTR1',
+'Cols'='ATTR2:CF1.C1, ATTR3:CF1.C2');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0120.html b/docs/dli/sqlreference/dli_08_0120.html new file mode 100644 index 00000000..9a57ab5d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0120.html @@ -0,0 +1,99 @@ + + +

Inserting Data to an HBase Table

+

Function

This statement is used to insert data in a DLI table to the associated HBase table.

+
+

Syntax

+
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0121.html b/docs/dli/sqlreference/dli_08_0121.html new file mode 100644 index 00000000..4019514a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0121.html @@ -0,0 +1,72 @@ + + +

Querying an HBase Table

+

This statement is used to query data in an HBase table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

Query data in the test_ct table.

+
1
SELECT * FROM test_hbase limit 100;
+
+ +
+
+

Query Pushdown

Query pushdown implements data filtering using HBase. Specifically, the HBase Client sends filtering conditions to the HBase server, and the HBase server returns only the required data, speeding up your Spark SQL queries. For the filter criteria that HBase does not support, for example, query with the composite row key, Spark SQL performs data filtering.

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0122.html b/docs/dli/sqlreference/dli_08_0122.html new file mode 100644 index 00000000..fefe1a2f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0122.html @@ -0,0 +1,71 @@ + + +

Creating a DLI Table and Associating It with OpenTSDB

+

Function

Run the CREATE TABLE statement to create the DLI table and associate it with the existing metric in OpenTSDB. This syntax supports the OpenTSDB of CloudTable and MRS.

+
+

Prerequisites

Before creating a DLI table and associating it with OpenTSDB, you need to create a datasource connection. For details about operations on the management console, see

+
+

Syntax

1
+2
+3
+4
+5
CREATE TABLE [IF NOT EXISTS] UQUERY_OPENTSDB_TABLE_NAME
+  USING OPENTSDB OPTIONS (
+  'host' = 'xx;xx',
+  'metric' = 'METRIC_NAME',
+  'tags' = 'TAG1,TAG2');
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

host

+

OpenTSDB IP address.

+

Before obtaining the OpenTSDB IP address, you need to create a datasource connection first..

+
  • After successfully created a connection, you can access the CloudTable OpenTSDB by entering the IP address of the OpenTSDB.
  • You can also access the MRS OpenTSDB. If you have created an enhanced datasource connection, enter the IP address and port number of the node where the OpenTSDB is located. The format is IP:PORT. If the OpenTSDB has multiple nodes, enter one of the node IP addresses.
+

metric

+

Name of the metric in OpenTSDB corresponding to the DLI table to be created.

+

tags

+

Tags corresponding to the metric. The tags are used for classification, filtering, and quick retrieval. You can set 1 to 8 tags, which are separated by commas (,). The parameter value includes values of all tagKs in the corresponding metric.

+
+
+
+

Precautions

When creating a DLI table, you do not need to specify the timestamp and value fields. The system automatically builds the following fields based on the specified tags. The fields TAG1 and TAG2 are specified by tags.

+ +
+

Example

1
+2
+3
+4
+5
CREATE table opentsdb_table
+  USING OPENTSDB OPTIONS (
+  'host' = 'opentsdb-3xcl8dir15m58z3.cloudtable.com:4242',
+  'metric' = 'city.temp',
+  'tags' = 'city,location');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0123.html b/docs/dli/sqlreference/dli_08_0123.html new file mode 100644 index 00000000..1649663c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0123.html @@ -0,0 +1,51 @@ + + +

Inserting Data to the OpenTSDB Table

+

Function

Run the INSERT INTO statement to insert the data in the DLI table to the associated OpenTSDB metric.

+

If no metric exists on the OpenTSDB, a new metric is automatically created on the OpenTSDB when data is inserted.

+
+
+

Syntax

1
INSERT INTO TABLE TABLE_NAME SELECT * FROM DLI_TABLE;
+
+ +
+
1
INSERT INTO TABLE TABLE_NAME VALUES(XXX);
+
+ +
+
+

Keyword

+
+ + + + + + + + + + +
Table 1 INSERT INTO parameter description

Parameter

+

Description

+

TABLE_NAME

+

Name of the associated OpenTSDB table.

+

DLI_TABLE

+

Name of the DLI table created.

+
+
+
+

Precautions

+
+

Example

1
INSERT INTO TABLE opentsdb_table VALUES('xxx','xxx','2018-05-03 00:00:00',21);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0124.html b/docs/dli/sqlreference/dli_08_0124.html new file mode 100644 index 00000000..fdd325d0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0124.html @@ -0,0 +1,28 @@ + + +

Querying an OpenTSDB Table

+

This SELECT command is used to query data in an OpenTSDB table.

+
  • If no metric exists in OpenTSDB, an error will be reported when the corresponding DLI table is queried.
  • If the security mode is enabled, you need to set conf:dli.sql.mrs.opentsdb.ssl.enabled to true when connecting to OpenTSDB.
+
+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

Query data in the opentsdb_table table.

+
1
SELECT * FROM opentsdb_table limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0125.html b/docs/dli/sqlreference/dli_08_0125.html new file mode 100644 index 00000000..c7223e8e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0125.html @@ -0,0 +1,63 @@ + + +

Reserved Keywords

+

Flink SQL reserves some strings as keywords. If you want to use the following character strings as field names, ensure that they are enclosed by back quotes, for example, `value` and `count`.

+

A

+
+

B

+
+

C

+
+

D

+
+

E

+
+

F

+
+

G

+
+

H

+
+

I

+
+

J

+
+

K

+
+

L

+
+

M

+
+

N

+
+

O

+
+

P

+
+

Q

+
+

R

+
+

S

+
+

T

+
+

U

+
+

V

+
+

W

+
+

X

+
+

Y

+
+

Z

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0129.html b/docs/dli/sqlreference/dli_08_0129.html new file mode 100644 index 00000000..b6a2948b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0129.html @@ -0,0 +1,17 @@ + + +

Views

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0130.html b/docs/dli/sqlreference/dli_08_0130.html new file mode 100644 index 00000000..b60fbfad --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0130.html @@ -0,0 +1,27 @@ + + +

Creating a View

+

Function

This statement is used to create views.

+
+

Syntax

1
CREATE [OR REPLACE] VIEW view_name AS select_statement;
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

To create a view named student_view for the queried ID and name of the student table, run the following statement:

+
1
CREATE VIEW student_view AS SELECT id, name FROM student;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0131.html b/docs/dli/sqlreference/dli_08_0131.html new file mode 100644 index 00000000..0f5ce938 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0131.html @@ -0,0 +1,27 @@ + + +

Deleting a View

+

Function

This statement is used to delete views.

+
+

Syntax

1
DROP VIEW [IF EXISTS] [db_name.]view_name;
+
+ +
+
+

Keyword

DROP: Deletes the metadata of a specified view. Although views and tables have many common points, the DROP TABLE statement cannot be used to delete views.

+
+

Precautions

The to-be-deleted view must exist. If you run this statement to delete a view that does not exist, an error is reported. To avoid such an error, you can add IF EXISTS in this statement.

+
+

Example

To delete a view named student_view, run the following statement:

+
1
DROP VIEW student_view;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0138.html b/docs/dli/sqlreference/dli_08_0138.html new file mode 100644 index 00000000..4c3ca3e7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0138.html @@ -0,0 +1,28 @@ + + +

Viewing the Execution Plan

+

Function

This statement returns the logical plan and physical execution plan for the SQL statement.

+
+

Syntax

1
EXPLAIN [EXTENDED | CODEGEN] statement;
+
+ +
+
+

Keyword

EXTENDED: After this keyword is specified, the logical and physical plans are outputted at the same time.

+

CODEGEN: After this keyword is specified, code generated by using the Codegen is also outputted.

+
+

Precautions

None

+
+

Example

To return the logical and physical plans of SELECT * FROM test, run the following statement:

+
1
EXPLAIN EXTENDED select * from test;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0139.html b/docs/dli/sqlreference/dli_08_0139.html new file mode 100644 index 00000000..c8c69a26 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0139.html @@ -0,0 +1,33 @@ + + +

Data Permissions Management

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0140.html b/docs/dli/sqlreference/dli_08_0140.html new file mode 100644 index 00000000..604c6be5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0140.html @@ -0,0 +1,203 @@ + + +

Data Permissions List

+

Table 1 describes the SQL statement permission matrix in DLI in terms of permissions on databases, tables, and roles.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Permission matrix

Category

+

SQL statement

+

Permission

+

Description

+

Database

+

DROP DATABASE db1

+

The DROP_DATABASE permission of database.db1

+

-

+

CREATE TABLE tb1(...)

+

The CREATE_TABLE permission of database.db1

+

-

+

CREATE VIEW v1

+

The CREATE_VIEW permission of database.db1

+

-

+

EXPLAIN query

+

The EXPLAIN permission of database.db1

+

Depending on the permissions required by query statements.

+

Table

+

SHOW CREATE TABLE tb1

+

The SHOW_CREATE_TABLE permission of database.db1.tables.tb1

+

-

+

DESCRIBE [EXTENDED|FORMATTED] tb1

+

The DESCRIBE_TABLE permission of databases.db1.tables.tb1

+

-

+

DROP TABLE [IF EXISTS] tb1

+

The DROP_TABLE permission of database.db1.tables.tb1

+

-

+

SELECT * FROM tb1

+

The SELECT permission of database.db1.tables.tb1

+

-

+

SELECT count(*) FROM tb1

+

The SELECT permission of database.db1.tables.tb1

+

-

+

SELECT * FROM view1

+

The SELECT permission of database.db1.tables.view1

+

-

+

SELECT count(*) FROM view1

+

The SELECT permission of database.db1.tables.view1

+

-

+

LOAD DLI TABLE

+

The INSERT_INTO_TABLE permission of database.db1.tables.tb1

+

-

+

INSERT INTO TABLE

+

The INSERT_INTO_TABLE permission of database.db1.tables.tb1

+

-

+

INSERT OVERWRITE TABLE

+

The INSERT_OVERWRITE_TABLE permission of database.db1.tables.tb1

+

-

+

ALTER TABLE ADD COLUMNS

+

The ALTER_TABLE_ADD_COLUMNS permission of database.db1.tables.tb1

+

-

+

ALTER TABLE RENAME

+

The ALTER_TABLE_RENAME permission of database.db1.tables.tb1

+

-

+

ROLE&PRIVILEGE

+

CREATE ROLE

+

The CREATE_ROLE permission of db

+

-

+

DROP ROLE

+

The DROP_ROLE permission of db

+

-

+

SHOW ROLES

+

The SHOW_ROLES permission of db

+

-

+

GRANT ROLES

+

The GRANT_ROLE permission of db

+

-

+

REVOKE ROLES

+

The REVOKE_ROLE permission of db

+

-

+

GRANT PRIVILEGE

+

The GRANT_PRIVILEGE permission of db or table

+

-

+

REVOKE PRIVILEGE

+

The REVOKE_PRIVILEGE permission of db or table

+

-

+

SHOW GRANT

+

The SHOW_GRANT permission of db or table

+

-

+
+
+

For privilege granting or revocation on databases and tables, DLI supports the following permissions:

+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0141.html b/docs/dli/sqlreference/dli_08_0141.html new file mode 100644 index 00000000..b3eb8af8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0141.html @@ -0,0 +1,26 @@ + + +

Creating a Role

+

Function

+
+

Syntax

1
CREATE ROLE [db_name].role_name;
+
+ +
+
+

Keyword

None

+
+

Precautions

+
+

Example

1
CREATE ROLE role1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0142.html b/docs/dli/sqlreference/dli_08_0142.html new file mode 100644 index 00000000..e0c731de --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0142.html @@ -0,0 +1,26 @@ + + +

Binding a Role

+

Function

This statement is used to bind a user with a role.

+
+

Syntax

1
GRANT ([db_name].role_name,...) TO (user_name,...);
+
+ +
+
+

Keyword

None

+
+

Precautions

The role_name and username must exist. Otherwise, an error will be reported.

+
+

Example

1
GRANT role1 TO user_name1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0143.html b/docs/dli/sqlreference/dli_08_0143.html new file mode 100644 index 00000000..5e4c7965 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0143.html @@ -0,0 +1,37 @@ + + +

Displaying a Role

+

Function

This statement is used to display all roles or roles bound to the user_name in the current database.

+
+

Syntax

1
SHOW [ALL] ROLES [user_name];
+
+ +
+
+

Keyword

ALL: Displays all roles.

+
+

Precautions

Keywords ALL and user_name cannot coexist.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0144.html b/docs/dli/sqlreference/dli_08_0144.html new file mode 100644 index 00000000..7344fc4b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0144.html @@ -0,0 +1,94 @@ + + +

Granting a Permission

+

Function

This statement is used to grant permissions to a user or role.

+
+

Syntax

1
GRANT (privilege,...) ON (resource,..) TO ((ROLE [db_name].role_name) | (USER user_name)),...);
+
+ +
+
+

Keyword

ROLE: The subsequent role_name must be a role.

+

USER: The subsequent user_name must be a user.

+
+

Precautions

+
+

Example

Run the following statement to grant user_name1 the permission to delete the db1 database:

+
1
GRANT DROP_DATABASE ON databases.db1 TO USER user_name1;
+
+ +
+

Run the following statement to grant user_name1 the SELECT permission of data table tb1 in the db1 database:

+
1
GRANT SELECT ON databases.db1.tables.tb1 TO USER user_name1;
+
+ +
+

Run the following statement to grant role_name the SELECT permission of data table tb1 in the db1 database:

+
1
GRANT SELECT ON databases.db1.tables.tb1 TO ROLE role_name;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0145.html b/docs/dli/sqlreference/dli_08_0145.html new file mode 100644 index 00000000..ede38800 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0145.html @@ -0,0 +1,34 @@ + + +

Displaying the Granted Permissions

+

Function

This statement is used to show the permissions granted to a user or role in the resource.

+
+

Syntax

1
SHOW GRANT ((ROLE [db_name].role_name) | (USER user_name)) ON resource;
+
+ +
+
+

Keyword

ROLE: The subsequent role_name must be a role.

+

USER: The subsequent user_name must be a user.

+
+

Precautions

The resource can be a queue, database, table, view, or column. The formats are as follows:

+ +
+

Example

Run the following statement to show permissions of user_name1 in the db1 database:

+
1
SHOW GRANT USER user_name1 ON databases.db1;
+
+ +
+

Run the following statement to show permissions of role_name on table tb1 in the db1 database:

+
1
SHOW GRANT ROLE role_name ON databases.db1.tables.tb1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0146.html b/docs/dli/sqlreference/dli_08_0146.html new file mode 100644 index 00000000..28d3434e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0146.html @@ -0,0 +1,39 @@ + + +

Revoking a Permission

+

Function

This statement is used to revoke permissions granted to a user or role.

+
+

Syntax

1
REVOKE (privilege,...) ON (resource,..) FROM ((ROLE [db_name].role_name) | (USER user_name)),...);
+
+ +
+
+

Keyword

ROLE: The subsequent role_name must be a role.

+

USER: The subsequent user_name must be a user.

+
+

Precautions

+
+

Example

To revoke the permission of user user_name1 to delete database db1, run the following statement:

+
1
REVOKE DROP_DATABASE ON databases.db1 FROM USER user_name1;
+
+ +
+

To revoke the SELECT permission of user user_name1 on table tb1 in database db1, run the following statement:

+
1
REVOKE SELECT ON databases.db1.tables.tb1 FROM USER user_name1;
+
+ +
+

To revoke the SELECT permission of role role_name on table tb1 in database db1, run the following statement:

+
1
REVOKE SELECT ON databases.db1.tables.tb1 FROM ROLE role_name;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0147.html b/docs/dli/sqlreference/dli_08_0147.html new file mode 100644 index 00000000..d2d840ea --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0147.html @@ -0,0 +1,27 @@ + + +

Unbinding a Role

+

Function

This statement is used to unbind the user with the role.

+
+

Syntax

1
REVOKE ([db_name].role_name,...) FROM (user_name,...);
+
+ +
+
+

Keyword

None

+
+

Precautions

role_name and user_name must exist and user_name has been bound to role_name.

+
+

Example

To unbind the user_name1 from role1, run the following statement:

+
1
REVOKE role1 FROM user_name1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0148.html b/docs/dli/sqlreference/dli_08_0148.html new file mode 100644 index 00000000..19a64a27 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0148.html @@ -0,0 +1,26 @@ + + +

Deleting a Role

+

Function

This statement is used to delete a role in the current database or a specified database.

+
+

Syntax

1
DROP ROLE [db_name].role_name;
+
+ +
+
+

Keyword

None

+
+

Precautions

+
+

Example

1
DROP ROLE role1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0149.html b/docs/dli/sqlreference/dli_08_0149.html new file mode 100644 index 00000000..2d5ef5f4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0149.html @@ -0,0 +1,26 @@ + + +

Displaying the Binding Relationship Between All Roles and Users

+

Function

This statement is used to display the binding relationship between roles and a user in the current database.

+
+

Syntax

1
SHOW PRINCIPALS ROLE;
+
+ +
+
+

Keyword

None

+
+

Precautions

The ROLE variable must exist.

+
+

Example

1
SHOW PRINCIPALS role1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0150.html b/docs/dli/sqlreference/dli_08_0150.html new file mode 100644 index 00000000..1fe83967 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0150.html @@ -0,0 +1,112 @@ + + +

Basic SELECT Statements

+

Function

This statement is a basic query statement and is used to return the query results.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
SELECT [ALL | DISTINCT] attr_expr_list FROM table_reference
+  [WHERE where_condition]
+  [GROUP BY col_name_list]
+  [ORDER BY col_name_list][ASC | DESC]
+  [CLUSTER BY col_name_list | DISTRIBUTE BY col_name_list]
+  [SORT BY col_name_list]]
+  [LIMIT number];
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 SELECT parameter description

Parameter

+

Description

+

ALL

+

Returns duplicate rows. By default, all repeated rows are returned. It is followed by asterisks (*) only. Otherwise, an error will occur.

+

DISTINCT

+

Removes duplicate rows from the result set.

+

WHERE

+

Specifies the filter criteria for a query. Arithmetic operators, relational operators, and logical operators are supported.

+

where_condition

+

Filter criteria.

+

GROUP BY

+

Specifies the grouping field. Single-field grouping and multi-field grouping are supported.

+

col_name_list

+

Field list

+

ORDER BY

+

Sort the query results.

+

ASC/DESC

+

ASC sorts from the lowest value to the highest value. DESC sorts from the highest value to the lowest value. ASC is the default sort order.

+

CLUSTER BY

+

CLUSTER BY is used to bucket the table according to the bucketing fields and then sort within the bucketed table. If the field of DISTRIBUTE BY is the same as the field of SORT BY and the sorting is in descending order, the combination of DISTRIBUTE BY and SORT BY achieves the same function as CLUSTER BY.

+

DISTRIBUTE BY

+

Specifies the bucketing fields without sorting the table.

+

SORT BY

+

The objects will be sorted in the bucket.

+

LIMIT

+

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+
+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

To filter the record, in which the name is Mike, from the student table and sort the results in ascending order of score, run the following statement:

+
1
+2
+3
SELECT * FROM student
+  WHERE name = 'Mike'
+  ORDER BY score;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0151.html b/docs/dli/sqlreference/dli_08_0151.html new file mode 100644 index 00000000..2783ff66 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0151.html @@ -0,0 +1,17 @@ + + +

Filtering

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0152.html b/docs/dli/sqlreference/dli_08_0152.html new file mode 100644 index 00000000..f6a762c4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0152.html @@ -0,0 +1,31 @@ + + +

WHERE Filtering Clause

+

Function

This statement is used to filter the query results using the WHERE clause.

+
+

Syntax

1
+2
SELECT [ALL | DISTINCT] attr_expr_list FROM table_reference
+  WHERE where_condition;
+
+ +
+
+

Keyword

+
+

Precautions

The to-be-queried table must exist.

+
+

Example

To filter the records in which the scores are higher than 90 and lower than 95 in the student table, run the following statement:

+
1
+2
SELECT * FROM student
+  WHERE score > 90 AND score < 95;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0153.html b/docs/dli/sqlreference/dli_08_0153.html new file mode 100644 index 00000000..75369367 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0153.html @@ -0,0 +1,37 @@ + + +

HAVING Filtering Clause

+

Function

This statement is used to filter the query results using the HAVING clause.

+
+

Syntax

1
+2
+3
+4
SELECT [ALL | DISTINCT] attr_expr_list FROM table_reference
+  [WHERE where_condition]
+  [GROUP BY col_name_list]
+  HAVING having_condition;
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

Group the student table according to the name field and filter the records in which the maximum score is higher than 95 based on groups.

+
1
+2
+3
SELECT name, max(score) FROM student
+  GROUP BY name
+  HAVING max(score) >95;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0154.html b/docs/dli/sqlreference/dli_08_0154.html new file mode 100644 index 00000000..69b10cf9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0154.html @@ -0,0 +1,21 @@ + + +

Sorting

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0155.html b/docs/dli/sqlreference/dli_08_0155.html new file mode 100644 index 00000000..9987946f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0155.html @@ -0,0 +1,33 @@ + + +

ORDER BY

+

Function

This statement is used to order the result set of a query by the specified field.

+
+

Syntax

1
+2
+3
SELECT attr_expr_list FROM table_reference
+  ORDER BY col_name
+  [ASC | DESC] [,col_name [ASC | DESC],...];
+
+ +
+
+

Keyword

+
+

Precautions

The to-be-sorted table must exist. If this statement is used to sort a table that does not exist, an error is reported.

+
+

Example

To sort table student in ascending order according to field score and return the sorting result, run the following statement:

+
1
+2
SELECT * FROM student
+  ORDER BY score;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0156.html b/docs/dli/sqlreference/dli_08_0156.html new file mode 100644 index 00000000..70cda4fd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0156.html @@ -0,0 +1,33 @@ + + +

SORT BY

+

Function

This statement is used to achieve the partial sorting of tables according to fields.

+
+

Syntax

1
+2
+3
SELECT attr_expr_list FROM table_reference
+  SORT BY col_name
+  [ASC | DESC] [,col_name [ASC | DESC],...];
+
+ +
+
+

Keyword

+
+

Precautions

The to-be-sorted table must exist. If this statement is used to sort a table that does not exist, an error is reported.

+
+

Example

To sort the student table in ascending order of the score field in Reducer, run the following statement:

+
1
+2
SELECT * FROM student
+  SORT BY score;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0157.html b/docs/dli/sqlreference/dli_08_0157.html new file mode 100644 index 00000000..577fa685 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0157.html @@ -0,0 +1,31 @@ + + +

CLUSTER BY

+

Function

This statement is used to bucket a table and sort the table within buckets.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  CLUSTER BY col_name [,col_name ,...];
+
+ +
+
+

Keyword

CLUSTER BY: Buckets are created based on specified fields. Single fields and multiple fields are supported, and data is sorted in buckets.

+
+

Precautions

The to-be-sorted table must exist. If this statement is used to sort a table that does not exist, an error is reported.

+
+

Example

To bucket the student table according to the score field and sort tables within buckets in descending order, run the following statement:

+
1
+2
SELECT * FROM student
+  CLUSTER BY score;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0158.html b/docs/dli/sqlreference/dli_08_0158.html new file mode 100644 index 00000000..c84ba13e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0158.html @@ -0,0 +1,31 @@ + + +

DISTRIBUTE BY

+

Function

This statement is used to bucket a table according to the field.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  DISTRIBUTE BY col_name [,col_name ,...];
+
+ +
+
+

Keyword

DISTRIBUTE BY: Buckets are created based on specified fields. A single field or multiple fields are supported, and the fields are not sorted in the bucket. This parameter is used together with SORT BY to sort data after bucket division.

+
+

Precautions

The to-be-sorted table must exist. If this statement is used to sort a table that does not exist, an error is reported.

+
+

Example Value

To bucket the student table according to the score field, run the following statement:

+
1
+2
SELECT * FROM student
+  DISTRIBUTE BY score;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0159.html b/docs/dli/sqlreference/dli_08_0159.html new file mode 100644 index 00000000..a98601ef --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0159.html @@ -0,0 +1,23 @@ + + +

Grouping

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0160.html b/docs/dli/sqlreference/dli_08_0160.html new file mode 100644 index 00000000..714e5ef6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0160.html @@ -0,0 +1,32 @@ + + +

Column-Based GROUP BY

+

Function

This statement is used to group a table based on columns.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  GROUP BY col_name_list;
+
+ +
+
+

Keyword

Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.

+ +
+

Precautions

The to-be-grouped table must exist. Otherwise, an error is reported.

+
+

Example

Group the student table according to the score and name fields and return the grouping results.

+
1
+2
SELECT score, count(name) FROM student
+  GROUP BY score,name;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0161.html b/docs/dli/sqlreference/dli_08_0161.html new file mode 100644 index 00000000..30b0e79c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0161.html @@ -0,0 +1,31 @@ + + +

Expression-Based GROUP BY

+

Function

This statement is used to group a table according to expressions.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  GROUP BY groupby_expression [, groupby_expression, ...];
+
+ +
+
+

Keyword

The groupby_expression can contain a single field or multiple fields, and also can call aggregate functions or string functions.

+
+

Precautions

+
+

Example

To use the substr function to obtain the character string from the name field, group the student table according to the obtained character string, and return each sub character string and the number of records, run the following statement:

+
1
+2
SELECT substr(name,6),count(name) FROM student
+  GROUP BY substr(name,6);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0162.html b/docs/dli/sqlreference/dli_08_0162.html new file mode 100644 index 00000000..c712014d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0162.html @@ -0,0 +1,37 @@ + + +

GROUP BY Using HAVING

+

Function

This statement filters a table after grouping it using the HAVING clause.

+
+

Syntax

1
+2
+3
SELECT attr_expr_list FROM table_reference
+  GROUP BY groupby_expression [, groupby_expression...]
+  HAVING having_expression;
+
+ +
+
+

Keyword

The groupby_expression can contain a single field or multiple fields, and can also call aggregate functions or string functions.

+
+

Precautions

+
+

Example

Group the transactions according to num, use the HAVING clause to filter the records in which the maximum value derived from multiplying price with amount is higher than 5000, and return the filtered results.

+
1
+2
+3
+4
SELECT num, max(price*amount) FROM transactions
+  WHERE time > '2016-06-01'
+  GROUP BY num
+  HAVING max(price*amount)>5000;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0163.html b/docs/dli/sqlreference/dli_08_0163.html new file mode 100644 index 00000000..377cf2eb --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0163.html @@ -0,0 +1,58 @@ + + +

ROLLUP

+

Function

This statement is used to generate the aggregate row, super-aggregate row, and the total row. The statement can achieve multi-layer statistics from right to left and display the aggregation of a certain layer.

+
+

Syntax

1
+2
+3
SELECT attr_expr_list FROM table_reference
+  GROUP BY col_name_list
+  WITH ROLLUP;
+
+ +
+
+

Keyword

ROLLUP is the expansion of GROUP BY. For example, SELECT a, b, c, SUM(expression) FROM table GROUP BY a, b, c WITH ROLLUP; can be transformed into the following query statements:
  • Counting the (a, b, c) combinations
    1
    +2
    SELECT a, b, c, sum(expression) FROM table
    +  GROUP BY a, b, c;
    +
    + +
    +
  • Counting the (a, b) combinations
    1
    +2
    SELECT a, b, NULL, sum(expression) FROM table
    +  GROUP BY a, b;
    +
    + +
    +
  • Counting the (a) combinations
    1
    +2
    SELECT a, NULL, NULL, sum(expression) FROM table
    +  GROUP BY a;
    +
    + +
    +
  • Total
    1
    SELECT NULL, NULL, NULL, sum(expression) FROM table;
    +
    + +
    +
+
+
+

Precautions

The to-be-grouped table must exist. If this statement is used to group a table that does not exist, an error is reported.

+
+

Example

To generate the aggregate row, super-aggregate row, and total row according to the group_id and job fields and return the total salary on each aggregation condition, run the following statement:

+
1
+2
+3
SELECT group_id, job, SUM(salary) FROM group_test
+  GROUP BY group_id, job
+  WITH ROLLUP;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0164.html b/docs/dli/sqlreference/dli_08_0164.html new file mode 100644 index 00000000..14c8eb2e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0164.html @@ -0,0 +1,78 @@ + + +

GROUPING SETS

+

Function

This statement is used to generate the cross-table row and achieve the cross-statistics of the GROUP BY field.

+
+

Syntax

1
+2
+3
SELECT attr_expr_list FROM table_reference
+  GROUP BY col_name_list
+  GROUPING SETS(col_name_list);
+
+ +
+
+

Keyword

GROUPING SETS is the expansion of GROUP BY. For example:

+ +
+

Precautions

+
+

Example

To generate the cross-table row according to the group_id and job fields and return the total salary on each aggregation condition, run the following statement:

+
1
+2
+3
SELECT group_id, job, SUM(salary) FROM group_test
+  GROUP BY group_id, job
+  GROUPING SETS (group_id, job);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0165.html b/docs/dli/sqlreference/dli_08_0165.html new file mode 100644 index 00000000..d362f0c2 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0165.html @@ -0,0 +1,29 @@ + + +

JOIN

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0166.html b/docs/dli/sqlreference/dli_08_0166.html new file mode 100644 index 00000000..0eded336 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0166.html @@ -0,0 +1,31 @@ + + +

INNER JOIN

+

Function

This statement is used to join and return the rows that meet the JOIN conditions from two tables as the result set.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  {JOIN | INNER JOIN} table_reference ON join_condition;
+
+ +
+
+

Keyword

JOIN/INNER JOIN: Only the records that meet the JOIN conditions in joined tables will be displayed.

+
+

Precautions

+
+

Example

To join the course IDs from the student_info and course_info tables and check the mapping between student names and courses, run the following statement:

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info
+  JOIN course_info ON (student_info.courseId = course_info.courseId);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0167.html b/docs/dli/sqlreference/dli_08_0167.html new file mode 100644 index 00000000..1961db9b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0167.html @@ -0,0 +1,31 @@ + + +

LEFT OUTER JOIN

+

Function

Join the left table with the right table and return all joined records of the left table. If no joined record is found, NULL will be returned.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  LEFT OUTER JOIN table_reference ON join_condition;
+
+ +
+
+

Keyword

LEFT OUTER JOIN: Returns all joined records of the left table. If no record is matched, NULL is returned.

+
+

Precautions

The to-be-joined table must exist. Otherwise, an error is reported.

+
+

Example

To join the courseId from the student_info table to the courseId from the course_info table for inner join and return the name of the students who have selected course, run the following statement. If no joined record is found, NULL will be returned.

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info
+  LEFT OUTER JOIN course_info ON (student_info.courseId = course_info.courseId);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0168.html b/docs/dli/sqlreference/dli_08_0168.html new file mode 100644 index 00000000..143801a4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0168.html @@ -0,0 +1,31 @@ + + +

RIGHT OUTER JOIN

+

Function

Match the right table with the left table and return all matched records of the right table. If no matched record is found, NULL will be returned.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  RIGHT OUTER JOIN table_reference ON join_condition;
+
+ +
+
+

Keyword

RIGHT OUTER JOIN: Return all matched records of the right table. If no record is matched, NULL is returned.

+
+

Precautions

The to-be-joined table must exist. Otherwise, an error is reported.

+
+

Example

To join the courseId from the course_info table to the courseId from the student_info table for inner join and return the records in the course_info table, run the following statement. If no joined record is found, NULL will be returned.

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info
+  RIGHT OUTER JOIN course_info ON (student_info.courseId = course_info.courseId);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0169.html b/docs/dli/sqlreference/dli_08_0169.html new file mode 100644 index 00000000..43a87218 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0169.html @@ -0,0 +1,31 @@ + + +

FULL OUTER JOIN

+

Function

Join all records from the right table and the left table and return all joined records. If no joined record is found, NULL will be returned.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  FULL OUTER JOIN table_reference ON join_condition;
+
+ +
+
+

Keyword

FULL OUTER JOIN: Matches all records in the left and right tables. If no record is matched, NULL is returned.

+
+

Precautions

The to-be-joined table must exist. Otherwise, an error is reported.

+
+

Example

To join all records from the right table and the left table and return all joined records, run the following statement. If no joined record is found, NULL will be returned.

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info
+  FULL OUTER JOIN course_info ON (student_info.courseId = course_info.courseId);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0170.html b/docs/dli/sqlreference/dli_08_0170.html new file mode 100644 index 00000000..6117aa61 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0170.html @@ -0,0 +1,31 @@ + + +

IMPLICIT JOIN

+

Function

This statement has the same function as INNER JOIN, that is, the result set that meet the WHERE condition is returned. However, IMPLICIT JOIN does not use the condition specified by JOIN.

+
+

Syntax

1
+2
SELECT table_reference.col_name, table_reference.col_name, ... FROM table_reference, table_reference
+  WHERE table_reference.col_name = table_reference.col_name;
+
+ +
+
+

Keyword

The keyword WHERE achieves the same function as JOIN...ON... and the mapped records will be returned. Syntax shows the WHERE filtering according to an equation. The WHERE filtering according to an inequation is also supported.

+
+

Precautions

+
+

Example

To return the student names and course names that match courseId, run the following statement:

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info,course_info
+  WHERE student_info.courseId = course_info.courseId;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0171.html b/docs/dli/sqlreference/dli_08_0171.html new file mode 100644 index 00000000..3d55fcea --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0171.html @@ -0,0 +1,31 @@ + + +

Cartesian JOIN

+

Function

Cartesian JOIN joins each record of table A with all records in table B. For example, if there are m records in table A and n records in table B, m x n records will be generated by Cartesian JOIN.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  CROSS JOIN table_reference ON join_condition;
+
+ +
+
+

Keyword

The join_condition is the condition for joining. If join_condition is always true, for example 1=1, the join is Cartesian JOIN. Therefore, the number of records output by Cartesian join is equal to the product of the number of records in the joined table. If Cartesian join is required, use the special keyword CROSS JOIN. CROSS JOIN is the standard way to calculate Cartesian product.

+
+

Precautions

The to-be-joined table must exist. Otherwise, an error is reported.

+
+

Example

To return all the JOIN results of the student name and course name from the student_info and course_info tables, run the following statement:

+
1
+2
SELECT student_info.name, course_info.courseName FROM student_info
+  CROSS JOIN course_info ON (1 = 1);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0172.html b/docs/dli/sqlreference/dli_08_0172.html new file mode 100644 index 00000000..8a20c9db --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0172.html @@ -0,0 +1,31 @@ + + +

LEFT SEMI JOIN

+

Function

This statement is used to query the records that meet the JOIN condition from the left table.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  LEFT SEMI JOIN table_reference ON join_condition;
+
+ +
+
+

Keyword

LEFT SEMI JOIN: Indicates to only return the records from the left table. LEFT SEMI JOIN can be achieved by nesting subqueries in LEFT SEMI JOIN, WHERE...IN, or WHERE EXISTS. LEFT SEMI JOIN returns the records that meet the JOIN condition from the left table, while LEFT OUTER JOIN returns all the records from the left table or NULL if no records that meet the JOIN condition are found.

+
+

Precautions

+
+

Example

To return the names of students who select the courses and the course IDs, run the following statement:

+
1
+2
SELECT student_info.name, student_info.courseId FROM student_info
+  LEFT SEMI JOIN course_info ON (student_info.courseId = course_info.courseId);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0173.html b/docs/dli/sqlreference/dli_08_0173.html new file mode 100644 index 00000000..d1edb237 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0173.html @@ -0,0 +1,31 @@ + + +

NON-EQUIJOIN

+

Function

This statement is used to join multiple tables using unequal values and return the result set that meet the condition.

+
+

Syntax

1
+2
SELECT attr_expr_list FROM table_reference
+  JOIN table reference ON non_equi_join_condition;
+
+ +
+
+

Keyword

The non_equi_join_condition is similar to join_condition. The only difference is that the JOIN condition is inequation.

+
+

Precautions

The to-be-joined table must exist. Otherwise, an error is reported.

+
+

Example

To return all the pairs of different student names from the student_info_1 and student_info_2 tables, run the following statement:

+
1
+2
SELECT student_info_1.name, student_info_2.name FROM student_info_1
+  JOIN student_info_2 ON (student_info_1. name <> student_info_2. name);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0174.html b/docs/dli/sqlreference/dli_08_0174.html new file mode 100644 index 00000000..e1a11203 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0174.html @@ -0,0 +1,21 @@ + + +

Subquery

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0175.html b/docs/dli/sqlreference/dli_08_0175.html new file mode 100644 index 00000000..c546922c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0175.html @@ -0,0 +1,32 @@ + + +

Subquery Nested by WHERE

+

Function

Subqueries are nested in the WHERE clause, and the subquery result is used as the filtering condition.

+
+

Syntax

1
+2
SELECT [ALL | DISTINCT] attr_expr_list FROM table_reference
+  WHERE {col_name operator (sub_query) | [NOT] EXISTS sub_query};
+
+ +
+
+

Keyword

+
+

Precautions

The to-be-queried table must exist. If this statement is used to query a table that does not exist, an error is reported.

+
+

Example

To query the courseId of Biology from the course_info table, and then query the student name matched the courseId from the student_info table, run the following statement:

+
1
+2
SELECT name FROM student_info
+  WHERE courseId = (SELECT courseId FROM course_info WHERE courseName = 'Biology');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0176.html b/docs/dli/sqlreference/dli_08_0176.html new file mode 100644 index 00000000..6cf6da82 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0176.html @@ -0,0 +1,29 @@ + + +

Subquery Nested by FROM

+

Function

This statement is used to nest subquery by FROM and use the subquery results as the data source of the external SELECT statement.

+
+

Syntax

1
SELECT [ALL | DISTINCT] attr_expr_list FROM (sub_query) [alias];
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

To return the names of students who select the courses in the course_info table and remove the repeated records using DISTINCT, run the following statement:

+
1
+2
SELECT DISTINCT name FROM (SELECT name FROM student_info
+  JOIN course_info ON student_info.courseId = course_info.courseId) temp;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0177.html b/docs/dli/sqlreference/dli_08_0177.html new file mode 100644 index 00000000..68a36a0b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0177.html @@ -0,0 +1,36 @@ + + +

Subquery Nested by HAVING

+

Function

This statement is used to embed a subquery in the HAVING clause. The subquery result is used as a part of the HAVING clause.

+
+

Syntax

1
+2
+3
SELECT [ALL | DISTINCT] attr_expr_list FROM table_reference
+  GROUP BY groupby_expression
+  HAVING aggregate_func(col_name) operator (sub_query);
+
+ +
+
+

Keyword

+ +
+

Precautions

+
+

Example

To group the student_info table according to the name field, count the records of each group, and return the number of records in which the name fields in the student_info table equal to the name fields in the course_info table if the two tables have the same number of records, run the following statement:

+
1
+2
+3
SELECT name FROM student_info
+  GROUP BY name
+  HAVING count(name) = (SELECT count(*) FROM course_info);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0178.html b/docs/dli/sqlreference/dli_08_0178.html new file mode 100644 index 00000000..b19e1757 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0178.html @@ -0,0 +1,27 @@ + + +

Multi-Layer Nested Subquery

+

Function

This statement is used to nest queries in the subquery.

+
+

Syntax

1
SELECT attr_expr FROM ( SELECT attr_expr FROM ( SELECT attr_expr FROM... ... ) [alias] ) [alias];
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

To return the name field from the user_info table after three queries, run the following statement:

+
1
SELECT name FROM ( SELECT name, acc_num FROM ( SELECT name, acc_num, password FROM ( SELECT name, acc_num, password, bank_acc FROM user_info) a ) b ) c;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0179.html b/docs/dli/sqlreference/dli_08_0179.html new file mode 100644 index 00000000..99a4a7b7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0179.html @@ -0,0 +1,17 @@ + + +

Alias

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0180.html b/docs/dli/sqlreference/dli_08_0180.html new file mode 100644 index 00000000..7b79352c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0180.html @@ -0,0 +1,31 @@ + + +

AS for Table

+

Function

This statement is used to specify an alias for a table or the subquery result.

+
+

Syntax

1
SELECT attr_expr_list FROM table_reference [AS] alias;
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0181.html b/docs/dli/sqlreference/dli_08_0181.html new file mode 100644 index 00000000..19a689ed --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0181.html @@ -0,0 +1,27 @@ + + +

AS for Column

+

Function

This statement is used to specify an alias for a column.

+
+

Syntax

1
SELECT attr_expr [AS] alias, attr_expr [AS] alias, ... FROM table_reference;
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

Run SELECT name AS n FROM simple_table WHERE score > 90 to obtain the subquery result. The alias n for name can be used by external SELECT statement.

+
1
SELECT n FROM (SELECT name AS n FROM simple_table WHERE score > 90) m WHERE n = "xiaoming";
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0182.html b/docs/dli/sqlreference/dli_08_0182.html new file mode 100644 index 00000000..d180fc6c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0182.html @@ -0,0 +1,19 @@ + + +

Set Operations

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0183.html b/docs/dli/sqlreference/dli_08_0183.html new file mode 100644 index 00000000..4609a28c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0183.html @@ -0,0 +1,27 @@ + + +

UNION

+

Function

This statement is used to return the union set of multiple query results.

+
+

Syntax

1
select_statement UNION [ALL] select_statement;
+
+ +
+
+

Keyword

UNION: The set operation is used to join the head and tail of a table based on certain conditions. The number of columns returned by each SELECT statement must be the same. The column type and column name may not be the same.

+
+

Precautions

+
+

Example

To return the union set of the query results of the SELECT * FROM student _1 and SELECT * FROM student _2 commands with the repeated records removed, run the following statement:

+
1
SELECT * FROM student_1 UNION SELECT * FROM student_2;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0184.html b/docs/dli/sqlreference/dli_08_0184.html new file mode 100644 index 00000000..668a45ca --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0184.html @@ -0,0 +1,27 @@ + + +

INTERSECT

+

Function

This statement is used to return the intersection set of multiple query results.

+
+

Syntax

1
select_statement INTERSECT select_statement;
+
+ +
+
+

Keyword

INTERSECT returns the intersection of multiple query results. The number of columns returned by each SELECT statement must be the same. The column type and column name may not be the same. By default, INTERSECT deduplication is used.

+
+

Precautions

Do not add brackets between multiple set operations, such as UNION, INTERSECT, and EXCEPT. Otherwise, an error is reported.

+
+

Example

To return the intersection set of the query results of the SELECT * FROM student _1 and SELECT * FROM student _2 commands with the repeated records removed, run the following statement:

+
1
SELECT * FROM student _1 INTERSECT SELECT * FROM student _2;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0185.html b/docs/dli/sqlreference/dli_08_0185.html new file mode 100644 index 00000000..278f6fb7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0185.html @@ -0,0 +1,27 @@ + + +

EXCEPT

+

Function

This statement is used to return the difference set of two query results.

+
+

Syntax

1
select_statement EXCEPT select_statement;
+
+ +
+
+

Keyword

EXCEPT minus the sets. A EXCEPT B indicates to remove the records that exist in both A and B from A and return the results. The repeated records returned by EXCEPT are not removed by default. The number of columns returned by each SELECT statement must be the same. The types and names of columns do not have to be the same.

+
+

Precautions

Do not add brackets between multiple set operations, such as UNION, INTERSECT, and EXCEPT. Otherwise, an error is reported.

+
+

Example

To remove the records that exist in both SELECT * FROM student_1 and SELECT * FROM student_2 from SELECT * FROM student_1 and return the results, run the following statement:

+
1
SELECT * FROM student_1 EXCEPT SELECT * FROM student_2;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0186.html b/docs/dli/sqlreference/dli_08_0186.html new file mode 100644 index 00000000..87cdef54 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0186.html @@ -0,0 +1,27 @@ + + +

WITH...AS

+

Function

This statement is used to define the common table expression (CTE) using WITH...AS to simplify the query and make the result easier to read and maintain.

+
+

Syntax

1
WITH cte_name AS (select_statement) sql_containing_cte_name;
+
+ +
+
+

Keyword

+
+

Precautions

+
+

Example

Define SELECT courseId FROM course_info WHERE courseName = 'Biology' as CTE nv and use nv as the SELECT statement in future queries.

+
1
WITH nv AS (SELECT courseId FROM course_info WHERE courseName = 'Biology') SELECT DISTINCT courseId FROM nv;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0187.html b/docs/dli/sqlreference/dli_08_0187.html new file mode 100644 index 00000000..7bed677b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0187.html @@ -0,0 +1,17 @@ + + +

CASE...WHEN

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0188.html b/docs/dli/sqlreference/dli_08_0188.html new file mode 100644 index 00000000..55d029a9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0188.html @@ -0,0 +1,28 @@ + + +

Basic CASE Statement

+

Function

This statement is used to display result_expression according to the joined results of input_expression and when_expression.

+
+

Syntax

1
CASE input_expression WHEN when_expression THEN result_expression [...n] [ELSE else_result_expression] END;
+
+ +
+
+

Keyword

CASE: Subquery is supported in basic CASE statement. However, input_expression and when_expression must be joinable.

+
+

Precautions

If there is no input_expression = when_expression with the TRUE value, else_result_expression will be returned when the ELSE clause is specified. If the ELSE clause is not specified, NULL will be returned.

+
+

Example

To return the name field and the character that is matched to id from the student table with the following matching rules, run the following statement:

+ +
1
SELECT name, CASE id WHEN 1 THEN 'a' WHEN 2 THEN 'b' WHEN 3 THEN 'c' ELSE NULL END FROM student;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0189.html b/docs/dli/sqlreference/dli_08_0189.html new file mode 100644 index 00000000..4f159174 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0189.html @@ -0,0 +1,27 @@ + + +

CASE Query Statement

+

Function

This statement is used to obtain the value of boolean_expression for each WHEN statement in a specified order. Then return the first result_expression with the value TRUE of boolean_expression.

+
+

Syntax

1
CASE WHEN boolean_expression THEN result_expression [...n] [ELSE else_result_expression] END;
+
+ +
+
+

Keyword

boolean_expression: can include subquery. However, the return value of boolean_expression can only be of Boolean type.

+
+

Precautions

If there is no Boolean_expression with the TRUE value, else_result_expression will be returned when the ELSE clause is specified. If the ELSE clause is not specified, NULL will be returned.

+
+

Example

To query the student table and return the related results for the name and score fields: EXCELLENT if the score is higher than 90, GOOD if the score ranges from 80 to 90, and BAD if the score is lower than 80, run the following statement:

+
1
SELECT name, CASE WHEN score >= 90 THEN 'EXCELLENT' WHEN 80 < score AND score < 90 THEN 'GOOD' ELSE 'BAD' END AS level FROM student;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0190.html b/docs/dli/sqlreference/dli_08_0190.html new file mode 100644 index 00000000..09177dd4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0190.html @@ -0,0 +1,57 @@ + + +

OVER Clause

+

Function

This statement is used together with the window function. The OVER statement is used to group data and sort the data within the group. The window function is used to generate serial numbers for values within the group.

+
+

Syntax

1
+2
+3
+4
+5
SELECT window_func(args) OVER
+  ([PARTITION BY col_name, col_name, ...]
+   [ORDER BY col_name, col_name, ...]
+   [ROWS | RANGE BETWEEN (CURRENT ROW | (UNBOUNDED |[num]) PRECEDING)
+  AND (CURRENT ROW | ( UNBOUNDED | [num]) FOLLOWING)]);
+
+ +
+
+

Keyword

+
+

Precautions

The three options of the OVER clause are PARTITION BY, ORDER BY, and WINDOW. They are optional and can be used together. If the OVER clause is empty, the window is the entire table.

+
+

Example

To start the window from the beginning of the table or partition and end the window at the current row, sort the over_test table according to the id field, and return the sorted id fields and corresponding serial numbers, run the following statement:

+
+
1
SELECT id, count(id) OVER (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM over_test;
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0191.html b/docs/dli/sqlreference/dli_08_0191.html new file mode 100644 index 00000000..bb95f1ec --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0191.html @@ -0,0 +1,445 @@ + + +

Mathematical Operation Functions

+

Relational Operators

All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.

+

Relationship operators are binary operators. Two compared data types must be of the same type or they must support implicit conversion.

+

Table 1 lists all relational operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Relational Operators

Operator

+

Returned Data Type

+

Description

+

A = B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. This operator is used for value assignment.

+

A <> B

+

BOOLEAN

+

If A is not equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned. This operator follows the standard SQL syntax.

+

A < B

+

BOOLEAN

+

If A is less than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A <= B

+

BOOLEAN

+

If A is less than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A > B

+

BOOLEAN

+

If A is greater than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A >= B

+

BOOLEAN

+

If A is greater than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A IS NULL

+

BOOLEAN

+

If A is NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS NOT NULL

+

BOOLEAN

+

If A is not NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS DISTINCT FROM B

+

BOOLEAN

+

If A is not equal to B, TRUE is returned. NULL indicates A equals B.

+

A IS NOT DISTINCT FROM B

+

BOOLEAN

+

If A is equal to B, TRUE is returned. NULL indicates A equals B.

+

A BETWEEN [ASYMMETRIC | SYMMETRIC] B AND C

+

BOOLEAN

+

If A is greater than or equal to B but less than or equal to C, TRUE is returned.

+
  • ASYMMETRIC: indicates that B and C are location-related.

    For example, "A BETWEEN ASYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C".

    +
  • SYMMETRIC: indicates that B and C are not location-related.

    For example, "A BETWEEN SYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C) OR (A BETWEEN C AND B".

    +
+

A NOT BETWEEN B AND C

+

BOOLEAN

+

If A is less than B or greater than C, TRUE is returned.

+

A LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A matches pattern B, TRUE is returned. The escape character C can be defined as required.

+

A NOT LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match pattern B, TRUE is returned. The escape character C can be defined as required.

+

A SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A matches regular expression B, TRUE is returned. The escape character C can be defined as required.

+

A NOT SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match regular expression B, TRUE is returned. The escape character C can be defined as required.

+

value IN (value [, value]* )

+

BOOLEAN

+

If the value is equal to any value in the list, TRUE is returned.

+

value NOT IN (value [, value]* )

+

BOOLEAN

+

If the value is not equal to any value in the list, TRUE is returned.

+
+
+
  • Values of the double, real, and float types may be different in precision. The equal sign (=) is not recommended for comparing two values of the double type. You are advised to obtain the absolute value by subtracting these two values of the double type and determine whether they are the same based on the absolute value. If the absolute value is small enough, the two values of the double data type are regarded equal. For example:
    abs(0.9999999999 - 1.0000000000) < 0.000000001 //The precision decimal places of 0.9999999999 and 1.0000000000 are 10, while the precision decimal place of 0.000000001 is 9. Therefore, 0.9999999999 can be regarded equal to 1.0000000000.
    +
+
  • Comparison between data of the numeric type and character strings is allowed. During comparison using relational operators, including >, <, ≤, and ≥, data of the string type is converted to numeric type by default. No characters other than numeric characters are allowed.
  • Character strings can be compared using relational operators.
+
+
+

Logical Operators

Common logical operators are AND, OR, and NOT. Their priority order is NOT > AND > OR.

+

Table 2 lists the calculation rules. A and B indicate logical expressions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Logical Operators

Operator

+

Result Type

+

Description

+

A OR B

+

BOOLEAN

+

If A or B is TRUE, TRUE is returned. Three-valued logic is supported.

+

A AND B

+

BOOLEAN

+

If both A and B are TRUE, TRUE is returned. Three-valued logic is supported.

+

NOT A

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, UNKNOWN is returned.

+

A IS FALSE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT FALSE

+

BOOLEAN

+

If A is not FALSE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS TRUE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT TRUE

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS UNKNOWN

+

BOOLEAN

+

If A is UNKNOWN, TRUE is returned.

+

A IS NOT UNKNOWN

+

BOOLEAN

+

If A is not UNKNOWN, TRUE is returned.

+
+
+

Only data of the Boolean type can be used for calculation using logical operators. Implicit type conversion is not supported.

+
+
+

Arithmetic Operators

Arithmetic operators include binary operators and unary operators, for all of which, the returned results are of the numeric type. Table 3 lists arithmetic operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 Arithmetic Operators

Operator

+

Result Type

+

Description

+

+ numeric

+

All numeric types

+

Returns numbers.

+

- numeric

+

All numeric types

+

Returns negative numbers.

+

A + B

+

All numeric types

+

A plus B. The result type is associated with the operation data type. For example, if floating-point number is added to an integer, the result will be a floating-point number.

+

A - B

+

All numeric types

+

A minus B. The result type is associated with the operation data type.

+

A * B

+

All numeric types

+

Multiply A and B. The result type is associated with the operation data type.

+

A / B

+

All numeric types

+

Divide A by B. The result is a number of the double type (double-precision number).

+

POWER(A, B)

+

All numeric types

+

Returns the value of A raised to the power B.

+

ABS(numeric)

+

All numeric types

+

Returns the absolute value of a specified value.

+

MOD(A, B)

+

All numeric types

+

Returns the remainder (modulus) of A divided by B. A negative value is returned only when A is a negative value.

+

SQRT(A)

+

All numeric types

+

Returns the square root of A.

+

LN(A)

+

All numeric types

+

Returns the nature logarithm of A (base e).

+

LOG10(A)

+

All numeric types

+

Returns the base 10 logarithms of A.

+

EXP(A)

+

All numeric types

+

Return the value of e raised to the power of a.

+

CEIL(A)

+

CEILING(A)

+

All numeric types

+

Return the smallest integer that is greater than or equal to a. For example: ceil(21.2) = 22.

+

FLOOR(A)

+

All numeric types

+

Return the largest integer that is less than or equal to a. For example: floor(21.2) = 21.

+

SIN(A)

+

All numeric types

+

Returns the sine value of A.

+

COS(A)

+

All numeric types

+

Returns the cosine value of A.

+

TAN(A)

+

All numeric types

+

Returns the tangent value of A.

+

COT(A)

+

All numeric types

+

Returns the cotangent value of A.

+

ASIN(A)

+

All numeric types

+

Returns the arc sine value of A.

+

ACOS(A)

+

All numeric types

+

Returns the arc cosine value of A.

+

ATAN(A)

+

All numeric types

+

Returns the arc tangent value of A.

+

DEGREES(A)

+

All numeric types

+

Convert the value of a from radians to degrees.

+

RADIANS(A)

+

All numeric types

+

Convert the value of a from degrees to radians.

+

SIGN(A)

+

All numeric types

+

Returns the sign of A. 1 is returned if A is positive. –1 is returned if A is negative. Otherwise, 0 is returned.

+

ROUND(A, d)

+

All numeric types

+

Round A to d places right to the decimal point. d is an int type. For example: round(21.263,2) = 21.26.

+

PI()

+

All numeric types

+

Return the value of pi.

+
+
+

Data of the string type is not allowed in arithmetic operations.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0192.html b/docs/dli/sqlreference/dli_08_0192.html new file mode 100644 index 00000000..bf54f783 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0192.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with a DWS table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0193.html b/docs/dli/sqlreference/dli_08_0193.html new file mode 100644 index 00000000..fd27ab90 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0193.html @@ -0,0 +1,146 @@ + + +

Creating a DLI Table and Associating It with DWS

+

Function

This statement is used to create a DLI table and associate it with an existing DWS table.

+
+

Prerequisites

Before creating a DLI table and associating it with DWS, you need to create a datasource connection. For details about operations on the management console, see

+
+

Syntax

1
+2
+3
+4
+5
+6
CREATE TABLE [IF NOT EXISTS] TABLE_NAME
+  USING JDBC OPTIONS (
+  'url'='xx',
+  'dbtable'='db_name_in_DWS.table_name_in_DWS',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

url

+

Before obtaining the DWS IP address, you need to create a datasource connection first..

+

If you have created an enhanced datasource connection, you can use the JDBC Connection String (intranet) provided by DWS or the intranet address and port number to access DWS. The format is protocol header: //Internal IP address:Internal network port/Database name, for example: jdbc:postgresql://192.168.0.77:8000/postgres.

+
NOTE:

The DWS IP address is in the following format: protocol header://IP address:port number/database name

+

The following is an example:

+

jdbc:postgresql://to-dws-1174405119-ihlUr78j.datasource.com:8000/postgres

+

If you want to connect to a database created in DWS, change postgres to the corresponding database name in this connection.

+
+

dbtable

+

Specifies the name or Schema name.Table name of the table that is associated with the DWS. For example: public.table_name.

+

user

+

(Discarded) DWS username.

+

password

+

User password of the DWS cluster.

+

passwdauth

+

Datasource password authentication name. For details about how to create datasource authentication, see Datasource Authentication in the Data Lake Insight User Guide.

+

encryption

+

Set this parameter to true when datasource password authentication is used.

+

partitionColumn

+

This parameter is used to set the numeric field used concurrently when data is read.

+
NOTE:
  • The partitionColumn, lowerBound, upperBound, and numPartitions parameters must be set at the same time.
  • To improve the concurrent read performance, you are advised to use auto-increment columns.
+
+

lowerBound

+

Minimum value of a column specified by partitionColumn. The value is contained in the returned result.

+

upperBound

+

Maximum value of a column specified by partitionColumn. The value is not contained in the returned result.

+

numPartitions

+

Number of concurrent read operations.

+
NOTE:

When data is read, the number of concurrent operations are evenly allocated to each task according to the lowerBound and upperBound to obtain data. The following is an example:

+
'partitionColumn'='id',
+'lowerBound'='0',
+'upperBound'='100',
+'numPartitions'='2'
+

Two concurrent tasks are started in DLI. The execution ID of one task is greater than or equal to 0 and the ID is less than 50, and the execution ID of the other task is greater than or equal to 50 and the ID is less than 100.

+
+

fetchsize

+

Number of data records obtained in each batch during data reading. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

+

batchsize

+

Number of data records written in each batch. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

+

truncate

+

Indicates whether to clear the table without deleting the original table when overwrite is executed. The options are as follows:

+
  • true
  • false
+

The default value is false, indicating that the original table is deleted and then a new table is created when the overwrite operation is performed.

+

isolationLevel

+

Transaction isolation level. The options are as follows:

+
  • NONE
  • READ_UNCOMMITTED
  • READ_COMMITTED
  • REPEATABLE_READ
  • SERIALIZABLE
+

The default value is READ_UNCOMMITTED.

+
+
+
+

Precautions

When creating a table associated with DWS, you do not need to specify the Schema of the associated table. DLI automatically obtains the schema of the table in the dbtable parameter of DWS.

+
+

Example

1
+2
+3
+4
+5
+6
CREATE TABLE IF NOT EXISTS dli_to_dws
+  USING JDBC OPTIONS (
+  'url'='jdbc:postgresql://to-dws-1174405119-ih1Ur78j.datasource.com:8000/postgres',
+  'dbtable'='test_dws',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0194.html b/docs/dli/sqlreference/dli_08_0194.html new file mode 100644 index 00000000..5d401935 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0194.html @@ -0,0 +1,99 @@ + + +

Inserting Data to the DWS Table

+

Function

This statement is used to insert data in a DLI table to the associated DWS table.

+
+

Syntax

+
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0195.html b/docs/dli/sqlreference/dli_08_0195.html new file mode 100644 index 00000000..82204650 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0195.html @@ -0,0 +1,26 @@ + + +

Querying the DWS Table

+

This statement is used to query data in a DWS table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

To query data in the dli_to_dws table, enter the following statement:

+
1
SELECT * FROM dli_to_dws limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0196.html b/docs/dli/sqlreference/dli_08_0196.html new file mode 100644 index 00000000..b0655872 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0196.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with an RDS Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0197.html b/docs/dli/sqlreference/dli_08_0197.html new file mode 100644 index 00000000..a0069259 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0197.html @@ -0,0 +1,174 @@ + + +

Creating a DLI Table and Associating It with RDS

+

Function

This statement is used to create a DLI table and associate it with an existing RDS table. This function supports access to the MySQL and PostgreSQL clusters of RDS.

+
+

Prerequisites

Before creating a DLI table and associating it with RDS, you need to create a datasource connection. For details about operations on the management console, see

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE TABLE [IF NOT EXISTS] TABLE_NAME
+  USING JDBC OPTIONS (
+  'url'='xx',
+  'driver'='DRIVER_NAME',
+  'dbtable'='db_name_in_RDS.table_name_in_RDS',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+
+

Keywords

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

url

+

Before obtaining the RDS IP address, you need to create a datasource connection first..

+

After an enhanced datasource connection is created, use the internal network domain name or internal network address and database port number provided by RDS to connect to DLI. If MySQL is used, the format is protocol header://internal IP address:internal network port number. If PostgreSQL is used, the format is protocol header://internal IP address:internal network port number/database name.

+

For example: jdbc:mysql://192.168.0.193:3306 or jdbc:postgresql://192.168.0.193:3306/postgres.

+

driver

+

JDBC driver class name. To connect to a MySQL cluster, enter com.mysql.jdbc.Driver. To connect to a PostgreSQL cluster, enter org.postgresql.Driver.

+

dbtable

+
  • To access the MySQL cluster, enter Database name.Table name.
    CAUTION:

    The name of the RDS database cannot contain hyphens (-) or ^. Otherwise, the table fails to be created.

    +
    +
  • To access the PostGre cluster, enter Schema name.Table name
    NOTE:

    The schema name is the name of the database schema. A schema is a collection of database objects, including tables and views.

    +
    +
+

user

+

(Discarded) Specifies the RDS username.

+

password

+

(Discarded) Specifies the RDS username and password.

+

passwdauth

+

Datasource password authentication name. For details about how to create datasource authentication, see Datasource Authentication in the Data Lake Insight User Guide.

+

encryption

+

Set this parameter to true when datasource password authentication is used.

+

partitionColumn

+

This parameter is used to set the numeric field used concurrently when data is read.

+
NOTE:
  • The partitionColumn, lowerBound, upperBound, and numPartitions parameters must be set at the same time.
  • To improve the concurrent read performance, you are advised to use auto-increment columns.
+
+

lowerBound

+

Minimum value of a column specified by partitionColumn. The value is contained in the returned result.

+

upperBound

+

Maximum value of a column specified by partitionColumn. The value is not contained in the returned result.

+

numPartitions

+

Number of concurrent read operations.

+
NOTE:

When data is read, the number of concurrent operations are evenly allocated to each task according to the lowerBound and upperBound to obtain data. The following is an example:

+
'partitionColumn'='id',
+'lowerBound'='0',
+'upperBound'='100',
+'numPartitions'='2'
+

Two concurrent tasks are started in DLI. The execution ID of one task is greater than or equal to 0 and the ID is less than 50, and the execution ID of the other task is greater than or equal to 50 and the ID is less than 100.

+
+

fetchsize

+

Number of data records obtained in each batch during data reading. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

+

batchsize

+

Number of data records written in each batch. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

+

truncate

+

Indicates whether to clear the table without deleting the original table when overwrite is executed. The options are as follows:

+
  • true
  • false
+

The default value is false, indicating that the original table is deleted and then a new table is created when the overwrite operation is performed.

+

isolationLevel

+

Transaction isolation level. The options are as follows:

+
  • NONE
  • READ_UNCOMMITTED
  • READ_COMMITTED
  • REPEATABLE_READ
  • SERIALIZABLE
+

The default value is READ_UNCOMMITTED.

+
+
+
+

Precautions

When creating a table associated with RDS, you do not need to specify the Schema of the associated table. DLI automatically obtains the schema of the table in the dbtable parameter of RDS.

+
+

Example

Accessing MySQL:

+
1
+2
+3
+4
+5
+6
+7
CREATE TABLE IF NOT EXISTS dli_to_rds
+  USING JDBC OPTIONS (
+  'url'='jdbc:mysql://to-rds-117405104-3eAHxnlz.datasource.com:3306',
+  'driver'='com.mysql.jdbc.Driver',
+  'dbtable'='rds_test.test1',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+

+

Accessing PostgreSQL:

+
1
+2
+3
+4
+5
+6
+7
CREATE TABLE IF NOT EXISTS dli_to_rds
+  USING JDBC OPTIONS (
+  'url'='jdbc:postgresql://to-rds-1174405119-oLRHAGE7.datasource.com:3306/postgreDB',
+  'driver'='org.postgresql.Driver',
+  'dbtable'='pg_schema.test1',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0198.html b/docs/dli/sqlreference/dli_08_0198.html new file mode 100644 index 00000000..609559c5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0198.html @@ -0,0 +1,99 @@ + + +

Inserting Data to the RDS Table

+

Function

This statement is used to insert data in a DLI table to the associated RDS table.

+
+

Syntax

+
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0199.html b/docs/dli/sqlreference/dli_08_0199.html new file mode 100644 index 00000000..c3a650de --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0199.html @@ -0,0 +1,26 @@ + + +

Querying the RDS Table

+

This statement is used to query data in an RDS table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

Query data in the test_ct table.

+
1
SELECT * FROM dli_to_rds limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0200.html b/docs/dli/sqlreference/dli_08_0200.html new file mode 100644 index 00000000..36684622 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0200.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with a CSS Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0201.html b/docs/dli/sqlreference/dli_08_0201.html new file mode 100644 index 00000000..436988a7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0201.html @@ -0,0 +1,128 @@ + + +

Creating a DLI Table and Associating It with CSS

+

Function

This statement is used to create a DLI table and associate it with an existing CSS table.

+
+

Prerequisites

Before creating a DLI table and associating it with CSS, you need to create a datasource connection. For details about operations on the management console, see

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
CREATE TABLE [IF NOT EXISTS] TABLE_NAME(
+  FIELDNAME1 FIELDTYPE1,
+  FIELDNAME2 FIELDTYPE2)
+  USING CSS OPTIONS (
+  'es.nodes'='xx',
+  'resource'='type_path_in_CSS',
+  'pushdown'='true',
+  'strict'='false',
+  'batch.size.entries'= '1000',
+  'batch.size.bytes'= '1mb',
+  'es.nodes.wan.only' = 'true',
+  'es.mapping.id' = 'FIELDNAME');
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

es.nodes

+

Before obtaining the CSS IP address, you need to create a datasource connection first..

+

If you have created an enhanced datasource connection, you can use the internal IP address provided by CSS. The format is IP1:PORT1,IP2:PORT2.

+

resource

+

The resource is used to specify the CSS datasource connection name. You can use /index/type to specify the resource location (for easier understanding, the index can be seen as database and type as table).

+
NOTE:
  • In ES 6.X, a single index supports only one type, and the type name can be customized.
  • In ES 7.X, a single index uses _doc as the type name and cannot be customized. To access ES 7.X, set this parameter to index.
+
+

pushdown

+

Indicates whether the press function of CSS is enabled. The default value is set to true. If there are a large number of I/O transfer tables, the pushdown can be enabled to reduce I/Os when the where filtering conditions are met.

+

strict

+

Indicates whether the CSS pushdown is strict. The default value is set to false. In exact match scenarios, more I/Os are reduced than pushdown.

+

batch.size.entries

+

Maximum number of entries that can be inserted to a batch processing. The default value is 1000. If the size of a single data record is so large that the number of data records in the bulk storage reaches the upper limit of the data amount of a single batch processing, the system stops storing data and submits the data based on the batch.size.bytes.

+

batch.size.bytes

+

Maximum amount of data in a single batch processing. The default value is 1 MB. If the size of a single data record is so small that the number of data records in the bulk storage reaches the upper limit of the data amount of a single batch processing, the system stops storing data and submits the data based on the batch.size.entries.

+

es.nodes.wan.only

+

Indicates whether to access the Elasticsearch node using only the domain name. The default value is false. If the original internal IP address provided by CSS is used as the es.nodes, you do not need to set this parameter or set it to false.

+

es.mapping.id

+

Specifies a field whose value is used as the document ID in the Elasticsearch node.

+
NOTE:
  • The document ID in the same /index/type is unique. If a field that functions as a document ID has duplicate values, the document with the duplicate ID will be overwritten when the ES is inserted.
  • This feature can be used as a fault tolerance solution. When data is being inserted, the DLI job fails and some data has been inserted into Elasticsearch. The data is redundant. If Document id is set, the last redundant data will be overwritten when the DLI job is executed again.
+
+

es.net.ssl

+

Whether to connect to the secure CSS cluster. The default value is false.

+

es.certificate.name

+

Name of the datasource authentication used to connect to the secure CSS cluster. For details about how to create datasource authentication, see Datasource Authentication in the Data Lake Insight User Guide.

+
+
+

batch.size.entries and batch.size.bytes limit the number of data records and data volume respectively.

+
+
+

Example

1
+2
+3
+4
+5
+6
+7
+8
CREATE TABLE IF NOT EXISTS dli_to_css (doc_id String, name string, age int)
+  USING CSS OPTIONS (
+  es.nodes 'to-css-1174404703-LzwpJEyx.datasource.com:9200',
+  resource '/dli_index/dli_type',
+  pushdown 'false',
+  strict 'true',
+  es.nodes.wan.only 'true',
+  es.mapping.id 'doc_id');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0202.html b/docs/dli/sqlreference/dli_08_0202.html new file mode 100644 index 00000000..02e3e32d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0202.html @@ -0,0 +1,99 @@ + + +

Inserting Data to the CSS Table

+

Function

This statement is used to insert data in a DLI table to the associated CSS table.

+
+

Syntax

+
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0203.html b/docs/dli/sqlreference/dli_08_0203.html new file mode 100644 index 00000000..91afb0dd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0203.html @@ -0,0 +1,26 @@ + + +

Querying the CSS Table

+

This statement is used to query data in a CSS table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

The table to be queried must exist. Otherwise, an error is reported.

+
+

Example

To query data in the dli_to_css table, enter the following statement:

+
1
SELECT * FROM dli_to_css limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0204.html b/docs/dli/sqlreference/dli_08_0204.html new file mode 100644 index 00000000..95310f11 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0204.html @@ -0,0 +1,138 @@ + + +

Creating a DLI Table Using the Hive Syntax

+

Function

This Hive syntax is used to create a DLI table. The main differences between the DataSource and the Hive syntax lie in the supported data formats and the number of supported partitions. For details, see syntax and precautions.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 
+  [(col_name1 col_type1 [COMMENT col_comment1], ...)]
+  [COMMENT table_comment] 
+  [PARTITIONED BY (col_name2 col_type2, [COMMENT col_comment2], ...)] 
+  [ROW FORMAT row_format]
+  STORED AS file_format 
+  [TBLPROPERTIES (key1=val1, key2=val2, ...)]
+  [AS select_statement];
+
+row_format:
+  : SERDE serde_cls [WITH SERDEPROPERTIES (key1=val1, key2=val2, ...)]
+  | DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
+      [COLLECTION ITEMS TERMINATED BY char]
+      [MAP KEYS TERMINATED BY char]
+      [LINES TERMINATED BY char]
+      [NULL DEFINED AS char]
+
+ +
+
+

Keyword

+
+

Parameter Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). The value cannot contain only digits and cannot start with a digit or underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

col_name

+

Column names with data types separated by commas (,). The column name contains letters, digits, and underscores (_). It cannot contain only digits and must contain at least one letter.

+

col_type

+

Field type

+

col_comment

+

Field description

+

row_format

+

Line data format

+

file_format

+

Data storage format: TEXTFILE, AVRO, ORC, SEQUENCEFILE, RCFILE, PARQUET.

+

table_comment

+

Table description

+

select_statement

+

The CREATE TABLE AS statement is used to insert the SELECT query result of the source table or a data record to a newly created DLI table.

+
+
+
+

Precautions

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0205.html b/docs/dli/sqlreference/dli_08_0205.html new file mode 100644 index 00000000..3248d403 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0205.html @@ -0,0 +1,64 @@ + + +

Exporting Search Results

+

Function

This statement is used to directly write query results to a specified directory. The query results can be stored in CSV, Parquet, ORC, JSON, or Avro format.

+
+

Syntax

1
+2
+3
+4
INSERT OVERWRITE DIRECTORY path
+  USING file_format
+  [OPTIONS(key1=value1)]
+  select_statement;
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + +
Table 1 INSERT OVERWRITE DIRECTORY parameter description

Parameter

+

Description

+

path

+

The OBS path to which the query result is to be written.

+

file_format

+

Format of the file to be written. The value can be CSV, Parquet, ORC, JSON, or Avro.

+
+
+

If the file format is set to CSV, see the Table 3 for the OPTIONS parameters.

+
+
+

Precautions

+
+

Example

1
+2
+3
+4
INSERT OVERWRITE DIRECTORY 'obs://bucket/dir'
+  USING csv
+  OPTIONS(key1=value1)
+  select * from db1.tb1;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0206.html b/docs/dli/sqlreference/dli_08_0206.html new file mode 100644 index 00000000..00debeb8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0206.html @@ -0,0 +1,34 @@ + + +

Table-Valued Functions

+

Table-valued functions can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be used in JOIN LATERAL TABLE.

+ +
+ + + + + + + + + +
Table 1 Table-valued functions

Function

+

Return Data Type

+

Description

+

split_cursor(value, delimiter)

+

cursor

+

Separates the "value" string into multiple rows of strings by using the delimiter.

+
+
+

Example

Input one record ("student1", "student2, student3") and output two records ("student1", "student2") and ("student1", "student3").

+
create source stream s1(attr1 string, attr2 string) with (......);
+insert into s2 select  attr1, b1 from s1 left join lateral table(split_cursor(attr2, ',')) as T(b1) on true;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0207.html b/docs/dli/sqlreference/dli_08_0207.html new file mode 100644 index 00000000..d127ad78 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0207.html @@ -0,0 +1,368 @@ + + +

Data Type

+

Overview

Data type is a basic attribute of data and used to distinguish different types of data. Different data types occupy different storage space and support different operations. Data is stored in data tables in the database. Each column of a data table defines the data type. During storage, data must be stored according to data types.

+

Similar to the open source community, Flink SQL of the big data platform supports both native data types and complex data types.

+
+

Primitive Data Types

Table 1 lists native data types supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Primitive Data Types

Data Type

+

Description

+

Storage Space

+

Value Range

+

VARCHAR

+

Character with a variable length

+

-

+

-

+

BOOLEAN

+

Boolean

+

-

+

TRUE/FALSE

+

TINYINT

+

Signed integer

+

1 byte

+

-128-127

+

SMALLINT

+

Signed integer

+

2 bytes

+

-32768-32767

+

INT

+

Signed integer

+

4 bytes

+

–2147483648 to 2147483647

+

INTEGER

+

Signed integer

+

4 bytes

+

–2147483648 to 2147483647

+

BIGINT

+

Signed integer

+

8 bytes

+

–9223372036854775808 to 9223372036854775807

+

REAL

+

Single-precision floating point

+

4 bytes

+

-

+

FLOAT

+

Single-precision floating point

+

4 bytes

+

-

+

DOUBLE

+

Double-precision floating-point

+

8 bytes

+

-

+

DECIMAL

+

Data type of valid fixed places and decimal places

+

-

+

-

+

DATE

+

Date type in the format of yyyy-MM-dd, for example, 2014-05-29

+

-

+

DATE does not contain time information. Its value ranges from 0000-01-01 to 9999-12-31.

+

TIME

+

Time type in the format of HH:MM:SS

+

For example, 20:17:40

+

-

+

-

+

TIMESTAMP(3)

+

Timestamp of date and time

+

For example, 1969-07-20 20:17:40

+

-

+

-

+

INTERVAL timeUnit [TO timeUnit]

+

Time interval

+

For example, INTERVAL '1:5' YEAR TO MONTH, INTERVAL '45' DAY

+

-

+

-

+
+
+
+

Complex Data Types

Flink SQL supports complex data types and complex type nesting. Table 2 describes complex data types.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Complex Data Types

Data Type

+

Description

+

Declaration Method

+

Reference Method

+

Construction Method

+

ARRAY

+

Indicates a group of ordered fields that are of the same data type.

+

ARRAY[TYPE]

+

Variable name [subscript]. The subscript starts from 1, for example, v1[1].

+

Array[value1, value2, ...] as v1

+

MAP

+

Indicates a group of unordered key/value pairs. The key must be native data type, but the value can be either native data type or complex data type. The type of the same MAP key, as well as the MAP value, must be the same.

+

MAP [TYPE, TYPE]

+

Variable name [key], for example, v1[key]

+

Map[key, value, key2, value2, key3, value3.......] as v1

+

ROW

+

Indicates a group of named fields. The data types of the fields can be different.

+

ROW<a1 TYPE1, a2 TYPE2>

+

Variable name. Field name, for example, v1.a1.

+

Row('1',2) as v1

+
+
+
Here is a sample code:
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
CREATE SOURCE STREAM car_infos (
+  car_id STRING,
+  address ROW<city STRING, province STRING, country STRING>,
+  average_speed MAP[STRING, LONG],
+  speeds ARRAY[LONG]
+) 
+  WITH (
+    type = "dis",
+    region = "xxx",
+    channel = "dliinput",
+    encode = "json"
+);
+
+CREATE temp STREAM car_speed_infos (
+  car_id STRING,
+  province STRING,
+  average_speed LONG,
+  start_speed LONG
+);
+
+INSERT INTO car_speed_infos SELECT
+   car_id,
+   address.province,
+   average_speed[address.city],
+   speeds[1]
+FROM car_infos;
+
+ +
+
+
+

Complex Type Nesting

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0209.html b/docs/dli/sqlreference/dli_08_0209.html new file mode 100644 index 00000000..2b48273f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0209.html @@ -0,0 +1,359 @@ + + +

Geographical Functions

+

Function description

Table 1 describes the basic geospatial geometric elements.

+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 1 Basic geospatial geometric element table

Geospatial geometric elements

+

Description

+

Example Value

+

ST_POINT(latitude, longitude)

+

Indicates a geographical point, including the longitude and latitude.

+

ST_POINT(1.12012, 1.23401)

+

ST_LINE(array[point1...pointN])

+

Indicates a geographical line formed by connecting multiple geographical points (ST_POINT) in sequence. The line can be a polygonal line or a straight line.

+

ST_LINE(ARRAY[ST_POINT(1.12, 2.23), ST_POINT(1.13, 2.44), ST_POINT(1.13, 2.44)])

+

ST_POLYGON(array[point1...point1])

+

Indicates a geographical polygon, which is a closed polygon area formed by connecting multiple geographical points (ST_POINT) with the same start and end points in sequence.

+

ST_POLYGON(ARRAY[ST_POINT(1.0, 1.0), ST_POINT(2.0, 1.0), ST_POINT(2.0, 2.0), ST_POINT(1.0, 1.0)])

+

ST_CIRCLE(point, radius)

+

Indicates a geographical circle that consists of ST_POINT and a radius.

+

ST_CIRCLE(ST_POINT(1.0, 1.0), 1.234)

+
+
+

You can build complex geospatial geometries based on basic geospatial geometric elements. Table 2 describes the related transformation methods.

+ +
+ + + + + + + + + + + + + + + + + +
Table 2 Transformation methods for building complex geometric elements based on basic geospatial geometric elements

Transformation Method

+

Description

+

Example Value

+

ST_BUFFER(geometry, distance)

+

Creates a polygon that surrounds the geospatial geometric elements at a given distance. Generally, this function is used to build the road area of a certain width for yaw detection.

+

ST_BUFFER(ST_LINE(ARRAY[ST_POINT(1.12, 2.23), ST_POINT(1.13, 2.44), ST_POINT(1.13, 2.44)]),1.0)

+

ST_INTERSECTION(geometry, geometry)

+

Creates a polygon that delimits the overlapping area of two given geospatial geometric elements.

+

ST_INTERSECTION(ST_CIRCLE(ST_POINT(1.0, 1.0), 2.0), ST_CIRCLE(ST_POINT(3.0, 1.0), 1.234))

+

ST_ENVELOPE(geometry)

+

Creates the minimal rectangle polygon including the given geospatial geometric elements.

+

ST_ENVELOPE(ST_CIRCLE(ST_POINT(1.0, 1.0), 2.0))

+
+
+

DLI provides multiple functions used for performing operations on and determining locations of geospatial geometric elements. Table 3 describes the SQL scalar functions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 SQL scalar function table

Function

+

Return Type

+

Description

+

ST_DISTANCE(point_1, point_2)

+

DOUBLE

+

Calculates the Euclidean distance between the two geographical points.

+

The following provides an example:

+

Select ST_DISTANCE(ST_POINT(x1, y1), ST_POINT(x2, y2)) FROM input

+

ST_GEODESIC_DISTANCE(point_1, point_2)

+

DOUBLE

+

Calculates the shortest distance along the surface between two geographical points.

+

The following provides an example:

+

Select ST_GEODESIC_DISTANCE(ST_POINT(x1, y1), ST_POINT(x2, y2)) FROM input

+

ST_PERIMETER(polygon)

+

DOUBLE

+

Calculates the circumference of a polygon.

+

The following provides an example:

+

Select ST_PERIMETER(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]) FROM input

+

ST_AREA(polygon)

+

DOUBLE

+

Calculates the area of a polygon.

+

The following provides an example:

+

Select ST_AREA(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]) FROM input

+

ST_OVERLAPS(polygon_1, polygon_2)

+

BOOLEAN

+

Checks whether one polygon overlaps with another.

+

The following provides an example:

+

SELECT ST_OVERLAPS(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]), ST_POLYGON(ARRAY[ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23), ST_POINT(x21, y21)])) FROM input

+

ST_INTERSECT(line1, line2)

+

BOOLEAN

+

Checks whether two line segments, rather than the two straight lines where the two line segments are located, intersect each other.

+

The following provides an example:

+

SELECT ST_INTERSECT(ST_LINE(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12)]), ST_LINE(ARRAY[ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23)])) FROM input

+

ST_WITHIN(point, polygon)

+

BOOLEAN

+

Checks whether one point is contained inside a geometry (polygon or circle).

+

The following provides an example:

+

SELECT ST_WITHIN(ST_POINT(x11, y11), ST_POLYGON(ARRAY[ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23), ST_POINT(x21, y21)])) FROM input

+

ST_CONTAINS(polygon_1, polygon_2)

+

BOOLEAN

+

Checks whether the first geometry contains the second geometry.

+

The following provides an example:

+

SELECT ST_CONTAINS(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]), ST_POLYGON(ARRAY[ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23), ST_POINT(x21, y21)])) FROM input

+

ST_COVERS(polygon_1, polygon_2)

+

BOOLEAN

+

Checks whether the first geometry covers the second geometry. This function is similar to ST_CONTAINS except the situation when judging the relationship between a polygon and the boundary line of polygon, for which ST_COVER returns TRUE and ST_CONTAINS returns FALSE.

+

The following provides an example:

+

SELECT ST_COVERS(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]), ST_POLYGON([ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23), ST_POINT(x21, y21)])) FROM input

+

ST_DISJOINT(polygon_1, polygon_2)

+

BOOLEAN

+

Checks whether one polygon is disjoint (not overlapped) with the other polygon.

+

The following provides an example:

+

SELECT ST_DISJOINT(ST_POLYGON(ARRAY[ST_POINT(x11, y11), ST_POINT(x12, y12), ST_POINT(x11, y11)]), ST_POLYGON(ARRAY[ST_POINT(x21, y21), ST_POINT(x22, y22), ST_POINT(x23, y23), ST_POINT(x21, y21)])) FROM input

+
+
+

The World Geodetic System 1984 (WGS84) is used as the reference coordinate system for geographical functions. Due to offsets, the GPS coordinates cannot be directly used in the Baidu Map (compliant with BD09) and the Google Map (compliant with GCJ02). To implement switchover between different geographical coordinate systems, DLI provides a series of functions related to coordinate system conversion as well as functions related to conversion between geographical distances and the unit meter. For details, see Table 4.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 4 Functions for geographical coordinate system conversion and distance-unit conversion

Function

+

Return Type

+

Description

+

WGS84_TO_BD09(geometry)

+

Geospatial geometric elements in the Baidu Map coordinate system

+

Converts the geospatial geometric elements in the GPS coordinate system into those in the Baidu Map coordinate system. The following provides an example:

+

WGS84_TO_BD09(ST_CIRCLE(ST_POINT(x, y), r))

+

WGS84_TO_CJ02(geometry)

+

Geospatial geometric elements in the Google Map coordinate system

+

Converts the geospatial geometric elements in the GPS coordinate system into those in the Google Map coordinate system. The following provides an example:

+

WGS84_TO_CJ02(ST_CIRCLE(ST_POINT(x, y), r))

+

BD09_TO_WGS84(geometry)

+

Geospatial geometric elements in the GPS coordinate system

+

Converts the geospatial geometric elements in the Baidu Map coordinate system into those in the GPS coordinate system. The following provides an example:

+

BD09_TO_WGS84(ST_CIRCLE(ST_POINT(x, y), r))

+

BD09_TO_CJ02(geometry)

+

Geospatial geometric elements in the Google Map coordinate system

+

Converts the geospatial geometric elements in the Baidu Map coordinate system into those in the Google Map coordinate system. The following provides an example:

+

BD09_TO_CJ02(ST_CIRCLE(ST_POINT(x, y), r))

+

CJ02_TO_WGS84(geometry)

+

Geospatial geometric elements in the GPS coordinate system

+

Converts the geospatial geometric elements in the Google Map coordinate system into those in the GPS coordinate system. The following provides an example:

+

CJ02_TO_WGS84(ST_CIRCLE(ST_POINT(x, y), r))

+

CJ02_TO_BD09(geometry)

+

Geospatial geometric elements in the Baidu Map coordinate system

+

Converts the geospatial geometric elements in the Google Map coordinate system into those in the Baidu Map coordinate system. The following provides an example:

+

CJ02_TO_BD09(ST_CIRCLE(ST_POINT(x, y), r))

+

DEGREE_TO_METER(distance)

+

DOUBLE

+

Converts the distance value of the geographical function to a value in the unit of meter. In the following example, you calculate the circumference of a triangle in the unit of meter.

+

DEGREE_TO_METER(ST_PERIMETER(ST_POLYGON(ARRAY[ST_POINT(x1,y1), ST_POINT(x2,y2), ST_POINT(x3,y3), ST_POINT(x1,y1)])))

+

METER_TO_DEGREE(numerical_value)

+

DOUBLE

+

Convert the value in the unit of meter to the distance value that can be calculated using the geographical function. In the following example, you draw a circle which takes a specified geographical point as the center and has a radius of 1 km.

+

ST_CIRCLE(ST_POINT(x,y), METER_TO_DEGREE(1000))

+
+
+

DLI also provides window-based SQL geographical aggregation functions specific for scenarios where SQL logic involves windows and aggregation. For details about the functions, see Table 5.

+ +
+ + + + + + + + + + + + + +
Table 5 Time-related SQL geographical aggregation function table

Function

+

Description

+

Example Value

+

AGG_DISTANCE(point)

+

Distance aggregation function, which is used to calculate the total distance of all adjacent geographical points in the window.

+

SELECT AGG_DISTANCE(ST_POINT(x,y)) FROM input GROUP BY HOP(rowtime, INTERVAL '1' HOUR, INTERVAL '1' DAY)

+

AVG_SPEED(point)

+

Average speed aggregation function, which is used to calculate the average speed of moving tracks formed by all geographical points in a window. The average speed is in the unit of m/s.

+

SELECT AVG_SPEED(ST_POINT(x,y)) FROM input GROUP BY TUMBLE(proctime, INTERVAL '1' DAY)

+
+
+
+

Precautions

None

+
+

Example

Example of yaw detection:

+
1
+2
+3
+4
INSERT INTO yaw_warning
+SELECT "The car is yawing"
+FROM driver_behavior
+WHERE NOT ST_WITHIN(ST_POINT(cast(Longitude as DOUBLE), cast(Latitude as DOUBLE)), ST_BUFFER(ST_LINE(ARRAY[ST_POINT(34.585555,105.725221),ST_POINT(34.586729,105.735974),ST_POINT(34.586492,105.740538),ST_POINT(34.586388,105.741651),ST_POINT(34.586135,105.748712),ST_POINT(34.588691,105.74997)]),0.001));
+
+ +
+
+

IP Functions

Currently, only IPv4 addresses are supported.

+
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 6 IP functions

Function

+

Return Type

+

Description

+

IP_TO_COUNTRY

+

STRING

+

Obtains the name of the country where the IP address is located.

+

IP_TO_PROVINCE

+

STRING

+

Obtains the province where the IP address is located.

+

Usage:

+
  • IP_TO_PROVINCE(STRING ip): Determines the province where the IP address is located and returns the province name.
  • IP_TO_PROVINCE(STRING ip, STRING lang): Determines the province where the IP is located and returns the province name of the specified language.
    NOTE:
    • If the province where the IP address is located cannot be obtained through IP address parsing, the country where the IP address is located is returned. If the IP address cannot be parsed, Unknown is returned.
    • The name returned by the function for the province is the short name.
    +
    +
+

IP_TO_CITY

+

STRING

+

Obtains the name of the city where the IP address is located.

+
NOTE:

If the city where the IP address is located cannot be obtained through IP address parsing, the province or the country where the IP address is located is returned. If the IP address cannot be parsed, Unknown is returned.

+
+

IP_TO_CITY_GEO

+

STRING

+

Obtains the longitude and latitude of the city where the IP address is located. The parameter value is in the following format: Latitude, Longitude.

+

Usage:

+

IP_TO_CITY_GEO(STRING ip): Returns the longitude and latitude of the city where the IP address is located.

+
+
+

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0216.html b/docs/dli/sqlreference/dli_08_0216.html new file mode 100644 index 00000000..6d5966b1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0216.html @@ -0,0 +1,76 @@ + + +

Real-Time Clustering

+

Clustering algorithms belong to unsupervised algorithms. K-Means, a clustering algorithm, partitions data points into related clusters by calculating the distance between data points based on the predefined cluster quantity. For offline static datasets, we can determine the clusters based on field knowledge and run K-Means to achieve a better clustering effect. However, online real-time streaming data is always changing and evolving, and the cluster quantity is likely to change. To address clustering issues on online real-time streaming data, DLI provides a low-delay online clustering algorithm that does not require predefined cluster quantity.

+

The algorithm works as follows: Given a distance function, if the distance between two data points is less than a threshold, both data points will be partitioned into the same cluster. If the distances between a data point and the central data points in several cluster centers are less than the threshold, then related clusters will be merged. When data in a data stream arrives, the algorithm computes the distances between each data point and the central data points of all clusters to determine whether the data point can be partitioned into to an existing or new cluster.

+

Syntax

1
+2
+3
+4
CENTROID(ARRAY[field_names], distance_threshold): Compute the centroid of the cluster where the current data point is assigned.
+CLUSTER_CENTROIDS(ARRAY[field_names], distance_threshold): Compute all centroids after the data point is assigned.
+ALL_POINTS_OF_CLUSTER(ARRAY[field_names], distance_threshold): Compute all data points in the cluster where the current data point is assigned.
+ALL_CLUSTERS_POINTS(ARRAY[field_names], distance_threshold): Computers all data points in each cluster after the current data point is assigned.
+
+ +
+
  • Clustering algorithms can be applied in unbounded streams.
+
+
+

Parameter Description

+
+ + + + + + + + + + + + + +
Table 1 Parameter Description

Parameter

+

Mandatory

+

Description

+

field_names

+

Yes

+

Name of the field where the data is located in the data stream. Multiple fields are separated by commas (,). For example, ARRAY[a, b, c].

+

distance_threshold

+

Yes

+

Distance threshold. When the distance between two data points is less than the threshold, both data points are placed in the same cluster.

+
+
+
+

Example

Use four functions to compute information related to clusters over windows.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
SELECT 
+  CENTROID(ARRAY[c,e], 1.0) OVER (ORDER BY proctime RANGE UNBOUNDED PRECEDING) AS centroid,
+  CLUSTER_CENTROIDS(ARRAY[c,e], 1.0) OVER (ORDER BY proctime RANGE UNBOUNDED PRECEDING) AS centroids
+FROM MyTable
+
+SELECT 
+  CENTROID(ARRAY[c,e], 1.0) OVER (ORDER BY proctime RANGE BETWEEN INTERVAL '60' MINUTE PRECEDING AND CURRENT ROW) AS centroidCE, 
+  ALL_POINTS_OF_CLUSTER(ARRAY[c,e], 1.0) OVER (ORDER BY proctime RANGE BETWEEN INTERVAL '60' MINUTE PRECEDING AND CURRENT ROW) AS itemList,
+  ALL_CLUSTERS_POINTS(ARRAY[c,e], 1.0) OVER (ORDER BY proctime RANGE  BETWEEN INTERVAL '60' MINUTE PRECEDING AND CURRENT ROW) AS listoflistofpoints
+FROM MyTable
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0217.html b/docs/dli/sqlreference/dli_08_0217.html new file mode 100644 index 00000000..eb462306 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0217.html @@ -0,0 +1,45 @@ + + +

Clearing Data

+

Function

This statement is used to delete data from the DLI or OBS table.

+
+

Syntax

1
TRUNCATE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)];
+
+ +
+
+

Keyword

+
+ + + + + + + + + + +
Table 1 Parameter

Parameter

+

Description

+

tablename

+

Name of the target DLI or OBS table that runs the Truncate statement.

+

partcol1

+

Partition name of the DLI or OBS table to be deleted.

+
+
+
+

Precautions

Only data in the DLI or OBS table can be deleted.

+
+

Example

1
truncate table test PARTITION (class = 'test');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0218.html b/docs/dli/sqlreference/dli_08_0218.html new file mode 100644 index 00000000..5f003702 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0218.html @@ -0,0 +1,222 @@ + + +

Window

+

GROUP WINDOW

Description

+

Group Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:

+
  • time_attr can be processing-time or event-time.
    • event-time: Specify the data type to bigint or timestamp.
    • processing-time: No need to specify the type.
    +
  • interval specifies the window period.
+
+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
//Calculate the SUM every day (event time).
+insert into temp SELECT name,
+    TUMBLE_START(ts, INTERVAL '1' DAY) as wStart,
+    SUM(amount)
+    FROM Orders
+    GROUP BY TUMBLE(ts, INTERVAL '1' DAY), name;
+
+//Calculate the SUM every day (processing time). 
+insert into temp SELECT name, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY TUMBLE(proctime, INTERVAL '1' DAY), name;
+
+//Calculate the SUM over the recent 24 hours every hour (event time).
+insert into temp SELECT product, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY HOP(ts, INTERVAL '1' HOUR, INTERVAL '1' DAY), product;
+
+//Calculate the SUM of each session and an inactive interval every 12 hours (event time).
+insert into temp SELECT name, 
+    SESSION_START(ts, INTERVAL '12' HOUR) AS sStart,
+    SESSION_END(ts, INTERVAL '12' HOUR) AS sEnd,
+    SUM(amount)
+    FROM Orders
+    GROUP BY SESSION(ts, INTERVAL '12' HOUR), name;
+
+ +
+

+
+

OVER WINDOW

The difference between Over Window and Group Window is that one record is generated from one row in Over Window.

+

Syntax

+
1
+2
+3
+4
OVER (
+  [PARTITION BY partition_name]
+  ORDER BY proctime|rowtime(ROWS number PRECEDING) |(RANGE (BETWEEN INTERVAL '1' SECOND PRECEDING AND CURRENT ROW | UNBOUNDED preceding))
+)
+
+ +
+

Description

+ +
+ + + + + + + + + + + + + + + + +
Table 3 Parameter description

Parameter

+

Parameter Description

+

PARTITION BY

+

Indicates the primary key of the specified group. Each group separately performs calculation.

+

ORDER BY

+

Indicates the processing time or event time as the timestamp for data.

+

ROWS

+

Indicates the count window.

+

RANGE

+

Indicates the time window.

+
+
+

Precautions

+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
//Calculate the count and total number from syntax rules enabled to now (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt2
+    FROM Orders;
+  
+//Calculate the count and total number of the recent four records (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+//Calculate the count and total number last 60s (in eventtime). Process the events based on event time, which is the timeattr field in Orders.
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+ +
+

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0219.html b/docs/dli/sqlreference/dli_08_0219.html new file mode 100644 index 00000000..fbcfbfac --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0219.html @@ -0,0 +1,269 @@ + + +

SQL Syntax Overview of Batch Jobs

+

This section describes the Spark SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 SQL syntax of batch jobs

Classification

+

Function

+

Database-related Syntax

+

Creating a Database

+

Deleting a Database

+

Viewing a Specified Database

+

Viewing All Databases

+

Syntax for Creating an OBS Table

+

Creating an OBS Table Using the Datasource Syntax

+

Creating an OBS Table Using the Hive Syntax

+

Syntax for Creating a DLI Table

+

Creating a DLI Table Using the Datasource Syntax

+

Creating a DLI Table Using the Hive Syntax

+

Syntax for Deleting a Table

+

Deleting a Table

+

Syntax for Viewing a Table

+

Viewing All Tables

+

Viewing Table Creation Statements

+

Viewing Table Properties

+

Viewing All Columns in a Specified Table

+

Viewing All Partitions in a Specified Table

+

Viewing Table Statistics

+

Syntax for Modifying a Table

+

Adding a Column

+

Syntax for Partitioning a Table

+

Adding a Partition (Only OBS Tables Supported)

+

Renaming a Partition

+

Deleting a Partition

+

Altering the Partition Location of a Table (Only OBS Tables Supported)

+

Updating Partitioned Table Data (Only OBS Tables Supported)

+

Syntax for Importing Data

+

Importing Data

+

Syntax for Inserting Data

+

Inserting Data

+

Syntax for Clearing Data

+

Clearing Data

+

Syntax for Exporting Query Results

+

Exporting Query Result

+

Syntax for Datasource Connection to an HBase Table

+

Creating a Table and Associating It with HBase

+

Inserting Data to an HBase Table

+

Querying an HBase Table

+

Syntax for Datasource Connection to an OpenTSDB Table

+

Creating a Table and Associating It with OpenTSDB

+

Inserting Data to an OpenTSDB Table

+

Querying an OpenTSDB Table

+

Syntax for Datasource Connection to a DWS Table

+

Creating a Table and Associating It with DWS

+

Inserting Data to a DWS Table

+

Querying a DWS Table

+

Syntax for Datasource Connection to an RDS Table

+

Creating a Table and Associating It with RDS

+

Inserting Data to an RDS Table

+

Querying an RDS Table

+

Syntax for Datasource Connection to a CSS Table

+

Creating a Table and Associating It with CSS

+

Inserting Data to a CSS Table

+

Querying a CSS Table

+

Syntax for Datasource Connection to a DCS Table

+

+

+

Creating a Table and Associating It with DCS

+

Inserting Data to a DCS Table

+

Querying a DCS Table

+

Syntax for Datasource Connection to a DDS Table

+

+

+

Creating a Table and Associating It with DDS

+

Inserting Data to a DDS Table

+

Querying a DDS Table

+

View-related Syntax

+

Creating a View

+

Deleting a View

+

Syntax for Viewing the Execution Plan

+

Viewing the Execution Plan

+

Syntax Related to Data Permissions

+

Creating a Role

+

Deleting a Role

+

Binding a Role

+

Unbinding a Role

+

Displaying a Role

+

Granting a Permission

+

Revoking a Permission

+

Displaying the Granted Permissions

+

Displaying the Binding Relationship Between All Roles and Users

+

UDF-related Syntax

+

+

+

+

Creating a Function

+

Deleting a Function

+

Displaying Function Details

+

Displaying All Functions

+

Multiversion-related Syntax

+

Enabling Multiversion Backup When Creating an OBS Table

+

Enabling or Disabling Multiversion Backup When Modifying a Table

+

Setting the Retention Period for Multiversion Backup Data

+

Viewing Multiversion Backup Data

+

Restoring Multiversion Backup Data

+

Configuring the Trash Bin for Expired Multiversion Data

+

Deleting Multiversion Backup Data

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0220.html b/docs/dli/sqlreference/dli_08_0220.html new file mode 100644 index 00000000..81ad9292 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0220.html @@ -0,0 +1,20 @@ + + +

Creating a Datasource Connection with an OpenTSDB Table

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0221.html b/docs/dli/sqlreference/dli_08_0221.html new file mode 100644 index 00000000..e4c5a683 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0221.html @@ -0,0 +1,86 @@ + + +

Spark SQL Syntax Reference

+

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0223.html b/docs/dli/sqlreference/dli_08_0223.html new file mode 100644 index 00000000..594aec57 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0223.html @@ -0,0 +1,18 @@ + + +

Creating an OBS Table

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0224.html b/docs/dli/sqlreference/dli_08_0224.html new file mode 100644 index 00000000..fafa74dc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0224.html @@ -0,0 +1,18 @@ + + +

Creating a DLI Table

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0225.html b/docs/dli/sqlreference/dli_08_0225.html new file mode 100644 index 00000000..e8a7b6c8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0225.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with a DCS Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0226.html b/docs/dli/sqlreference/dli_08_0226.html new file mode 100644 index 00000000..6c40086d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0226.html @@ -0,0 +1,180 @@ + + +

Creating a DLI Table and Associating It with DCS

+

Function

This statement is used to create a DLI table and associate it with an existing DCS key.

+
+

Prerequisites

Before creating a DLI table and associating it with DCS, you need to create a datasource connection and bind it to a queue. For details about operations on the management console, see

+
+

Syntax

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

host

+

To connect to DCS, you need to create a datasource connection first.

+

After creating an enhanced datasource connection, use the connection address provided by DCS. If there are multiple connection addresses, select one of them.

+
NOTE:

Currently, only enhanced datasource is supported.

+
+

port

+

DCS connection port, for example, 6379.

+

password

+

Password entered during DCS cluster creation. You do not need to set this parameter when accessing a non-secure Redis cluster.

+

passwdauth

+

Datasource password authentication name. For details about how to create datasource authentication, see Datasource Authentication in the Data Lake Insight User Guide.

+

encryption

+

Set this parameter to true when datasource password authentication is used.

+

table

+

The key or hash key in Redis.

+
  • This parameter is mandatory when Redis data is inserted.
  • Either this parameter or the keys.pattern parameter when Redis data is queried.
+

keys.pattern

+

Use a regular expression to match multiple keys or hash keys. This parameter is used only for query. Either this parameter or table is used to query Redis data.

+

key.column

+

(Optional) Specifies a field in the schema as the key ID in Redis. This parameter is used together with the table parameter when data is inserted.

+

partitions.number

+

Number of concurrent tasks during data reading.

+

scan.count

+

Number of data records read in each batch. The default value is 100. If the CPU usage of the Redis cluster still needs to be improved during data reading, increase the value of this parameter.

+

iterator.grouping.size

+

Number of data records inserted in each batch. The default value is 100. If the CPU usage of the Redis cluster still needs to be improved during the insertion, increase the value of this parameter.

+

timeout

+

Timeout interval for connecting to the Redis, in milliseconds. The default value is 2000 (2 seconds).

+
+
+

When connecting to DCS, complex data types such as Array, Struct, and Map are not supported.

+

The following methods can be used to process complex data:

+
  • Place the fields of the next level in the Schema field of the same level.
  • Write and read data in binary mode, and encode and decode it using user-defined functions.
+
+
+

Example

+
1
+2
+3
+4
+5
+6
+7
create table test_redis(name string, age int) using redis options(
+  'host' = '192.168.4.199',
+  'port' = '6379',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true',
+  'table' = 'person'
+);
+
+ +
+ +
1
+2
+3
+4
+5
+6
+7
+8
create table test_redis_keys_patten(id string, name string, age int) using redis options(
+  'host' = '192.168.4.199',
+  'port' = '6379',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true',
+  'keys.pattern' = 'p*:*',
+  'key.column' = 'id'
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0227.html b/docs/dli/sqlreference/dli_08_0227.html new file mode 100644 index 00000000..57c03bac --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0227.html @@ -0,0 +1,128 @@ + + +

Inserting Data to a DCS Table

+

Function

This statement is used to insert data in a DLI table to the DCS key.

+
+

Syntax

+
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

+
+

Example

1
+2
INSERT INTO test_redis
+  VALUES("James", 35), ("Michael", 22);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0228.html b/docs/dli/sqlreference/dli_08_0228.html new file mode 100644 index 00000000..1dbb8bc1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0228.html @@ -0,0 +1,24 @@ + + +

Querying the DCS Table

+

This statement is used to query data in a DCS table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Example

Query data in the test_redis table.

+
1
SELECT * FROM test_redis limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0229.html b/docs/dli/sqlreference/dli_08_0229.html new file mode 100644 index 00000000..327199ed --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0229.html @@ -0,0 +1,19 @@ + + +

Creating a Datasource Connection with a DDS Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0230.html b/docs/dli/sqlreference/dli_08_0230.html new file mode 100644 index 00000000..4bcdb6e6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0230.html @@ -0,0 +1,103 @@ + + +

Creating a DLI Table and Associating It with DDS

+

Function

This statement is used to create a DLI table and associate it with an existing DDS collection.

+
+

Prerequisites

Before creating a DLI table and associating it with DDS, you need to create a datasource connection and bind it to a queue. For details about operations on the management console, see

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
CREATE TABLE [IF NOT EXISTS] TABLE_NAME(
+    FIELDNAME1 FIELDTYPE1,
+    FIELDNAME2 FIELDTYPE2)
+  USING MONGO OPTIONS (
+  'url'='IP:PORT[,IP:PORT]/[DATABASE][.COLLECTION][AUTH_PROPERTIES]',
+  'database'='xx',
+  'collection'='xx',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true'
+);
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 CREATE TABLE parameter description

Parameter

+

Description

+

url

+

Before obtaining the DDS IP address, you need to create a datasource connection first..

+

After creating an enhanced datasource connection, use the random connection address provided by DDS. The format is as follows:

+

"IP:PORT[,IP:PORT]/[DATABASE][.COLLECTION][AUTH_PROPERTIES]"

+

Example: "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin"

+

database

+

DDS database name. If the database name is specified in the URL, the database name in the URL does not take effect.

+

collection

+

Collection name in the DDS. If the collection is specified in the URL, the collection in the URL does not take effect.

+

user

+

(Discarded) Username for accessing the DDS cluster.

+

password

+

(Discarded) Password for accessing the DDS cluster.

+

passwdauth

+

Datasource password authentication name. For details about how to create datasource authentication, see Datasource Authentication in the Data Lake Insight User Guide.

+

encryption

+

Set this parameter to true when datasource password authentication is used.

+
+
+

If a collection already exists in DDS, you do not need to specify schema information when creating a table. DLI automatically generates schema information based on data in the collection.

+
+
+

Example

1
+2
+3
+4
+5
+6
create table 1_datasource_mongo.test_mongo(id string, name string, age int) using mongo options(
+  'url' = '192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin',
+  'database' = 'test',
+  'collection' = 'test',
+  'passwdauth' = 'xxx',
+  'encryption' = 'true');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0231.html b/docs/dli/sqlreference/dli_08_0231.html new file mode 100644 index 00000000..3f66569e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0231.html @@ -0,0 +1,116 @@ + + +

Inserting Data to the DDS Table

+

Function

This statement is used to insert data in a DLI table to the associated DDS table.

+
+

Syntax

+ +
+

Keywords

For details about the SELECT keywords, see Basic SELECT Statements.

+
+

Parameter description

+
+ + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

DLI_TABLE

+

Name of the DLI table for which a datasource connection has been created.

+

DLI_TEST

+

indicates the table that contains the data to be queried.

+

field1,field2..., field

+

Column values in the DLI_TEST table must match the column values and types in the DLI_TABLE table.

+

where_condition

+

Query condition.

+

num

+

Limit the query result. The num parameter supports only the INT type.

+

values_row

+

Value to be inserted to a table. Use commas (,) to separate columns.

+
+
+
+

Precautions

The target DLI table must exist.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0232.html b/docs/dli/sqlreference/dli_08_0232.html new file mode 100644 index 00000000..063a7b29 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0232.html @@ -0,0 +1,26 @@ + + +

Querying the DDS Table

+

This statement is used to query data in a DDS table.

+

Syntax

1
SELECT * FROM table_name LIMIT number;
+
+ +
+
+

Keyword

LIMIT is used to limit the query results. Only INT type is supported by the number parameter.

+
+

Precautions

If schema information is not specified during table creation, the query result contains the _id field for storing _id in the DOC file.

+
+

Example

Query data in the test_mongo table.

+
1
SELECT * FROM test_mongo limit 100;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0233.html b/docs/dli/sqlreference/dli_08_0233.html new file mode 100644 index 00000000..cc2a0b08 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0233.html @@ -0,0 +1,52 @@ + + +

Flink SQL Syntax

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0234.html b/docs/dli/sqlreference/dli_08_0234.html new file mode 100644 index 00000000..1da0b7e3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0234.html @@ -0,0 +1,26 @@ + + +

Creating a Source Stream

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0235.html b/docs/dli/sqlreference/dli_08_0235.html new file mode 100644 index 00000000..4d73ef49 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0235.html @@ -0,0 +1,387 @@ + + +

DIS Source Stream

+

Function

Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove data from producers using DIS source sources for continuous processing. Flink jobs are applicable to scenarios where data outside the cloud service is imported to the cloud service for filtering, real-time analysis, monitoring reports, and dumping.

+

DIS addresses the challenge of transmitting data outside cloud services to cloud services. DIS builds data intake streams for custom applications capable of processing or analyzing streaming data. DIS continuously captures, transmits, and stores terabytes of data from hundreds of thousands of sources every hour, such as logs, Internet of Things (IoT) data, social media feeds, website clickstreams, and location-tracking events. For more information about DIS, see the Data Ingestion Service User Guide.

+
+

Syntax

CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "dis",
+    region = "",
+    channel = "",
+    partition_count = "",
+    encode = "",
+    field_delimiter = "",
+    offset= "");
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. dis indicates that the data source is DIS.

+

region

+

Yes

+

Region where DIS for storing the data is located.

+

ak

+

No

+

Access Key ID (AK).

+

sk

+

No

+

Specifies the secret access key used together with the ID of the access key.

+

channel

+

Yes

+

Name of the DIS stream where data is located.

+

partition_count

+

No

+

Number of partitions of the DIS stream where data is located. This parameter and partition_range cannot be configured at the same time. If this parameter is not specified, data of all partitions is read by default.

+

partition_range

+

No

+

Range of partitions of a DIS stream, data in which is ingested by the DLI job. This parameter and partition_count cannot be configured at the same time. If this parameter is not specified, data of all partitions is read by default.

+

If you set this parameter to [0:2], data will be read from partitions 1, 2, and 3.

+

encode

+

Yes

+

Data encoding format. The value can be csv, json, xml, email, blob, or user_defined.

+
  • field_delimiter must be specified if this parameter is set to csv.
  • json_config must be specified if this parameter is set to json.
  • xml_config must be specified if this parameter is set to xml.
  • email_key must be specified if this parameter is set to email.
  • If this parameter is set to blob, the received data is not parsed, only one stream attribute exists, and the data format is ARRAY[TINYINT].
  • encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.
+

field_delimiter

+

No

+

Attribute delimiter. This parameter is mandatory only when the CSV encoding format is used. You can set this parameter, for example, to a comma (,).

+

quote

+

No

+

Quoted symbol in a data format. The attribute delimiters between two quoted symbols are treated as common characters.

+
  • If double quotation marks are used as the quoted symbol, set this parameter to \u005c\u0022 for character conversion.
  • If a single quotation mark is used as the quoted symbol, set this parameter to a single quotation mark (').
+
NOTE:
  • Currently, only the CSV format is supported.
  • After this parameter is specified, ensure that each field does not contain quoted symbols or contains an even number of quoted symbols. Otherwise, parsing will fail.
+
+

json_config

+

No

+

When the encoding format is JSON, you need to use this parameter to specify the mapping between JSON fields and stream definition fields. The format is field1=data_json.field1; field2=data_json.field2; field3=$, where field3=$ indicates that the content of field3 is the entire JSON string.

+

xml_config

+

No

+

If encode is set to xml, you need to set this parameter to specify the mapping between the xml field and the stream definition field. An example of the format is as follows: field1=data_xml.field1; field2=data_xml.field2.

+

email_key

+

No

+

If encode is set to email, you need to set the parameter to specify the information to be extracted. You need to list the key values that correspond to stream definition fields. Multiple key values are separated by commas (,), for example, "Message-ID, Date, Subject, body". There is no keyword in the email body and DLI specifies "body" as the keyword.

+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+

offset

+

No

+
  • If data is imported to the DIS stream after the job is started, this parameter will become invalid.
  • If the job is started after data is imported to the DIS stream, you can set the parameter as required.

    For example, if offset is set to 100, DLI starts from the 100th data record in DIS.

    +
+

start_time

+

No

+

Start time for reading DIS data.

+
  • If this parameter is specified, DLI reads data read from the specified time. The format is yyyy-MM-dd HH:mm:ss.
  • If neither start_time nor offset is specified, DLI reads the latest data.
  • If start_time is not specified but offset is specified, DLI reads data from the data record specified by offset.
+

enable_checkpoint

+

No

+

Whether to enable the checkpoint function. The value can be true (enabled) or false (disabled). The default value is false.

+

checkpoint_app_name

+

No

+

ID of a DIS consumer. If a DIS stream is consumed by different jobs, you need to configure the consumer ID for each job to avoid checkpoint confusion.

+

checkpoint_interval

+

No

+

Interval of checkpoint operations on the DIS source operator. The value is in the unit of seconds. The default value is 60.

+
+
+
+

Precautions

When creating a source stream, you can specify a time model for subsequent calculation. Currently, DLI supports two time models: Processing Time and Event Time. For details about the syntax, see Configuring Time Models.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0236.html b/docs/dli/sqlreference/dli_08_0236.html new file mode 100644 index 00000000..9690f527 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0236.html @@ -0,0 +1,181 @@ + + +

OBS Source Stream

+

Function

Create a source stream to obtain data from OBS. DLI reads data stored by users in OBS as input data for jobs. OBS applies to various scenarios, such as big data analysis, cloud-native application program data, static website hosting, backup/active archive, and deep/cold archive.

+

OBS is an object-based storage service. It provides massive, secure, highly reliable, and low-cost data storage capabilities. For more information about OBS, see the Object Storage Service Console Operation Guide.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "obs",
+    region = "",
+    bucket = "",
+    object_name = "",
+    row_delimiter = "\n",
+    field_delimiter = '',
+    version_id = ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. obs indicates that the data source is OBS.

+

region

+

Yes

+

Region to which OBS belongs.

+

encode

+

No

+

Data encoding format. The value can be csv or json. The default value is csv.

+

ak

+

No

+

Access Key ID (AK).

+

sk

+

No

+

Secret access key used together with the ID of the access key.

+

bucket

+

Yes

+

Name of the OBS bucket where data is located.

+

object_name

+

Yes

+

Name of the object stored in the OBS bucket where data is located. If the object is not in the OBS root directory, you need to specify the folder name, for example, test/test.csv. For the object file format, see the encode parameter.

+

row_delimiter

+

Yes

+

Separator used to separate every two rows.

+

field_delimiter

+

No

+

Separator used to separate every two attributes.

+
  • This parameter is mandatory when encode is csv. You use custom attribute separators.
  • If encode is json, you do not need to set this parameter.
+

quote

+

No

+

Quoted symbol in a data format. The attribute delimiters between two quoted symbols are treated as common characters.

+
  • If double quotation marks are used as the quoted symbol, set this parameter to \u005c\u0022 for character conversion.
  • If a single quotation mark is used as the quoted symbol, set this parameter to a single quotation mark (').
+
NOTE:
  • Currently, only the CSV format is supported.
  • After this parameter is specified, ensure that each field does not contain quoted symbols or contains an even number of quoted symbols. Otherwise, parsing will fail.
+
+

version_id

+

No

+

Version number. This parameter is optional and required only when the OBS bucket or object has version settings.

+
+
+
+

Precautions

When creating a source stream, you can specify a time model for subsequent calculation. Currently, DLI supports two time models: Processing Time and Event Time. For details about the syntax, see Configuring Time Models.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0237.html b/docs/dli/sqlreference/dli_08_0237.html new file mode 100644 index 00000000..652ba7c3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0237.html @@ -0,0 +1,119 @@ + + +

CloudTable HBase Source Stream

+

Function

Create a source stream to obtain data from HBase of CloudTable as input data of the job. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. DLI can read data from HBase for filtering, analysis, and data dumping.

+

CloudTable is a distributed, scalable, and fully-hosted key-value data storage service based on Apache HBase. It provides DLI with high-performance random read and write capabilities, which are helpful when applications need to store and query a massive amount of structured data, semi-structured data, and time series data. CloudTable applies to IoT scenarios and storage and query of massive volumes of key-value data. For more information about CloudTable, see the CloudTable Service User Guide.

+
+

Prerequisites

In this scenario, jobs must run on the dedicated queue of DLI. Therefore, DLI must interconnect with the enhanced datasource connection that has been connected with CloudTable HBase. You can also set the security group rules as required.

+

For details about how to create an enhanced datasource connection, see Enhanced Datasource Connections in the Data Lake Insight User Guide.

+

For details about how to configure security group rules, see Security Group in the Virtual Private Cloud User Guide.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "cloudtable",
+    region = "",
+    cluster_id = "",
+    table_name = "",
+    table_columns = ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. CloudTable indicates that the data source is CloudTable.

+

region

+

Yes

+

Region to which CloudTable belongs.

+

cluster_id

+

Yes

+

ID of the cluster to which the data table to be read belongs.

+

For details about how to view the ID of the CloudTable cluster, see section "Viewing Basic Cluster Information" in the CloudTable Service User Guide.

+

table_name

+

Yes

+

Name of the table from which data is to be read. If a namespace needs to be specified, set it to namespace_name:table_name.

+

table_columns

+

Yes

+

Column to be read. The format is rowKey,f1:c1,f1:c2,f2:c1. The number of columns must be the same as the number of attributes specified in the source stream.

+
+
+
+

Precautions

When creating a source stream, you can specify a time model for subsequent calculation. Currently, DLI supports two time models: Processing Time and Event Time. For details about the syntax, see Configuring Time Models.

+
+

Example

Read the car_infos table from HBase of CloudTable.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
CREATE SOURCE STREAM car_infos (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT
+)
+  WITH (
+    type = "cloudtable",
+    region = "xxx",
+    cluster_id = "209ab1b6-de25-4c48-8e1e-29e09d02de28",
+    table_name = "carinfo",
+    table_columns = "rowKey,info:owner,info:age,car:speed,car:miles"
+); 
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0238.html b/docs/dli/sqlreference/dli_08_0238.html new file mode 100644 index 00000000..7b990eb5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0238.html @@ -0,0 +1,211 @@ + + +

MRS Kafka Source Stream

+

Function

Create a source stream to obtain data from Kafka as input data for jobs.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages. Kafka clusters are deployed and hosted on MRS that is powered on Apache Kafka.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "kafka",
+    kafka_bootstrap_servers = "",
+    kafka_group_id = "",
+    kafka_topic = "",
+    encode = "json"
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. Kafka indicates that the data source is Kafka.

+

kafka_bootstrap_servers

+

Yes

+

Port that connects DLI to Kafka. Use enhanced datasource connections to connect DLI queues with Kafka clusters.

+

kafka_group_id

+

No

+

Group ID

+

kafka_topic

+

Yes

+

Kafka topic to be read. Currently, only one topic can be read at a time.

+

encode

+

Yes

+

Data encoding format. The value can be csv, json, blob, or user_defined.

+
  • field_delimiter must be specified if this parameter is set to csv.
  • json_config must be specified if this parameter is set to json.
  • If this parameter is set to blob, the received data is not parsed, only one stream attribute exists, and the stream attribute is of the Array[TINYINT] type.
  • encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.
+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+

krb_auth

+

No

+

The authentication name for creating a datasource connection authentication. This parameter is mandatory when Kerberos authentication is enabled.

+
NOTE:

Ensure that the /etc/hosts information of the master node in the MRS cluster is added to the host file of the DLI queue.

+
+

json_config

+

No

+

If encode is set to json, you can use this parameter to specify the mapping between JSON fields and stream attributes.

+

The format is field1=json_field1;field2=json_field2.

+

field1 and field2 indicate the names of the created table fields. json_field1 and json_field2 are key fields of the JSON strings in the Kafka input data.

+

For details, see the example.

+

field_delimiter

+

No

+

If encode is set to csv, you can use this parameter to specify the separator between CSV fields. By default, the comma (,) is used.

+

quote

+

No

+

Quoted symbol in a data format. The attribute delimiters between two quoted symbols are treated as common characters.

+
  • If double quotation marks are used as the quoted symbol, set this parameter to \u005c\u0022 for character conversion.
  • If a single quotation mark is used as the quoted symbol, set this parameter to a single quotation mark (').
+
NOTE:
  • Currently, only the CSV format is supported.
  • After this parameter is specified, ensure that each field does not contain quoted symbols or contains an even number of quoted symbols. Otherwise, parsing will fail.
+
+

start_time

+

No

+

Start time when Kafka data is ingested.

+

If this parameter is specified, DLI reads data read from the specified time. The format is yyyy-MM-dd HH:mm:ss. Ensure that the value of start_time is not later than the current time. Otherwise, no data will be obtained.

+

kafka_properties

+

No

+

This parameter is used to configure the native attributes of Kafka. The format is key1=value1;key2=value2.

+

kafka_certificate_name

+

No

+

Specifies the name of the datasource authentication information. This parameter is valid only when the datasource authentication type is set to Kafka_SSL.

+
NOTE:
  • If this parameter is specified, the service loads only the specified file and password under the authentication. The system automatically sets this parameter to kafka_properties.
  • Other configuration information required for Kafka SSL authentication needs to be manually configured in the kafka_properties attribute.
+
+
+
+
+

Precautions

When creating a source stream, you can specify a time model for subsequent calculation. Currently, DLI supports two time models: Processing Time and Event Time. For details about the syntax, see Configuring Time Models.

+
+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0239.html b/docs/dli/sqlreference/dli_08_0239.html new file mode 100644 index 00000000..8d43f192 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0239.html @@ -0,0 +1,220 @@ + + +

Open-Source Kafka Source Stream

+

Function

Create a source stream to obtain data from Kafka as input data for jobs.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "kafka",
+    kafka_bootstrap_servers = "",
+    kafka_group_id = "",
+    kafka_topic = "",
+    encode = "json",
+    json_config=""
+  );
+
+ +
+
+

Keywords

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. Kafka indicates that the data source is Kafka.

+

kafka_bootstrap_servers

+

Yes

+

Port that connects DLI to Kafka. Use enhanced datasource connections to connect DLI queues with Kafka clusters.

+

kafka_group_id

+

No

+

Group ID.

+

kafka_topic

+

Yes

+

Kafka topic to be read. Currently, only one topic can be read at a time.

+

encode

+

Yes

+

Data encoding format. The value can be csv, json, blob, or user_defined.

+
  • field_delimiter must be specified if this parameter is set to csv.
  • json_config must be specified if this parameter is set to json.
  • If this parameter is set to blob, the received data will not be parsed, and only one Array[TINYINT] field exists in the table.
  • encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.
+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+

json_config

+

No

+

If encode is set to json, you can use this parameter to specify the mapping between JSON fields and stream attributes.

+

The format is field1=json_field1;field2=json_field2.

+

field1 and field2 indicate the names of the created table fields. json_field1 and json_field2 are key fields of the JSON strings in the Kafka input data.

+

For details, see Example.

+
NOTE:

If the attribute names in the source stream are the same as those in JSON fields, you do not need to set this parameter.

+
+

field_delimiter

+

No

+

If encode is set to csv, you can use this parameter to specify the separator between CSV fields. By default, the comma (,) is used.

+

quote

+

No

+

Quoted symbol in a data format. The attribute delimiters between two quoted symbols are treated as common characters.

+
  • If double quotation marks are used as the quoted symbol, set this parameter to \u005c\u0022 for character conversion.
  • If a single quotation mark is used as the quoted symbol, set this parameter to a single quotation mark (').
+
NOTE:
  • Currently, only the CSV format is supported.
  • After this parameter is specified, ensure that each field does not contain quoted symbols or contains an even number of quoted symbols. Otherwise, parsing will fail.
+
+

start_time

+

No

+

Start time when Kafka data is ingested.

+

If this parameter is specified, DLI reads data read from the specified time. The format is yyyy-MM-dd HH:mm:ss. Ensure that the value of start_time is not later than the current time. Otherwise, no data will be obtained.

+

If you set this parameter, only the data generated after the specified time for the Kafka topic will be read.

+

kafka_properties

+

No

+

Native properties of Kafka. The format is key1=value1;key2=value2. For details about the property values, see the description in Apache Kafka.

+

kafka_certificate_name

+

No

+

Name of the datasource authentication information. This parameter is valid only when the datasource authentication type is set to Kafka_SSL.

+
NOTE:
  • If this parameter is specified, the service loads only the specified file and password under the authentication. The system automatically sets this parameter to kafka_properties.
  • Other configuration information required for Kafka SSL authentication needs to be manually configured in the kafka_properties attribute.
+
+
+
+
+

Precautions

When creating a source stream, you can specify a time model for subsequent calculation. Currently, DLI supports two time models: Processing Time and Event Time. For details about the syntax, see Configuring Time Models.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0240.html b/docs/dli/sqlreference/dli_08_0240.html new file mode 100644 index 00000000..082f726b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0240.html @@ -0,0 +1,48 @@ + + +

Creating a Sink Stream

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0241.html b/docs/dli/sqlreference/dli_08_0241.html new file mode 100644 index 00000000..8550f079 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0241.html @@ -0,0 +1,198 @@ + + +

DIS Sink Stream

+

Function

DLI writes the Flink job output data into DIS. This cloud ecosystem is applicable to scenarios where data is filtered and imported to the DIS stream for future processing.

+

DIS addresses the challenge of transmitting data outside cloud services to cloud services. DIS builds data intake streams for custom applications capable of processing or analyzing streaming data. DIS continuously captures, transmits, and stores terabytes of data from hundreds of thousands of sources every hour, such as logs, Internet of Things (IoT) data, social media feeds, website clickstreams, and location-tracking events. For more information about DIS, see the Data Ingestion Service User Guide.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "dis",
+    region = "",
+    channel = "",
+    partition_key = "",
+    encode= "",
+    field_delimiter= ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. dis indicates that data is exported to DIS.

+

region

+

Yes

+

Region where DIS for storing the data is located.

+

ak

+

No

+

Access Key ID (AK).

+

sk

+

No

+

Specifies the secret access key used together with the ID of the access key.

+

channel

+

Yes

+

DIS stream.

+

partition_key

+

No

+

Group primary key. Multiple primary keys are separated by commas (,). If this parameter is not specified, data is randomly written to DIS partitions.

+

encode

+

Yes

+

Data encoding format. The value can be csv, json, or user_defined.

+
NOTE:
  • field_delimiter must be specified if this parameter is set to csv.
  • If the encoding format is json, you need to configure enable_output_null to determine whether to generate an empty field. For details, see the examples.
  • encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.
+
+

field_delimiter

+

Yes

+

Separator used to separate every two attributes.

+
  • This parameter needs to be configured if the CSV encoding format is adopted. It can be user-defined, for example, a comma (,).
  • This parameter is not required if the JSON encoding format is adopted.
+

json_config

+

No

+

If encode is set to json, you can set this parameter to specify the mapping between the JSON field and the stream definition field. An example of the format is as follows: field1=data_json.field1; field2=data_json.field2.

+

enable_output_null

+

No

+

If encode is set to json, you need to specify this parameter to determine whether to generate an empty field.

+

If this parameter is set to true, an empty field (the value is null) is generated. If set to false, no empty field is generated. The default value is true.

+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+
+
+
+

Precautions

None

+
+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0242.html b/docs/dli/sqlreference/dli_08_0242.html new file mode 100644 index 00000000..82374f80 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0242.html @@ -0,0 +1,284 @@ + + +

OBS Sink Stream

+

Function

Create a sink stream to export DLI data to OBS. DLI can export the job analysis results to OBS. OBS applies to various scenarios, such as big data analysis, cloud-native application program data, static website hosting, backup/active archive, and deep/cold archive.

+

OBS is an object-based storage service. It provides massive, secure, highly reliable, and low-cost data storage capabilities. For more information about OBS, see the Object Storage Service Console Operation Guide.

+

You are advised to use the File System Sink Stream (Recommended).

+
+
+

Prerequisites

Before data exporting, check the version of the OBS bucket. The OBS sink stream supports data exporting to an OBS bucket running OBS 3.0 or a later version.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+           type = "obs",
+           region = "",
+           encode = "",
+           field_delimiter = "",
+           row_delimiter = "",
+           obs_dir = "",
+           file_prefix = "",
+           rolling_size = "",
+           rolling_interval = "",
+           quote = "",
+           array_bracket = "",
+           append = "",
+           max_record_num_per_file = "",
+           dump_interval = "",
+           dis_notice_channel = ""
+  )
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. obs indicates that data is exported to OBS.

+

region

+

Yes

+

Region to which OBS belongs.

+

ak

+

No

+

Access Key ID (AK).

+

sk

+

No

+

Secret access key used together with the ID of the access key.

+

encode

+

Yes

+

Encoding format. Currently, formats CSV, JSON, ORC, Avro, Avro-Merge, and Parquet are supported.

+

field_delimiter

+

No

+

Separator used to separate every two attributes.

+

This parameter is mandatory only when the CSV encoding format is adopted. If this parameter is not specified, the default separator comma (,) is used.

+

row_delimiter

+

No

+

Row delimiter. This parameter does not need to be configured if the CSV or JSON encoding format is adopted.

+

json_config

+

No

+

If encode is set to json, you can set this parameter to specify the mapping between the JSON field and the stream definition field. An example of the format is as follows: field1=data_json.field1;field2=data_json.field2.

+

obs_dir

+

Yes

+

Directory for storing files. The directory is in the format of {Bucket name}/{Directory name}, for example, obs-a1/dir1/subdir. If encode is set to csv (append is false), json (append is false), avro_merge, or parquet, parameterization is supported.

+

file_prefix

+

No

+

Prefix of the data export file name. The generated file is named in the format of file_prefix.x, for example, file_prefix.1 and file_prefix.2. If this parameter is not specified, the file prefix is temp by default.

+

rolling_size

+

No

+

Maximum size of a file.

+
NOTE:
  • One or both of rolling_size and rolling_interval must be configured.
  • When the size of a file exceeds the specified size, a new file is generated.
  • The unit can be KB, MB, or GB. If no unit is specified, the byte unit is used.
  • This parameter does not need to be configured if the ORC encoding format is adopted.
+
+

rolling_interval

+

No

+

Time mode, in which data is saved to the corresponding directory.

+
NOTE:
  • One or both of rolling_size and rolling_interval must be configured.
  • After this parameter is specified, data is written to the corresponding directories according to the output time.
  • The parameter value can be in the format of yyyy/MM/dd/HH/mm, which is case sensitive. The minimum unit is minute. If this parameter is set to yyyy/MM/dd/HH, data is written to the directory that is generated at the hour time. For example, data generated at 2018-09-10 16:00 will be written to the {obs_dir}/2018-09-10_16 directory.
  • If both rolling_size and rolling_interval are set, a new file is generated when the size of a single file exceeds the specified size in the directory corresponding to each time point.
+
+

quote

+

No

+

Modifier, which is added before and after each attribute only when the CSV encoding format is adopted. You are advised to use invisible characters, such as u0007, as the parameter value.

+

array_bracket

+

No

+

Array bracket, which can be configured only when the CSV encoding format is adopted. The available options are (), {}, and []. For example, if you set this parameter to {}, the array output format is {a1, a2}.

+

append

+

No

+

The value can be true or false. The default value is true.

+

If OBS does not support the append mode and the encoding format is CSV or JSON, set this parameter to false. If Append is set to false, max_record_num_per_file and dump_interval must be set.

+

max_record_num_per_file

+

No

+

Maximum number of records in a file. This parameter needs to be set if encode is csv (append is false), json (append is false), orc, avro, avro_merge, or parquet. If the maximum number of records has been reached, a new file is generated.

+

dump_interval

+

No

+

Triggering period. This parameter needs to be configured when the ORC encoding format is adopted or notification to DIS is enabled.

+
  • If the ORC encoding format is specified, this parameter indicates that files will be uploaded to OBS when the triggering period arrives even if the number of file records does not reach the maximum value.
  • In notification to DIS is enabled, this parameter specifies that a notification is sent to DIS every period to indicate that no more files will be generated in the directory.
+

dis_notice_channel

+

No

+

DIS channel where DLI sends the record that contains the OBS directory DLI periodically sends the DIS channel a record, which contains the OBS directory, indicating that no more new files will be generated in the directory.

+

encoded_data

+

No

+

Data to be encoded. This parameter is set if encode is json (append is false), avro_merge, or parquet. The format is ${field_name}, indicating that the stream field content is encoded as a complete record.

+
+
+
+

Precautions

If a configuration item can be specified through parameter configurations, one or more columns in the record can be used as part of the configuration item. For example, if the configuration item is set to car_$ {car_brand} and the value of car_brand in a record is BMW, the value of this configuration item is car_BMW in the record.

+
+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0243.html b/docs/dli/sqlreference/dli_08_0243.html new file mode 100644 index 00000000..ccd0b678 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0243.html @@ -0,0 +1,147 @@ + + +

CloudTable HBase Sink Stream

+

Function

DLI exports the job output data to HBase of CloudTable. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. Structured and semi-structured key-value data can be stored, including messages, reports, recommendation data, risk control data, logs, and orders. With DLI, you can write massive volumes of data to HBase at a high speed and with low latency.

+

CloudTable is a distributed, scalable, and fully-hosted key-value data storage service based on Apache HBase. It provides DLI with high-performance random read and write capabilities, which are helpful when applications need to store and query a massive amount of structured data, semi-structured data, and time series data. CloudTable applies to IoT scenarios and storage and query of massive volumes of key-value data. For more information about CloudTable, see the CloudTable Service User Guide.

+
+

Prerequisites

In this scenario, jobs must run on the dedicated queue of DLI. Therefore, DLI must interconnect with the enhanced datasource connection that has been connected with CloudTable HBase. You can also set the security group rules as required.

+

For details about how to create an enhanced datasource connection, see Enhanced Datasource Connections in the Data Lake Insight User Guide.

+

For details about how to configure security group rules, see Security Group in the Virtual Private Cloud User Guide.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "cloudtable",
+    region = "",
+    cluster_id = "",
+    table_name = "",
+    table_columns = "",
+    create_if_not_exist = ""
+  )
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. cloudtable indicates that data is exported to CloudTable (HBase).

+

region

+

Yes

+

Region to which CloudTable belongs.

+

cluster_id

+

Yes

+

ID of the cluster to which the data you want to insert belongs.

+

table_name

+

Yes

+

Name of the table, into which data is to be inserted. It can be specified through parameter configurations. For example, if you want one or more certain columns as part of the table name, use car_pass_inspect_with_age_${car_age}, where car_age is the column name.

+

table_columns

+

Yes

+

Columns to be inserted. The format is rowKey, f1:c1, f1:c2, f2:c1, where rowKey must be specified. If you do not want to add a column (for example, the third column) to the database, set this parameter to rowKey,f1:c1,,f2:c1.

+

illegal_data_table

+

No

+

If this parameter is specified, abnormal data (for example, rowKey does not exist) will be written into the table. If not specified, abnormal data will be discarded. The rowKey value is a timestamp followed by six random digits, and the schema is info:data, info:reason.

+

create_if_not_exist

+

No

+

Whether to create a table or column into which the data is written when this table or column does not exist. The value can be true or false. The default value is false.

+

batch_insert_data_num

+

No

+

Number of data records to be written in batches at a time. The value must be a positive integer. The upper limit is 100. The default value is 10.

+
+
+
+

Precautions

+
+

Example

Output data of stream qualified_cars to CloudTable (HBase).

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
CREATE SINK STREAM qualified_cars (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT
+)
+  WITH (
+    type = "cloudtable",
+    region = "xxx",
+    cluster_id = "209ab1b6-de25-4c48-8e1e-29e09d02de28",
+    table_name = "car_pass_inspect_with_age_${car_age}",
+    table_columns = "rowKey,info:owner,,car:speed,car:miles",
+    illegal_data_table = "illegal_data",
+    create_if_not_exist = "true",
+    batch_insert_data_num = "20"
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0244.html b/docs/dli/sqlreference/dli_08_0244.html new file mode 100644 index 00000000..d827986b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0244.html @@ -0,0 +1,158 @@ + + +

CloudTable OpenTSDB Sink Stream

+

Function

DLI exports the job output data to OpenTSDB of CloudTable. OpenTSDB is a distributed, scalable time series database based on HBase. It stores time series data. Time series data refers to the data collected at different time points. This type of data reflects the change status or degree of an object over time. OpenTSDB supports data collection and monitoring in seconds, permanent storage, index, and queries. It can be used for system monitoring and measurement as well as collection and monitoring of IoT data, financial data, and scientific experimental results.

+

CloudTable is a distributed, scalable, and fully-hosted key-value data storage service based on Apache HBase. It provides DLI with high-performance random read and write capabilities, which are helpful when applications need to store and query a massive amount of structured data, semi-structured data, and time series data. CloudTable applies to IoT scenarios and storage and query of massive volumes of key-value data. For more information about CloudTable, see the CloudTable Service User Guide.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "opentsdb",
+    region = "",
+    cluster_id = "",
+    tsdb_metrics = "",
+    tsdb_timestamps = "",
+    tsdb_values = "",
+    tsdb_tags = "",
+    batch_insert_data_num = ""
+  )
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. opentsdb indicates that data is exported to CloudTable (OpenTSDB).

+

region

+

Yes

+

Region to which CloudTable belongs.

+

cluster_id

+

No

+

ID of the cluster to which the data to be inserted belongs. Either this parameter or tsdb_link_address must be specified.

+

tsdb_metrics

+

Yes

+

Metric of a data point, which can be specified through parameter configurations.

+

tsdb_timestamps

+

Yes

+

Timestamp of a data point. The data type can be LONG, INT, SHORT, or STRING. Only dynamic columns are supported.

+

tsdb_values

+

Yes

+

Value of a data point. The data type can be SHORT, INT, LONG, FLOAT, DOUBLE, or STRING. Dynamic columns or constant values are supported.

+

tsdb_tags

+

Yes

+

Tags of a data point. Each of tags contains at least one tag value and up to eight tag values. Tags of the data point can be specified through parameter configurations.

+

batch_insert_data_num

+

No

+

Number of data records to be written in batches at a time. The value must be a positive integer. The upper limit is 65536. The default value is 8.

+

tsdb_link_address

+

No

+

OpenTSDB link of the cluster to which the data to be inserted belongs. If this parameter is used, the job must run in a dedicated DLI queue, and the DLI queue must be connected to the CloudTable cluster through an enhanced datasource connection. Either this parameter or cluster_id must be specified.

+
NOTE:

For details about how to create an enhanced datasource connection, see Enhanced Datasource Connections in the Data Lake Insight User Guide.

+
+
+
+
+

Precautions

If a configuration item can be specified through parameter configurations, one or more columns in the record can be used as part of the configuration item. For example, if the configuration item is set to car_$ {car_brand} and the value of car_brand in a record is BMW, the value of this configuration item is car_BMW in the record.

+
+

Example

Output data of stream weather_out to CloudTable (OpenTSDB).

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
CREATE SINK STREAM weather_out (
+  timestamp_value LONG, /* Time */
+  temperature FLOAT, /* Temperature value */
+  humidity FLOAT, /* Humidity */
+  location STRING /* Location */
+)
+  WITH (
+    type = "opentsdb",
+    region = "xxx",
+    cluster_id = "e05649d6-00e2-44b4-b0ff-7194adaeab3f",
+    tsdb_metrics = "weather",
+    tsdb_timestamps = "${timestamp_value}",
+    tsdb_values = "${temperature}; ${humidity}",
+    tsdb_tags = "location:${location},signify:temperature; location:${location},signify:humidity",
+    batch_insert_data_num = "10"
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0245.html b/docs/dli/sqlreference/dli_08_0245.html new file mode 100644 index 00000000..a33e06c8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0245.html @@ -0,0 +1,164 @@ + + +

RDS Sink Stream

+

Function

DLI outputs the Flink job output data to RDS. Currently, PostgreSQL and MySQL databases are supported. The PostgreSQL database can store data of more complex types and delivers space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-commerce. The MySQL database reduces IT deployment and maintenance costs in various scenarios, such as web applications, e-commerce, enterprise applications, and mobile applications.

+

RDS is a cloud-based web service.

+

+

For more information about RDS, see the Relational Database Service User Guide.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "rds",
+    username = "",
+    password = "",
+    db_url = "",
+    table_name = ""
+  );
+
+ +
+

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. rds indicates that data is exported to RDS.

+

username

+

Yes

+

Username for connecting to a database.

+

password

+

Yes

+

Password for connecting to a database.

+

db_url

+

Yes

+

Database connection address, for example, {database_type}://ip:port/database.

+

Currently, two types of database connections are supported: MySQL and PostgreSQL.

+
  • MySQL: 'mysql://ip:port/database'
  • PostgreSQL: 'postgresql://ip:port/database'
+

table_name

+

Yes

+

Name of the table where data will be inserted.

+

db_columns

+

No

+

Mapping between attributes in the output stream and those in the database table. This parameter must be configured based on the sequence of attributes in the output stream.

+

Example:

+
create sink stream a3(student_name  string, student_age int) with (
+type = "rds",
+username = "root",
+password = "xxxxxxxx",
+db_url = "mysql://192.168.0.102:8635/test1",
+db_columns = "name,age",
+table_name = "t1"
+);
+

In the example, student_name corresponds to the name attribute in the database, and student_age corresponds to the age attribute in the database.

+
NOTE:
  • If db_columns is not configured, it is normal that the number of attributes in the output stream is less than that of attributes in the database table and the extra attributes in the database table are all nullable or have default values.
+
+

primary_key

+

No

+

To update data in the table in real time by using the primary key, add the primary_key configuration item (c_timeminute in the following example) when creating a table. During the data writing operation, data is updated if the specified primary_key exists. Otherwise, data is inserted.

+

Example:

+
CREATE SINK STREAM test(c_timeminute LONG, c_cnt LONG)
+WITH (
+type = "rds",
+username = "root",
+password = "xxxxxxxx",
+db_url = "mysql://192.168.0.12:8635/test",
+table_name = "test",
+primary_key = "c_timeminute");
+

operation_field

+

No

+

Processing method of specified data in the format of ${field_name}. The value of field_name must be a string. If field_name indicates D or DELETE, this record is deleted from the database and data is inserted by default.

+
+
+
+

Precautions

The stream format defined by stream_id must be the same as the table format.

+
+

Example

Data of stream audi_cheaper_than_30w is exported to the audi_cheaper_than_30w table in the test database.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
CREATE SINK STREAM audi_cheaper_than_30w (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "rds",
+    username = "root",
+    password = "xxxxxx",
+    db_url = "mysql://192.168.1.1:8635/test",
+    table_name = "audi_cheaper_than_30w"
+); 
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0247.html b/docs/dli/sqlreference/dli_08_0247.html new file mode 100644 index 00000000..aa8f0e78 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0247.html @@ -0,0 +1,180 @@ + + +

DWS Sink Stream (JDBC Mode)

+

Function

DLI outputs the Flink job output data to Data Warehouse Service (DWS). DWS database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types and delivers space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-commerce.

+

DWS is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data. For more information about DWS, see the .

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "rds",
+    username = "",
+    password = "",
+    db_url = "",
+    table_name = ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. rds indicates that data is exported to RDS or DWS.

+

username

+

Yes

+

Username for connecting to a database.

+

password

+

Yes

+

Password for connecting to a database.

+

db_url

+

Yes

+

Database connection address, for example, postgresql://ip:port/database.

+

table_name

+

Yes

+

Name of the table where data will be inserted. You need to create the database table in advance.

+

db_columns

+

No

+

Mapping between attributes in the output stream and those in the database table. This parameter must be configured based on the sequence of attributes in the output stream.

+

Example:

+
1
+2
+3
+4
+5
+6
+7
+8
+9
create sink stream a3(student_name  string, student_age int) 
+  with (
+    type = "rds",
+    username = "root",
+    password = "xxxxxxxx",
+    db_url = "postgresql://192.168.0.102:8000/test1",
+    db_columns = "name,age",
+    table_name = "t1"
+  );
+
+ +
+

In the example, student_name corresponds to the name attribute in the database, and student_age corresponds to the age attribute in the database.

+
NOTE:
  • If db_columns is not configured, it is normal that the number of attributes in the output stream is less than that of attributes in the database table and the extra attributes in the database table are all nullable or have default values.
+
+

primary_key

+

No

+

To update data in the table in real time by using the primary key, add the primary_key configuration item (c_timeminute in the following example) when creating a table. During the data writing operation, data is updated if the specified primary_key exists. Otherwise, data is inserted.

+

Example:

+
1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SINK STREAM test(c_timeminute LONG, c_cnt LONG)
+  WITH (
+    type = "rds",
+    username = "root",
+    password = "xxxxxxxx",
+    db_url = "postgresql://192.168.0.12:8000/test",
+    table_name = "test",
+    primary_key = "c_timeminute"
+  );
+
+ +
+
+
+
+

Precautions

The stream format defined by stream_id must be the same as the table format.

+
+

Example

Data of stream audi_cheaper_than_30w is exported to the audi_cheaper_than_30w table in the test database.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
CREATE SINK STREAM audi_cheaper_than_30w (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "rds",
+    username = "root",
+    password = "xxxxxx",
+    db_url = "postgresql://192.168.1.1:8000/test",
+    table_name = "audi_cheaper_than_30w"
+  ); 
+
+insert into audi_cheaper_than_30w select "1","2","3",4;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0248.html b/docs/dli/sqlreference/dli_08_0248.html new file mode 100644 index 00000000..98379b5b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0248.html @@ -0,0 +1,290 @@ + + +

DWS Sink Stream (OBS-based Dumping)

+

Function

Create a sink stream to export Flink job data to DWS through OBS-based dumping, specifically, output Flink job data to OBS and then import data from OBS to DWS. For details about how to import OBS data to DWS, see Concurrently Importing Data from OBS in the Data Warehouse Service Development GuideData Warehouse Service Development Guide.

+

DWS is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data. For more information about DWS, see the .

+
+

Precautions

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+          type = "dws",
+          region = "",
+          ak = "",
+          sk = "",
+          encode = "",
+          field_delimiter = "",
+          quote = "",
+          db_obs_server = "",
+          obs_dir = "",
+          username = "",
+          password =  "",
+          db_url = "",
+          table_name = "",
+          max_record_num_per_file = "",
+          dump_interval = ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. dws indicates that data is exported to DWS.

+

region

+

Yes

+

Region where DWS is located.

+

ak

+

Yes

+

Access Key ID (AK).

+

sk

+

Yes

+

Secret access key used together with the ID of the AK.

+

encode

+

Yes

+

Encoding format. Currently, CSV and ORC are supported.

+

field_delimiter

+

No

+

Separator used to separate every two attributes. This parameter needs to be configured if the CSV encoding mode is used. It is recommended that you use invisible characters as separators, for example, \u0006\u0002.

+

quote

+

No

+

Single byte. It is recommended that invisible characters be used, for example, u0007.

+

db_obs_server

+

No

+

Foreign server (for example, obs_server) that has been created in the database.

+

You need to specify this parameter if the ORC encoding mode is adopted.

+

obs_dir

+

Yes

+

Directory for storing intermediate files. The directory is in the format of {Bucket name}/{Directory name}, for example, obs-a1/dir1/subdir.

+

username

+

Yes

+

Username for connecting to a database.

+

password

+

Yes

+

Password for connecting to a database.

+

db_url

+

Yes

+

Database connection address. The format is /ip:port/database, for example, 192.168.1.21:8000/test1.

+

table_name

+

Yes

+

Data table name. If no table is available, a table is automatically created.

+

max_record_num_per_file

+

Yes

+

Maximum number of records that can be stored in a file. If the number of records in a file is less than the maximum value, the file will be dumped to OBS after one dumping period.

+

dump_interval

+

Yes

+

Dumping period. The unit is second.

+

delete_obs_temp_file

+

No

+

Whether to delete temporary files on OBS. The default value is true. If this parameter is set to false, files on OBS will not be deleted. You need to manually clear the files.

+

max_dump_file_num

+

No

+

Maximum number of files that can be dumped at a time. If the number of files to be dumped is less than the maximum value, the files will be dumped to OBS after one dumping period.

+
+
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0249.html b/docs/dli/sqlreference/dli_08_0249.html new file mode 100644 index 00000000..d1412e61 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0249.html @@ -0,0 +1,129 @@ + + +

DDS Sink Stream

+

Function

DLI outputs the job output data to Document Database Service (DDS).

+

DDS is compatible with the MongoDB protocol and is secure, highly available, reliable, scalable, and easy to use. It provides DB instance creation, scaling, redundancy, backup, restoration, monitoring, and alarm reporting functions with just a few clicks on the DDS console.

+

For more information about DDS, see the Document Database Service User Guide.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "dds",
+    username = "",
+    password = "",
+    db_url = "",
+    field_names = ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. dds indicates that data is exported to DDS.

+

username

+

Yes

+

Username for connecting to a database.

+

password

+

Yes

+

Password for connecting to a database.

+

db_url

+

Yes

+

DDS instance access address, for example, ip1:port,ip2:port/database/collection.

+

field_names

+

Yes

+

Key of the data field to be inserted. The format is f1,f2,f3. Ensure that the key corresponds to the data column in the sink stream.

+

batch_insert_data_num

+

No

+

Amount of data to be written in batches at a time. The value must be a positive integer. The default value is 10.

+
+
+
+

Example

Output data in the qualified_cars stream to the collectionTest DDS DB.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
CREATE SINK STREAM qualified_cars (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT
+)
+  WITH (
+    type = "dds",
+    region = "xxx",
+    db_url = "192.168.0.8:8635,192.168.0.130:8635/dbtest/collectionTest",
+    username = "xxxxxxxxxx",
+    password =  "xxxxxxxxxx",
+    field_names = "car_id,car_owner,car_age,average_speed,total_miles",
+    batch_insert_data_num = "10"
+  );
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0251.html b/docs/dli/sqlreference/dli_08_0251.html new file mode 100644 index 00000000..d6660a9b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0251.html @@ -0,0 +1,147 @@ + + +

SMN Sink Stream

+

Function

DLI exports Flink job output data to SMN.

+

SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling and pushes messages to subscription endpoints based on requirements. SMN can be connected to other cloud services or integrated with any application that uses or generates message notifications to push messages over multiple protocols.

+

For more information about SMN, see the .

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* ) 
+  WITH(
+    type = "smn",
+    region = "",
+    topic_urn = "",
+    urn_column = "",
+    message_subject = "",
+    message_column = ""
+  )
+
+ +
+

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. smn indicates that data is exported to SMN.

+

region

+

Yes

+

Region to which SMN belongs.

+

topic_urn

+

No

+

URN of an SMN topic, which is used for the static topic URN configuration. The SMN topic serves as the destination for short message notification and needs to be created in SMN.

+

One of topic_urn and urn_column must be configured. If both of them are configured, the topic_urn setting takes precedence.

+

urn_column

+

No

+

Field name of the topic URN content, which is used for the dynamic topic URN configuration.

+

One of topic_urn and urn_column must be configured. If both of them are configured, the topic_urn setting takes precedence.

+

message_subject

+

Yes

+

Message subject sent to SMN. This parameter can be user-defined.

+

message_column

+

Yes

+

Field name in the sink stream. Contents of the field name serve as the message contents, which are user-defined. Currently, only text messages (default) are supported.

+
+
+
+

Precautions

None

+
+

Example

Data of stream over_speed_warning is exported to SMN.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
//Static topic configuration
+CREATE SINK STREAM over_speed_warning (
+  over_speed_message STRING /* over speed message */
+)
+  WITH (
+    type = "smn",
+    region = "xxx",
+    topic_Urn = "xxx",
+    message_subject = "message title",
+    message_column = "over_speed_message"
+  );
+
+ +
+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
//Dynamic topic configuration
+CREATE SINK STREAM over_speed_warning2 (  
+    over_speed_message STRING, /* over speed message */  
+    over_speed_urn STRING
+)
+  WITH (  
+    type = "smn",  
+    region = "xxx",
+    urn_column = "over_speed_urn",  
+    message_subject = "message title",  
+    message_column = "over_speed_message"
+  );
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0252.html b/docs/dli/sqlreference/dli_08_0252.html new file mode 100644 index 00000000..d5c1fb32 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0252.html @@ -0,0 +1,177 @@ + + +

CSS Elasticsearch Sink Stream

+

Function

DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides the distributed multi-user capabilities. It delivers multiple functions, including full-text retrieval, structured search, analytics, aggregation, and highlighting. With Elasticsearch, you can achieve stable, reliable, real-time search. Elasticsearch applies to diversified scenarios, such as log analysis and site search.

+

CSS is a fully managed, distributed search service. It is fully compatible with open-source Elasticsearch and provides DLI with structured and unstructured data search, statistics, and report capabilities.

+

+

For more information about CSS, see the Cloud Search Service User Guide.

+

If the security mode is enabled when you create a CSS cluster, it cannot be undone.

+
+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "es",
+    region = "",
+    cluster_address = "",
+    es_index = "",
+    es_type= "",
+    es_fields= "",
+    batch_insert_data_num= ""
+  );
+
+ +
+

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. es indicates that data is exported to CSS.

+

region

+

Yes

+

Region where CSS is located.

+

cluster_address

+

Yes

+

Private access address of the CSS cluster, for example: x.x.x.x:x. Use commas (,) to separate multiple addresses.

+

es_index

+

Yes

+

Index of the data to be inserted. This parameter corresponds to CSS index.

+

es_type

+

Yes

+

Type of the document to which data is to be inserted. This parameter corresponds to the CSS type.

+

If the Elasticsearch version is 6.x, the value cannot start with an underscore (_).

+

If the Elasticsearch version is 7.x and the type of CSS is preset, the value must be _doc. Otherwise, the value must comply with CSS specifications.

+

es_fields

+

Yes

+

Key of the data field to be inserted. The format is id,f1,f2,f3,f4. Ensure that the key corresponds to the data column in the sink. If a random attribute field instead of a key is used, the keyword id does not need to be used, for example, f1,f2,f3,f4,f5. This parameter corresponds to the CSS filed.

+

batch_insert_data_num

+

Yes

+

Amount of data to be written in batches at a time. The value must be a positive integer. The unit is 10 records. The maximum value allowed is 65536, and the default value is 10.

+

action

+

No

+

If the value is add, data is forcibly overwritten when the same ID is encountered. If the value is upsert, data is updated when the same ID is encountered. (If upsert is selected, id in the es_fields field must be specified.) The default value is add.

+

enable_output_null

+

No

+

This parameter is used to configure whether to generate an empty field. If this parameter is set to true, an empty field (the value is null) is generated. If set to false, no empty field is generated. The default value is false.

+

max_record_num_cache

+

No

+

Maximum number of records that can be cached.

+

es_certificate_name

+

No

+

Name of the datasource authentication information

+

If the security mode is enabled and HTTPS is used by the Elasticsearch cluster, the certificate is required for access. In this case, set the datasource authentication type to CSS.

+

If the security mode is enabled for the Elasticsearch cluster but HTTPS is disabled, the certificate and username and password are required for access. In this case, set the datasource authentication type to Password.

+
+
+
+

Precautions

If a configuration item can be specified through parameter configurations, one or more columns in the record can be used as part of the configuration item. For example, if the configuration item is set to car_$ {car_brand} and the value of car_brand in a record is BMW, the value of this configuration item is car_BMW in the record.

+
+

Example

Data of stream qualified_cars is exported to the cluster on CSS.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
CREATE SINK STREAM qualified_cars (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT
+)
+  WITH (
+    type = "es",
+    region = "xxx",
+    cluster_address = "192.168.0.212:9200",
+    es_index = "car",
+    es_type = "information",
+    es_fields = "id,owner,age,speed,miles",
+    batch_insert_data_num = "10"
+); 
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0253.html b/docs/dli/sqlreference/dli_08_0253.html new file mode 100644 index 00000000..af9cf0a7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0253.html @@ -0,0 +1,131 @@ + + +

DCS Sink Stream

+

Function

DLI exports the Flink job output data to Redis of DCS. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scenarios such as caching, event pub/sub, and high-speed queuing. Redis supports direct read/write of strings, hashes, lists, queues, and sets. Redis works with in-memory dataset and provides persistence. For more information about Redis, visit https://redis.io/.

+

DCS provides Redis-compatible, secure, reliable, out-of-the-box, distributed cache capabilities allowing elastic scaling and convenient management. It meets users' requirements for high concurrency and fast data access.

+

+

For more information about DCS, see the Distributed Cache Service User Guide.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "dcs_redis",
+    region = "",
+    cluster_address = "",
+    password = "",
+    value_type= "",key_value= ""
+  );
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. dcs_redis indicates that data is exported to DCS Redis.

+

region

+

Yes

+

Region where DCS for storing the data is located.

+

cluster_address

+

Yes

+

Redis instance connection address.

+

password

+

No

+

Redis instance connection password. This parameter is not required if password-free access is used.

+

value_type

+

Yes

+

This parameter can be set to any or the combination of the following options:

+
  • Data types, including string, list, hash, set, and zset
  • Commands used to set the expiration time of a key, including expire, pexpire, expireAt, and pexpireAt
  • Commands used to delete a key, including del and hdel
+

Use commas (,) to separate multiple commands.

+

key_value

+

Yes

+

Key and value. The number of key_value pairs must be the same as the number of types specified by value_type, and key_value pairs are separated by semicolons (;). Both key and value can be specified through parameter configurations. The dynamic column name is represented by ${column name}.

+
+
+
+

Precautions

+
+

Example

Data of stream qualified_cars is exported to the Redis cache instance on DCS.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
CREATE SINK STREAM qualified_cars (
+  car_id STRING, 
+  car_owner STRING, 
+  car_age INT, 
+  average_speed DOUBLE, 
+  total_miles DOUBLE
+)
+  WITH (
+    type = "dcs_redis",
+    cluster_address = "192.168.0.34:6379",
+    password = "xxxxxxxx",
+    value_type = "string; list; hash; set; zset",
+    key_value = "${car_id}_str: ${car_owner}; name_list: ${car_owner}; ${car_id}_hash: {name:${car_owner}, age: ${car_age}}; name_set:   ${car_owner}; math_zset: {${car_owner}:${average_speed}}"
+  );
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0254.html b/docs/dli/sqlreference/dli_08_0254.html new file mode 100644 index 00000000..7d850609 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0254.html @@ -0,0 +1,167 @@ + + +

MRS Kafka Sink Stream

+

Function

DLI exports the output data of the Flink job to Kafka.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages. Kafka clusters are deployed and hosted on MRS that is powered on Apache Kafka.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH(
+    type = "kafka",
+    kafka_bootstrap_servers = "",
+    kafka_topic = "",
+    encode = "json"
+)
+
+ +
+

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. kafka indicates that data is exported to Kafka.

+

kafka_bootstrap_servers

+

Yes

+

Port that connects DLI to Kafka. Use enhanced datasource connections to connect DLI queues with Kafka clusters.

+

kafka_topic

+

Yes

+

Kafka topic into which DLI writes data.

+

encode

+

Yes

+

Encoding format. Currently, json and user_defined are supported.

+

encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.

+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+

krb_auth

+

No

+

Authentication name for creating a datasource connection authentication. This parameter is mandatory when Kerberos authentication is enabled. If Kerberos authentication is not enabled for the created MRS cluster, ensure that the /etc/hosts information of the master node in the MRS cluster is added to the host file of the DLI queue.

+

kafka_properties

+

No

+

This parameter is used to configure the native attributes of Kafka. The format is key1=value1;key2=value2.

+

kafka_certificate_name

+

No

+

Specifies the name of the datasource authentication information. This parameter is valid only when the datasource authentication type is set to Kafka_SSL.

+
NOTE:
  • If this parameter is specified, the service loads only the specified file and password under the authentication. The system automatically sets this parameter to kafka_properties.
  • Other configuration information required for Kafka SSL authentication needs to be manually configured in the kafka_properties attribute.
+
+
+
+
+

Precautions

None

+
+

Example

Output data to Kafka.

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0255.html b/docs/dli/sqlreference/dli_08_0255.html new file mode 100644 index 00000000..1c4bec7b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0255.html @@ -0,0 +1,163 @@ + + +

MRS HBase Sink Stream

+

Function

DLI exports the output data of the Flink job to HBase of MRS.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "mrs_hbase",
+    region = "",
+    cluster_address = "",
+    table_name = "",
+    table_columns = "",
+    illegal_data_table = "",
+    batch_insert_data_num = "",
+    action = ""
+)
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. mrs_hbase indicates that data is exported to HBase of MRS.

+

region

+

Yes

+

Region where MRS resides.

+

cluster_address

+

Yes

+

ZooKeeper address of the cluster to which the data table to be inserted belongs. The format is ip1,ip2:port.

+

table_name

+

Yes

+

Name of the table where data is to be inserted.

+

It can be specified through parameter configurations. For example, if you want one or more certain columns as part of the table name, use car_pass_inspect_with_age_${car_age}, where car_age is the column name.

+

table_columns

+

Yes

+

Columns to be inserted. The format is rowKey, f1:c1, f1:c2, f2:c1, where rowKey must be specified. If you do not want to add a column (for example, the third column) to the database, set this parameter to rowKey,f1:c1,,f2:c1.

+

illegal_data_table

+

No

+

If this parameter is specified, abnormal data (for example, rowKey does not exist) will be written into the table. If not specified, abnormal data will be discarded. The rowKey value is taskNo_Timestamp followed by six random digits, and the schema is info:data, info:reason.

+

batch_insert_data_num

+

No

+

Number of data records to be written in batches at a time. The value must be a positive integer. The upper limit is 1000. The default value is 10.

+

action

+

No

+

Whether data is added or deleted. Available options include add and delete. The default value is add.

+

krb_auth

+

No

+

Authentication name for creating a datasource connection authentication. This parameter is mandatory when Kerberos authentication is enabled. Set this parameter to the corresponding cross-source authentication name.

+
NOTE:

Ensure that the /etc/hosts information of the master node in the MRS cluster is added to the host file of the DLI queue.

+
+
+
+
+

Precautions

None

+
+

Example

Output data to HBase of MRS.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
CREATE SINK STREAM qualified_cars (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT
+)
+  WITH (
+    type = "mrs_hbase",
+    region = "xxx",
+    cluster_address = "192.16.0.88,192.87.3.88:2181",
+    table_name = "car_pass_inspect_with_age_${car_age}",
+    table_columns = "rowKey,info:owner,,car:speed,car:miles",
+    illegal_data_table = "illegal_data",
+    batch_insert_data_num = "20",
+    action = "add",
+    krb_auth = "KRB_AUTH_NAME"
+  );
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0257.html b/docs/dli/sqlreference/dli_08_0257.html new file mode 100644 index 00000000..b5af03d5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0257.html @@ -0,0 +1,133 @@ + + +

Open-Source Kafka Sink Stream

+

Function

DLI exports the output data of the Flink job to Kafka.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH(
+    type = "kafka",
+    kafka_bootstrap_servers = "",
+    kafka_topic = "",
+    encode = "json"
+  )
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. kafka indicates that data is exported to Kafka.

+

kafka_bootstrap_servers

+

Yes

+

Port that connects DLI to Kafka. Use enhanced datasource connections to connect DLI queues with Kafka clusters.

+

kafka_topic

+

Yes

+

Kafka topic into which DLI writes data.

+

encode

+

Yes

+

Data encoding format. The value can be csv, json, or user_defined.

+
  • field_delimiter must be specified if this parameter is set to csv.
  • encode_class_name and encode_class_parameter must be specified if this parameter is set to user_defined.
+

filed_delimiter

+

No

+

If encode is set to csv, you can use this parameter to specify the separator between CSV fields. By default, the comma (,) is used.

+

encode_class_name

+

No

+

If encode is set to user_defined, you need to set this parameter to the name of the user-defined decoding class (including the complete package path). The class must inherit the DeserializationSchema class.

+

encode_class_parameter

+

No

+

If encode is set to user_defined, you can set this parameter to specify the input parameter of the user-defined decoding class. Only one parameter of the string type is supported.

+

kafka_properties

+

No

+

This parameter is used to configure the native attributes of Kafka. The format is key1=value1;key2=value2.

+

kafka_certificate_name

+

No

+

Name of the datasource authentication information. This parameter is valid only when the datasource authentication type is set to Kafka_SSL.

+
NOTE:
  • If this parameter is specified, the service loads only the specified file and password under the authentication. The system automatically sets this parameter to kafka_properties.
  • Other configuration information required for Kafka SSL authentication needs to be manually configured in the kafka_properties attribute.
+
+
+
+
+

Precautions

None

+
+

Example

Output the data in the kafka_sink stream to Kafka.

+
1
+2
+3
+4
+5
+6
+7
CREATE SINK STREAM kafka_sink (name STRING) 
+  WITH (
+    type="kafka",
+    kafka_bootstrap_servers =  "ip1:port1,ip2:port2",
+    kafka_topic = "testsink",
+    encode = "json" 
+  );
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0258.html b/docs/dli/sqlreference/dli_08_0258.html new file mode 100644 index 00000000..0b1ac951 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0258.html @@ -0,0 +1,22 @@ + + +

Creating a Temporary Stream

+

Function

The temporary stream is used to simplify SQL logic. If complex SQL logic is followed, write SQL statements concatenated with temporary streams. The temporary stream is just a logical concept and does not generate any data.

+
+

Syntax

1
CREATE TEMP STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+
+ +
+
+

Example

1
create temp stream a2(attr1 int, attr2 string);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0259.html b/docs/dli/sqlreference/dli_08_0259.html new file mode 100644 index 00000000..6622f951 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0259.html @@ -0,0 +1,18 @@ + + +

Creating a Dimension Table

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0260.html b/docs/dli/sqlreference/dli_08_0260.html new file mode 100644 index 00000000..162f887b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0260.html @@ -0,0 +1,111 @@ + + +

Creating a Redis Table

+

Create a Redis table to connect to the source stream.

+

For details about the JOIN syntax, see JOIN Between Stream Data and Table Data.

+

Syntax

1
+2
+3
+4
+5
+6
CREATE TABLE table_id (key_attr_name STRING(, hash_key_attr_name STRING)?, value_attr_name STRING)
+  WITH (
+    type = "dcs_redis",
+    cluster_address = ""(,password = "")?,
+    value_type= "",
+    key_column= ""(,hash_key_column="")?);
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. Value dcs_redis indicates that data is exported to DCS Redis.

+

cluster_address

+

Yes

+

Redis instance connection address.

+

password

+

No

+

Redis instance connection password. This parameter is not required if password-free access is used.

+

value_type

+

Yes

+

Indicates the field data type. Supported data types include string, list, hash, set, and zset.

+

key_column

+

Yes

+

Indicates the column name of the Redis key attribute.

+

hash_key_column

+

No

+

If value_type is set to hash, this field must be specified as the column name of the level-2 key attribute.

+

cache_max_num

+

No

+

Indicates the maximum number of cached query results. The default value is 32768.

+

cache_time

+

No

+

Indicates the maximum duration for caching database query results in the memory. The unit is millisecond. The default value is 10000. The value 0 indicates that caching is disabled.

+
+
+
+

Precautions

+
+

Example

The Redis table is used to connect to the source stream.
CREATE TABLE table_a (attr1 string, attr2 string, attr3 string)
+  WITH (
+    type = "dcs_redis",
+    value_type = "hash",
+    key_column = "attr1",
+    hash_key_column = "attr2",
+    cluster_address = "192.168.1.238:6379",
+    password = "xxxxxxxx"
+ );
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0261.html b/docs/dli/sqlreference/dli_08_0261.html new file mode 100644 index 00000000..0244e656 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0261.html @@ -0,0 +1,218 @@ + + +

Creating an RDS Table

+

Create an RDS/DWS table to connect to the source stream.

+

For details about the JOIN syntax, see JOIN.

+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
CREATE TABLE  table_id (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "rds",
+    username = "",
+    password = "",
+    db_url = "",
+    table_name = ""
+  );
+
+ +
+

+
+

Keyword

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output channel type. Value rds indicates that data is stored to RDS.

+

username

+

Yes

+

Username for connecting to a database.

+

password

+

Yes

+

Password for connecting to a database.

+

db_url

+

Yes

+

Database connection address, for example, {database_type}://ip:port/database.

+

Currently, two types of database connections are supported: MySQL and PostgreSQL.

+
  • MySQL: 'mysql://ip:port/database'
  • PostgreSQL: 'postgresql://ip:port/database'
    NOTE:

    To create a DWS dimension table, set the database connection address to a DWS database address. If the DWS database version is later than 8.1.0, the open-source PostgreSQL driver cannot be used for connection. You need to use the GaussDB driver for connection.

    +
    +
+

table_name

+

Yes

+

Indicates the name of the database table for data query.

+

db_columns

+

No

+

Indicates the mapping of stream attribute fields between the sink stream and database table. This parameter is mandatory when the stream attribute fields in the sink stream do not match those in the database table. The parameter value is in the format of dbtable_attr1,dbtable_attr2,dbtable_attr3.

+

cache_max_num

+

No

+

Indicates the maximum number of cached query results. The default value is 32768.

+

cache_time

+

No

+

Indicates the maximum duration for caching database query results in the memory. The unit is millisecond. The default value is 10000. The value 0 indicates that caching is disabled.

+
+
+
+

Example

The RDS table is used to connect to the source stream.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
CREATE SOURCE STREAM car_infos (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "dis",
+    region = "",
+    channel = "dliinput",
+    encode = "csv",
+    field_delimiter = ","
+  );
+
+CREATE TABLE  db_info (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "rds",
+    username = "root",
+    password = "******",
+    db_url = "postgresql://192.168.0.0:2000/test1",
+    table_name = "car"
+);
+
+CREATE SINK STREAM audi_cheaper_than_30w (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+    type = "dis",
+    region = "",
+    channel = "dlioutput",
+    partition_key = "car_owner",
+    encode = "csv",
+    field_delimiter = ","
+  );
+
+INSERT INTO audi_cheaper_than_30w
+SELECT a.car_id, b.car_owner, b.car_brand, b.car_price 
+FROM car_infos as a join db_info as b on a.car_id = b.car_id;
+
+ +
+
+

To create a DWS dimension table, set the database connection address to a DWS database address. If the DWS database version is later than 8.1.0, the open-source PostgreSQL driver cannot be used for connection. You need to use the GaussDB driver for connection.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0262.html b/docs/dli/sqlreference/dli_08_0262.html new file mode 100644 index 00000000..79cf5ef4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0262.html @@ -0,0 +1,18 @@ + + +

Modifying a Table

+

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0263.html b/docs/dli/sqlreference/dli_08_0263.html new file mode 100644 index 00000000..2c82c1ad --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0263.html @@ -0,0 +1,62 @@ + + +

Adding a Column

+

Function

This statement is used to add one or more new columns to a table.

+
+

Syntax

1
ALTER TABLE [db_name.]table_name ADD COLUMNS (col_name1 col_type1 [COMMENT col_comment1], ...);
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_).

+

table_name

+

Table name

+

col_name

+

Column name

+

col_type

+

Field type

+

col_comment

+

Column description

+
+
+
+

Precautions

Do not run this SQL statement concurrently. Otherwise, columns may be overwritten.

+
+

Example

1
ALTER TABLE t1 ADD COLUMNS (column2 int, column3 string);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0266.html b/docs/dli/sqlreference/dli_08_0266.html new file mode 100644 index 00000000..af441755 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0266.html @@ -0,0 +1,68 @@ + + +

Common Configuration Items of Batch SQL Jobs

+

This section describes the common configuration items of the SQL syntax for DLI batch jobs.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Common configuration items

Item

+

Default Value

+

Description

+

spark.sql.files.maxRecordsPerFile

+

0

+

Maximum number of records to be written into a single file. If the value is zero or negative, there is no limit.

+

spark.sql.autoBroadcastJoinThreshold

+

209715200

+

Maximum size of the table that displays all working nodes when a connection is executed. You can set this parameter to -1 to disable the display.

+
NOTE:

Currently, only the configuration unit metastore table that runs the ANALYZE TABLE COMPUTE statistics noscan command and the file-based data source table that directly calculates statistics based on data files are supported.

+
+

spark.sql.shuffle.partitions

+

200

+

Default number of partitions used to filter data for join or aggregation.

+

spark.sql.dynamicPartitionOverwrite.enabled

+

false

+

Whether DLI overwrites the partitions where data will be written into during runtime. If you set this parameter to false, all partitions that meet the specified condition will be deleted before data overwrite starts. For example, if you set false and use INSERT OVERWRITE to write partition 2021-02 to a partitioned table that has the 2021-01 partition, this partition will be deleted.

+

If you set this parameter to true, DLI does not delete partitions before overwrite starts.

+

spark.sql.files.maxPartitionBytes

+

134217728

+

Maximum number of bytes to be packed into a single partition when a file is read.

+

spark.sql.badRecordsPath

+

-

+

Path of bad records.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0267.html b/docs/dli/sqlreference/dli_08_0267.html new file mode 100644 index 00000000..1a06d0ca --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0267.html @@ -0,0 +1,195 @@ + + +

File System Sink Stream (Recommended)

+

Function

You can create a sink stream to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly according to the generated directory. The table can be processed through DLI SQL, and the output data directory can be stored in partitioned tables. It is applicable to scenarios such as data dumping, big data analysis, data backup, and active, deep, or cold archiving.

+

OBS is an object-based storage service. It provides massive, secure, highly reliable, and low-cost data storage capabilities.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  [PARTITIONED BY (attr_name (',' attr_name)*]
+  WITH (
+    type = "filesystem",
+    file.path = "obs://bucket/xx",
+    encode = "parquet",
+    ak = "",
+    sk = ""
+  );
+
+ +
+
+

Keywords

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Output stream type. If type is set to filesystem, data is exported to the file system.

+

file.path

+

Yes

+

Output directory in the form: schema://file.path.

+

Currently, Schema supports only OBS and HDFS.

+
  • If schema is set to obs, data is stored to OBS.
  • If schema is set to hdfs, data is exported to HDFS. A proxy user needs to be configured for HDFS. For details, see HDFS Proxy User Configuration.

    Example: hdfs://node-master1sYAx:9820/user/car_infos, where node-master1sYAx:9820 is the name of the node where the NameNode is located.

    +
+

encode

+

Yes

+

Output data encoding format. Currently, only the parquet and csv formats are supported.

+
  • When schema is set to obs, the encoding format of the output data can only be parquet.
  • When schema is set to hdfs, the output data can be encoded in Parquet or CSV format.
+

ak

+

No

+

Access key. This parameter is mandatory when data is exported to OBS. Global variables can be used to mask the access key used for OBS authentication.

+

sk

+

No

+

Secret access key. This parameter is mandatory when data is exported to OBS. Secret key for accessing OBS authentication. Global variables can be used to mask sensitive information.

+

krb_auth

+

No

+

Authentication name for creating a datasource connection authentication. This parameter is mandatory when Kerberos authentication is enabled. If Kerberos authentication is not enabled for the created MRS cluster, ensure that the /etc/hosts information of the master node in the MRS cluster is added to the host file of the DLI queue.

+

field_delimiter

+

No

+

Separator used to separate every two attributes.

+

This parameter needs to be configured if the CSV encoding format is adopted. It can be user-defined, for example, a comma (,).

+
+
+
+

Precautions

+
+

HDFS Proxy User Configuration

  1. Log in to the MRS management page.
  2. Select the HDFS NameNode configuration of MRS and add configuration parameters in the Customization area.

    In the preceding information, myname in the core-site values hadoop.proxyuser.myname.hosts and hadoop.proxyuser.myname.groups is the name of the krb authentication user.

    +

    Ensure that the permission on the HDFS data write path is 777.

    +
    +
  3. After the configuration is complete, click Save.
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0270.html b/docs/dli/sqlreference/dli_08_0270.html new file mode 100644 index 00000000..9595940f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0270.html @@ -0,0 +1,12 @@ + + +

DMS Source Stream

+

DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully managed queues for sending, receiving, and storing messages. DMS for Kafka is a message queuing service based on Apache Kafka. This service provides Kafka premium instances.

+

The source stream can read data from a Kafka instance as the input data of jobs. The syntax for creating a Kafka source stream is the same as that for creating an open source Apache Kafka source stream. For details, see Open-Source Kafka Source Stream.

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0271.html b/docs/dli/sqlreference/dli_08_0271.html new file mode 100644 index 00000000..98d87e46 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0271.html @@ -0,0 +1,12 @@ + + +

DMS Sink Stream

+

DMS (Distributed Message Service) is a message middleware service based on distributed, high-availability clustering technology. It provides reliable, scalable, fully managed queues for sending, receiving, and storing messages. DMS for Kafka is a message queuing service based on Apache Kafka. This service provides Kafka premium instances.

+

DLI can write the job output data into the Kafka instance. The syntax for creating a Kafka sink stream is the same as that for creating an open source Apache Kafka sink stream. For details, see MRS Kafka Sink Stream.

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0272.html b/docs/dli/sqlreference/dli_08_0272.html new file mode 100644 index 00000000..035546fe --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0272.html @@ -0,0 +1,21 @@ + + + +

Custom Stream Ecosystem

+ +

+
+ +
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0273.html b/docs/dli/sqlreference/dli_08_0273.html new file mode 100644 index 00000000..ba9d7fbe --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0273.html @@ -0,0 +1,101 @@ + + +

Custom Source Stream

+

Compile code to obtain data from the desired cloud ecosystem or open-source ecosystem as the input data of Flink jobs.

+

Syntax

1
+2
+3
+4
+5
+6
+7
CREATE SOURCE STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "user_defined",
+    type_class_name = "",
+    type_class_parameter = ""
+  )
+  (TIMESTAMP BY timeindicator (',' timeindicator)?);timeindicator:PROCTIME '.' PROCTIME| ID '.' ROWTIME
+
+ +
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. The value user_defined indicates that the data source is a user-defined data source.

+

type_class_name

+

Yes

+

Name of the source class for obtaining source data. The value must contain the complete package path.

+

type_class_parameter

+

Yes

+

Input parameter of the user-defined source class. Only one parameter of the string type is supported.

+
+
+
+

Precautions

The user-defined source class needs to inherit the RichParallelSourceFunction class and specify the data type as Row. For example, define MySource class: public class MySource extends RichParallelSourceFunction<Row>{}. It aims to implement the open, run, and close functions.

+

Dependency pom:

+
<dependency>
+	<groupId>org.apache.flink</groupId>
+	<artifactId>flink-streaming-java_2.11</artifactId>
+	<version>${flink.version}</version>
+	<scope>provided</scope>
+</dependency>
+<dependency>
+	<groupId>org.apache.flink</groupId>
+	<artifactId>flink-core</artifactId>
+	<version>${flink.version}</version>
+	<scope>provided</scope>
+</dependency>
+
+

Example

A data record is generated in each period. The data record contains only one field of the INT type. The initial value is 1 and the period is 60 seconds. The period is specified by an input parameter.

+
+
1
+2
+3
+4
+5
+6
+7
+8
+9
CREATE SOURCE STREAM user_in_data (
+	count INT
+     )
+  WITH (  
+	type = "user_defined", 
+	type_class_name = "mySourceSink.MySource", 
+	type_class_parameter = "60"
+      )
+      TIMESTAMP BY car_timestamp.rowtime;		
+
+ +
+

To customize the implementation of the source class, you need to pack the class in a JAR package and upload the UDF function on the SQL editing page.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0274.html b/docs/dli/sqlreference/dli_08_0274.html new file mode 100644 index 00000000..4bb9610b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0274.html @@ -0,0 +1,89 @@ + + +

Custom Sink Stream

+

Compile code to write the data processed by DLI to a specified cloud ecosystem or open-source ecosystem.

+

Syntax

CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "user_defined",
+    type_class_name = "",
+    type_class_parameter = ""
+  );
+
+

Keyword

+
+ + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Data source type. The value user_defined indicates that the data source is a user-defined data source.

+

type_class_name

+

Yes

+

Name of the sink class for obtaining source data. The value must contain the complete package path.

+

type_class_parameter

+

Yes

+

Input parameter of the user-defined sink class. Only one parameter of the string type is supported.

+
+
+
+

Precautions

The user-defined sink class needs to inherit the RichSinkFunction class and specify the data type as Row. For example, define MySink class: public class MySink extends RichSinkFunction<Row>{}. It aims to implement the open, invoke, and close functions.

+

Dependency pom:

+
<dependency>
+	<groupId>org.apache.flink</groupId>
+	<artifactId>flink-streaming-java_2.11</artifactId>
+	<version>${flink.version}</version>
+	<scope>provided</scope>
+</dependency>
+<dependency>
+	<groupId>org.apache.flink</groupId>
+	<artifactId>flink-core</artifactId>
+	<version>${flink.version}</version>
+	<scope>provided</scope>
+</dependency>
+
+

Example

Writing data encoded in CSV format to a DIS stream is used as an example.

+
+
1
+2
+3
+4
+5
+6
+7
+8
CREATE SINK STREAM user_out_data (
+	count INT
+)
+  WITH (  
+	type = "user_defined", 
+	type_class_name = "mySourceSink.MySink", 
+	type_class_parameter = ""
+      );
+
+ +
+

To customize the implementation of the sink class, you need to pack the class in a JAR package and upload the UDF function on the SQL editing page.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0275.html b/docs/dli/sqlreference/dli_08_0275.html new file mode 100644 index 00000000..c8eaaaa9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0275.html @@ -0,0 +1,132 @@ + + +

SQL Syntax Overview of Stream Jobs

+

This section describes the Flink SQL syntax list provided by DLI. For details about the parameters and examples, see the syntax description.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 SQL Syntax of stream jobs

Classification

+

Function

+

Creating a Source Stream

+

CloudTable HBase Source Stream

+

Creating a Source Stream

+

DIS Source Stream

+

DMS Source Stream

+

Creating a Source Stream

+

MRS Kafka Source Stream

+

Open-Source Kafka Source Stream

+

OBS Source Stream

+

Creating a Sink Stream

+

CloudTable HBase Sink Stream

+

Creating a Sink Stream

+

CloudTable OpenTSDB Sink Stream

+

Creating a Sink Stream

+

+

+

+

+

+

+

CSS Elasticsearch Sink Stream

+

DCS Sink Stream

+

DDS Sink Stream

+

DIS Sink Stream

+

DMS Sink Stream

+

DWS Sink Stream (JDBC Mode)

+

DWS Sink Stream (OBS-based Dumping)

+

Creating a Sink Stream

+

MRS HBase Sink Stream

+

MRS Kafka Sink Stream

+

Open-Source Kafka Sink Stream

+

OBS Sink Stream

+

RDS Sink Stream

+

Creating a Sink Stream

+

SMN Sink Stream

+

File System Sink Stream (Recommended)

+

Creating a Temporary Stream

+

Creating a Temporary Stream

+

Creating a Dimension Table

+

+

Creating a Redis Table

+

Creating an RDS Table

+

Custom Stream Ecosystem

+

+

Custom Source Stream

+

Custom Sink Stream

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0281.html b/docs/dli/sqlreference/dli_08_0281.html new file mode 100644 index 00000000..8428e83b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0281.html @@ -0,0 +1,27 @@ + + +

Displaying Function Details

+

Function

Displays information about a specified function.

+
+

Syntax

1
DESCRIBE FUNCTION [EXTENDED] [db_name.] function_name;
+
+ +
+
+

Keywords

EXTENDED: displays extended usage information.

+
+

Precautions

The metadata (implementation class and usage) of an existing function is returned. If the function does not exist, the system reports an error.

+
+

Example

Displays information about the mergeBill function.

+
1
DESCRIBE FUNCTION mergeBill;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0282.html b/docs/dli/sqlreference/dli_08_0282.html new file mode 100644 index 00000000..c0f469e0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0282.html @@ -0,0 +1,25 @@ + + + +

User-Defined Functions

+ +

+
+ +
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0283.html b/docs/dli/sqlreference/dli_08_0283.html new file mode 100644 index 00000000..92f8b2e7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0283.html @@ -0,0 +1,37 @@ + + +

Creating a Function

+

Function

DLI allows you to create and use user-defined functions (UDF) and user-defined table functions (UDTF) in Spark jobs.

+
+

Syntax

1
+2
+3
+4
+5
CREATE [TEMPORARY] FUNCTION [db_name.]function_name AS class_name
+  [USING resource,...]
+
+resource: 
+  : (JAR|FILE|ARCHIVE)file_uri
+
+ +
+
+

Precautions

+
+

Keywords

+
+

Example

Create the mergeBill function.

+
1
+2
CREATE FUNCTION mergeBill AS 'com.xxx.hiveudf.MergeBill'
+  using jar 'obs://onlyci-7/udf/MergeBill.jar';
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0284.html b/docs/dli/sqlreference/dli_08_0284.html new file mode 100644 index 00000000..07f32b28 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0284.html @@ -0,0 +1,27 @@ + + +

Deleting a Function

+

Function

This statement is used to delete functions.

+
+

Syntax

1
DROP [TEMPORARY] FUNCTION [IF EXISTS] [db_name.] function_name;
+
+ +
+
+

Keywords

+
+

Precautions

+
+

Example

The mergeBill function is deleted.

+
1
DROP FUNCTION mergeBill;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0285.html b/docs/dli/sqlreference/dli_08_0285.html new file mode 100644 index 00000000..a7b45259 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0285.html @@ -0,0 +1,57 @@ + + +

Displaying All Functions

+

Function

View all functions in the current project.

+
+

Syntax

1
SHOW [USER|SYSTEM|ALL] FUNCTIONS ([LIKE] regex | [db_name.] function_name);
+
+ +
+

+

In the preceding statement, regex is a regular expression. For details about its parameters, see Table 1.

+ +
+ + + + + + + + + + + + + +
Table 1 Parameter examples

Expression

+

Description

+

'xpath*'

+

Matches all functions whose names start with xpath.

+

Example: SHOW FUNCTIONS LIKE'xpath* ;

+

Matches functions whose names start with xpath, including xpath, xpath_int, and xpath_string.

+

'x[a-z]+'

+

Matches functions whose names start with x and is followed by one or more characters from a to z. For example, xpath and xtest can be matched.

+

'x.*h'

+

Matches functions whose names start with x, end with h, and contain one or more characters in the middle. For example, xpath and xtesth can be matched.

+
+
+

For details about other expressions, see the official website.

+
+

Keywords

LIKE: This qualifier is used only for compatibility and has no actual effect.

+
+

Precautions

The function that matches the given regular expression or function name are displayed. If no regular expression or name is provided, all functions are displayed. If USER or SYSTEM is specified, user-defined Spark SQL functions and system-defined Spark SQL functions are displayed, respectively.

+
+

Example

This statement is used to view all functions.

+
1
SHOW FUNCTIONS;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0286.html b/docs/dli/sqlreference/dli_08_0286.html new file mode 100644 index 00000000..662c75ac --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0286.html @@ -0,0 +1,148 @@ + + +

MRS OpenTSDB Sink Stream

+

Function

DLI exports the output data of the Flink job to OpenTSDB of MRS.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
CREATE SINK STREAM stream_id (attr_name attr_type (',' attr_name attr_type)* )
+  WITH (
+    type = "opentsdb",
+    region = "",
+    tsdb_metrics = "",
+    tsdb_timestamps = "",
+    tsdb_values = "",
+    tsdb_tags = "",
+    batch_insert_data_num = ""
+  )
+
+ +
+
+

Keywords

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Keyword description

Parameter

+

Mandatory

+

Description

+

type

+

Yes

+

Sink channel type. opentsdb indicates that data is exported to OpenTSDB of MRS.

+

region

+

Yes

+

Region where MRS resides.

+

tsdb_link_address

+

Yes

+

Service address of the OpenTSDB instance in MRS. The format is http://ip:port or https://ip:port.

+
NOTE:

If tsd.https.enabled is set to true, HTTPS must be used. Note that HTTPS does not support certificate authentication.

+
+

tsdb_metrics

+

Yes

+

Metric of a data point, which can be specified through parameter configurations.

+

tsdb_timestamps

+

Yes

+

Timestamp of a data point. The data type can be LONG, INT, SHORT, or STRING. Only dynamic columns are supported.

+

tsdb_values

+

Yes

+

Value of a data point. The data type can be SHORT, INT, LONG, FLOAT, DOUBLE, or STRING. Dynamic columns or constant values are supported.

+

tsdb_tags

+

Yes

+

Tags of a data point. Each of tags contains at least one tag value and up to eight tag values. Tags of the data point can be specified through parameter configurations.

+

batch_insert_data_num

+

No

+

Number of data records to be written in batches at a time. The value must be a positive integer. The upper limit is 65536. The default value is 8.

+
+
+
+

Precautions

If a configuration item can be specified through parameter configurations, one or more columns in the record can be used as part of the configuration item. For example, if the configuration item is set to car_$ {car_brand} and the value of car_brand in a record is BMW, the value of this configuration item is car_BMW in the record.

+
+

Example

Output data of stream weather_out to OpenTSDB of MRS.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
CREATE SINK STREAM weather_out (
+  timestamp_value LONG, /* Time */
+  temperature FLOAT, /* Temperature value */
+  humidity FLOAT, /* Humidity */
+  location STRING /* Location */
+)
+  WITH (
+    type = "opentsdb",
+    region = "xxx",
+    tsdb_link_address = "https://x.x.x.x:4242",
+    tsdb_metrics = "weather",
+    tsdb_timestamps = "${timestamp_value}",
+    tsdb_values = "${temperature}; ${humidity}",
+    tsdb_tags = "location:${location},signify:temperature; location:${location},signify:humidity",
+    batch_insert_data_num = "10"
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0289.html b/docs/dli/sqlreference/dli_08_0289.html new file mode 100644 index 00000000..e1973ed2 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0289.html @@ -0,0 +1,20 @@ + + +

Flink Opensource SQL 1.10 Syntax Reference

+

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0290.html b/docs/dli/sqlreference/dli_08_0290.html new file mode 100644 index 00000000..72908259 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0290.html @@ -0,0 +1,17 @@ + + +

Constraints and Definitions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0291.html b/docs/dli/sqlreference/dli_08_0291.html new file mode 100644 index 00000000..6c00f550 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0291.html @@ -0,0 +1,11 @@ + + +

Supported Data Types

+

STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP, ROW

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0292.html b/docs/dli/sqlreference/dli_08_0292.html new file mode 100644 index 00000000..15139873 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0292.html @@ -0,0 +1,17 @@ + + +

Syntax Definition

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0293.html b/docs/dli/sqlreference/dli_08_0293.html new file mode 100644 index 00000000..05d54931 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0293.html @@ -0,0 +1,19 @@ + + +

Data Definition Language (DDL)

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0294.html b/docs/dli/sqlreference/dli_08_0294.html new file mode 100644 index 00000000..c0edbb13 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0294.html @@ -0,0 +1,75 @@ + + +

CREATE TABLE

+

Syntax

CREATE TABLE table_name
+  (
+    { <column_definition> | <computed_column_definition> }[ , ...n]
+    [ <watermark_definition> ]
+    [ <table_constraint> ][ , ...n]
+  )
+  [COMMENT table_comment]
+  [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)]
+  WITH (key1=val1, key2=val2, ...)
+
+<column_definition>:
+  column_name column_type [ <column_constraint> ] [COMMENT column_comment]
+
+<column_constraint>:
+  [CONSTRAINT constraint_name] PRIMARY KEY NOT ENFORCED
+
+<table_constraint>:
+  [CONSTRAINT constraint_name] PRIMARY KEY (column_name, ...) NOT ENFORCED
+
+<computed_column_definition>:
+  column_name AS computed_column_expression [COMMENT column_comment]
+
+<watermark_definition>:
+  WATERMARK FOR rowtime_column_name AS watermark_strategy_expression
+
+<source_table>:
+  [catalog_name.][db_name.]table_name
+
+

Function

This clause is used to create a table with a specified name.

+
+

Description

COMPUTED COLUMN

+

A computed column is a virtual column generated using column_name AS computed_column_expression. A computed column evaluates an expression that can reference other columns declared in the same table. The column itself is not physically stored within the table. A computed column could be defined using cost AS price * quantity. This expression can contain any combination of physical columns, constants, functions, or variables, but cannot contain any subquery.

+

In Flink, a computed column is used to define the time attribute in CREATE TABLE statements. A processing time attribute can be defined easily via proc AS PROCTIME() using the system's PROCTIME() function. The event time column may be obtained from an existing field. In this case, you can use the computed column to obtain event time. For example, if the original field is not of the TIMESTAMP(3) type or is nested in a JSON string, you can use computed columns.

+

Notes:

+ +
+

WATERMARK

+

The WATERMARK clause defines the event time attribute of a table and takes the form WATERMARK FOR rowtime_column_name AS watermark_strategy_expression.

+

rowtime_column_name defines an existing column that is marked as the event time attribute of the table. The column must be of the TIMESTAMP(3) type and must be the top-level column in the schema. It can also be a computed column.

+

watermark_strategy_expression defines the watermark generation strategy. It allows arbitrary non-query expression, including computed columns, to calculate the watermark. The expression return type must be TIMESTAMP(3), which represents the timestamp since the Epoch. The returned watermark will be emitted only if it is non-null and its value is larger than the previously emitted local watermark (to preserve the contract of ascending watermarks). The watermark generation expression is evaluated by the framework for every record. The framework will periodically emit the largest generated watermark. If the current watermark is still identical to the previous one, or is null, or the value of the returned watermark is smaller than that of the last emitted one, then no new watermark will be emitted. Watermark is emitted in an interval defined by pipeline.auto-watermark-interval configuration. If watermark interval is 0 ms, the generated watermarks will be emitted per-record if it is not null and greater than the last emitted one.

+

When using event time semantics, tables must contain an event time attribute and watermarking strategy.

+

Flink provides several commonly used watermark strategies.

+ +

PRIMARY KEY

+

Primary key constraint is a hint for Flink to leverage for optimizations. It tells that a column or a set of columns of a table or a view are unique and they do not contain null. Neither of columns in a primary can be nullable. The primary key therefore uniquely identifies a row in a table.

+

Primary key constraint can be either declared along with a column definition (a column constraint) or as a single line (a table constraint). For both cases, it should only be declared as a singleton. If you define multiple primary key constraints at the same time, an exception would be thrown.

+

Validity Check

+

SQL standard specifies that a constraint can either be ENFORCED or NOT ENFORCED. This controls if the constraint checks are performed on the incoming/outgoing data. Flink does not own the data therefore the only mode we want to support is the NOT ENFORCED mode. It is up to the user to ensure that the query enforces key integrity.

+

Flink will assume correctness of the primary key by assuming that the columns nullability is aligned with the columns in primary key. Connectors should ensure those are aligned.

+

Notes: In a CREATE TABLE statement, creating a primary key constraint will alter the columns nullability, that means, a column with primary key constraint is not nullable.

+

PARTITIONED BY

+

Partition the created table by the specified columns. A directory is created for each partition if this table is used as a filesystem sink.

+

WITH OPTIONS

+

Table properties used to create a table source/sink. The properties are usually used to find and create the underlying connector.

+

The key and value of expression key1=val1 should both be string literal.

+

Notes: The table registered with CREATE TABLE statement can be used as both table source and table sink. We cannot decide if it is used as a source or sink until it is referenced in the DMLs.

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0295.html b/docs/dli/sqlreference/dli_08_0295.html new file mode 100644 index 00000000..336cf90b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0295.html @@ -0,0 +1,22 @@ + + +

CREATE VIEW

+

Syntax

CREATE VIEW [IF NOT EXISTS] view_name
+  [{columnName [, columnName ]* }] [COMMENT view_comment]
+  AS query_expression
+
+

Function

Create a view with multiple layers nested in it to simplify the development process.

+
+

Description

IF NOT EXISTS

+

If the view already exists, nothing happens.

+
+

Example

Create a view named viewName.

+
create view viewName as select * from dataSource
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0296.html b/docs/dli/sqlreference/dli_08_0296.html new file mode 100644 index 00000000..6dc733bd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0296.html @@ -0,0 +1,24 @@ + + +

CREATE FUNCTION

+

Syntax

CREATE FUNCTION
+  [IF NOT EXISTS] function_name
+  AS identifier [LANGUAGE JAVA|SCALA]
+
+

Function

Create a user-defined function.

+
+

Description

IF NOT EXISTS

+

If the function already exists, nothing happens.

+

LANGUAGE JAVA|SCALA

+

Language tag is used to instruct Flink runtime how to execute the function. Currently only JAVA and SCALA are supported, the default language for a function is JAVA.

+
+

Example

Create a function named STRINGBACK.

+
create function STRINGBACK as 'com.dli.StringBack'
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0297.html b/docs/dli/sqlreference/dli_08_0297.html new file mode 100644 index 00000000..8300b27a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0297.html @@ -0,0 +1,138 @@ + + +

Data Manipulation Language (DML)

+

Statements

Syntax

+
+
INSERT INTO table_name [PARTITION part_spec] query
+
+part_spec:  (part_col_name1=val1 [, part_col_name2=val2, ...])
+
+query:
+  values
+  | {
+      select
+      | selectWithoutFrom
+      | query UNION [ ALL ] query
+      | query EXCEPT query
+      | query INTERSECT query
+    }
+    [ ORDER BY orderItem [, orderItem ]* ]
+    [ LIMIT { count | ALL } ]
+    [ OFFSET start { ROW | ROWS } ]
+    [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY]
+
+orderItem:
+  expression [ ASC | DESC ]
+
+select:
+  SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+  [ WINDOW windowName AS windowSpec [, windowName AS windowSpec ]* ]
+
+selectWithoutFrom:
+  SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+
+projectItem:
+  expression [ [ AS ] columnAlias ]
+  | tableAlias . *
+
+tableExpression:
+  tableReference [, tableReference ]*
+  | tableExpression [ NATURAL ] [ LEFT | RIGHT | FULL ] JOIN tableExpression [ joinCondition ]
+
+joinCondition:
+  ON booleanExpression
+  | USING '(' column [, column ]* ')'
+
+tableReference:
+  tablePrimary
+  [ matchRecognize ]
+  [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ]
+
+tablePrimary:
+  [ TABLE ] [ [ catalogName . ] schemaName . ] tableName
+  | LATERAL TABLE '(' functionName '(' expression [, expression ]* ')' ')'
+  | UNNEST '(' expression ')'
+
+values:
+  VALUES expression [, expression ]*
+
+groupItem:
+  expression
+  | '(' ')'
+  | '(' expression [, expression ]* ')'
+  | CUBE '(' expression [, expression ]* ')'
+  | ROLLUP '(' expression [, expression ]* ')'
+  | GROUPING SETS '(' groupItem [, groupItem ]* ')'
+
+windowRef:
+    windowName
+  | windowSpec
+
+windowSpec:
+    [ windowName ]
+    '('
+    [ ORDER BY orderItem [, orderItem ]* ]
+    [ PARTITION BY expression [, expression ]* ]
+    [
+        RANGE numericOrIntervalExpression {PRECEDING}
+      | ROWS numericExpression {PRECEDING}
+    ]
+    ')'
+
+matchRecognize:
+      MATCH_RECOGNIZE '('
+      [ PARTITION BY expression [, expression ]* ]
+      [ ORDER BY orderItem [, orderItem ]* ]
+      [ MEASURES measureColumn [, measureColumn ]* ]
+      [ ONE ROW PER MATCH ]
+      [ AFTER MATCH
+            ( SKIP TO NEXT ROW
+            | SKIP PAST LAST ROW
+            | SKIP TO FIRST variable
+            | SKIP TO LAST variable
+            | SKIP TO variable )
+      ]
+      PATTERN '(' pattern ')'
+      [ WITHIN intervalLiteral ]
+      DEFINE variable AS condition [, variable AS condition ]*
+      ')'
+
+measureColumn:
+      expression AS alias
+
+pattern:
+      patternTerm [ '|' patternTerm ]*
+
+patternTerm:
+      patternFactor [ patternFactor ]*
+
+patternFactor:
+      variable [ patternQuantifier ]
+
+patternQuantifier:
+      '*'
+  |   '*?'
+  |   '+'
+  |   '+?'
+  |   '?'
+  |   '??'
+  |   '{' { [ minRepeat ], [ maxRepeat ] } '}' ['?']
+  |   '{' repeat '}'
+

Precautions

+

Flink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:

+ +

String literals must be enclosed in single quotes (for example, SELECT'Hello World'). Two single quotation marks are used for escaping (for example, SELECT'It''s me.'). Unicode characters are supported in string literals. If explicit Unicode points are required, use the following syntax:

+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0298.html b/docs/dli/sqlreference/dli_08_0298.html new file mode 100644 index 00000000..4d700de6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0298.html @@ -0,0 +1,107 @@ + + +

Flink OpenSource SQL 1.10 Syntax

+

This section describes the Flink OpenSource SQL syntax supported by DLI. For details about the parameters and examples, see the syntax description.

+

Creating Tables

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Syntax for creating tables

Classification

+

Function

+

Creating a Source Table

+

+

+

+

+

+

+

Kafka Source Table

+

DIS Source Table

+

JDBC Source Table

+

GaussDB(DWS) Source Table

+

Redis Source Table

+

HBase Source Table

+

userDefined Source Table

+

Creating a Result Table

+

+

+

+

+

+

+

+

+

ClickHouse Result Table

+

Kafka Result Table

+

Upsert Kafka Result Table

+

DIS Result Table

+

JDBC Result Table

+

GaussDB(DWS) Result Table

+

Redis Result Table

+

SMN Result Table

+

HBase Result Table

+

Elasticsearch Result Table

+

User-defined Result Table

+

Creating a Dimension Table

+

+

+

JDBC Dimension Table

+

GaussDB(DWS) Dimension Table

+

HBase Dimension Table

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0299.html b/docs/dli/sqlreference/dli_08_0299.html new file mode 100644 index 00000000..1f341f87 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0299.html @@ -0,0 +1,19 @@ + + +

Data Definition Language (DDL)

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0300.html b/docs/dli/sqlreference/dli_08_0300.html new file mode 100644 index 00000000..9693018a --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0300.html @@ -0,0 +1,27 @@ + + +

Creating a Source Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0301.html b/docs/dli/sqlreference/dli_08_0301.html new file mode 100644 index 00000000..33775000 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0301.html @@ -0,0 +1,251 @@ + + +

Kafka Source Table

+

Function

Create a source stream to obtain data from Kafka as input data for jobs.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

Kafka is an offline cluster. You have built an enhanced datasource connection to connect Flink jobs to Kafka. You have set security group rules as required.

+
+

Precautions

SASL_SSL cannot be enabled for the interconnected Kafka cluster.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' WATERMARK FOR rowtime_column_name AS watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'kafka',
+  'connector.version' = '',
+  'connector.topic' = '',
+  'connector.properties.bootstrap.servers' = '',
+  'connector.properties.group.id' = '',
+  'connector.startup-mode' = '',
+  'format.type' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to kafka.

+

connector.version

+

Yes

+

Kafka version. The value can be '0.10' or '0.11', which corresponds to Kafka 2.11 to 2.4.0 and other historical versions, respectively.

+

format.type

+

Yes

+

Data deserialization format. The value can be csv, json, or avro.

+

format.field-delimiter

+

No

+

Attribute delimiter. You can customize the attribute delimiter only when the encoding format is CSV. The default delimiter is a comma (,).

+

connector.topic

+

Yes

+

Kafka topic name. Either this parameter or connector.topic-pattern is used.

+

connector.topic-pattern

+

No

+

Regular expression for matching the Kafka topic name. Either this parameter or connector.topic is used.

+

Example:

+

'topic.*'

+

'(topic-c|topic-d)'

+

'(topic-a|topic-b|topic-\\d*)'

+

'(topic-a|topic-b|topic-[0-9]*)'

+

connector.properties.bootstrap.servers

+

Yes

+

Kafka broker addresses. Use commas (,) to separated them.

+

connector.properties.group.id

+

No

+

Consumer group name

+

connector.startup-mode

+

No

+

Consumer startup mode. The value can be earliest-offset, latest-offset, group-offsets, specific-offsets or timestamp. The default value is group-offsets.

+

connector.specific-offsets

+

No

+

Consumption offset. This parameter is mandatory when startup-mode is specific-offsets. The value is in the 'partition:0,offset:42;partition:1,offset:300' format.

+

connector.startup-timestamp-millis

+

No

+

Consumption start timestamp. This parameter is mandatory when startup-mode is timestamp.

+

connector.properties.*

+

No

+

Native Kafka property

+
+
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0302.html b/docs/dli/sqlreference/dli_08_0302.html new file mode 100644 index 00000000..7acb4a5c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0302.html @@ -0,0 +1,180 @@ + + +

DIS Source Table

+

Function

Create a source stream to read data from DIS. DIS accesses user data and Flink job reads data from the DIS stream as input data for jobs. Flink jobs can quickly remove data from producers using DIS source sources for continuous processing. Flink jobs are applicable to scenarios where data outside the cloud service is imported to the cloud service for filtering, real-time analysis, monitoring reports, and dumping.

+

DIS addresses the challenge of transmitting data outside cloud services to cloud services. DIS builds data intake streams for custom applications capable of processing or analyzing streaming data. DIS continuously captures, transmits, and stores terabytes of data from hundreds of thousands of sources every hour, such as logs, Internet of Things (IoT) data, social media feeds, website clickstreams, and location-tracking events. For more information about DIS, see the Data Ingestion Service User Guide.

+
+

Syntax

create table disSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = '',
+  'format-type' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Data source type. Set this parameter to dis.

+

connector.region

+

Yes

+

Region where DIS for storing the data locates.

+

connector.ak

+

No

+

Access key ID. This parameter must be set in pair with sk.

+

connector.sk

+

No

+

Secret access key. This parameter must be set in pair with ak.

+

connector.channel

+

Yes

+

Name of the DIS stream where data is located.

+

connector.partition-count

+

No

+

Number of partitions where data will be read. Data in partition 0 to partition-count will be read.

+

+

Neither this parameter or partition-range can be configured.

+

If neither of the two parameters is set, all partition data will be read by default.

+

connector.partition-range

+

No

+

Range of partitions where data will be read. Neither this parameter or partition-count can be configured. If neither of the two parameters is set, all partition data will be read by default.

+

+

For example, if you set partition-range to [0:2], data in partitions 1, 2, and 3 will be read. The range must be within the DIS stream.

+

connector.offset

+

No

+

Start position from which data will be read. Either this parameter or start-time can be configured.

+

connector.start-time

+

No

+

Time from which DLI reads data

+

+

If this parameter is specified, DLI reads data read from the specified time. The format is yyyy-MM-dd HH:mm:ss.

+

If neither start-time nor offset is specified, the latest data is read.

+

connector. enable-checkpoint

+

No

+

Whether to enable the checkpoint function. The value can be true (enabled) or false (disabled). The default value is false.

+

+

Do not set this parameter when offset or start-time is set. If this parameter is set to true, checkpoint-app-name must be configured.

+

connector. checkpoint-app-name

+

No

+

ID of a DIS consumer. If a DIS stream is consumed by different jobs, you need to configure the consumer ID for each job to avoid checkpoint confusion.

+

+

Do not set this parameter when offset or start-time is set. If checkpoint-app-name is set to true, this parameter is mandatory.

+

connector. checkpoint-interval

+

No

+

Interval of checkpoint operations on the DIS source operator. The default value is 60s. Available value units: d, day/h, hour/min, minute/s, sec, second

+

+

Do not set this parameter when offset or start-time is configured.

+

format.type

+

Yes

+

Data coding format. The value can be csv or json.

+

format.field-delimiter

+

No

+

Attribute delimiter. You can customize the attribute delimiter only when the encoding format is CSV. The default delimiter is a comma (,).

+
+
+
+

Precautions

None

+
+

Example

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
create table disCsvSource (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT)
+with (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disInput',
+  'format.type' = 'csv'
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0303.html b/docs/dli/sqlreference/dli_08_0303.html new file mode 100644 index 00000000..2c5c65c3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0303.html @@ -0,0 +1,145 @@ + + +

JDBC Source Table

+

Function

The JDBC connector is a Flink's built-in connector to read data from a database.

+
+

Prerequisites

+
+

Syntax

create table jbdcSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'jdbc',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Data source type. Set this parameter to jdbc.

+

connector.url

+

Yes

+

Database URL

+

connector.table

+

Yes

+

Name of the table where the data to be read from the database is located

+

connector.driver

+

No

+

Driver required for connecting to the database If you do not set this parameter, the automatically extracted URL will be used.

+

connector.username

+

No

+

Database authentication username. This parameter must be configured in pair with connector.password.

+

connector.password

+

No

+

Database authentication password. This parameter must be configured in pair with connector.username.

+

connector.read.partition.column

+

No

+

Name of the column used to partition the input

+

+

This parameter is mandatory if connector.read.partition.lower-bound, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.lower-bound

+

No

+

Lower bound of values to be fetched for the first partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.upper-bound

+

No

+

Upper bound of values to be fetched for the last partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.lower-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.num

+

No

+

Number of partitions to be created

+

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.upper-bound are configured.

+

connector.read.fetch-size

+

No

+

Number of rows fetched from the database each time The default value is 0, indicating the hint is ignored.

+
+
+
+

Precautions

None

+
+

Example

create table jdbcSource (
+  car_id STRING,
+  car_owner STRING,
+  car_age INT,
+  average_speed INT,
+  total_miles INT)
+with (
+  'connector.type' = 'jdbc',
+  'connector.url' = 'jdbc:mysql://xx.xx.xx.xx:3306/xx',
+  'connector.table' = 'jdbc_table_name',
+  'connector.driver' = 'com.mysql.jdbc.Driver',
+  'connector.username' = 'xxx',
+  'connector.password' = 'xxxxxx'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0304.html b/docs/dli/sqlreference/dli_08_0304.html new file mode 100644 index 00000000..4b9c0111 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0304.html @@ -0,0 +1,200 @@ + + +

GaussDB(DWS) Source Table

+

Function

DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types and delivers space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-commerce.

+

GaussDB(DWS) is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'gaussdb',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to gaussdb.

+

connector.url

+

Yes

+

JDBC connection address. The format is jdbc:postgresql://${ip}:${port}/${dbName}. If the database version is later than 8.1.0, the value format is jdbc:gaussdb://${ip}:${port}/${dbName}.

+

connector.table

+

Yes

+

Name of the table to be operated. If the GaussDB(DWS) table is in a schema, the format is schema\".\"Table name. For details, see the Example.

+

connector.driver

+

No

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

connector.username

+

No

+

Database authentication user name. This parameter must be configured in pair with connector.password.

+

connector.password

+

No

+

Database authentication password. This parameter must be configured in pair with connector.username.

+

connector.read.partition.column

+

No

+

Name of the column used to partition the input

+

This parameter is mandatory if connector.read.partition.lower-bound, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.lower-bound

+

No

+

Lower bound of values to be fetched for the first partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.upper-bound

+

No

+

Upper bound of values to be fetched for the last partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.lower-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.num

+

No

+

Number of partitions to be created

+

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.upper-bound are configured.

+

connector.read.fetch-size

+

No

+

Number of rows fetched from the database each time The default value is 0, indicating the hint is ignored.

+
+
+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0305.html b/docs/dli/sqlreference/dli_08_0305.html new file mode 100644 index 00000000..e801132e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0305.html @@ -0,0 +1,123 @@ + + +

Redis Source Table

+

Function

Create a source stream to obtain data from Redis as input for jobs.

+
+

Prerequisites

An enhanced datasource connection with Redis has been established, so that you can configure security group rules as required.

+ +
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'redis',
+  'connector.host' = '',
+  'connector.port' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to redis.

+

connector.host

+

Yes

+

Redis connector address

+

connector.port

+

Yes

+

Redis connector port

+

connector.password

+

No

+

Redis authentication password

+

connector.deploy-mode

+

No

+

Redis deployment mode. The value can be standalone or cluster. The default value is standalone.

+

connector.table-name

+

No

+

Name of the table stored in the Redis. This parameter is mandatory in the Redis Hashmap storage pattern. In this pattern, data is stored to Redis in hashmaps. The hash key is ${table-name}:${ext-key}, and the field name is the column name.

+
NOTE:

Table storage pattern: connector.table-name and connector.key-column are used as Redis keys. For the Redis hash type, each key corresponds to a hashmap. A hash key is a field name of the source table, and a hash value is a field value of the source table.

+
+

connector.use-internal-schema

+

No

+

Whether to use the existing schema in the Redis. This parameter is optional in the Redis Hashmap storage pattern. The default value is false.

+

connector.key-column

+

No

+

This parameter is optional in table storage pattern. The value is used as the value of ext-key in the Redis. If this parameter is not set, the value of ext-key is the generated UUID.

+
+
+
+

Example

Reads data from Redis.

+
create table redisSource(
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_speed INT
+) with (
+ 'connector.type' = 'redis',
+  'connector.host' = 'xx.xx.xx.xx',
+  'connector.port' = '6379',
+  'connector.password' = 'xx',
+  'connector.table-name' = 'car_info'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0306.html b/docs/dli/sqlreference/dli_08_0306.html new file mode 100644 index 00000000..fb01d083 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0306.html @@ -0,0 +1,98 @@ + + +

HBase Source Table

+

Function

Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. DLI can read data from HBase for filtering, analysis, and data dumping.

+
+

Prerequisites

+
+

Syntax

create table hbaseSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector.type' = 'hbase',
+  'connector.version' = '1.4.3',
+  'connector.table-name' = '',
+  'connector.zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to hbase.

+

connector.version

+

Yes

+

The value must be 1.4.3.

+

connector. table-name

+

Yes

+

HBase table name

+

connector.zookeeper.quorum

+

Yes

+

ZooKeeper address

+

connector.zookeeper.znode.parent

+

No

+

Root directory for ZooKeeper. The default value is /hbase.

+

connector.rowkey

+

No

+

Content of a compound rowkey to be assigned. The content is assigned to a new field based on the configuration.

+

Example: rowkey1:3,rowkey2:3,...

+

The value 3 indicates the first three bytes of the field. The number cannot be greater than the byte size of the field and cannot be less than 1. rowkey1:3,rowkey2:3 indicates that the first three bytes of the compound rowkey are assigned to rowkey1, and the last three bytes are assigned to rowkey2.

+
+
+
+

Example

create table hbaseSource(
+  rowkey1 string,
+  rowkey2 string,
+  info Row<owner string>,
+  car ROW<miles string, speed string>
+ ) with (
+   'connector.type' = 'hbase',
+   'connector.version' = '1.4.3',
+   'connector.table-name' = 'carinfo',
+   'connector.rowkey' = 'rowkey1:1,rowkey2:3',
+   'connector.zookeeper.quorum' = 'xxxx:2181'
+ );
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0307.html b/docs/dli/sqlreference/dli_08_0307.html new file mode 100644 index 00000000..00110bc4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0307.html @@ -0,0 +1,41 @@ + + +

Creating a Result Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0308.html b/docs/dli/sqlreference/dli_08_0308.html new file mode 100644 index 00000000..e11aa540 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0308.html @@ -0,0 +1,157 @@ + + +

Kafka Result Table

+

Function

DLI exports the output data of the Flink job to Kafka.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

Kafka is an offline cluster. You have built an enhanced datasource connection to connect Flink jobs to Kafka. You have set security group rules as required.

+
+

Precautions

SASL_SSL cannot be enabled for the interconnected Kafka cluster.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'kafka',
+  'connector.version' = '',
+  'connector.topic' = '',
+  'connector.properties.bootstrap.servers' = '',
+  'format.type' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to kafka.

+

connector.version

+

No

+

Kafka version. The value can be '0.10' or '0.11', which corresponds to Kafka 2.11 to 2.4.0 and other historical versions, respectively.

+

format.type

+

Yes

+

Data serialization format. The value can be csv, json, or avro.

+

format.field-delimiter

+

No

+

Attribute delimiter. You can customize the attribute delimiter only when the encoding format is CSV. The default delimiter is a comma (,).

+

connector.topic

+

Yes

+

Kafka topic name.

+

connector.properties.bootstrap.servers

+

Yes

+

Kafka broker addresses. Use commas (,) to separated them.

+

connector.sink-partitioner

+

No

+

Partitioner type. The value can be fixed, round-robin, or custom.

+

connector.sink-partitioner-class

+

No

+

Custom partitioner. This parameter is mandatory when sink-partitioner is custom, for example, org.mycompany.MyPartitioner.

+

update-mode

+

No

+

Data update mode. Three write modes are supported: append, retract, and upsert.

+

connector.properties.*

+

No

+

Native properties of Kafka

+
+
+
+

Example

Output the data in kafkaSink to Kafka.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table kafkaSink(
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_speed INT)
+with (
+  'connector.type' = 'kafka',
+  'connector.version' = '0.10',
+  'connector.topic' = 'test-topic',
+  'connector.properties.bootstrap.servers' = 'xx.xx.xx.xx:9092',
+  'connector.sink-partitioner' = 'round-robin',
+  'format.type' = 'csv'
+);
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0309.html b/docs/dli/sqlreference/dli_08_0309.html new file mode 100644 index 00000000..3cecdd74 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0309.html @@ -0,0 +1,142 @@ + + +

Upsert Kafka Result Table

+

Function

DLI exports the output data of the Flink job to Kafka in upsert mode.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

Kafka is an offline cluster. You have built an enhanced datasource connection to connect Flink jobs to Kafka. You have set security group rules as required.

+
+

Precautions

SASL_SSL cannot be enabled for the interconnected Kafka cluster.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'upsert-kafka',
+  'connector.version' = '',
+  'connector.topic' = '',
+  'connector.properties.bootstrap.servers' = '',
+   'format.type' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to upsert-kafka.

+

connector.version

+

No

+

Kafka version. The value can only be 0.11.

+

format.type

+

Yes

+

Data serialization format. The value can be csv, json, or avro.

+

connector.topic

+

Yes

+

Kafka topic name

+

connector.properties.bootstrap.servers

+

Yes

+

Kafka broker addresses. Use commas (,) to separated them.

+

connector.sink-partitioner

+

No

+

Partitioner type. The value can be fixed, round-robin, or custom.

+

connector.sink-partitioner-class

+

No

+

Custom partitioner. This parameter is mandatory when sink-partitioner is custom, for example, org.mycompany.MyPartitioner.

+

connector.sink.ignore-retraction

+

No

+

Whether to ignore the retraction message. The default value is false, indicating that the retraction message is written to Kafka as null.

+

update-mode

+

No

+

Data update mode. Three write modes are supported: append, retract, and upsert.

+

connector.properties.*

+

No

+

Native properties of Kafka

+
+
+
+

Example

create table upsertKafkaSink(
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_speed INT,
+  primary key (car_id) not enforced
+)
+with (
+  'connector.type' = 'upsert-kafka',
+  'connector.version' = '0.11',
+  'connector.topic' = 'test-topic',
+  'connector.properties.bootstrap.servers' = 'xx.xx.xx.xx:9092',
+  'format.type' = 'csv'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0310.html b/docs/dli/sqlreference/dli_08_0310.html new file mode 100644 index 00000000..c580673c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0310.html @@ -0,0 +1,139 @@ + + +

DIS Result Table

+

Function

DLI writes the Flink job output data into DIS. The data is filtered and imported to the DIS stream for future processing.

+

DIS addresses the challenge of transmitting data outside cloud services to cloud services. DIS builds data intake streams for custom applications capable of processing or analyzing streaming data. DIS continuously captures, transmits, and stores terabytes of data from hundreds of thousands of sources every hour, such as logs, Internet of Things (IoT) data, social media feeds, website clickstreams, and location-tracking events. For more information about DIS, see the Data Ingestion Service User Guide.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
create table disSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = '',
+  'format.type' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Data source type. Set this parameter to dis.

+

connector.region

+

Yes

+

Region where DIS for storing the data locates.

+

connector.ak

+

No

+

Access key ID. This parameter must be set in pair with sk.

+

connector.sk

+

No

+

Secret access key. This parameter must be set in pair with ak.

+

connector.channel

+

Yes

+

Name of the DIS stream where data is located.

+

format.type

+

Yes

+

Data coding format. The value can be csv or json.

+

format.field-delimiter

+

No

+

Attribute delimiter. You can customize the attribute delimiter only when the encoding format is CSV. The default delimiter is a comma (,).

+

connector.partition-key

+

No

+

Group primary key. Multiple primary keys are separated by commas (,). If this parameter is not specified, data is randomly written to DIS partitions.

+
+
+
+

Precautions

None

+
+

Example

Output the data in the disSink stream to DIS.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table disSink(
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_speed INT
+)
+with (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disOutput',
+  'connector.partition-key' = 'car_id,car_owner',
+  'format.type' = 'csv'
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0311.html b/docs/dli/sqlreference/dli_08_0311.html new file mode 100644 index 00000000..28506a8b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0311.html @@ -0,0 +1,160 @@ + + +

JDBC Result Table

+

Function

DLI exports the output data of the Flink job to RDS.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table jdbcSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'jdbc',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.driver' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Data source type. Set this parameter to jdbc.

+

connector.url

+

Yes

+

Database URL

+

connector.table

+

Yes

+

Name of the table where the data to be read from the database is located

+

connector.driver

+

No

+

Driver required for connecting to the database If you do not set this parameter, the automatically extracted URL will be used.

+

connector.username

+

No

+

Username for accessing the database

+

connector.password

+

No

+

Password for accessing the database

+

connector.write.flush.max-rows

+

No

+

Maximum number of rows to be updated when data is written. The default value is 5000.

+

connector.write.flush.interval

+

No

+

Interval for data update. The unit can be ms, milli, millisecond/s, sec, second/min or minute. If this parameter is not set, the value is not updated based on the interval by default.

+

connector.write.max-retries

+

No

+

Maximum number of attempts to write data if failed. The default value is 3.

+

connector.write.exclude-update-columns

+

No

+

Columns excluded for data update. The default value is empty, indicating that when data with the same primary key is updated, the update of the specified field is ignored. The primary key column is ignored by default.

+
+
+
+

Precautions

None

+
+

Example

Output data from stream jdbcSink to the MySQL database.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
create table jdbcSink(
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_speed INT
+)
+with (
+  'connector.type' = 'jdbc',
+  'connector.url' = 'jdbc:mysql://xx.xx.xx.xx:3306/xx',
+  'connector.table' = 'jdbc_table_name',
+  'connector.driver' = 'com.mysql.jdbc.Driver',
+  'connector.username' = 'xxx',
+  'connector.password' = 'xxxxxx'
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0312.html b/docs/dli/sqlreference/dli_08_0312.html new file mode 100644 index 00000000..3ffccda5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0312.html @@ -0,0 +1,198 @@ + + +

GaussDB(DWS) Result Table

+

Function

DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types and delivers space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-commerce.

+

GaussDB(DWS) is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data.

+
+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table dwsSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'gaussdb',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.driver' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to gaussdb.

+

connector.url

+

Yes

+

JDBC connection address. The format is jdbc:postgresql://${ip}:${port}/${dbName}.

+

connector.table

+

Yes

+

Name of the table to be operated. If the GaussDB(DWS) table is in a schema, the format is schema\".\"Table name. For details, see the Example.

+

connector.driver

+

No

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

connector.username

+

No

+

Database authentication user name. This parameter must be configured in pair with connector.password.

+

connector.password

+

No

+

Database authentication password. This parameter must be configured in pair with connector.username.

+

connector.write.mode

+

No

+

Data write mode. The value can be copy, insert, or upsert. The default value is upsert.

+

This parameter must be configured depending on primary key.

+
  • If primary key is not configured, data can be appended in copy and insert modes.
  • If primary key is configured, all the three modes are available.
+

Note: GaussDB(DWS) does not support the update of distribution columns. The primary keys of columns to be updated must cover all distribution columns defined in the GaussDB(DWS) table.

+

connector.write.flush.max-rows

+

No

+

Maximum rows allowed for data flush. If the data size exceeds the value, data flush is triggered. The default value is 5000.

+

connector.write.flush.interval

+

No

+

Data flush period. Data flush is triggered periodically. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit). If this parameter is not set, the value is not updated based on the interval by default.

+

connector.write.max-retries

+

No

+

Maximum number of attempts to write data. The default value is 3.

+

connector.write.merge.filter-key

+

No

+

Column to be merged. This parameter takes effects only when PRIMARY KEY is configured and connector.write.mode is set to copy.

+

connector.write.escape-string-value

+

No

+

Whether to escape values of the string type. The default value is false.

+
+
+
+

Precautions

None

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0313.html b/docs/dli/sqlreference/dli_08_0313.html new file mode 100644 index 00000000..96a15133 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0313.html @@ -0,0 +1,235 @@ + + +

Redis Result Table

+

Function

DLI exports the output data of the Flink job to Redis. Redis is a storage system that supports multiple types of data structures such as key-value. It can be used in scenarios such as caching, event pub/sub, and high-speed queuing. Redis supports direct read/write of strings, hashes, lists, queues, and sets. Redis works with in-memory dataset and provides persistence. For more information about Redis, visit https://redis.io/.

+
+

Prerequisites

An enhanced datasource connection with Redis has been established, so that you can configure security group rules as required.

+ +
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table dwsSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+'connector.type' = 'redis',
+  'connector.host' = '',
+  'connector.port' = '',
+  'connector.password' = '',
+  'connector.table-name' = '',
+  'connector.key-column' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to redis.

+

connector.host

+

Yes

+

Redis connector address

+

connector.port

+

Yes

+

Redis connector port

+

connector.password

+

No

+

Redis authentication password

+

connector.deploy-mode

+

No

+

Redis deployment mode. The value can be standalone or cluster. The default value is standalone.

+

connector.table-name

+

No

+

Name of the table stored in the Redis. This parameter is mandatory in the Redis Hashmap storage pattern. In this pattern, data is stored to Redis in hashmaps. The hash key is ${table-name}:${ext-key}, and the field name is the column name.

+
NOTE:

Table storage pattern: connector.table-name and connector.key-column are used as Redis keys. For the Redis hash type, each key corresponds to a hashmap. A hash key is a field name of the source table, and a hash value is a field value of the source table.

+
+

connector.key-column

+

No

+

This parameter is optional in table storage pattern. The value is used as the value of ext-key in the Redis. If this parameter is not set, the value of ext-key is the generated UUID.

+

connector.write-schema

+

No

+

Whether to write the current schema to the Redis. This parameter is available in table storage pattern. The default value is false.

+

connector.data-type

+

No

+

Data types for storage. This parameter is mandatory for a custom storage pattern. Supported values include string, list, hash, and set. In a string, list or set, the number of schema fields must be 2, and the number of hash fields must be 3.

+

connector.ignore-retraction

+

No

+

Whether to ignore the retraction message. The default value is false.

+
+
+
+

Precautions

Either connector.table-name or connector.data-type must be set.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0314.html b/docs/dli/sqlreference/dli_08_0314.html new file mode 100644 index 00000000..e0ab29dc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0314.html @@ -0,0 +1,100 @@ + + +

SMN Result Table

+

Function

DLI exports Flink job output data to SMN.

+

SMN provides reliable and flexible large-scale message notification services to DLI. It significantly simplifies system coupling and pushes messages to subscription endpoints based on requirements. SMN can be connected to other cloud services or integrated with any application that uses or generates message notifications to push messages over multiple protocols.

+
+

Syntax

create table smnSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'smn',
+  'connector.region' = '',
+  'connector.topic-urn' = '',
+  'connector.message-subject' = '',
+  'connector.message-column' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Sink data type. Set this parameter to smn, which means that data is stored to SMN.

+

connector.region

+

Yes

+

Region where SMN belongs

+

connector.topic-urn

+

No

+

URN of an SMN topic, which is used for the static topic URN configuration. The SMN topic serves as the destination for short message notification and needs to be created in SMN.

+

+

Either of topic_urn and urn_column must be configured. If both of them are configured, the topic_urn setting takes precedence.

+

connector.urn-column

+

No

+

Field name of the topic URN content, which is used for the dynamic topic URN configuration.

+

+

One of topic_urn and urn_column must be configured. If both of them are configured, the topic_urn setting takes precedence.

+

connector.message-subject

+

Yes

+

Message subject sent by SMN. This parameter can be customized.

+

connector.message-column

+

Yes

+

Column name in the current table. Data in this column is the message content and is customized. Currently, only text messages are supported.

+
+
+
+

Precautions

None

+
+

Example

Write the data to the target of SMN topic. The topic of the message sent by SMN is test, and the message content is the data in the attr1 column.

+
create table smnSink (
+  attr1 STRING,
+  attr2 STRING
+)
+with (
+  'connector.type' = 'smn',
+  'connector.region' = '',
+  'connector.topic-urn' = 'xxxxxx',
+  'connector.message-subject' = 'test',
+  'connector.message-column' = 'attr1'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0315.html b/docs/dli/sqlreference/dli_08_0315.html new file mode 100644 index 00000000..308384cc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0315.html @@ -0,0 +1,119 @@ + + +

HBase Result Table

+

Function

DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. Structured and semi-structured key-value data can be stored, including messages, reports, recommendation data, risk control data, logs, and orders. With DLI, you can write massive volumes of data to HBase at a high speed and with low latency.

+
+

Prerequisites

An enhanced datasource connection has been created for DLI to connect to HBase, so that jobs can run on the dedicated queue of DLI and you can set the security group rules as required.

+ +
+

Syntax

create table hbaseSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'hbase',
+  'connector.version' = '1.4.3',
+  'connector.table-name' = '',
+  'connector.zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to hbase.

+

connector.version

+

Yes

+

The value must be 1.4.3.

+

connector.table-name

+

Yes

+

HBase table name

+

connector.zookeeper.quorum

+

Yes

+

ZooKeeper address

+

connector.zookeeper.znode.parent

+

No

+

Root directory for ZooKeeper. The default value is /hbase.

+

connector.write.buffer-flush.max-size

+

No

+

Maximum buffer size for each data write. The default value is 2 MB. The unit is MB.

+

connector.write.buffer-flush.max-rows

+

No

+

Maximum number of data records that can be updated each time

+

connector.write.buffer-flush.interval

+

No

+

Update time. The default value is 0s. Example value: 2s.

+

connector.rowkey

+

No

+

Content of a compound rowkey to be assigned. The content is assigned to a new field based on the configuration.

+

Example: rowkey1:3,rowkey2:3, ...

+

The value 3 indicates the first three bytes of the field. The number cannot be greater than the byte size of the field and cannot be less than 1.

+
+
+
+

Example

 create table hbaseSink(
+  rowkey string,
+  name string,
+  i Row<geneder string, age int>,
+  j Row<address string>
+ ) with (
+   'connector.type' = 'hbase',
+   'connector.version' = '1.4.3',
+   'connector.table-name' = 'sink',
+   'connector.rowkey' = 'rowkey:1,name:3',
+   'connector.write.buffer-flush.max-rows' = '5',
+   'connector.zookeeper.quorum' = 'xxxx:2181'
+ );
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0316.html b/docs/dli/sqlreference/dli_08_0316.html new file mode 100644 index 00000000..bd95b726 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0316.html @@ -0,0 +1,188 @@ + + +

Elasticsearch Result Table

+

Function

DLI exports Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides the distributed multi-user capabilities. It delivers multiple functions, including full-text retrieval, structured search, analytics, aggregation, and highlighting. With Elasticsearch, you can achieve stable, reliable, real-time search. Elasticsearch applies to diversified scenarios, such as log analysis and site search.

+

CSS is a fully managed, distributed search service. It is fully compatible with open-source Elasticsearch and provides DLI with structured and unstructured data search, statistics, and report capabilities. For more information about CSS, see .

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table esSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector.type' = 'elasticsearch',
+  'connector.version' = '7',
+  'connector.hosts' = 'http://xxxx:9200',
+  'connector.index' = '',
+  'connector.document-type' = '',
+  'update-mode' = '',
+  'format.type' = 'json'
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to elasticsearch.

+

connector.version

+

Yes

+

Elasticsearch version

+

Currently, only version 7 can be used. That is, the value of this parameter can only be 7.

+

connector.hosts

+

Yes

+

Host name of the cluster where Elasticsearch locates. Use semicolons (;) to separate multiple host names. Ensure that the host name starts with http, for example, http://x.x.x.x:9200.

+

connector.index

+

Yes

+

Elasticsearch index name

+

connector.document-type

+

Yes

+

Elasticsearch type name

+

This attribute is invalid because Elasticsearch 7 uses the default _doc type.

+

update-mode

+

Yes

+

Data update mode of the sink. The value can be append or upsert.

+

connector.key-delimiter

+

No

+

Delimiter of compound primary keys. The default value is _.

+

connector.key-null-literal

+

No

+

Character used to replace null in keys.

+

connector.failure-handler

+

No

+

Policy used when an Elasticsearch request fails. The default value is fail.

+

fail: An exception is thrown when the request fails and the job fails.

+

ignore: The failed request is ignored.

+

retry-rejected: If the request fails because the queue running the Elasticsearch node is full, the request is resent and no failure is reported.

+

custom: A custom policy is used.

+

connector.failure-handler-class

+

No

+

Custom processing mode used to handle a failure

+

connector.flush-on-checkpoint

+

No

+

Whether the connector waits for all pending action requests to be acknowledged by Elasticsearch on checkpoints.

+

The default value true indicates that wait for all pending action requests on checkpoints. If you set this parameter to false, the connector will not wait for the requests.

+

connector.bulk-flush.max-actions

+

No

+

Maximum number of records that can be written in a batch

+

connector.bulk-flush.max-size

+

No

+

Maximum total amount of data to be written in batches. Specify the unit when you configure this parameter. The unit is MB.

+

connector.bulk-flush.interval

+

No

+

Update interval for batch writing. The unit is milliseconds and is not required.

+

format.type

+

Yes

+

Data format. Currently, only JSON is supported.

+

connector.username

+

No

+

Account of the cluster where Elasticsearch locates. This parameter and must be configured in pair with connector.password.

+

If the account and password are used, the security mode must be enabled and HTTPS must be disabled for the created CSS cluster.

+

connector.password

+

No

+

Password of the cluster where Elasticsearch locates. This parameter must be configured in pair with connector.username.

+
+
+
+

+

Example

create table sink1(
+  attr1 string,
+  attr2 int
+) with (
+  'connector.type' = 'elasticsearch',
+  'connector.version' = '7', 
+  'connector.hosts' = 'http://xxxx:9200',
+  'connector.index' = 'es',
+  'connector.document-type' = 'one',
+  'update-mode' = 'append',
+  'format.type' = 'json'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0317.html b/docs/dli/sqlreference/dli_08_0317.html new file mode 100644 index 00000000..0bcd2581 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0317.html @@ -0,0 +1,19 @@ + + +

Creating a Dimension Table

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0318.html b/docs/dli/sqlreference/dli_08_0318.html new file mode 100644 index 00000000..0ee1840d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0318.html @@ -0,0 +1,255 @@ + + +

JDBC Dimension Table

+

Create a JDBC dimension table to connect to the source stream.

+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
CREATE TABLE  table_id (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+  WITH (
+  'connector.type' = 'jdbc',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Data source type. Set this parameter to jdbc.

+

connector.url

+

Yes

+

Database URL

+

connector.table

+

Yes

+

Name of the table where the data to be read from the database is located

+

connector.driver

+

No

+

Driver required for connecting to the database If you do not set this parameter, the automatically extracted URL will be used.

+

connector.username

+

No

+

Database authentication user name. This parameter must be configured in pair with connector.password.

+

connector.password

+

No

+

Database authentication password. This parameter must be configured in pair with connector.username.

+

connector.read.partition.column

+

No

+

Name of the column used to partition the input

+

+

This parameter is mandatory if connector.read.partition.lower-bound, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.lower-bound

+

No

+

Lower bound of values to be fetched for the first partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.upper-bound

+

No

+

Upper bound of values to be fetched for the last partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.lower-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.num

+

No

+

Number of partitions

+

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.upper-bound are configured.

+

connector.read.fetch-size

+

No

+

Number of rows fetched from the database each time. The default value is 0, indicating the hint is ignored.

+

connector.lookup.cache.max-rows

+

No

+

Maximum number of cached rows in a dimension table. If the number of cached rows exceeds the value , old data will be deleted. The value -1 indicates that data cache disabled.

+

connector.lookup.cache.ttl

+

No

+

Time To Live (TTL) of dimension table cache. Caches exceeding the TTL will be deleted. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

connector.lookup.max-retries

+

No

+

Maximum number of attempts to obtain data from the dimension table. The default value is 3.

+
+
+
+

Example

The RDS table is used to connect to the source stream.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
CREATE TABLE car_infos (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT,
+  proctime as PROCTIME()
+)
+  WITH (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disInput',
+  'format.type' = 'csv'
+  );
+
+CREATE TABLE  db_info (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+  'connector.type' = 'jdbc',
+  'connector.url' = 'jdbc:mysql://xx.xx.xx.xx:3306/xx',
+  'connector.table' = 'jdbc_table_name',
+  'connector.driver' = 'com.mysql.jdbc.Driver',
+  'connector.username' = 'xxx',
+  'connector.password' = 'xxxxx'
+);
+
+CREATE TABLE audi_cheaper_than_30w (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disOutput',
+  'connector.partition-key' = 'car_id,car_owner',
+  'format.type' = 'csv'
+  );
+
+INSERT INTO audi_cheaper_than_30w
+SELECT a.car_id, b.car_owner, b.car_brand, b.car_price 
+FROM car_infos as a join db_info FOR SYSTEM_TIME AS OF a.proctime AS b on a.car_id = b.car_id;
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0319.html b/docs/dli/sqlreference/dli_08_0319.html new file mode 100644 index 00000000..739359be --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0319.html @@ -0,0 +1,259 @@ + + +

GaussDB(DWS) Dimension Table

+

Create a GaussDB(DWS) dimension table to connect to the input stream.

+

Prerequisites

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'gaussdb',
+  'connector.url' = '',
+  'connector.table' = '',
+  'connector.username' = '',
+  'connector.password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to gaussdb.

+

connector.url

+

Yes

+

JDBC connection address. The format is jdbc:postgresql://${ip}:${port}/${dbName}.

+

connector.table

+

Yes

+

Name of the table where the data to be read from the database is located

+

connector.driver

+

No

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

connector.username

+

No

+

Database authentication user name. This parameter must be configured in pair with connector.password.

+

connector.password

+

No

+

Database authentication password. This parameter must be configured in pair with connector.username.

+

connector.read.partition.column

+

No

+

Name of the column used to partition the input

+

This parameter is mandatory if connector.read.partition.lower-bound, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.lower-bound

+

No

+

Lower bound of values to be fetched for the first partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.upper-bound

+

No

+

Upper bound of values to be fetched for the last partition

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.lower-bound, and

+

connector.read.partition.num are configured.

+

connector.read.partition.num

+

No

+

Number of partitions

+

+

This parameter is mandatory if connector.read.partition.column, connector.read.partition.upper-bound, and

+

connector.read.partition.upper-bound are configured.

+

connector.read.fetch-size

+

No

+

Number of rows fetched from the database each time. The default value is 0, indicating the hint is ignored.

+

connector.lookup.cache.max-rows

+

No

+

Maximum number of cached rows in a dimension table. If the number of cached rows exceeds the value , old data will be deleted. The value -1 indicates that data cache disabled.

+

connector.lookup.cache.ttl

+

No

+

+

Time To Live (TTL) of dimension table cache. Caches exceeding the TTL will be deleted. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

connector.lookup.max-retries

+

No

+

Maximum number of attempts to obtain data from the dimension table. The default value is 3.

+
+
+
+

Example

Use an RDS table to connect to the source stream.
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
CREATE TABLE car_infos (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT,
+  proctime as PROCTIME()
+)
+  WITH (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disInput',
+  'format.type' = 'csv'
+  );
+
+CREATE TABLE  db_info (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+  'connector.type' = 'gaussdb',
+  'connector.driver' = 'org.postgresql.Driver',
+  'connector.url' = 'jdbc:gaussdb://xx.xx.xx.xx:8000/xx',
+  'connector.table' = 'car_info',
+  'connector.username' = 'xx',
+  'connector.password' = 'xx',
+  'connector.lookup.cache.max-rows' = '10000',
+  'connector.lookup.cache.ttl' = '24h'
+);
+
+CREATE TABLE audi_cheaper_than_30w (
+  car_id STRING,
+  car_owner STRING,
+  car_brand STRING,
+  car_price INT
+)
+  WITH (
+  'connector.type' = 'dis',
+  'connector.region' = '',
+  'connector.channel' = 'disOutput',
+  'connector.partition-key' = 'car_id,car_owner',
+  'format.type' = 'csv'
+  );
+
+INSERT INTO audi_cheaper_than_30w
+SELECT a.car_id, b.car_owner, b.car_brand, b.car_price 
+FROM car_infos as a join db_info FOR SYSTEM_TIME AS OF a.proctime AS b on a.car_id = b.car_id;
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0320.html b/docs/dli/sqlreference/dli_08_0320.html new file mode 100644 index 00000000..b418787e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0320.html @@ -0,0 +1,114 @@ + + +

HBase Dimension Table

+

Function

Create a Hbase dimension table to connect to the source stream.

+
+

Prerequisites

+
+

Syntax

create table hbaseSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+ )
+with (
+  'connector.type' = 'hbase',
+  'connector.version' = '1.4.3',
+  'connector.table-name' = '',
+  'connector.zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to hbase.

+

connector.version

+

Yes

+

The value must be 1.4.3.

+

connector. table-name

+

Yes

+

Table name in HBase

+

connector.zookeeper.quorum

+

Yes

+

ZooKeeper address

+

connector.zookeeper.znode.parent

+

No

+

Root directory for ZooKeeper. The default value is /hbase.

+
+
+
+

Example

create table hbaseSource(
+  id string,
+  i Row<score string>
+ ) with (
+   'connector.type' = 'hbase',
+   'connector.version' = '1.4.3',
+   'connector.table-name' = 'user',
+   'connector.zookeeper.quorum' = 'xxxx:2181'
+ );
+create table source1(
+  id string,
+  name string,
+  geneder string,
+  age int,
+  address string,
+  proctime as PROCTIME()
+) with (
+  "connector.type" = "dis",
+  "connector.region" = "",
+  "connector.channel" = "read",
+  "connector.ak" = "xxxxxx",
+  "connector.sk" = "xxxxxx",
+  "format.type" = 'csv'
+);
+
+ create table hbaseSink(
+  rowkey string,
+  i Row<name string, geneder string, age int, address string>,
+  j ROW<score string>
+ ) with (
+   'connector.type' = 'hbase',
+   'connector.version' = '1.4.3',
+   'connector.table-name' = 'score',
+   'connector.write.buffer-flush.max-rows' = '1',
+   'connector.zookeeper.quorum' = 'xxxx:2181'
+ );
+ insert into hbaseSink select d.id, ROW(name, geneder,age,address), ROW(score) from source1 as d join hbaseSource for system_time as of d.proctime as h on d.id = h.id;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0321.html b/docs/dli/sqlreference/dli_08_0321.html new file mode 100644 index 00000000..51b2799b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0321.html @@ -0,0 +1,27 @@ + + +

Data Manipulation Language (DML)

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0322.html b/docs/dli/sqlreference/dli_08_0322.html new file mode 100644 index 00000000..795e749c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0322.html @@ -0,0 +1,197 @@ + + +

SELECT

+

SELECT

Syntax

+
1
+2
+3
+4
+5
+6
SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

This clause is used to select data from a table.

+

ALL indicates that all results are returned.

+

DISTINCT indicates that the duplicated results are removed.

+

Precautions

+ +

Example

+

Select the order which contains more than 3 pieces of data.

+
1
insert into temp SELECT  * FROM Orders WHERE units > 3; 
+
+ +
+

Insert a group of constant data.

+
1
insert into temp select 'Lily', 'male', 'student', 17;
+
+ +
+
+

WHERE Filtering Clause

Syntax

+
1
+2
+3
SELECT   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+
+ +
+

Description

+

This clause is used to filter the query results using the WHERE clause.

+

Precautions

+ +

Example

+

Filter orders which contain more than 3 pieces and fewer than 10 pieces of data.

+
1
+2
insert into temp SELECT  * FROM Orders
+  WHERE units > 3 and units < 10; 
+
+ +
+
+

HAVING Filtering Clause

Function

+

This clause is used to filter the query results using the HAVING clause.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. GROUP BY applies first for grouping and HAVING then applies for filtering. The arithmetic operation and aggregate function are supported by the HAVING clause.

+

Precautions

+

If the filtering condition is subject to the query results of GROUP BY, the HAVING clause, rather than the WHERE clause, must be used for filtering.

+

Example

+

Group the student table according to the name field and filter the records in which the maximum score is higher than 95 based on groups.

+
1
+2
+3
insert into temp SELECT name, max(score) FROM student
+  GROUP BY name
+  HAVING max(score) >95;
+
+ +
+
+

Column-Based GROUP BY

Function

+

This clause is used to group a table based on columns.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.

+ +

Precautions

+

GroupBy generates update results in the stream processing table.

+

Example

+

Group the student table according to the score and name fields and return the grouping results.

+
1
+2
insert into temp SELECT name,score, max(score) FROM student 
+  GROUP BY name,score;
+
+ +
+
+

Expression-Based GROUP BY

Function

+

This clause is used to group a table according to expressions.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

groupItem can have one or more fields. The fields can be called by string functions, but cannot be called by aggregate functions.

+

Precautions

+

None

+

Example

+

Use the substring function to obtain the character string from the name field, group the student table according to the obtained character string, and return each sub string and the number of records.

+
1
+2
insert into temp SELECT substring(name,6),count(name) FROM student
+  GROUP BY substring(name,6);
+
+ +
+
+

Grouping sets, Rollup, Cube

Function

+ +
Syntax
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY groupingItem]
+
+

Description

+

Values of groupingItem can be Grouping sets(columnName [, columnName]*), Rollup(columnName [, columnName]*), and Cube(columnName [, columnName]*).

+

Precautions

+

None

+

Example

+

Return the results generated based on user and product.

+
INSERT INTO temp SELECT SUM(amount)
+FROM Orders
+GROUP BY GROUPING SETS ((user), (product));
+
+

GROUP BY Using HAVING

Function

+

This statement filters a table after grouping it using the HAVING clause.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. GROUP BY applies first for grouping and HAVING then applies for filtering.

+

Precautions

+ +

Example

+

Group the transactions according to num, use the HAVING clause to filter the records in which the maximum value derived from multiplying price with amount is higher than 5000, and return the filtered results.

+
1
+2
+3
+4
insert into temp SELECT num, max(price*amount) FROM transactions
+  WHERE time > '2016-06-01'
+  GROUP BY num
+  HAVING max(price*amount)>5000;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0323.html b/docs/dli/sqlreference/dli_08_0323.html new file mode 100644 index 00000000..56de6382 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0323.html @@ -0,0 +1,56 @@ + + +

Set Operations

+

UNION/UNION ALL/INTERSECT/EXCEPT

Syntax

+
1
query UNION [ ALL ] | Intersect | Except query
+
+ +
+

Description

+ +

Precautions

+ +

Example

+

Output the union set of Orders1 and Orders2 without duplicate records.

+
1
+2
insert into temp SELECT  * FROM Orders1
+  UNION SELECT  * FROM Orders2;
+
+ +
+
+

IN

Syntax

+
1
+2
+3
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  WHERE column_name IN (value (, value)* ) | query
+
+ +
+

Description

+

The IN operator allows multiple values to be specified in the WHERE clause. It returns true if the expression exists in the given table subquery.

+

Precautions

+

The subquery table must consist of a single column, and the data type of the column must be the same as that of the expression.

+

Example

+

Return user and amount information of the products in NewProducts of the Orders table.

+
1
+2
+3
+4
+5
insert into temp SELECT user, amount
+FROM Orders
+WHERE product IN (
+    SELECT product FROM NewProducts
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0324.html b/docs/dli/sqlreference/dli_08_0324.html new file mode 100644 index 00000000..a5cc4e30 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0324.html @@ -0,0 +1,293 @@ + + +

Window

+

GROUP WINDOW

Description

+

Group Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:

+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
// Calculate the SUM every day (event time).
+insert into temp SELECT name,
+    TUMBLE_START(ts, INTERVAL '1' DAY) as wStart,
+    SUM(amount)
+    FROM Orders
+    GROUP BY TUMBLE(ts, INTERVAL '1' DAY), name;
+
+//Calculate the SUM every day (processing time). 
+insert into temp SELECT name, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY TUMBLE(proctime, INTERVAL '1' DAY), name;
+
+//Calculate the SUM over the recent 24 hours every hour (event time).
+insert into temp SELECT product, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY HOP(ts, INTERVAL '1' HOUR, INTERVAL '1' DAY), product;
+
+//Calculate the SUM of each session and an inactive interval every 12 hours (event time).
+insert into temp SELECT name, 
+    SESSION_START(ts, INTERVAL '12' HOUR) AS sStart,
+    SESSION_END(ts, INTERVAL '12' HOUR) AS sEnd,
+    SUM(amount)
+    FROM Orders
+    GROUP BY SESSION(ts, INTERVAL '12' HOUR), name;
+
+ +
+
+

TUMBLE WINDOW Extension

Function

+
The extension functions of the DLI tumbling window are as follows:
  • Periodical tumbling windows for lower latency

    Before the tumbling window ends, the window can be periodically triggered based on the configured frequency. The compute result from the start to the current time is output, which does not affect the final output. The latest result can be viewed in each period before the window ends.

    +
  • Custom latency for higher data accuracy

    You can set a latency for the end of the window. The output of the window is updated according to the configured latency each time a piece of late data reaches.

    +
+
+

Precautions

+

If you use insert to write results into the sink, the sink must support the upsert mode.

+

Syntax

+
TUMBLE(time_attr, window_interval, period_interval, lateness_interval)
+

Example

+
If the current time_attr attribute column is testtime and the window interval is 10 seconds, the statement is as follows:
TUMBLE(testtime, INTERVAL '10' SECOND, INTERVAL '10' SECOND, INTERVAL '10' SECOND)
+
+
+

Description

+ +
+ + + + + + + + + + + + + + + + + + + +
Table 3 Parameter description

Parameter

+

Description

+

Format

+

time_attr

+

Event time or processing time attribute column

+

-

+

window_interval

+

Duration of the window

+
  • Format 1: INTERVAL '10' SECOND

    The window interval is 10 seconds. You can change the value as needed.

    +
  • Format 2: INTERVAL '10' MINUTE

    The window interval is 10 minutes. You can change the value as needed.

    +
  • Format 3: INTERVAL '10' DAY

    The window interval is 10 days. You can change the value as needed.

    +
+

period_interval

+

Frequency of periodic triggering within the window range. That is, before the window ends, the output result is updated at an interval specified by period_interval from the time when the window starts. If this parameter is not set, the periodic triggering policy is not used by default.

+

lateness_interval

+

Time to postpone the end of the window. The system continues to collect the data that reaches the window within lateness_interval after the window ends. The output is updated for each data that reaches the window within lateness_interval.

+
NOTE:

If the time window is for processing time, lateness_interval does not take effect.

+
+
+
+
Values of period_interval and lateness_interval cannot be negative numbers.
  • If period_interval is set to 0, periodic triggering is disabled for the window.
  • If lateness_interval is set to 0, the latency after the window ends is disabled.
  • If neither of the two parameters is set, both periodic triggering and latency are disabled and only the regular tumbling window functions are available .
  • If only the latency function needs to be used, set period_interval INTERVAL '0' SECOND.
+
+
+

OVER WINDOW

The difference between Over Window and Group Window is that one record is generated from one row in Over Window.

+

Syntax

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
SELECT agg1(attr1) OVER (
+  [PARTITION BY partition_name]
+  ORDER BY proctime|rowtime 
+  ROWS  
+ BETWEEN (UNBOUNDED|rowCOUNT) PRECEDING AND CURRENT ROW FROM TABLENAME
+
+SELECT agg1(attr1) OVER (
+  [PARTITION BY partition_name]
+  ORDER BY proctime|rowtime 
+  RANGE  
+  BETWEEN (UNBOUNDED|timeInterval) PRECEDING AND CURRENT ROW FROM TABLENAME
+
+ +
+

Description

+ +
+ + + + + + + + + + + + + + + + +
Table 4 Parameter description

Parameter

+

Parameter Description

+

PARTITION BY

+

Indicates the primary key of the specified group. Each group separately performs calculation.

+

ORDER BY

+

Indicates the processing time or event time as the timestamp for data.

+

ROWS

+

Indicates the count window.

+

RANGE

+

Indicates the time window.

+
+
+

Precautions

+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
// Calculate the count and total number from syntax rules enabled to now (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt2
+    FROM Orders;
+  
+//Calculate the count and total number of the recent four records (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+//Calculate the count and total number last 60s (in eventtime). Process the events based on event time, which is the timeattr field in Orders.
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0325.html b/docs/dli/sqlreference/dli_08_0325.html new file mode 100644 index 00000000..dbc57033 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0325.html @@ -0,0 +1,102 @@ + + +

JOIN

+

Equi-join

Syntax

+
1
+2
FROM tableExpression INNER | LEFT | RIGHT | FULL JOIN tableExpression
+  ON value11 = value21 [ AND value12 = value22]
+
+ +
+
+

Precautions

+ +

Example

+
SELECT *
+FROM Orders INNER JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders LEFT JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders RIGHT JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders FULL OUTER JOIN Product ON Orders.productId = Product.id;
+

Time-Windowed Join

Function

+

Each piece of data in a stream is joined with data in different time zones in another stream.

+

Syntax

+
from t1 JOIN t2 ON t1.key = t2.key AND TIMEBOUND_EXPRESSIO
+

Description

+

TIMEBOUND_EXPRESSION can be in either of the following formats:

+ +

Precautions

+

A time window join requires at least one equi join predicate and a join condition that limits the time of both streams.

+

For example, use two range predicates (<, <=, >=, or >), a BETWEEN predicate, or an equal predicate that compares the same type of time attributes (such as processing time and event time) in two input tables.

+

For example, the following predicate is a valid window join condition:

+ +
+

Example

+

Join all orders shipped within 4 hours with their associated shipments.

+
SELECT *
+FROM Orders o, Shipments s
+WHERE o.id = s.orderId AND
+      o.ordertime BETWEEN s.shiptime - INTERVAL '4' HOUR AND s.shiptime;
+

Array Expansion

Precautions

+

This clause is used to return a new row for each element in the given array. Unnesting WITH ORDINALITY is not yet supported.

+

Example

+
SELECT users, tag
+FROM Orders CROSS JOIN UNNEST(tags) AS t (tag);
+
+

User-Defined Table Functions

Function

+

This clause is used to join a table with the results of a table function. ach row of the left (outer) table is joined with all rows produced by the corresponding call of the table function.

+

Precautions

+

A left outer join against a lateral table requires a TRUE literal in the ON clause.

+

Example

+

The row of the left (outer) table is dropped, if its table function call returns an empty result.

+
SELECT users, tag
+FROM Orders, LATERAL TABLE(unnest_udtf(tags)) t AS tag;
+

If a table function call returns an empty result, the corresponding outer row is preserved, and the result padded with null values.

+
SELECT users, tag
+FROM Orders LEFT JOIN LATERAL TABLE(unnest_udtf(tags)) t AS tag ON TRUE;
+
+

Temporal Table Function Join

Function

+

Precautions

+

Currently only inner join and left outer join with temporal tables are supported.

+

Example

+

Assuming Rates is a temporal table function, the join can be expressed in SQL as follows:

+
SELECT
+  o_amount, r_rate
+FROM
+  Orders,
+  LATERAL TABLE (Rates(o_proctime))
+WHERE
+  r_currency = o_currency;
+
+

Join Temporal Tables

Function

+

This clause is used to join the Temporal table.

+

Syntax

+
SELECT column-names
+FROM table1  [AS <alias1>]
+[LEFT] JOIN table2 FOR SYSTEM_TIME AS OF table1.proctime [AS <alias2>]
+ON table1.column-name1 = table2.key-name1
+

Description

+ +

Precautions

+

Only inner and left joins are supported for temporal tables with processing time attributes.

+

Example

+

LatestRates is a temporal table that is materialized with the latest rate.

+
SELECT
+  o.amout, o.currency, r.rate, o.amount * r.rate
+FROM
+  Orders AS o
+  JOIN LatestRates FOR SYSTEM_TIME AS OF o.proctime AS r
+  ON r.currency = o.currency;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0326.html b/docs/dli/sqlreference/dli_08_0326.html new file mode 100644 index 00000000..008f65a1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0326.html @@ -0,0 +1,30 @@ + + +

OrderBy & Limit

+

OrderBy

Function

+

This clause is used to sort data in ascending order on a time attribute.

+

Precautions

+

Currently, only sorting by time attribute is supported.

+

Example

+

Sort data in ascending order on the time attribute.

+
SELECT *
+FROM Orders
+ORDER BY orderTime;
+
+

Limit

Function

+

This clause is used to constrain the number of rows returned.

+

Precautions

+

This clause is used in conjunction with ORDER BY to ensure that the results are deterministic.

+

Example

+
SELECT *
+FROM Orders
+ORDER BY orderTime
+LIMIT 3;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0327.html b/docs/dli/sqlreference/dli_08_0327.html new file mode 100644 index 00000000..ba59216b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0327.html @@ -0,0 +1,32 @@ + + +

Top-N

+

Function

Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in cases where the need is to display only the N bottom-most or the N top- most records from batch/streaming table on a condition.

+
+

Syntax

SELECT [column_list]
+FROM (
+   SELECT [column_list],
+     ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]]
+       ORDER BY col1 [asc|desc][, col2 [asc|desc]...]) AS rownum
+   FROM table_name)
+WHERE rownum <= N [AND conditions]
+
+

Description

+
+

Important Notes

+
+

Example

This is an example to get the top five products per category that have the maximum sales in realtime.

+
SELECT * 
+  FROM ( 
+     SELECT *,
+         ROW_NUMBER() OVER (PARTITION BY category ORDER BY sales DESC) as row_num
+     FROM ShopSales)
+  WHERE row_num <= 5;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0328.html b/docs/dli/sqlreference/dli_08_0328.html new file mode 100644 index 00000000..9df115e3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0328.html @@ -0,0 +1,32 @@ + + +

Deduplication

+

Function

Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.

+
+

Syntax

SELECT [column_list]
+FROM (
+   SELECT [column_list],
+     ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]]
+       ORDER BY time_attr [asc|desc]) AS rownum
+   FROM table_name)
+WHERE rownum = 1
+
+

Description

+
+

Precautions

None

+
+

Example

The following examples show how to remove duplicate rows on order_id. The proctime is an event time attribute.

+
SELECT order_id, user, product, number
+  FROM (
+     SELECT *,
+         ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY proctime ASC) as row_num
+     FROM Orders)
+  WHERE row_num = 1;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0329.html b/docs/dli/sqlreference/dli_08_0329.html new file mode 100644 index 00000000..c551439e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0329.html @@ -0,0 +1,17 @@ + + +

Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0330.html b/docs/dli/sqlreference/dli_08_0330.html new file mode 100644 index 00000000..5ff303ec --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0330.html @@ -0,0 +1,187 @@ + + +

User-Defined Functions

+

Overview

DLI supports the following three types of user-defined functions (UDFs):

+
+ +

UDFs can only be used in dedicated queues.

+
+

POM Dependency

<dependency>
+        <groupId>org.apache.flink</groupId>
+        <artifactId>flink-table-common</artifactId>
+        <version>1.10.0</version>
+        <scope>provided</scope>
+</dependency>
+
+

Important Notes

+
+

Using UDFs

  1. Encapsulate the implemented UDFs into a JAR package and upload the package to OBS.
  2. In the navigation pane of the DLI management console, choose Data Management > Package Management. On the displayed page, click Create and use the JAR package uploaded to OBS to create a package.
  3. In the left navigation, choose Job Management and click Flink Jobs. Locate the row where the target resides and click Edit in the Operation column to switch to the page where you can edit the job.
  4. Click the Running Parameters tab of your job, select the UDF JAR and click Save.
  5. Add the following statement to the SQL statements to use the functions:
+
+

UDF

The regular UDF must inherit the ScalarFunction function and implement the eval method. The open and close functions are optional.

+
+

Example code

+
import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.ScalarFunction;
+public class UdfScalarFunction extends ScalarFunction {
+  private int factor = 12;
+  public UdfScalarFunction() {
+    this.factor = 12;
+  }
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  /**
+   * Custom logic
+   * @param s
+   * @return
+   */
+   public int eval(String s) {
+     return s.hashCode() * factor;
+   }
+   /**
+    * Optional
+    */
+   @Override
+   public void close() {}
+}
+

Example

+
1
+2
CREATE FUNCTION udf_test AS 'com.company.udf.UdfScalarFunction';
+INSERT INTO sink_stream select udf_test(attr) FROM source_stream;
+
+ +
+

UDTF

The UDTF must inherit the TableFunction function and implement the eval method. The open and close functions are optional. If the UDTF needs to return multiple columns, you only need to declare the returned value as Tuple or Row. If Row is used, you need to overload the getResultType method to declare the returned field type.

+
+

Example code

+
import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.TableFunction;
+import org.apache.flink.types.Row;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+public class UdfTableFunction extends TableFunction<Row> {
+  private Logger log = LoggerFactory.getLogger(TableFunction.class);
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  public void eval(String str, String split) {
+    for (String s : str.split(split)) {
+      Row row = new Row(2);
+      row.setField(0, s);
+      row.setField(1, s.length());
+      collect(row);
+    }
+  }
+  /**
+   * Declare the type returned by the function
+   * @return
+   */
+  @Override
+  public TypeInformation<Row> getResultType() {
+  return Types.ROW(Types.STRING, Types.INT);
+  }
+  /**
+    * Optional
+   */
+  @Override
+  public void close() {}
+ }
+

Example

+

The UDTF supports CROSS JOIN and LEFT JOIN. When the UDTF is used, the LATERAL and TABLE keywords must be included.

+ +
1
+2
+3
+4
+5
+6
+7
CREATE FUNCTION udtf_test AS 'com.company.udf.TableFunction';
+// CROSS JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream, LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length);
+// LEFT JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream LEFT JOIN LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length) ON TRUE;
+
+ +
+

UDAF

The UDAF must inherit the AggregateFunction function. You need to create an accumulator for storing the computing result, for example, WeightedAvgAccum in the following example code.

+
+

Example code

+
public class WeightedAvgAccum {
+public long sum = 0;
+public int count = 0;
+}
+

+
import org.apache.flink.table.functions.AggregateFunction;
+import java.util.Iterator;
+/**
+* The first type variable is the type returned by the aggregation function, and the second type variable is of the Accumulator type.
+* Weighted Average user-defined aggregate function.
+*/
+public class UdfAggFunction extends AggregateFunction<Long, WeightedAvgAccum> {
+// Initialize the accumulator.
+  @Override
+  public WeightedAvgAccum createAccumulator() {
+    return new WeightedAvgAccum();
+  }
+// Return the intermediate computing value stored in the accumulator.
+  @Override
+  public Long getValue(WeightedAvgAccum acc) {
+    if (acc.count == 0) {
+       return null;
+    } else {
+      return acc.sum / acc.count;
+ }
+}
+// Update the intermediate computing value according to the input.
+public void accumulate(WeightedAvgAccum acc, long iValue) {
+acc.sum += iValue;
+acc.count += 1;
+}
+// Perform the retraction operation, which is opposite to the accumulate operation.
+public void retract(WeightedAvgAccum acc, long iValue) {
+acc.sum -= iValue;
+acc.count -= 1;
+}
+// Combine multiple accumulator values.
+public void merge(WeightedAvgAccum acc, Iterable<WeightedAvgAccum> it) {
+Iterator<WeightedAvgAccum> iter = it.iterator();
+while (iter.hasNext()) {
+WeightedAvgAccum a = iter.next();
+acc.count += a.count;
+acc.sum += a.sum;
+}
+}
+// Reset the intermediate computing value.
+public void resetAccumulator(WeightedAvgAccum acc) {
+acc.count = 0;
+acc.sum = 0L;
+}
+}
+

Example

+
1
+2
CREATE FUNCTION udaf_test AS 'com.company.udf.UdfAggFunction';
+INSERT INTO sink_stream SELECT udaf_test(attr2) FROM source_stream GROUP BY attr1;
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0331.html b/docs/dli/sqlreference/dli_08_0331.html new file mode 100644 index 00000000..75de0d7f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0331.html @@ -0,0 +1,35 @@ + + +

Built-In Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0332.html b/docs/dli/sqlreference/dli_08_0332.html new file mode 100644 index 00000000..4f855083 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0332.html @@ -0,0 +1,573 @@ + + +

Mathematical Operation Functions

+

Relational Operators

All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.

+

Relationship operators are binary operators. Two compared data types must be of the same type or they must support implicit conversion.

+

Table 1 lists all relational operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Relational Operators

Operator

+

Returned Data Type

+

Description

+

A = B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. This operator is used for value assignment.

+

A <> B

+

BOOLEAN

+

If A is not equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned. This operator follows the standard SQL syntax.

+

A < B

+

BOOLEAN

+

If A is less than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A <= B

+

BOOLEAN

+

If A is less than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A > B

+

BOOLEAN

+

If A is greater than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A >= B

+

BOOLEAN

+

If A is greater than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A IS NULL

+

BOOLEAN

+

If A is NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS NOT NULL

+

BOOLEAN

+

If A is not NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS DISTINCT FROM B

+

BOOLEAN

+

If A is not equal to B, TRUE is returned. NULL indicates A equals B.

+

A IS NOT DISTINCT FROM B

+

BOOLEAN

+

If A is equal to B, TRUE is returned. NULL indicates A equals B.

+

A BETWEEN [ASYMMETRIC | SYMMETRIC] B AND C

+

BOOLEAN

+

If A is greater than or equal to B but less than or equal to C, TRUE is returned.

+
  • ASYMMETRIC: indicates that B and C are location-related.

    For example, "A BETWEEN ASYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C".

    +
  • SYMMETRIC: indicates that B and C are not location-related.

    For example, "A BETWEEN SYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C) OR (A BETWEEN C AND B".

    +
+

A NOT BETWEEN B [ASYMMETRIC | SYMMETRIC]AND C

+

BOOLEAN

+

If A is less than B or greater than C, TRUE is returned.

+
  • ASYMMETRIC: indicates that B and C are location-related.

    For example, "A NOT BETWEEN ASYMMETRIC B AND C" is equivalent to "A NOT BETWEEN B AND C".

    +
  • SYMMETRIC: indicates that B and C are not location-related.

    For example, "A NOT BETWEEN SYMMETRIC B AND C" is equivalent to "(A NOT BETWEEN B AND C) OR (A NOT BETWEEN C AND B)".

    +
+

A LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A matches pattern B, TRUE is returned. The escape character C can be defined as required.

+

A NOT LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match pattern B, TRUE is returned. The escape character C can be defined as required.

+

A SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A matches regular expression B, TRUE is returned. The escape character C can be defined as required.

+

A NOT SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match regular expression B, TRUE is returned. The escape character C can be defined as required.

+

value IN (value [, value]* )

+

BOOLEAN

+

If the value is equal to any value in the list, TRUE is returned.

+

value NOT IN (value [, value]* )

+

BOOLEAN

+

If the value is not equal to any value in the list, TRUE is returned.

+

EXISTS (sub-query)

+

BOOLEAN

+

If sub-query returns at least one row, TRUE is returned.

+

value IN (sub-query)

+

BOOLEAN

+

If value is equal to a row returned by subquery, TRUE is returned.

+

value NOT IN (sub-query)

+

BOOLEAN

+

If value is not equal to a row returned by subquery, TRUE is returned.

+
+
+

Precautions

+ + +
+

Logical Operators

Common logical operators are AND, OR, and NOT. Their priority order is NOT > AND > OR.

+

Table 2 lists the calculation rules. A and B indicate logical expressions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Logical Operators

Operator

+

Result Type

+

Description

+

A OR B

+

BOOLEAN

+

If A or B is TRUE, TRUE is returned. Three-valued logic is supported.

+

A AND B

+

BOOLEAN

+

If both A and B are TRUE, TRUE is returned. Three-valued logic is supported.

+

NOT A

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, UNKNOWN is returned.

+

A IS FALSE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT FALSE

+

BOOLEAN

+

If A is not FALSE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS TRUE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT TRUE

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS UNKNOWN

+

BOOLEAN

+

If A is UNKNOWN, TRUE is returned.

+

A IS NOT UNKNOWN

+

BOOLEAN

+

If A is not UNKNOWN, TRUE is returned.

+
+
+

Precautions

+

Only data of the Boolean type can be used for calculation using logical operators. Implicit type conversion is not supported.

+
+

Arithmetic Operators

Arithmetic operators include binary operators and unary operators, for all of which, the returned results are of the numeric type. Table 3 lists arithmetic operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 Arithmetic Operators

Operator

+

Result Type

+

Description

+

+ numeric

+

All numeric types

+

Returns numbers.

+

- numeric

+

All numeric types

+

Returns negative numbers.

+

A + B

+

All numeric types

+

A plus B. The result type is associated with the operation data type. For example, if floating-point number is added to an integer, the result will be a floating-point number.

+

A - B

+

All numeric types

+

A minus B. The result type is associated with the operation data type.

+

A * B

+

All numeric types

+

Multiply A and B. The result type is associated with the operation data type.

+

A / B

+

All numeric types

+

Divide A by B. The result is a double-precision number.

+

POWER(A, B)

+

All numeric types

+

Returns the value of A raised to the power B.

+

ABS(numeric)

+

All numeric types

+

Returns the absolute value of a specified value.

+

MOD(A, B)

+

All numeric types

+

Returns the remainder (modulus) of A divided by B. A negative value is returned only when A is a negative value.

+

SQRT(A)

+

All numeric types

+

Returns the square root of A.

+

LN(A)

+

All numeric types

+

Returns the nature logarithm of A (base e).

+

LOG10(A)

+

All numeric types

+

Returns the base 10 logarithms of A.

+

LOG2(A)

+

All numeric types

+

Returns the base 2 logarithm of A.

+

LOG(B)

+

LOG(A, B)

+

All numeric types

+

When called with one argument, returns the natural logarithm of B.

+

When called with two arguments, this function returns the logarithm of B to the base A.

+

B must be greater than 0 and A must be greater than 1.

+

EXP(A)

+

All numeric types

+

Return the value of e raised to the power of a.

+

CEIL(A)

+

CEILING(A)

+

All numeric types

+

Return the smallest integer that is greater than or equal to a. For example: ceil(21.2) = 22.

+

FLOOR(A)

+

All numeric types

+

Return the largest integer that is less than or equal to a. For example: floor(21.2) = 21.

+

SIN(A)

+

All numeric types

+

Returns the sine value of A.

+

COS(A)

+

All numeric types

+

Returns the cosine value of A.

+

TAN(A)

+

All numeric types

+

Returns the tangent value of A.

+

COT(A)

+

All numeric types

+

Returns the cotangent value of A.

+

ASIN(A)

+

All numeric types

+

Returns the arc sine value of A.

+

ACOS(A)

+

All numeric types

+

Returns the arc cosine value of A.

+

ATAN(A)

+

All numeric types

+

Returns the arc tangent value of A.

+

ATAN2(A, B)

+

All numeric types

+

Returns the arc tangent of a coordinate (A, B).

+

COSH(A)

+

All numeric types

+

Returns the hyperbolic cosine of A. Return value type is DOUBLE.

+

DEGREES(A)

+

All numeric types

+

Convert the value of a from radians to degrees.

+

RADIANS(A)

+

All numeric types

+

Convert the value of a from degrees to radians.

+

SIGN(A)

+

All numeric types

+

Returns the sign of A. 1 is returned if A is positive. –1 is returned if A is negative. Otherwise, 0 is returned.

+

ROUND(A, d)

+

All numeric types

+

Returns a number rounded to d decimal places for A. For example: round(21.263,2) = 21.26.

+

PI

+

All numeric types

+

Returns the value of pi.

+

E()

+

All numeric types

+

Returns the value of e.

+

RAND()

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, 1.0)

+

RAND(A)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, 1.0) with an initial seed A. Two RAND functions will return identical sequences of numbers if they have the same initial seed.

+

RAND_INTEGER(A)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, A)

+

RAND_INTEGER(A, B)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, B) with an initial seed A.

+

UUID()

+

All numeric types

+

Returns a UUID string.

+

BIN(A)

+

All numeric types

+

Returns a string representation of integer A in binary format. Returns NULL if A is NULL.

+

HEX(A)

+

HEX(B)

+

All numeric types

+

Returns a string representation of an integer A value or a string B in hex format. Returns NULL if the A or B is NULL.

+

TRUNCATE(A, d)

+

All numeric types

+

Returns a number of truncated to d decimal places. Returns NULL if A or d is NULL.

+

Example: truncate (42.345, 2) = 42.340

+

truncate(42.345) = 42.000

+

PI()

+

All numeric types

+

Returns the value of pi.

+
+
+

Precautions

+

Data of the string type is not allowed in arithmetic operations.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0333.html b/docs/dli/sqlreference/dli_08_0333.html new file mode 100644 index 00000000..5a660120 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0333.html @@ -0,0 +1,439 @@ + + +

String Functions

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 String functions

SQL Function

+

Return Type

+

Description

+

string1 || string2

+

STRING

+

Returns the concatenation of string1 and string2.

+

CHAR_LENGTH(string)

+

CHARACTER_LENGTH(string)

+

INT

+

Returns the number of characters in the string.

+

UPPER(string)

+

STRING

+

Returns the string in uppercase.

+

LOWER(string)

+

STRING

+

Returns the string in lowercase.

+

POSITION(string1 IN string2)

+

INT

+

Returns the position (start from 1) of the first occurrence of string1 in string2; returns 0 if string1 cannot be found in string2.

+

TRIM([ BOTH | LEADING | TRAILING ] string1 FROM string2)

+

STRING

+

Returns a string that removes leading and/or trailing characters string2 from string1.

+

LTRIM(string)

+

STRING

+

Returns a string that removes the left whitespaces from the specified string.

+

For example, LTRIM(' This is a test String.') returns "This is a test String.".

+

RTRIM(string)

+

STRING

+

Returns a string that removes the right whitespaces from the specified string.

+

For example, RTRIM('This is a test String. ') returns "This is a test String.".

+

REPEAT(string, integer)

+

STRING

+

Returns a string that repeats the base string integer times.

+

For example, REPEAT('This is a test String.', 2) returns "This is a test String.This is a test String.".

+

REGEXP_REPLACE(string1, string2, string3)

+

STRING

+

Returns a string from string1 with all the substrings that match a regular expression string2 consecutively being replaced with string3.

+

For example, REGEXP_REPLACE('foobar', 'oo|ar', '') returns "fb".

+

REGEXP_REPLACE('ab\ab', '\\', 'e') returns "abeab".

+

OVERLAY(string1 PLACING string2 FROM integer1 [ FOR integer2 ])

+

STRING

+

Returns a string that replaces integer2 characters of STRING1 with STRING2 from position integer1.

+

The default value of integer2 is the length of string2.

+

For example, OVERLAY('This is an old string' PLACING ' new' FROM 10 FOR 5) returns "This is a new string".

+

SUBSTRING(string FROM integer1 [ FOR integer2 ])

+

STRING

+

Returns a substring of the specified string starting from position integer1 with length integer2 (to the end by default). If integer2 is not configured, the substring from integer1 to the end is returned by default.

+

REPLACE(string1, string2, string3)

+

STRING

+

Returns a new string which replaces all the occurrences of string2 with string3 (non-overlapping) from string1.

+

For example, REPLACE('hello world', 'world', 'flink') returns "hello flink"; REPLACE('ababab', 'abab', 'z') returns "zab".

+

REPLACE('ab\\ab', '\\', 'e') returns "abeab".

+

REGEXP_EXTRACT(string1, string2[, integer])

+

STRING

+

Returns a string from string1 which extracted with a specified regular expression string2 and a regex match group index integer.

+

Returns NULL, if the parameter is NULL or the regular expression is invalid.

+

For example, REGEXP_EXTRACT('foothebar', 'foo(.*?)(bar)', 2)" returns "bar".

+

INITCAP(string)

+

STRING

+

Returns a new form of STRING with the first character of each word converted to uppercase and the rest characters to lowercase.

+

CONCAT(string1, string2,...)

+

STRING

+

Returns a string that concatenates string1, string2, ….

+

For example, CONCAT('AA', 'BB', 'CC') returns "AABBCC".

+

CONCAT_WS(string1, string2, string3,...)

+

STRING

+

Returns a string that concatenates string2, string3, … with a separator string1. The separator is added between the strings to be concatenated. Returns NULL if string1 is NULL. If other arguments are NULL, this function automatically skips NULL arguments.

+

For example, CONCAT_WS('~', 'AA', NULL, 'BB', '', 'CC') returns "AA~BB~~CC".

+

LPAD(string1, integer, string2)

+

STRING

+

Returns a new string from string1 left-padded with string2 to a length of integer characters.

+

If any argument is NULL, NULL is returned.

+

If integer is negative, NULL is returned.

+

If the length of string1 is shorter than integer, returns string1 shortened to integer characters.

+

For example, LPAD(Symbol,4,Symbol) returns "Symbol hi".

+

LPAD('hi',1,'??') returns "h".

+

RPAD(string1, integer, string2)

+

STRING

+

Returns a new string from string1 right-padded with string2 to a length of integer characters.

+

If any argument is NULL, NULL is returned.

+

If integer is negative, NULL is returned.

+

If the length of string1 is shorter than integer, returns string1 shortened to integer characters.

+

For example, RPAD('hi',4,'??') returns "hi??".

+

RPAD('hi',1,'??') returns "h".

+

FROM_BASE64(string)

+

STRING

+

Returns the base64-decoded result from string.

+

Returns NULL if string is NULL.

+

For example, FROM_BASE64('aGVsbG8gd29ybGQ=') returns "hello world".

+

TO_BASE64(string)

+

STRING

+

Returns the base64-encoded result from string; f string is NULL.

+

Returns NULL if string is NULL.

+

For example, TO_BASE64(hello world) returns "aGVsbG8gd29ybGQ=".

+

ASCII(string)

+

INT

+

Returns the numeric value of the first character of string.

+

Returns NULL if string is NULL.

+

For example, ascii('abc') returns 97.

+

ascii(CAST(NULL AS VARCHAR)) returns NULL.

+

CHR(integer)

+

STRING

+

Returns the ASCII character having the binary equivalent to integer.

+

If integer is larger than 255, we will get the modulus of integer divided by 255 first, and returns CHR of the modulus.

+

Returns NULL if integer is NULL.

+

chr(97) returns a.

+

chr(353) Return a.

+

DECODE(binary, string)

+

STRING

+

Decodes the first argument into a String using the provided character set (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').

+

If either argument is NULL, the result will also be NULL.

+

ENCODE(strinh1, string2)

+

STRING

+

Encodes the string1 into a BINARY using the provided string2 character set (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').

+

If either argument is NULL, the result will also be NULL.

+

INSTR(string1, string2)

+

INT

+

Returns the position of the first occurrence of string2 in string1.

+

Returns NULL if any argument is NULL.

+

LEFT(string, integer)

+

STRING

+

Returns the leftmost integer characters from the string.

+

Returns EMPTY String if integer is negative.

+

Returns NULL if any argument is NULL.

+

RIGHT(string, integer)

+

STRING

+

Returns the rightmost integer characters from the string.

+

Returns EMPTY String if integer is negative.

+

Returns NULL if any argument is NULL.

+

LOCATE(string1, string2[, integer])

+

INT

+

Returns the position of the first occurrence of string1 in string2 after position integer.

+

Returns 0 if not found.

+

The value of integer defaults to 0.

+

Returns NULL if any argument is NULL.

+

PARSE_URL(string1, string2[, string3])

+

STRING

+

Returns the specified part from the URL.

+

Valid values for string2 include 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'AUTHORITY', 'FILE', and 'USERINFO'.

+

Returns NULL if any argument is NULL.

+

If string2 is QUERY, the key in QUERY can be specified as string3.

+

Example:

+

The parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') returns 'facebook.com'.

+

parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1') returns 'v1'.

+

REGEXP(string1, string2)

+

BOOLEAN

+

Returns TRUE if any (possibly empty) substring of string1 matches the regular expression string2, otherwise FALSE. If the information is found, TRUE is returned. string1 indicates the specified string, and string2 indicates the regular expression.

+

Returns NULL if any argument is NULL.

+

REVERSE(string)

+

STRING

+

Returns the reversed string.

+

Returns NULL if string is NULL.

+

SPLIT_INDEX(string1, string2, integer1)

+

STRING

+

Splits string1 by the delimiter string2, returns the integer1-th (zero-based) string of the split strings.

+

Returns NULL if integer is negative.

+

Returns NULL if any argument is NULL.

+

STR_TO_MAP(string1[, string2, string3]])

+

MAP

+

Returns a map after splitting the string1 into key/value pairs using delimiters.

+

The default value of string2 is ','.

+

The default value of string3 is '='.

+

SUBSTR(string[, integer1[, integer2]])

+

STRING

+

Returns a substring of string starting from position integer1 with length integer2.

+

If integer2 is not specified, the string is truncated to the end.

+

JSON_VAL(STRING json_string, STRING json_path)

+

STRING

+

Returns the value of the specified json_path from the json_string. For details about how to use the functions, see JSON_VAL Function.

+
NOTE:

The following rules are listed in descending order of priority.

+
  1. The two arguments json_string and json_path cannot be NULL.
  2. The value of json_string must be a valid JSON string. Otherwise, the function returns NULL.
  3. If json_string is an empty string, the function returns an empty string.
  4. If json_path is an empty string or the path does not exist, the function returns NULL.
+
+
+
+

JSON_VAL Function

+
STRING JSON_VAL(STRING json_string, STRING json_path)
+ +
+ + + + + + + + + + + + + +
Table 2 Parameter description

Parameter

+

Type

+

Description

+

json_string

+

STRING

+

JSON object to be parsed

+

json_path

+

STRING

+

Path expression for parsing the JSON string For the supported expressions, see Table 3.

+
+
+ +
+ + + + + + + + + + + + + + + + +
Table 3 Expressions supported

Expression

+

Description

+

$

+

Root node in the path

+

[]

+

Access array elements

+

*

+

Array wildcard

+

.

+

Access child elements

+
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0334.html b/docs/dli/sqlreference/dli_08_0334.html new file mode 100644 index 00000000..f4f0f130 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0334.html @@ -0,0 +1,1704 @@ + + +

Temporal Functions

+

Table 1 lists the temporal functions supported by Flink OpenSource SQL.

+

Function Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Temporal functions

Function

+

Return Type

+

Description

+

DATE string

+

DATE

+

Parse the date string (yyyy-MM-dd) to a SQL date.

+

TIME string

+

TIME

+

Parse the time string (HH:mm:ss[.fff]) to a SQL time.

+

TIMESTAMP string

+

TIMESTAMP

+

Convert the time string into a timestamp. The time string format is yyyy-MM-dd HH:mm:ss[.fff].

+

INTERVAL string range

+

INTERVAL

+

Parse an interval string in the following two forms:

+
  • yyyy-MM for SQL intervals of months. An interval range might be YEAR or YEAR TO MONTH.
  • dd hh:mm:ss.fff for SQL intervals of milliseconds. An interval range might be DAY, MINUTE, DAY TO HOUR, or DAY TO SECOND.
+

Example:

+

INTERVAL '10 00:00:00.004' DAY TO second indicates that the interval is 10 days and 4 milliseconds.

+

INTERVAL '10' DAY: indicates that the interval is 10 days.

+

INTERVAL '2-10' YEAR TO MONTH indicates that the interval is two years and ten months.

+

CURRENT_DATE

+

DATE

+

Return the SQL date of UTC time zone.

+

CURRENT_TIME

+

TIME

+

Return the SQL time of UTC time zone.

+

CURRENT_TIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of UTC time zone.

+

LOCALTIME

+

TIME

+

Return the SQL time of the local time zone.

+

LOCALTIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of the local time zone.

+

EXTRACT(timeintervalunit FROM temporal)

+

BIGINT

+

Extract part of the time point or interval. Return the part in the int type.

+

For example, extract the date 2006-06-05 and return 5.

+

EXTRACT(DAY FROM DATE '2006-06-05') returns 5.

+

YEAR(date)

+

BIGINT

+

Return the year from a SQL date.

+

For example, YEAR(DATE'1994-09-27') returns 1994.

+

QUARTER(date)

+

BIGINT

+

Return the quarter of a year from a SQL date.

+

MONTH(date)

+

BIGINT

+

+

Return the month of a year from a SQL date.

+

For example, MONTH(DATE '1994-09-27') returns 9.

+

WEEK(date)

+

BIGINT

+

Return the week of a year from a SQL date.

+

For example, WEEK(DATE'1994-09-27') returns 39.

+

DAYOFYEAR(date)

+

BIGINT

+

Return the day of a year from a SQL date.

+

For example, DAYOFYEAR(DATE '1994-09-27') is 270.

+

DAYOFMONTH(date)

+

BIGINT

+

Return the day of a month from a SQL date.

+

For example, DAYOFMONTH(DATE'1994-09-27') returns 27.

+

DAYOFWEEK(date)

+

BIGINT

+

Return the day of a week from a SQL date.

+

Sunday is set to 1.

+

For example, DAYOFWEEK(DATE'1994-09-27') returns 3.

+

HOUR(timestamp)

+

BIGINT

+

Return the hour of a day (an integer between 0 and 23) from a SQL timestamp.

+

For example, HOUR(TIMESTAMP '1994-09-27 13:14:15') returns 13.

+

MINUTE(timestamp)

+

BIGINT

+

Return the minute of an hour (an integer between 0 and 59) from a SQL timestamp.

+

For example, MINUTE(TIMESTAMP '1994-09-27 13:14:15') returns 14.

+

SECOND(timestamp)

+

BIGINT

+

Returns the second of a minute (an integer between 0 and 59) from a SQL timestamp.

+

For example, SECOND(TIMESTAMP '1994-09-27 13:14:15') returns 15.

+

FLOOR(timepoint TO timeintervalunit)

+

TIME

+

Round a time point down to the given unit.

+

For example, 12:44:00 is returned from FLOOR(TIME '12:44:31' TO MINUTE).

+

CEIL(timepoint TO timeintervalunit)

+

TIME

+

Round a time point up to the given unit.

+

For example, CEIL(TIME '12:44:31' TO MINUTE) returns 12:45:00.

+

(timepoint1, temporal1) OVERLAPS (timepoint2, temporal2)

+

BOOLEAN

+

Return TRUE if two time intervals overlap.

+

Example:

+

(TIME '2:55:00', INTERVAL '1' HOUR) OVERLAPS (TIME '3:30:00', INTERVAL '2' HOUR) returns TRUE.

+

(TIME '9:00:00', TIME '10:00:00') OVERLAPS (TIME '10:15:00', INTERVAL '3' HOUR) returns FALSE.

+

DATE_FORMAT(timestamp, string)

+

STRING

+

Convert a timestamp to a value of string in the format specified by the date format string.

+

TIMESTAMPADD(timeintervalunit, interval, timepoint)

+

TIMESTAMP/DATE/TIME

+

Return the date and time added to timepoint based on the result of interval and timeintervalunit.

+

For example, TIMESTAMPADD(WEEK, 1, DATE '2003-01-02') returns 2003-01-09.

+

TIMESTAMPDIFF(timepointunit, timepoint1, timepoint2)

+

INT

+

Return the (signed) number of timepointunit between timepoint1 and timepoint2.

+

The unit for the interval is given by the first argument, which should be one of the following values: SECOND, MINUTE, HOUR, DAY, MONTH, and YEAR.

+

For example, TIMESTAMPDIFF(DAY, TIMESTAMP '2003-01-02 10:00:00', TIMESTAMP '2003-01-03 10:00:00') returns 1.

+

CONVERT_TZ(string1, string2, string3)

+

TIMESTAMP

+

Convert a datetime string1 from time zone string2 to time zone string3.

+

For example, CONVERT_TZ('1970-01-01 00:00:00', 'UTC', 'America/Los_Angeles') returns '1969-12-31 16:00:00'.

+

FROM_UNIXTIME(numeric[, string])

+

STRING

+

Return a representation of the numeric argument as a value in string format.

+

The default string format is YYYY-MM-DD hh:mm:ss.

+

For example, FROM_UNIXTIME(44) returns 1970-01-01 09:00:44.

+

UNIX_TIMESTAMP()

+

BIGINT

+

Get current Unix timestamp in seconds.

+

+

UNIX_TIMESTAMP(string1[, string2])

+

BIGINT

+

Convert date time string string1 in format string2 to Unix timestamp (in seconds), using the specified timezone in table config.

+

The default format of string2 is yyyy-MM-dd HH:mm:ss.

+

TO_DATE(string1[, string2])

+

DATE

+

Convert a date string string1 with format string2 to a date.

+

The default format of string2 is yyyy-MM-dd.

+

TO_TIMESTAMP(string1[, string2])

+

TIMESTAMP

+

Convert date time string string1 with format string2 to a timestamp.

+

The default format of string2 is yyyy-MM-dd HH:mm:ss.

+
+
+
+

DATE

+
+

TIME

+
+

TIMESTAMP

+
+

INTERVAL

+
+

CURRENT_DATE

+
+

CURRENT_TIME

+
+

CURRENT_TIMESTAMP

+
+

LOCALTIME

+
+

LOCALTIMESTAMP

+
+

EXTRACT

+
+

YEAR

+
+

QUARTER

+
+

MONTH

+
+

WEEK

+
+

DAYOFYEAR

+
+

DAYOFMONTH

+
+

DAYOFWEEK

+
+

HOUR

+
+

MINUTE

+
+

SECOND

+
+

FLOOR

+
+

CEIL

+
+

OVERLAPS

+
+

DATE_FORMAT

+
+

TIMESTAMPADD

+
+

TIMESTAMPDIFF

+
+

CONVERT_TZ

+
+

FROM_UNIXTIME

+
+

UNIX_TIMESTAMP

+
+

UNIX_TIMESTAMP(string1[, string2])

+
+

TO_DATE

+
+

TO_TIMESTAMP

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0335.html b/docs/dli/sqlreference/dli_08_0335.html new file mode 100644 index 00000000..460824ca --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0335.html @@ -0,0 +1,77 @@ + + +

Conditional Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Conditional functions

Function

+

Description

+

CASE value

+

WHEN value1_1 [, value1_2 ]* THEN result1

+

[ WHEN value2_1 [, value2_2 ]* THEN result2 ]*

+

[ ELSE resultZ ]

+

END

+

Returns resultX when the value is contained in (valueX_1, valueX_2, …).

+

Only the first matched value is returned.

+

When no value matches, returns resultZ if it is provided and returns NULL otherwise.

+

CASE

+

WHEN condition1 THEN result1

+

[ WHEN condition2 THEN result2 ]*

+

[ ELSE resultZ ]

+

END

+

Returns resultX when the first conditionX is met.

+

Only the first matched value is returned.

+

When no condition is met, returns resultZ if it is provided and returns NULL otherwise.

+

NULLIF(value1, value2)

+

Returns NULL if value1 is equal to value2; returns value1 otherwise.

+

For example, NullIF (5, 5) returns NULL.

+

NULLIF(5, 0) returns 5.

+

COALESCE(value1, value2 [, value3 ]* )

+

Returns the first value (from left to right) that is not NULL from value1, value2, ….

+

For example, COALESCE(NULL, 5) returns 5.

+

IF(condition, true_value, false_value)

+

Returns the true_value if condition is met, otherwise false_value.

+

For example, IF(5 > 3, 5, 3) returns 5.

+

IS_ALPHA(string)

+

Returns TRUE if all characters in the string are letters, otherwise FALSE.

+

IS_DECIMAL(string)

+

Returns TRUE if string can be parsed to a valid numeric, otherwise FALSE.

+

IS_DIGIT(string)

+

Returns TRUE if all characters in the string are digits, otherwise FALSE.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0336.html b/docs/dli/sqlreference/dli_08_0336.html new file mode 100644 index 00000000..be6edca2 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0336.html @@ -0,0 +1,131 @@ + + +

Type Conversion Function

+

Syntax

CAST(value AS type)
+
+

Syntax Description

This function is used to forcibly convert types.

+
+

Precautions

If the input is NULL, NULL is returned.

+
+

Example

The following example converts the amount value to an integer.

+
insert into temp select cast(amount as INT) from source_stream;
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 1 Examples of type conversion functions

Example

+

Description

+

Example

+

cast(v1 as string)

+

Converts v1 to a string. The value of v1 can be of the numeric type or of the timestamp, date, or time type.

+

Table T1:

+
| content (INT)           |
+| -------------           |
+| 5                       |
+

Statement:

+
SELECT
+  cast(content as varchar)
+FROM
+  T1;
+

Result:

+
"5"
+

cast (v1 as int)

+

Converts v1 to the int type. The value of v1 can be a number or a character.

+

Table T1:

+
| content  (STRING)           |
+| -------------               |
+| "5"                         |
+

Statement:

+
SELECT
+  cast(content as int)
+FROM
+  T1;
+

Result:

+
5
+

cast(v1 as timestamp)

+

Converts v1 to the timestamp type. The value of v1 can be of the string, date, or time type.

+

Table T1:

+
| content  (STRING)          |
+| -------------              |
+| "2018-01-01 00:00:01"     |
+

Statement:

+
SELECT
+  cast(content as timestamp)
+FROM
+  T1;
+

Result:

+
1514736001000
+

cast(v1 as date)

+

Converts v1 to the date type. The value of v1 can be of the string or timestamp type.

+

Table T1:

+
| content  (TIMESTAMP)     |
+| -------------            |
+| 1514736001000            |
+

Statement:

+
SELECT
+  cast(content as date)
+FROM
+  T1;
+

Result:

+
"2018-01-01"
+
+
+

Flink jobs do not support the conversion of bigint to timestamp using CAST. You can convert it using to_timestamp.

+
+
+

Detailed Sample Code

/** source **/
+CREATE
+TABLE car_infos (cast_int_to_string int, cast_String_to_int string,
+case_string_to_timestamp string, case_timestamp_to_date timestamp(3)) WITH (
+  'connector.type' = 'dis',
+  'connector.region' = 'xxxxx',
+  'connector.channel' = 'dis-input',
+  'format.type' = 'json'
+);
+/** sink **/
+CREATE
+TABLE cars_infos_out (cast_int_to_string string, cast_String_to_int
+int, case_string_to_timestamp timestamp(3), case_timestamp_to_date date) WITH (
+  'connector.type' = 'dis',
+  'connector.region' = 'xxxxx',
+  'connector.channel' = 'dis-output',
+  'format.type' = 'json'
+);
+/** Statistics on static car information**/
+INSERT
+INTO
+  cars_infos_out
+SELECT
+  cast(cast_int_to_string as string),
+  cast(cast_String_to_int as int),
+  cast(case_string_to_timestamp as timestamp),
+  cast(case_timestamp_to_date as date)
+FROM
+  car_infos;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0337.html b/docs/dli/sqlreference/dli_08_0337.html new file mode 100644 index 00000000..8cfe23ac --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0337.html @@ -0,0 +1,48 @@ + + +

Collection Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 Collection functions

Function

+

Description

+

CARDINALITY(array)

+

Returns the number of elements in array.

+

array '[' integer ']'

+

Returns the element at position INT in array. The index starts from 1.

+

ELEMENT(array)

+

Returns the sole element of array (whose cardinality should be one)

+

Returns NULL if array is empty.

+

Throws an exception if array has more than one element.

+

CARDINALITY(map)

+

Returns the number of entries in map.

+

map '[' key ']'

+

Returns the value specified by key value in map.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0338.html b/docs/dli/sqlreference/dli_08_0338.html new file mode 100644 index 00000000..1bbcd2ab --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0338.html @@ -0,0 +1,38 @@ + + +

Value Construction Functions

+

Description

+
+ + + + + + + + + + + + + +
Table 1 Value construction functions

Function

+

Description

+

ROW(value1, [, value2]*)

+

(value1, [, value2]*)

+

Returns a row created from a list of values (value1, value2,…).

+

ARRAY '[' value1 [, value2 ]* ']'

+

Returns an array created from a list of values (value1, value2, …).

+

MAP '[' key1, value1 [, key2, value2]* ']'

+

Returns a map created from a list of key-value pairs ((value1, value2), (value3, value4), …).

+

The key-value pair is (key1, value1),(key2, value2).

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0339.html b/docs/dli/sqlreference/dli_08_0339.html new file mode 100644 index 00000000..e0b05dd8 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0339.html @@ -0,0 +1,31 @@ + + +

Value Access Functions

+

Description

+
+ + + + + + + + + + +
Table 1 Value access functions

Function

+

Description

+

tableName.compositeType.field

+

Returns the value of a field from a Flink composite type (e.g., Tuple, POJO) by name.

+

tableName.compositeType.*

+

Returns a flat representation of a Flink composite type (e.g., Tuple, POJO) that converts each of its direct subtype into a separate field.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0340.html b/docs/dli/sqlreference/dli_08_0340.html new file mode 100644 index 00000000..f401dc8e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0340.html @@ -0,0 +1,64 @@ + + +

Hash Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Hash functions

Function

+

Description

+

MD5(string)

+

Returns the MD5 hash as a string that contains 32 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA1(string)

+

Returns the SHA-1 hash as a string that contains 40 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA224(string)

+

Returns the SHA-224 hash as a string that contains 56 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA256(string)

+

Returns the SHA-256 hash as a string that contains 64 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA384(string)

+

Returns the SHA-384 hash as a string that contains 96 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA512(string)

+

Returns the SHA-512 hash as a string that contains 128 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA2(string, hashLength)

+

Returns the hash using the SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, or SHA-512).

+

The first argument string is the string to be hashed and the second argument hashLength is the bit length of the result (224, 256, 384, or 512).

+

Returns NULL if string or hashLength is NULL.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0341.html b/docs/dli/sqlreference/dli_08_0341.html new file mode 100644 index 00000000..56a2b889 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0341.html @@ -0,0 +1,124 @@ + + +

Aggregate Function

+

An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved by an SQL statement. Table 1 lists aggregate functions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Aggregate functions

Function

+

Return Data Type

+

Description

+

COUNT([ ALL ] expression | DISTINCT expression1 [, expression2]*)

+

BIGINT

+

Returns the number of input rows for which the expression is not NULL. Use DISTINCT for one unique instance of each value.

+

COUNT(*)

+

COUNT(1)

+

BIGINT

+

Returns the number of input rows.

+

AVG([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the average (arithmetic mean) of expression across all input rows.

+

Use DISTINCT for one unique instance of each value.

+

SUM([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sum of expression across all input rows.

+

Use DISTINCT for one unique instance of each value.

+

MAX([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the maximum value of expression across all input rows.

+

MIN([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the minimum value of expression across all input rows.

+

STDDEV_POP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the population standard deviation of expression across all input rows.

+

STDDEV_SAMP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample standard deviation of expression across all input rows.

+

VAR_POP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the population variance (square of the population standard deviation) of expression across all input rows.

+

VAR_SAMP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample variance (square of the sample standard deviation) of expression across all input rows.

+

COLLECT([ ALL | DISTINCT ] expression)

+

MULTISET

+

Returns a multiset of expression across all input rows.

+

VARIANCE([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample variance (square of the sample standard deviation) of expression across all input rows.

+

FIRST_VALUE(expression)

+

Actual type

+

Returns the first value in an ordered set of values.

+

LAST_VALUE(expression)

+

Actual type

+

Returns the last value in an ordered set of values.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0342.html b/docs/dli/sqlreference/dli_08_0342.html new file mode 100644 index 00000000..27fc475b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0342.html @@ -0,0 +1,17 @@ + + +

Table-Valued Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0343.html b/docs/dli/sqlreference/dli_08_0343.html new file mode 100644 index 00000000..3ad617fb --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0343.html @@ -0,0 +1,76 @@ + + +

Deleting Partitions by Specifying Filter Criteria (Only OBS Tables Supported)

+

Function

This statement is used to delete one or more partitions based on specified conditions.

+
+

Precautions

+
+

Syntax

1
+2
+3
ALTER TABLE [db_name.]table_name
+  DROP [IF EXISTS]
+  PARTITIONS partition_filtercondition;
+
+ +
+
+

Keyword

+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+

This statement is used for OBS table operations.

+

partition_filtercondition

+

Condition used to search partitions to be deleted. The format is as follows:

+
  • Partition column name Operator Value to compare

    Example: start_date < '201911'

    +
  • <partition_filtercondition1> AND|OR <partition_filtercondition2>

    Example: start_date < '201911' OR start_date >= '202006'

    +
  • (<partition_filtercondition1>) [,partitions (<partition_filtercondition2>), ...]

    Example: (start_date <> '202007'), partitions(start_date < '201912')

    +
+
+
+
+

Example

You can run the following statements to delete partitions of the student table using different conditions:

+
1
+2
+3
+4
+5
+6
+7
+8
alter table student drop partitions(start_date < '201911');
+alter table student drop partitions(start_date >= '202007');
+alter table student drop partitions(start_date BETWEEN '202001' AND '202007');
+alter table student drop partitions(start_date < '201912' OR start_date >= '202006');
+alter table student drop partitions(start_date > '201912' AND start_date <= '202004');
+alter table student drop partitions(start_date != '202007');
+alter table student drop partitions(start_date <> '202007');
+alter table student drop partitions(start_date <> '202007'), partitions(start_date < '201912');
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0344.html b/docs/dli/sqlreference/dli_08_0344.html new file mode 100644 index 00000000..20d67279 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0344.html @@ -0,0 +1,221 @@ + + +

ClickHouse Result Table

+

Function

DLI exports Flink job data to ClickHouse result tables.

+

ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and provides good query performance. The aggregation analysis and query performance based on large and wide tables is excellent, which is one order of magnitude faster than other analytical databases.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
create table clickhouseSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'clickhouse',
+  'connector.url' = '',
+  'connector.table' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Result table type. Set this parameter to clickhouse.

+

connector.url

+

Yes

+

ClickHouse URL.

+

Parameter format: jdbc:clickhouse://ClickHouseBalancer instance IP address:HTTP port number for ClickHouseBalancer instances/Database name

+
  • IP address of a ClickHouseBalancer instance:

    Log in to the MRS management console, click a cluster name, and choose Components > ClickHouse > Instance to obtain the service IP address of the ClickHouseBalancer instance.

    +
  • HTTP port of a ClickHouseBalancer instance:

    Log in to the MRS management console, click the target cluster name. On the displayed page, choose Components > ClickHouse. In the Service Configuration tab, choose ClickHouseBalancer from the All Roles dropdown list and search for lb_http_port to configure the parameter. The default value is 21425.

    +
  • The database name is the name of the database created for the ClickHouse cluster.
+

connector.table

+

Yes

+

Name of the ClickHouse table to be created

+

connector.driver

+

No

+

Driver required for connecting to the database

+
  • If this parameter is not specified during table creation, the driver automatically extracts the value from the ClickHouse URL.
  • If this parameter is specified during table creation, the value must be ru.yandex.clickhouse.ClickHouseDriver.
+

connector.username

+

No

+

Account for connecting the ClickHouse database

+

connector.password

+

No

+

Password for accessing the ClickHouse database

+

connector.write.flush.max-rows

+

No

+

Maximum number of rows to be updated when data is written. The default value is 5000.

+

connector.write.flush.interval

+

No

+

Interval for data update. The unit can be ms, milli, millisecond/s, sec, second/min or minute.

+

connector.write.max-retries

+

No

+

Maximum number of attempts to write data if failed. The default value is 3.

+
+
+
+

Example

Read data from a DIS table and insert the data into the test table of ClickHouse database flinktest.

+
  1. Create a DIS source table disSource.
     1
    + 2
    + 3
    + 4
    + 5
    + 6
    + 7
    + 8
    + 9
    +10
    +11
    +12
    +13
    +14
    +15
    +16
    +17
    +18
    +19
    +20
    +21
    create table disSource(
    +  attr0 string,
    +  attr1 TINYINT,
    +  attr2 smallint,
    +  attr3 int,
    +  attr4 bigint,
    +  attr5 float,
    +  attr6 double,
    +  attr7 String,
    +  attr8 string,
    +  attr9 timestamp(3),
    +  attr10 timestamp(3),
    +  attr11 date,
    +  attr12 decimal(38, 18),
    +  attr13 decimal(38, 18)
    +) with (
    +  "connector.type" = "dis",
    +  "connector.region" = "cn-xxxx-x",
    +  "connector.channel" = "xxxx",
    +  "format.type" = 'csv'
    +);
    +
    + +
    +
  2. Create ClickHouse result table clickhouse and insert the data from the disSource table to the result table.
    create table clickhouse(
    +  attr0 string,
    +  attr1 TINYINT,
    +  attr2 smallint,
    +  attr3 int,
    +  attr4 bigint,
    +  attr5 float,
    +  attr6 double,
    +  attr7 String,
    +  attr8 string,
    +  attr9 timestamp(3),
    +  attr10 timestamp(3),
    +  attr11 date,
    +  attr12 decimal(38, 18),
    +  attr13 decimal(38, 18),
    +  attr14 array < int >,
    +  attr15 array < bigint >,
    +  attr16 array < float >,
    +  attr17 array < double >,
    +  attr18 array < varchar >,
    +  attr19 array < String >
    +) with (
    +  'connector.type' = 'clickhouse',
    +  'connector.url' = 'jdbc:clickhouse://xx.xx.xx.xx:xx/flinktest',
    +  'connector.table' = 'test'
    +);
    +
    +insert into
    +  clickhouse
    +select
    +  attr0,
    +  attr1,
    +  attr2,
    +  attr3,
    +  attr4,
    +  attr5,
    +  attr6,
    +  attr7,
    +  attr8,
    +  attr9,
    +  attr10,
    +  attr11,
    +  attr12,
    +  attr13,
    +  array [attr3, attr3+1],
    +  array [cast(attr4 as bigint), cast(attr4+1 as bigint)],
    +  array [cast(attr12 as float), cast(attr12+1 as float)],
    +  array [cast(attr13 as double), cast(attr13+1 as double)],
    +  array ['TEST1', 'TEST2'],
    +  array [attr7, attr7]
    +from
    +  disSource;
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0345.html b/docs/dli/sqlreference/dli_08_0345.html new file mode 100644 index 00000000..555f18dd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0345.html @@ -0,0 +1,132 @@ + + +

Print Result Table

+

Function

The print connector exports your data output to the error file or the out file of TaskManager. It is mainly used for code debugging and output viewing.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
create table printSink (
+  attr_name attr_type (',' attr_name attr_type) * (',' PRIMARY KEY (attr_name,...) NOT ENFORCED)
+) with (
+  'connector' = 'print',
+  'print-identifier' = '',
+  'standard-error' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector

+

Yes

+

The value is fixed to print.

+

print-identifier

+

No

+

Message that identifies print and is prefixed to the output of the value.

+

standard-error

+

No

+

The value can be only true or false. The default value is false.

+
  • If the value is true, data is output to the error file of the TaskManager.
  • If the value is false, data is output to the out file of the TaskManager.
+
+
+
+

Example

Read data from Kafka and export the data to the out file of TaskManager. You can view the output in the exported file.

+
create table kafkaSource(
+  attr0 string,
+  attr1 boolean,
+  attr3 decimal(38, 18),
+  attr4 TINYINT,
+  attr5 smallint,
+  attr6 int,
+  attr7 bigint,
+  attr8 float,
+  attr9 double,
+  attr10 date,
+  attr11 time,
+  attr12 timestamp(3)
+) with (
+  'connector.type' = 'kafka',
+  'connector.version' = '0.11',
+  'connector.topic' = 'test_json',
+  'connector.properties.bootstrap.servers' = 'xx.xx.xx.xx:9092',
+  'connector.properties.group.id' = 'test_print',
+  'connector.startup-mode' = 'latest-offset',
+  'format.type' = 'csv'
+);
+
+create table printTable(
+  attr0 string,
+  attr1 boolean,
+  attr3 decimal(38,18),
+  attr4 TINYINT,
+  attr5 smallint,
+  attr6 int,
+  attr7 bigint,
+  attr8 float,
+  attr9 double,
+  attr10 date,
+  attr11 time,
+  attr12 timestamp(3),
+  attr13 array<string>,
+  attr14 row<attr15 float, attr16 timestamp(3)>,
+  attr17 map<int, bigint>
+) with (
+  "connector" = "print"
+);
+
+insert into
+  printTable
+select
+  attr0,
+  attr1,
+  attr3,
+  attr4,
+  attr5,
+  attr6,
+  attr7,
+  attr8,
+  attr9,
+  attr10,
+  attr11,
+  attr12,
+  array [cast(attr0 as string), cast(attr0 as string)],
+  row(
+    cast(attr8 as float),
+    cast(attr12 as timestamp(3))
+  ),
+  map [cast(attr6 as int), cast(attr7 as bigint)]
+from
+  kafkaSource;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0346.html b/docs/dli/sqlreference/dli_08_0346.html new file mode 100644 index 00000000..fb18acc1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0346.html @@ -0,0 +1,161 @@ + + +

File System Result Table

+

Function

You can create a file system result table to export data to a file system such as HDFS or OBS. After the data is generated, a non-DLI table can be created directly according to the generated directory. The table can be processed through DLI SQL, and the output data directory can be stored in partition tables. It is applicable to scenarios such as data dumping, big data analysis, data backup, and active, deep, or cold archiving.

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
create table filesystemSink (
+  attr_name attr_type (',' attr_name attr_type) *
+) with (
+  'connector.type' = 'filesystem',
+  'connector.file-path' = '',
+  'format.type' = ''
+);
+
+ +
+
+

Important Notes

+
+

Parameter

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

The value is fixed to filesystem.

+

connector.file-path

+

Yes

+

Data output directory. The format is schema://file.path.

+
NOTE:
Currently, Schema supports only OBS and HDFS.
  • If schema is set to obs, data is stored to OBS. Note that OBS directory must be a parallel file system and must not be an OBS bucket.

    For example, obs://bucketName/fileName indicates that data is exported to the fileName directory in the bucketName bucket.

    +
  • If schema is set to hdfs, data is exported to HDFS.

    Example: hdfs://node-master1sYAx:9820/user/car_infos, where node-master1sYAx:9820 is the name of the node where the NameNode locates.

    +
+
+
+

format.type

+

Yes

+

Output data encoding format. Only parquet and csv are supported.

+
  • When schema is set to obs, the encoding format of the output data can only be parquet.
  • When schema is set to hdfs, the output data can be encoded in Parquet or CSV format.
+

format.field-delimiter

+

No

+

Delimiter used to separate every two attributes.

+

This parameter needs to be configured if the CSV encoding format is adopted. It can be user-defined, for example, a comma (,).

+

connector.ak

+

No

+

Access key for accessing OBS

+

This parameter is mandatory when data is written to OBS.

+

connector.sk

+

No

+

Secret key for accessing OBS

+

This parameter is mandatory when data is written to OBS.

+

connector.partitioned-by

+

No

+

Partitioning field. Use commas (,) to separate multiple fields.

+
+
+
+

Example

Read data from Kafka and write the data in Parquet format to the fileName directory in the bucketName bucket.

+
create table kafkaSource(
+  attr0 string,
+  attr1 boolean,
+  attr2 TINYINT,
+  attr3 smallint,
+  attr4 int,
+  attr5 bigint,
+  attr6 float,
+  attr7 double,
+  attr8 timestamp(3),
+  attr9 time
+) with (
+  'connector.type' = 'kafka',
+  'connector.version' = '0.11',
+  'connector.topic' = 'test_json',
+  'connector.properties.bootstrap.servers' = 'xx.xx.xx.xx:9092',
+  'connector.properties.group.id' = 'test_filesystem',
+  'connector.startup-mode' = 'latest-offset',
+  'format.type' = 'csv'
+);
+
+create table filesystemSink(
+  attr0 string,
+  attr1 boolean,
+  attr2 TINYINT,
+  attr3 smallint,
+  attr4 int,
+  attr5 bigint,
+  attr6 float,
+  attr7 double,
+  attr8 map < string,  string >,
+  attr9 timestamp(3),
+  attr10 time
+) with (
+  "connector.type" = "filesystem",
+  "connector.file-path" = "obs://bucketName/fileName",
+  "format.type" = "parquet",
+  "connector.ak" = "xxxx",
+  "connector.sk" = "xxxxxx"
+);
+
+insert into
+  filesystemSink
+select
+  attr0,
+  attr1,
+  attr2,
+  attr3,
+  attr4,
+  attr5,
+  attr6,
+  attr7,
+  map [attr0,attr0],
+  attr8,
+  attr9
+from
+  kafkaSource;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0347.html b/docs/dli/sqlreference/dli_08_0347.html new file mode 100644 index 00000000..e50d0166 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0347.html @@ -0,0 +1,98 @@ + + +

User-defined Result Table

+

Function

Write your Java code to insert the processed data into a specified database supported by your cloud service.

+
+

Prerequisites

Implement the custom sink class :

+

The custom sink class is inherited from Flink open-source class RichSinkFunction. The data type is Tuple2<Boolean, Row>.

+
For example, define the MySink class by public class MySink extends RichSinkFunction< Tuple2<Boolean, Row>>{}, and implement the open, invoke, and close functions. A code example is as follows:
public class MySink extends RichSinkFunction<Tuple2<Boolean, Row>> {
+    // Initialize the object.
+    @Override
+    public void open(Configuration parameters) throws Exception {}
+
+    @Override
+    // Implement the data processing logic.
+    /* The in parameter contains two values. The first value is of the Boolean type. The value true indicates the insert or update operation, and the value false indicates the delete operation. If the interconnected sink does not support the delete operation, the deletion will not be executed. The second value indicates the data to be operated.*/
+    public void invoke(Tuple2<Boolean, Row> in, Context context) throws Exception {}
+
+    @Override
+    public void close() throws Exception {}
+}
+
+

Content of the dependent pom configuration file is as follows:

+
<dependency>
+    <groupId>org.apache.flink</groupId> 
+    <artifactId>flink-streaming-java_2.11</artifactId> 
+    <version>${flink.version}</version> 
+    <scope>provided</scope> 
+</dependency> 
+
+<dependency> 
+    <groupId>org.apache.flink</groupId> 
+    <artifactId>flink-core</artifactId> 
+    <version>${flink.version}</version> 
+    <scope>provided</scope> 
+</dependency>
+

Pack the implemented class and compile it in a JAR file, and upload it using the UDF Jar parameter on the editing page of your Flink OpenSource SQL job.

+
+

Syntax

create table userDefinedSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'user-defined',
+  'connector.class-name' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. The value can only be a user-defined sink.

+

connector.class-name

+

Yes

+

Fully qualified class name of the sink class. For details about the implementation of the sink class, see Prerequisites.

+

connector.class-parameter

+

No

+

Parameter of the constructor of the sink class. Only one parameter of the string type is supported.

+
+
+
+

Precautions

connector.class-name must be a fully qualified class name.

+
+

Example

create table userDefinedSink (
+  attr1 int,
+  attr2 int
+)
+with (
+  'connector.type' = 'user-defined',
+  'connector.class-name' = 'xx.xx.MySink'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0348.html b/docs/dli/sqlreference/dli_08_0348.html new file mode 100644 index 00000000..f813c8b9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0348.html @@ -0,0 +1,125 @@ + + +

OpenTSDB Result Table

+

Function

OpenTSDB is a distributed, scalable time series database based on HBase. OpenTSDB is designed to collect monitoring information of a large-scale cluster and query data in seconds, facilitating querying and storing massive amounts of monitoring data in common databases. OpenTSDB can be used for system monitoring and measurement as well as collection and monitoring of IoT data, financial data, and scientific experimental results.

+

DLI uses enhanced datasource connections to write the output of Flink jobs to OpenTSDB.

+
+

Prerequisites

+
+

Syntax

create table tsdbSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'opentsdb',
+  'connector.region' = '',
+  'connector.tsdb-metrics' = '',
+  'connector.tsdb-timestamps' = '',
+  'connector.tsdb-values' = '',
+  'connector.tsdb-tags' = '',
+  'connector.tsdb-link-address' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Connector type. Set this parameter to opentsdb.

+

connector.region

+

Yes

+

Region where OpenTSDB locates

+

connector.tsdb-metrics

+

Yes

+

Metrics of data points, which can be specified through parameter configurations.

+

The number of metrics must be 1 or the same as the number of connector.tsdb-values.

+

Use semicolons (;) to separate multiple metrics.

+

connector.tsdb-timestamps

+

Yes

+

Timestamps of data points. Only dynamic columns are supported.

+

The data type can be int, bigint, or string. Only numbers are supported.

+

The number of metrics must be 1 or the same as the number of connector.tsdb-values.

+

Use semicolons (;) to separate multiple timestamps.

+

connector.tsdb-values

+

Yes

+

Values of data points. You can specify dynamic columns or constant values.

+

Separate multiple values with semicolons (;).

+

connector.tsdb-tags

+

Yes

+

Tags of data points. Each tag contains at least one tag value and a maximum of eight tag values. Separate multiple tags by commas (,). You can specify the tags by parameters.

+

The number of metrics must be 1 or the same as the number of connector.tsdb-values.

+

Separate multiple tags with semicolons (;).

+

connector.batch-insert-data-num

+

No

+

Number of data records to be written in batches at a time. The value must be a positive integer. The default value is 8.

+

connector.tsdb-link-address

+

Yes

+

OpenTSDB address for connecting to the cluster where the data to be inserted belongs.

+
+
+
+

Precautions

+ +
+

Example

create table sink1(
+  attr1 bigint,
+  attr2 int,
+  attr3 int
+) with (
+  'connector.type' = 'opentsdb',
+  'connector.region' = '',
+  'connector.tsdb-metrics' = '',
+  'connector.tsdb-timestamps' = '${attr1}',
+  'connector.tsdb-values' = '${attr2};10',
+  'connector.tsdb-tags' = 'key1:value1,key2:value2;key3:value3',
+  'connector.tsdb-link-address' = ''
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0349.html b/docs/dli/sqlreference/dli_08_0349.html new file mode 100644 index 00000000..1a567ee5 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0349.html @@ -0,0 +1,27 @@ + + + +

Backing Up and Restoring Data of Multiple Versions

+ +

+
+ +
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0350.html b/docs/dli/sqlreference/dli_08_0350.html new file mode 100644 index 00000000..8a47933b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0350.html @@ -0,0 +1,58 @@ + + +

Setting the Retention Period for Multiversion Backup Data

+

Function

After multiversion is enabled, backup data is retained for seven days by default. You can change the retention period by setting system parameter dli.multi.version.retention.days. Multiversion data out of the retention period will be automatically deleted when the insert overwrite or truncate statement is executed. You can also set table attribute dli.multi.version.retention.days to adjust the retention period when adding a column or modifying a partitioned table. For details about the syntax for enabling or disabling the multiversion function, see Enabling or Disabling Multiversion Backup.

+

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Syntax

1
+2
ALTER TABLE [db_name.]table_name 
+SET TBLPROPERTIES ("dli.multi.version.retention.days"="days");
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+

days

+

Date when the multiversion backup data is reserved. The default value is 7 days. The value ranges from 1 to 7 days.

+
+
+
+

Precautions

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Example

Set the retention period of multiversion backup data to 5 days.
1
+2
ALTER TABLE test_table 
+SET TBLPROPERTIES ("dli.multi.version.retention.days"="5");
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0351.html b/docs/dli/sqlreference/dli_08_0351.html new file mode 100644 index 00000000..91ba2088 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0351.html @@ -0,0 +1,63 @@ + + +

Viewing Multiversion Backup Data

+

Function

After the multiversion function is enabled, you can run the SHOW HISTORY command to view the backup data of a table. For details about the syntax for enabling or disabling the multiversion function, see Enabling or Disabling Multiversion Backup.

+

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Syntax

+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+

column

+

Partition column name

+

value

+

Value corresponding to the partition column name

+
+
+
+

Precautions

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0352.html b/docs/dli/sqlreference/dli_08_0352.html new file mode 100644 index 00000000..ba3c2183 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0352.html @@ -0,0 +1,68 @@ + + +

Restoring Multiversion Backup Data

+

Function

After the multiversion function is enabled, you can run the RESTORE TABLE statement to restore a table or partition of a specified version. For details about the syntax for enabling or disabling the multiversion function, see Enabling or Disabling Multiversion Backup.

+

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Syntax

+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+

column

+

Partition column name

+

value

+

Value corresponding to the partition column name

+

version_id

+

Target version of the backup data to be restored You can run the SHOW HISTORY command to obtain the version number. For details, see Viewing Multiversion Backup Data.

+
+
+
+

Precautions

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Example

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0353.html b/docs/dli/sqlreference/dli_08_0353.html new file mode 100644 index 00000000..af3372ff --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0353.html @@ -0,0 +1,59 @@ + + +

Configuring the Trash Bin for Expired Multiversion Data

+

Function

After the multiversion function is enabled, expired backup data will be directly deleted by the system when the insert overwrite or truncate statement is executed. You can configure the trash bin of the OBS parallel file system to accelerate the deletion of expired backup data. To enable the trash bin, add dli.multi.version.trash.dir to the table properties. For details about the syntax for enabling or disabling the multiversion function, see Enabling or Disabling Multiversion Backup.

+

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Syntax

1
+2
ALTER TABLE [db_name.]table_name 
+SET TBLPROPERTIES ("dli.multi.version.trash.dir"="OBS bucket for expired multiversion backup data");
+
+ +
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+

OBS bucket for expired multiversion backup data

+

A directory in the bucket where the current OBS table locates. You can change the directory path as needed. For example, if the current OBS table directory is obs://bucketName/filePath and a Trash directory has been created in the OBS table directory, you can set the trash bin directory to obs://bucketName/filePath/Trash.

+
+
+
+

Precautions

+
+

Example

Configure the trash bin to accelerate the deletion of expired backup data. The data is dumped to the /.Trash directory in OBS.
1
+2
ALTER TABLE test_table 
+SET TBLPROPERTIES ("dli.multi.version.trash.dir"="/.Trash");
+
+ +
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0354.html b/docs/dli/sqlreference/dli_08_0354.html new file mode 100644 index 00000000..8010d210 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0354.html @@ -0,0 +1,66 @@ + + +

Enabling or Disabling Multiversion Backup

+

Function

DLI controls multiple versions of backup data for restoration. After the multiversion function is enabled, the system automatically backs up table data when you delete or modify the data using insert overwrite or truncate, and retains the data for a certain period. You can quickly restore data within the retention period. For details about the syntax related to the multiversion function, see Backing Up and Restoring Data of Multiple Versions.

+

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Syntax

+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_).

+

table_name

+

Table name

+
+
+
+

Precautions

Currently, the multiversion function supports only OBS tables created using the Hive syntax. For details about the syntax for creating a table, see Creating an OBS Table Using the Hive Syntax.

+
+

Example

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0355.html b/docs/dli/sqlreference/dli_08_0355.html new file mode 100644 index 00000000..24caa70f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0355.html @@ -0,0 +1,49 @@ + + +

Deleting Multiversion Backup Data

+

Function

The retention period of multiversion backup data takes effect each time the insert overwrite or truncate statement is executed. If neither statement is executed for the table, multiversion backup data out of the retention period will not be automatically deleted. You can run the SQL commands described in this section to manually delete multiversion backup data.

+
+

Syntax

Delete multiversion backup data out of the retention period.
clear history for table [db_name.]table_name older_than 'timestamp';
+
+
+

Keyword

+
+

Parameter

+
+ + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name, which consists of letters, digits, and underscores (_). The value cannot contain only digits or start with a digit or underscore (_).

+

table_name

+

Table name

+

Timestamp

+

Multiversion backup data generated before the timestamp will be deleted. Timestamp format: yyyy-MM-dd HH:mm:ss

+
+
+
+

Precautions

+
+

Example

Delete the multiversion backup data generated before 2021-09-25 23:59:59 in the dliTable table. When the multiversion backup data is generated, a timestamp is generated.
clear history for table dliTable older_than '2021-09-25 23:59:59';
+
+

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0356.html b/docs/dli/sqlreference/dli_08_0356.html new file mode 100644 index 00000000..cb2d6a9d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0356.html @@ -0,0 +1,141 @@ + + +

string_split

+

The string_split function splits a target string into substrings based on the specified separator and returns a substring list.

+

Description

string_split(target, separator)
+ +
+ + + + + + + + + + + + + +
Table 1 string_split parameters

Parameter

+

Type

+

Description

+

target

+

STRING

+

Target string to be processed

+
NOTE:
  • If target is NULL, an empty line is returned.
  • If target contains two or more consecutive separators, an empty substring is returned.
  • If target does not contain a specified separator, the original string passed to target is returned.
+
+

separator

+

VARCHAR

+

Delimiter. Currently, only single-character delimiters are supported.

+
+
+
+

Example

  1. Prepare test input data. +
    + + + + + + + + + + + + + +
    Table 2 Source table disSource

    target (STRING)

    +

    separator (VARCHAR)

    +

    test-flink

    +

    -

    +

    flink

    +

    -

    +

    one-two-ww-three

    +

    -

    +
    +
    +
  2. Write test SQL statements.
    create table disSource(
    +  target STRING,
    +  separator  VARCHAR
    +) with (
    +  "connector.type" = "dis",
    +  "connector.region" = "xxx",
    +  "connector.channel" = "ygj-dis-in",
    +  "format.type" = 'csv'
    +);
    +
    +create table disSink(
    +  target STRING,
    +  item STRING
    +) with (
    +  'connector.type' = 'dis',
    +  'connector.region' = 'xxx',
    +  'connector.channel' = 'ygj-dis-out',
    +  'format.type' = 'csv'
    +);
    +
    +insert into
    +  disSink
    +select
    +  target,
    +  item
    +from
    +  disSource,
    +lateral table(string_split(target, separator)) as T(item);
    +
  3. Check test results. +
    + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 3 disSink result table

    target (STRING)

    +

    item (STRING)

    +

    test-flink

    +

    test

    +

    test-flink

    +

    flink

    +

    flink

    +

    flink

    +

    one-two-ww-three

    +

    one

    +

    one-two-ww-three

    +

    two

    +

    one-two-ww-three

    +

    ww

    +

    one-two-ww-three

    +

    three

    +
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0357.html b/docs/dli/sqlreference/dli_08_0357.html new file mode 100644 index 00000000..97390b3c --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0357.html @@ -0,0 +1,34 @@ + + +

split_cursor

+

The split_cursor function can convert one row of records into multiple rows or convert one column of records into multiple columns. Table-valued functions can only be used in JOIN LATERAL TABLE.

+ +
+ + + + + + + + + +
Table 1 split_cursor function

Function

+

Return Type

+

Description

+

split_cursor(value, delimiter)

+

cursor

+

Separates the "value" string into multiple rows of strings by using the delimiter.

+
+
+

Example

Input one record ("student1", "student2, student3") and output two records ("student1", "student2") and ("student1", "student3").

+
create table s1(attr1 string, attr2 string) with (......);
+insert into s2 select  attr1, b1 from s1 left join lateral table(split_cursor(attr2, ',')) as T(b1) on true;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0358.html b/docs/dli/sqlreference/dli_08_0358.html new file mode 100644 index 00000000..5b9b82fe --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0358.html @@ -0,0 +1,83 @@ + + +

userDefined Source Table

+

Function

You can call APIs to obtain data from the cloud ecosystem or an open source ecosystem and use the obtained data as input of Flink jobs.

+
+

Prerequisites

The customized source class needs to inherit the RichParallelSourceFunction class and specify the data type as Row.

+

For example, run public class MySource extends RichParallelSourceFunction<Row>{} to declare custom class MySource. You need to implement the open, run, close, and cancel functions. Encapsulate the class into a JAR file and upload the file through the UDF JAR on the SQL editing page.

+
Content of the dependent pom configuration file is as follows:
<dependency>
+    <groupId>org.apache.flink</groupId> 
+    <artifactId>flink-streaming-java_2.11</artifactId> 
+    <version>${flink.version}</version> 
+    <scope>provided</scope> 
+</dependency> 
+
+<dependency> 
+    <groupId>org.apache.flink</groupId> 
+    <artifactId>flink-core</artifactId> 
+    <version>${flink.version}</version> 
+    <scope>provided</scope> 
+</dependency>
+
+
+

Syntax

create table userDefinedSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = 'user-defined',
+  'connector.class-name' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Description

+

connector.type

+

Yes

+

Source type. The value can only be user-defined, indicating a custom source.

+

connector.class-name

+

Yes

+

Fully qualified class name of the source class

+

connector.class-parameter

+

No

+

Parameter of the constructor of the source class. Only one parameter of the string type is supported.

+
+
+
+

Precautions

connector.class-name must be a fully qualified class name.

+
+

Example

create table userDefinedSource (
+  attr1 int,
+  attr2 int
+)
+with (
+  'connector.type' = 'user-defined',
+  'connector.class-name' = 'xx.xx.MySource'
+);
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0359.html b/docs/dli/sqlreference/dli_08_0359.html new file mode 100644 index 00000000..c6e88d1b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0359.html @@ -0,0 +1,50 @@ + + +

Updating Table Metadata with REFRESH TABLE

+

Function

Spark caches Parquet metadata to improve performance. If you update a Parquet table, the cached metadata is not updated. Spark SQL cannot find the newly inserted data and an error similar with the following is reported:
DLI.0002: FileNotFoundException: getFileStatus on  error message
+
+

You can use REFRESH TABLE to solve this problem. REFRESH TABLE reorganizes files of a partition and reuses the original table metadata information to detect the increase or decrease of table fields. This statement is mainly used when the metadata in a table is not modified but the table data is modified.

+
+

Syntax

1
REFRESH TABLE [db_name.]table_name;
+
+ +
+
+

Keyword

None

+
+

Parameter

+
+ + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Description

+

db_name

+

Database name that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_).

+

table_name

+

Table name of a database that contains letters, digits, and underscores (_). It cannot contain only digits or start with an underscore (_). The matching rule is ^(?!_)(?![0-9]+$)[A-Za-z0-9_$]*$. If special characters are required, use single quotation marks ('') to enclose them.

+
+
+
+

Precautions

None

+
+

Example

Update metadata of the test table.

+
1
REFRESH TABLE test;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0370.html b/docs/dli/sqlreference/dli_08_0370.html new file mode 100644 index 00000000..ee72b1ad --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0370.html @@ -0,0 +1,20 @@ + + +

Flink Opensource SQL 1.12 Syntax Reference

+

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0371.html b/docs/dli/sqlreference/dli_08_0371.html new file mode 100644 index 00000000..d3f6a1cb --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0371.html @@ -0,0 +1,17 @@ + + +

Constraints and Definitions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0372.html b/docs/dli/sqlreference/dli_08_0372.html new file mode 100644 index 00000000..14418a45 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0372.html @@ -0,0 +1,11 @@ + + +

Supported Data Types

+

STRING, BOOLEAN, BYTES, DECIMAL, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE, DATE, TIME, TIMESTAMP, TIMESTAMP WITH LOCAL TIME ZONE, INTERVAL, ARRAY, MULTISET, MAP, ROW

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0373.html b/docs/dli/sqlreference/dli_08_0373.html new file mode 100644 index 00000000..24430126 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0373.html @@ -0,0 +1,17 @@ + + +

Syntax

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0374.html b/docs/dli/sqlreference/dli_08_0374.html new file mode 100644 index 00000000..a70ea328 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0374.html @@ -0,0 +1,19 @@ + + +

Data Definition Language (DDL)

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0375.html b/docs/dli/sqlreference/dli_08_0375.html new file mode 100644 index 00000000..caa06afd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0375.html @@ -0,0 +1,75 @@ + + +

CREATE TABLE

+

Syntax

CREATE TABLE table_name
+  (
+    { <column_definition> | <computed_column_definition> }[ , ...n]
+    [ <watermark_definition> ]
+    [ <table_constraint> ][ , ...n]
+  )
+  [COMMENT table_comment]
+  [PARTITIONED BY (partition_column_name1, partition_column_name2, ...)]
+  WITH (key1=val1, key2=val2, ...)
+
+<column_definition>:
+  column_name column_type [ <column_constraint> ] [COMMENT column_comment]
+
+<column_constraint>:
+  [CONSTRAINT constraint_name] PRIMARY KEY NOT ENFORCED
+
+<table_constraint>:
+  [CONSTRAINT constraint_name] PRIMARY KEY (column_name, ...) NOT ENFORCED
+
+<computed_column_definition>:
+  column_name AS computed_column_expression [COMMENT column_comment]
+
+<watermark_definition>:
+  WATERMARK FOR rowtime_column_name AS watermark_strategy_expression
+
+<source_table>:
+  [catalog_name.][db_name.]table_name
+
+

Function

Create a table with a specified name.

+
+

Description

COMPUTED COLUMN

+

A computed column is a virtual column generated using column_name AS computed_column_expression. A computed column evaluates an expression that can reference other columns declared in the same table. The column itself is not physically stored within the table. A computed column could be defined using cost AS price * quantity. This expression can contain any combination of physical columns, constants, functions, or variables, but cannot contain any subquery.

+

In Flink, a computed column is used to define the time attribute in CREATE TABLE statements. A processing time attribute can be defined easily via proc AS PROCTIME() using the system's PROCTIME() function. The event time column may be obtained from an existing field. In this case, you can use the computed column to obtain event time. For example, if the original field is not of the TIMESTAMP(3) type or is nested in a JSON string, you can use computed columns.

+

Note:

+ +
+

WATERMARK

+

The WATERMARK clause defines the event time attribute of a table and takes the form WATERMARK FOR rowtime_column_name AS watermark_strategy_expression.

+

rowtime_column_name defines an existing column that is marked as the event time attribute of the table. The column must be of the TIMESTAMP(3) type and must be the top-level column in the schema. It can also be a computed column.

+

watermark_strategy_expression defines the watermark generation strategy. It allows arbitrary non-query expressions, including computed columns, to calculate the watermark. The expression return type must be TIMESTAMP(3), which represents the timestamp since the Epoch. The returned watermark will be emitted only if it is non-null and its value is greater than the previously emitted local watermark (to preserve the contract of ascending watermarks). The watermark generation expression is evaluated by the framework for every record. The framework will periodically emit the largest generated watermark. If the current watermark is still identical to the previous one, or is null, or the value of the returned watermark is smaller than that of the last emitted one, then no new watermark will be emitted. A watermark is emitted in an interval defined by pipeline.auto-watermark-interval. If the watermark interval is 0 ms, a watermark will be emitted per record if it is not null and greater than the last emitted one.

+

When using event time semantics, tables must contain an event time attribute and watermark strategy.

+

Flink provides several commonly used watermark strategies.

+ +

PRIMARY KEY

+

The primary key constraint is a hint for Flink to leverage for optimizations. It tells that a column or a set of columns of a table or a view are unique and they do not contain null. Neither of columns in a primary can be nullable. The primary key therefore uniquely identifies a row in a table.

+

The primary key constraint can be either declared along with a column definition (a column constraint) or as a single line (a table constraint). For both cases, it should only be declared as a singleton. If you define multiple primary key constraints at the same time, an exception would be thrown.

+

Validity Check

+

SQL standard specifies that a constraint can either be ENFORCED or NOT ENFORCED. This controls if the constraint checks are performed on the incoming/outgoing data. Flink does not own the data and therefore the only mode we want to support is the NOT ENFORCED mode. It is up to the user to ensure that the query enforces key integrity.

+

Flink will assume correctness of the primary key by assuming that the columns nullability is aligned with the columns in the primary key. Connectors should ensure those are aligned.

+

Note: In a CREATE TABLE statement, creating a primary key constraint will alter the columns nullability, which means, a column with a primary key constraint is not nullable.

+

PARTITIONED BY

+

Partition the created table by the specified columns. A directory is created for each partition if this table is used as a file system sink.

+

WITH OPTIONS

+

Table properties used to create a table source/sink. The properties are usually used to find and create the underlying connector.

+

The key and value of expression key1=val1 should both be string literal.

+

Note: The table registered with the CREATE TABLE statement can be used as both the table source and table sink. We cannot decide if it is used as a source or sink until it is referenced in the DMLs.

+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0376.html b/docs/dli/sqlreference/dli_08_0376.html new file mode 100644 index 00000000..d0a8d161 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0376.html @@ -0,0 +1,22 @@ + + +

CREATE VIEW

+

Syntax

CREATE VIEW [IF NOT EXISTS] view_name
+  [{columnName [, columnName ]* }] [COMMENT view_comment]
+  AS query_expression
+
+

Function

Create a view with multiple layers nested in it to simplify the development process.

+
+

Description

IF NOT EXISTS

+

If the view already exists, nothing happens.

+
+

Example

Create a view named viewName.

+
create view viewName as select * from dataSource
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0377.html b/docs/dli/sqlreference/dli_08_0377.html new file mode 100644 index 00000000..24ae788f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0377.html @@ -0,0 +1,25 @@ + + +

CREATE FUNCTION

+

Syntax

CREATE FUNCTION
+  [IF NOT EXISTS] function_name
+  AS identifier [LANGUAGE JAVA|SCALA]
+
+

Function

Create a user-defined function.

+

For details about how to create a user-defined function, see User-Defined Functions (UDFs).

+
+

Description

IF NOT EXISTS

+

If the function already exists, nothing happens.

+

LANGUAGE JAVA|SCALA

+

The language tag is used to instruct Flink runtime how to execute the function. Currently, only JAVA and SCALA language tags are supported, the default language for a function is JAVA.

+
+

Example

Create a function named STRINGBACK.

+
create function STRINGBACK as 'com.dli.StringBack'
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0378.html b/docs/dli/sqlreference/dli_08_0378.html new file mode 100644 index 00000000..23d2b454 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0378.html @@ -0,0 +1,138 @@ + + +

Data Manipulation Language (DML)

+

DML Statements

Syntax

+
+
INSERT INTO table_name [PARTITION part_spec] query
+
+part_spec:  (part_col_name1=val1 [, part_col_name2=val2, ...])
+
+query:
+  values
+  | {
+      select
+      | selectWithoutFrom
+      | query UNION [ ALL ] query
+      | query EXCEPT query
+      | query INTERSECT query
+    }
+    [ ORDER BY orderItem [, orderItem ]* ]
+    [ LIMIT { count | ALL } ]
+    [ OFFSET start { ROW | ROWS } ]
+    [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY]
+
+orderItem:
+  expression [ ASC | DESC ]
+
+select:
+  SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+  [ WINDOW windowName AS windowSpec [, windowName AS windowSpec ]* ]
+
+selectWithoutFrom:
+  SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+
+projectItem:
+  expression [ [ AS ] columnAlias ]
+  | tableAlias . *
+
+tableExpression:
+  tableReference [, tableReference ]*
+  | tableExpression [ NATURAL ] [ LEFT | RIGHT | FULL ] JOIN tableExpression [ joinCondition ]
+
+joinCondition:
+  ON booleanExpression
+  | USING '(' column [, column ]* ')'
+
+tableReference:
+  tablePrimary
+  [ matchRecognize ]
+  [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ]
+
+tablePrimary:
+  [ TABLE ] [ [ catalogName . ] schemaName . ] tableName
+  | LATERAL TABLE '(' functionName '(' expression [, expression ]* ')' ')'
+  | UNNEST '(' expression ')'
+
+values:
+  VALUES expression [, expression ]*
+
+groupItem:
+  expression
+  | '(' ')'
+  | '(' expression [, expression ]* ')'
+  | CUBE '(' expression [, expression ]* ')'
+  | ROLLUP '(' expression [, expression ]* ')'
+  | GROUPING SETS '(' groupItem [, groupItem ]* ')'
+
+windowRef:
+    windowName
+  | windowSpec
+
+windowSpec:
+    [ windowName ]
+    '('
+    [ ORDER BY orderItem [, orderItem ]* ]
+    [ PARTITION BY expression [, expression ]* ]
+    [
+        RANGE numericOrIntervalExpression {PRECEDING}
+      | ROWS numericExpression {PRECEDING}
+    ]
+    ')'
+
+matchRecognize:
+      MATCH_RECOGNIZE '('
+      [ PARTITION BY expression [, expression ]* ]
+      [ ORDER BY orderItem [, orderItem ]* ]
+      [ MEASURES measureColumn [, measureColumn ]* ]
+      [ ONE ROW PER MATCH ]
+      [ AFTER MATCH
+            ( SKIP TO NEXT ROW
+            | SKIP PAST LAST ROW
+            | SKIP TO FIRST variable
+            | SKIP TO LAST variable
+            | SKIP TO variable )
+      ]
+      PATTERN '(' pattern ')'
+      [ WITHIN intervalLiteral ]
+      DEFINE variable AS condition [, variable AS condition ]*
+      ')'
+
+measureColumn:
+      expression AS alias
+
+pattern:
+      patternTerm [ '|' patternTerm ]*
+
+patternTerm:
+      patternFactor [ patternFactor ]*
+
+patternFactor:
+      variable [ patternQuantifier ]
+
+patternQuantifier:
+      '*'
+  |   '*?'
+  |   '+'
+  |   '+?'
+  |   '?'
+  |   '??'
+  |   '{' { [ minRepeat ], [ maxRepeat ] } '}' ['?']
+  |   '{' repeat '}'
+

Precautions

+

Flink SQL uses a lexical policy for identifier (table, attribute, function names) similar to Java:

+ +

String literals must be enclosed in single quotes (for example, SELECT'Hello World'). Duplicate a single quote for escaping (for example, SELECT'It''s me.'). Unicode characters are supported in string literals. If explicit Unicode points are required, use the following syntax:

+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0379.html b/docs/dli/sqlreference/dli_08_0379.html new file mode 100644 index 00000000..1b11eed9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0379.html @@ -0,0 +1,124 @@ + + +

Overview

+

This section describes the Flink open source SQL 1.12 syntax supported by DLI. For details about the parameters and examples, see the syntax description.

+

Creating Tables

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0380.html b/docs/dli/sqlreference/dli_08_0380.html new file mode 100644 index 00000000..216823b9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0380.html @@ -0,0 +1,21 @@ + + +

DDL Syntax

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0381.html b/docs/dli/sqlreference/dli_08_0381.html new file mode 100644 index 00000000..bde8db08 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0381.html @@ -0,0 +1,31 @@ + + +

Creating Source Tables

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0382.html b/docs/dli/sqlreference/dli_08_0382.html new file mode 100644 index 00000000..dce74a79 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0382.html @@ -0,0 +1,163 @@ + + +

DataGen Source Table

+

Function

DataGen is used to generate random data for debugging and testing.

+
+

Prerequisites

None

+
+

Precautions

+
+

Syntax

create table dataGenSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' WATERMARK FOR rowtime_column_name AS watermark-strategy_expression)
+)
+with (
+  'connector' = 'datagen'
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to datagen.

+

rows-per-second

+

No

+

10000

+

Long

+

Number of rows generated per second, which is used to control the emit rate.

+

fields.#.kind

+

No

+

random

+

String

+

Generator of the # field. The # field must be an actual field in the DataGen table. Replace # with the corresponding field name. The meanings of the # field for other parameters are the same.

+

The value can be sequence or random.

+
  • random is the default generator. You can use the fields.#.max and fields.#.min parameters to specify the maximum and minimum values that are randomly generated.

    If the specified field type is char, varchar, or string, you can also use the fields.#.length field to specify the length. A random generator is an unbounded generator.

    +
  • Sequence generator. You can use fields.#.start and fields.#.end to specify the start and end values of a sequence. A sequence generator is a bounded generator. When the sequence number reaches the end value, the reading ends.
+

fields.#.min

+

No

+

Minimum value of the field type specified by #

+

Field type specified by #

+

This parameter is valid only when fields.#.kind is set to random.

+

Minimum value of the random generator. It applies only to numeric field types specified by #.

+

fields.#.max

+

No

+

Maximum value of the field type specified by #

+

Field type specified by #

+

This parameter is valid only when fields.#.kind is set to random.

+

Maximum value of the random generator. It applies only to numeric field types specified by #.

+

fields.#.length

+

No

+

100

+

Integer

+

This parameter is valid only when fields.#.kind is set to random.

+

Length of the characters generated by the random generator. It applies only to char, varchar, and string types specified by #.

+

fields.#.start

+

No

+

None

+

Field type specified by #

+

This parameter is valid only when fields.#.kind is set to sequence.

+

Start value of a sequence generator.

+

fields.#.end

+

No

+

None

+

Field type specified by #

+

This parameter is valid only when fields.#.kind is set to sequence.

+

End value of a sequence generator.

+
+
+
+

Example

Create a Flink OpenSource SQL job. Run the following script to generate random data through the DataGen table and output the data to the Print result table.

+

When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs.

+
create table dataGenSOurce(
+  user_id string,
+  amount int
+) with (
+  'connector' = 'datagen',
+  'rows-per-second' = '1', --Generates a piece of data per second.
+  'fields.user_id.kind' = 'random', --Specifies a random generator for the user_id field.
+  'fields.user_id.length' = '3' --Limits the length of user_id to 3.
+);
+
+create table printSink(
+  user_id string,
+  amount int
+) with (
+  'connector' = 'print'
+);
+
+insert into printSink select * from dataGenSOurce;
+

After the job is submitted, the job status changes to Running. You can perform the following operations to view the output result:

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0383.html b/docs/dli/sqlreference/dli_08_0383.html new file mode 100644 index 00000000..81824fb6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0383.html @@ -0,0 +1,286 @@ + + +

GaussDB(DWS) Source Table

+

Function

DLI reads data of Flink jobs from GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types and deliver space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-Commerce.

+

GaussDB(DWS) is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data.

+
+

Prerequisites

+
+

Precautions

When creating a Flink OpenSource SQL job, you need to set Flink Version to 1.12 on the Running Parameters tab of the job editing page, select Save Job Log, and set the OBS bucket for saving job logs.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+)
+with (
+  'connector' = 'gaussdb',
+  'url' = '',
+  'table-name' = '',
+  'username' = '',
+  'password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to gaussdb.

+

url

+

Yes

+

None

+

String

+

JDBC connection address. Set the IP address in this parameter to the internal IP address of GaussDB(DWS).

+

If you use the gsjdbc4 driver, set the value in jdbc:postgresql://${ip}:${port}/${dbName} format.

+

If you use the gsjdbc200 driver, set the value in jdbc:gaussdb://${ip}:${port}/${dbName} format.

+

table-name

+

Yes

+

None

+

String

+

Name of the GaussDB(DWS) table to be operated. If the GaussDB(DWS) table is in a schema, refer to the description of GaussDB(DWS) table in a schema.

+

driver

+

No

+

org.postgresql.Driver

+

String

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

username

+

No

+

None

+

String

+

Username for GaussDB(DWS) database authentication. This parameter must be configured in pair with password.

+

password

+

No

+

None

+

String

+

Password for GaussDB(DWS) database authentication. This parameter must be configured in pair with username.

+

scan.partition.column

+

No

+

None

+

String

+

Name of the column used to partition the input.

+

Note: This parameter must be used together with scan.partition.lower-bound, scan.partition.upper-bound, and scan.partition.num.

+

scan.partition.lower-bound

+

No

+

None

+

Integer

+

Lower bound of values to be fetched for the first partition.

+

This parameter must be used together with scan.partition.column, scan.partition.upper-bound, and scan.partition.num.

+

scan.partition.upper-bound

+

No

+

None

+

Integer

+

Upper bound of values to be fetched for the last partition.

+

This parameter must be used together with scan.partition.column, scan.partition.lower-bound, and scan.partition.num.

+

scan.partition.num

+

No

+

None

+

Integer

+

Number of partitions to be created.

+

This parameter must be used together with scan.partition.column, scan.partition.upper-bound, and scan.partition.upper-bound.

+

scan.fetch-size

+

No

+

0

+

Integer

+

Number of rows fetched from the database each time. The default value is 0, indicating that the number of rows is not limited.

+
+
+
+

Example

In this example, data is read from the GaussDB(DWS) data source and written to the Print result table. The procedure is as follows:

+
  1. Create a table named dws_order in GaussDB(DWS).
    create table public.dws_order(
    +  order_id VARCHAR,
    +  order_channel VARCHAR,
    +  order_time VARCHAR,
    +  pay_amount FLOAT8,
    +  real_pay FLOAT8,
    +  pay_time VARCHAR,
    +  user_id VARCHAR,
    +  user_name VARCHAR,
    +  area_id VARCHAR);
    +
    Insert data into the dws_order table.
    insert into public.dws_order
    +  (order_id,
    +  order_channel,
    +  order_time,
    +  pay_amount,
    +  real_pay,
    +  pay_time,
    +  user_id,
    +  user_name,
    +  area_id) values
    +  ('202103241000000001', 'webShop', '2021-03-24 10:00:00', '100.00', '100.00', '2021-03-24 10:02:03', '0001', 'Alice', '330106'),
    +  ('202103251202020001', 'miniAppShop', '2021-03-25 12:02:02', '60.00', '60.00', '2021-03-25 12:03:00', '0002', 'Bob', '330110');
    +
    +
  2. Create an enhanced datasource connection in the VPC and subnet where GaussDB(DWS) locates, and bind the connection to the required Flink elastic resource pool.
  3. Set GaussDB(DWS) security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the GaussDB(DWS) address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job. The job script uses the GaussDB(DWS) data source and the Print result table.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE dwsSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'gaussdb',
    +  'url' = 'jdbc:postgresql://DWSIP:DWSPort/DWSdbName',
    +  'table-name' = 'dws_order',
    +  'driver' = 'org.postgresql.Driver',
    +  'username' = 'DWSUserName',
    +  'password' = 'DWSPassword'
    +);
    +
    +CREATE TABLE printSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from dwsSource;
    +
    +
  5. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106)
    ++I(202103251202020001,miniAppShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    +
+
+

FAQ

+
+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0384.html b/docs/dli/sqlreference/dli_08_0384.html new file mode 100644 index 00000000..b5f6ead9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0384.html @@ -0,0 +1,267 @@ + + +

HBase Source Table

+

Function

Create a source stream to obtain data from HBase as input for jobs. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. DLI can read data from HBase for filtering, analysis, and data dumping.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table hbaseSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+  ','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'hbase-2.2',
+  'table-name' = '',
+  'zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to hbase-2.2.

+

table-name

+

Yes

+

None

+

String

+

Name of the HBase table to connect.

+

zookeeper.quorum

+

Yes

+

None

+

String

+

HBase ZooKeeper quorum, in the format of "ZookeeperAddress:ZookeeperPort".

+

The following uses an MRS HBase cluster as an example to describe how to obtain the IP address and port number of ZooKeeper used by this parameter:

+
  • On MRS Manager, choose Cluster and click the name of the desired cluster. Choose Services > ZooKeeper > Instance, and obtain the IP address of the ZooKeeper instance.
  • On MRS Manager, choose Cluster and click the name of the desired cluster. Choose Services > ZooKeeper > Configurations > All Configurations, search for the clientPort parameter, and obtain its value, that is, the ZooKeeper port number.
+

zookeeper.znode.parent

+

No

+

/hbase

+

String

+

Root directory in ZooKeeper. The default value is /hbase.

+

null-string-literal

+

No

+

None

+

String

+

Representation for null values for string fields.

+

HBase source encodes/decodes empty bytes as null values for all types except the string type.

+
+
+
+

Data Type Mapping

HBase stores all data as byte arrays. The data needs to be serialized and deserialized during read and write operations.

+

When serializing and de-serializing, Flink HBase connector uses utility class org.apache.hadoop.hbase.util.Bytes provided by HBase (Hadoop) to convert Flink data types to and from byte arrays.

+

Flink HBase connector encodes null values to empty bytes, and decode empty bytes to null values for all data types except the string type. For the string type, the null literal is determined by the null-string-literal option.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data type mapping

Flink SQL Type

+

HBase Conversion

+

CHAR/VARCHAR/STRING

+

byte[] toBytes(String s)

+

String toString(byte[] b)

+

BOOLEAN

+

byte[] toBytes(boolean b)

+

boolean toBoolean(byte[] b)

+

BINARY/VARBINARY

+

Returns byte[] as is.

+

DECIMAL

+

byte[] toBytes(BigDecimal v)

+

BigDecimal toBigDecimal(byte[] b)

+

TINYINT

+

new byte[] { val }

+

bytes[0] // returns first and only byte from bytes

+

SMALLINT

+

byte[] toBytes(short val)

+

short toShort(byte[] bytes)

+

INT

+

byte[] toBytes(int val)

+

int toInt(byte[] bytes)

+

BIGINT

+

byte[] toBytes(long val)

+

long toLong(byte[] bytes)

+

FLOAT

+

byte[] toBytes(float val)

+

float toFloat(byte[] bytes)

+

DOUBLE

+

byte[] toBytes(double val)

+

double toDouble(byte[] bytes)

+

DATE

+

Stores the number of days since epoch as an int value.

+

TIME

+

Stores the number of milliseconds of the day as an int value.

+

TIMESTAMP

+

Stores the milliseconds since epoch as a long value.

+

ARRAY

+

Not supported

+

MAP/MULTISET

+

Not supported

+

ROW

+

Not supported

+
+
+
+

Example

In this example, data is read from the HBase data source and written to the Print result table. The procedure is as follows (the HBase versions used in this example are 1.3.1, 2.1.1, and 2.2.3):

+
  1. Create an enhanced datasource connection in the VPC and subnet where HBase locates, and bind the connection to the required Flink queue. .
  2. Set HBase cluster security groups and add inbound rules to allow access from the Flink job queue. Test the connectivity using the HBase address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Use the HBase shell to create HBase table order that has only one column family detail. The creation statement is as follows:
    create 'order', {NAME => 'detail'}
    +
  4. Run the following command in the HBase shell to insert a data record:
    put 'order', '202103241000000001', 'detail:order_channel','webShop'
    +put 'order', '202103241000000001', 'detail:order_time','2021-03-24 10:00:00'
    +put 'order', '202103241000000001', 'detail:pay_amount','100.00'
    +put 'order', '202103241000000001', 'detail:real_pay','100.00'
    +put 'order', '202103241000000001', 'detail:pay_time','2021-03-24 10:02:03'
    +put 'order', '202103241000000001', 'detail:user_id','0001'
    +put 'order', '202103241000000001', 'detail:user_name','Alice'
    +put 'order', '202103241000000001', 'detail:area_id','330106'
    +
  5. Create a Flink OpenSource SQL job. Enter the following job script and submit the job. The job script uses the HBase data source and the Print result table.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    create table hbaseSource (
    +  order_id string,-- Indicates the unique rowkey.
    +  detail Row( -- Indicates the column family.
    +    order_channel string,
    +    order_time string,
    +    pay_amount string,
    +    real_pay string,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string),
    +  primary key (order_id) not enforced
    +) with (
    +  'connector' = 'hbase-2.2',
    +   'table-name' = 'order',
    +   'zookeeper.quorum' = 'ZookeeperAddress:ZookeeperPort'
    +) ;
    +
    +create table printSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount string,
    +  real_pay string,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) with (
    + 'connector' = 'print'
    +);
    +
    +insert into printSink select order_id, detail.order_channel,detail.order_time,detail.pay_amount,detail.real_pay,
    +detail.pay_time,detail.user_id,detail.user_name,detail.area_id from hbaseSource;
    +
    +
  6. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103241000000001,webShop,2021-03-24 10:00:00,100.00,100.00,2021-03-24 10:02:03,0001,Alice,330106)
    +
+
+

FAQ

+
+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0385.html b/docs/dli/sqlreference/dli_08_0385.html new file mode 100644 index 00000000..43827ded --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0385.html @@ -0,0 +1,403 @@ + + +

JDBC Source Table

+

Function

The JDBC connector is a Flink's built-in connector to read data from a database.

+
+

Prerequisites

+
+

Precautions

When creating a Flink OpenSource SQL job, you need to set Flink Version to 1.12 on the Running Parameters tab of the job editing page, select Save Job Log, and set the OBS bucket for saving job logs.

+
+

Syntax

create table jbdcSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+) with (
+  'connector' = 'jdbc',
+  'url' = '',
+  'table-name' = '',
+  'username' = '',
+  'password' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to jdbc.

+

url

+

Yes

+

None

+

String

+

Database URL.

+

table-name

+

Yes

+

None

+

String

+

Name of the table where the data will be read from the database.

+

driver

+

No

+

None

+

String

+

Driver required for connecting to the database. If you do not set this parameter, it will be automatically derived from the URL.

+

username

+

No

+

None

+

String

+

Database authentication username. This parameter must be configured in pair with password.

+

password

+

No

+

None

+

String

+

Database authentication password. This parameter must be configured in pair with username.

+

scan.partition.column

+

No

+

None

+

String

+

Name of the column used to partition the input. For details, see Partitioned Scan.

+

scan.partition.num

+

No

+

None

+

Integer

+

Number of partitions to be created. For details, see Partitioned Scan.

+

scan.partition.lower-bound

+

No

+

None

+

Integer

+

Lower bound of values to be fetched for the first partition. For details, see Partitioned Scan.

+

scan.partition.upper-bound

+

No

+

None

+

Integer

+

Upper bound of values to be fetched for the last partition. For details, see Partitioned Scan.

+

scan.fetch-size

+

No

+

0

+

Integer

+

Number of rows fetched from the database each time. If this parameter is set to 0, the SQL hint is ignored.

+

scan.auto-commit

+

No

+

true

+

Boolean

+

Whether each statement is committed in a transaction automatically.

+
+
+
+

Partitioned Scan

To accelerate reading data in parallel Source task instances, Flink provides the partitioned scan feature for the JDBC table. The following parameters describe how to partition the table when reading in parallel from multiple tasks.

+ +
  • When a table is created, the preceding partitioned scan parameters must all be specified if any of them is specified.
  • The scan.partition.lower-bound and scan.partition.upper-bound parameters are used to decide the partition stride instead of filtering rows in the table. All rows in the table are partitioned and returned.
+
+
+

Data Type Mapping

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data type mapping

MySQL Type

+

PostgreSQL Type

+

Flink SQL Type

+

TINYINT

+

-

+

TINYINT

+

SMALLINT

+

TINYINT UNSIGNED

+

SMALLINT

+

INT2

+

SMALLSERIAL

+

SERIAL2

+

SMALLINT

+

INT

+

MEDIUMINT

+

SMALLINT UNSIGNED

+

INTEGER

+

SERIAL

+

INT

+

BIGINT

+

INT UNSIGNED

+

BIGINT

+

BIGSERIAL

+

BIGINT

+

BIGINT UNSIGNED

+

-

+

DECIMAL(20, 0)

+

BIGINT

+

BIGINT

+

BIGINT

+

FLOAT

+

REAL

+

FLOAT4

+

FLOAT

+

DOUBLE

+

DOUBLE PRECISION

+

FLOAT8

+

DOUBLE PRECISION

+

DOUBLE

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

DECIMAL(p, s)

+

BOOLEAN

+

TINYINT(1)

+

BOOLEAN

+

BOOLEAN

+

DATE

+

DATE

+

DATE

+

TIME [(p)]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

DATETIME [(p)]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

CHAR(n)

+

VARCHAR(n)

+

TEXT

+

CHAR(n)

+

CHARACTER(n)

+

VARCHAR(n)

+

CHARACTER

+

VARYING(n)

+

TEXT

+

STRING

+

BINARY

+

VARBINARY

+

BLOB

+

BYTEA

+

BYTES

+

-

+

ARRAY

+

ARRAY

+
+
+
+

Example

This example uses JDBC as the data source and Print as the sink to read data from the RDS MySQL database and write the data to the Print result table.

+
  1. Create an enhanced datasource connection in the VPC and subnet where RDS MySQL locates, and bind the connection to the required Flink elastic resource pool.
  2. Set RDS MySQL security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the RDS address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Log in to the RDS MySQL database, create table orders in the Flink database, and insert data.

    Create table orders in the Flink database.

    +
    CREATE TABLE `flink`.`orders` (
    +	`order_id` VARCHAR(32) NOT NULL,
    +	`order_channel` VARCHAR(32) NULL,
    +	`order_time` VARCHAR(32) NULL,
    +	`pay_amount` DOUBLE UNSIGNED NOT NULL,
    +	`real_pay` DOUBLE UNSIGNED NULL,
    +	`pay_time` VARCHAR(32) NULL,
    +	`user_id` VARCHAR(32) NULL,
    +	`user_name` VARCHAR(32) NULL,
    +	`area_id` VARCHAR(32) NULL,
    +	PRIMARY KEY (`order_id`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4
    +	COLLATE = utf8mb4_general_ci;
    +
    Insert data into the table.
    insert into orders(
    +  order_id,
    +  order_channel,
    +  order_time,
    +  pay_amount,
    +  real_pay,
    +  pay_time,
    +  user_id,
    +  user_name,
    +  area_id) values
    +  ('202103241000000001', 'webShop', '2021-03-24 10:00:00', '100.00', '100.00', '2021-03-24 10:02:03', '0001', 'Alice', '330106'),  
    +  ('202103251202020001', 'miniAppShop', '2021-03-25 12:02:02', '60.00', '60.00', '2021-03-25 12:03:00', '0002', 'Bob', '330110');
    +
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE jdbcSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'jdbc',
    +  'url' = 'jdbc:mysql://MySQLAddress:MySQLPort/flink',--flink is the database name created in RDS MySQL.
    +  'table-name' = 'orders',
    +  'username' = 'MySQLUsername',
    +  'password' = 'MySQLPassword'
    +);
    +
    +CREATE TABLE printSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from jdbcSource;
    +
    +
  5. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106)
    ++I(202103251202020001,miniAppShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    +
+
+

FAQ

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0386.html b/docs/dli/sqlreference/dli_08_0386.html new file mode 100644 index 00000000..987720dd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0386.html @@ -0,0 +1,696 @@ + + +

Kafka Source Table

+

Function

Create a source stream to obtain data from Kafka as input data for jobs.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+  (',' WATERMARK FOR rowtime_column_name AS watermark-strategy_expression)
+)
+with (
+  'connector' = 'kafka',
+  'topic' = '',
+  'properties.bootstrap.servers' = '',
+  'properties.group.id' = '',
+  'scan.startup.mode' = '',
+  'format' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to kafka.

+

topic

+

Yes

+

None

+

String

+

Topic name of the Kafka record.

+

Note:

+
  • Only one of topic and topic-pattern can be specified.
  • If there are multiple topics, separate them with semicolons (;), for example, topic-1;topic-2.
+

topic-pattern

+

No

+

None

+

String

+

Regular expression for a pattern of topic names to read from.

+

Only one of topic and topic-pattern can be specified.

+

For example:

+

'topic.*'

+

'(topic-c|topic-d)'

+

'(topic-a|topic-b|topic-\\d*)'

+

'(topic-a|topic-b|topic-[0-9]*)'

+

properties.bootstrap.servers

+

Yes

+

None

+

String

+

Comma separated list of Kafka brokers.

+

properties.group.id

+

Yes

+

None

+

String

+

ID of the consumer group for the Kafka source.

+

properties.*

+

No

+

None

+

String

+

This parameter can set and pass arbitrary Kafka configurations.

+

Note:

+
  • The suffix to properties. must match the configuration key in Apache Kafka.

    For example, you can disable automatic topic creation via 'properties.allow.auto.create.topics' = 'false'.

    +
  • Some configurations are not supported, for example, 'key.deserializer' and 'value.deserializer'.
+

format

+

Yes

+

None

+

String

+

Format used to deserialize and serialize the value part of Kafka messages. Note: Either this parameter or the value.format parameter is required.

+

Refer to Format for more details and format parameters.

+

key.format

+

No

+

None

+

String

+

Format used to deserialize and serialize the key part of Kafka messages.

+

Note:

+
  • If a key format is defined, the key.fields parameter is required as well. Otherwise, the Kafka records will have an empty key.
  • Refer to Format for more details and format parameters.
+

key.fields

+

No

+

[]

+

List<String>

+

Defines the columns in the table as the list of keys. This parameter must be configured in pair with key.format.

+

This parameter is left empty by default. Therefore, no key is defined.

+

The format is like field1;field2.

+

key.fields-prefix

+

No

+

None

+

String

+

Defines a custom prefix for all fields of the key format to avoid name clashes with fields of the value format.

+

value.format

+

Yes

+

None

+

String

+

Format used to deserialize and serialize the value part of Kafka messages.

+

Note:

+
  • Either this parameter or the format parameter is required. If two parameters are configured, a conflict occurs.
  • Refer to Format for more details and format parameters.
+

value.fields-include

+

No

+

ALL

+

Enum

+

Possible values: [ALL, EXCEPT_KEY]

+

Whether to contain the key field when parsing the message body.

+

Possible values are:

+
  • ALL (default): All defined fields are included in the value of Kafka messages.
  • EXCEPT_KEY: All the fields except those defined by key.fields are included in the value of Kafka messages.
+

scan.startup.mode

+

No

+

group-offsets

+

String

+

Start position for Kafka to read data.

+

Possible values are:

+
  • earliest-offset: Data is read from the earliest Kafka offset.
  • latest-offset: Data is read from the latest Kafka offset.
  • group-offsets (default): Data is read based on the consumer group.
  • timestamp: Data is read from a user-supplied timestamp. When setting this option, you also need to specify scan.startup.timestamp-millis in WITH.
  • specific-offsets: Data is read from user-supplied specific offsets for each partition. When setting this option, you also need to specify scan.startup.specific-offsets in WITH.
+

scan.startup.specific-offsets

+

No

+

None

+

String

+

This parameter takes effect only when scan.startup.mode is set to specific-offsets. It specifies the offsets for each partition, for example, partition:0,offset:42;partition:1,offset:300.

+

scan.startup.timestamp-millis

+

No

+

None

+

Long

+

Startup timestamp. This parameter takes effect when scan.startup.mode is set to timestamp.

+

scan.topic-partition-discovery.interval

+

No

+

None

+

Duration

+

Interval for a consumer to periodically discover dynamically created Kafka topics and partitions.

+
+
+
+

Metadata Column

You can define metadata columns in the source table to obtain the metadata of Kafka messages. For example, if multiple topics are defined in the WITH parameter and the metadata column is defined in the Kafka source table, the data read by Flink is labeled with the topic from which the data is read.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Metadata column

Key

+

Data Type

+

R/W

+

Description

+

topic

+

STRING NOT NULL

+

R

+

Topic name of the Kafka record.

+

partition

+

INT NOT NULL

+

R

+

Partition ID of the Kafka record.

+

headers

+

MAP<STRING, BYTES> NOT NULL

+

R/W

+

Headers of Kafka messages.

+

leader-epoch

+

INT NULL

+

R

+

Leader epoch of the Kafka record.

+

For details, see example 1.

+

offset

+

BIGINT NOT NULL

+

R

+

Offset of the Kafka record.

+

timestamp

+

TIMESTAMP(3) WITH LOCAL TIME ZONE NOT NULL

+

R/W

+

Timestamp of the Kafka record.

+

timestamp-type

+

STRING NOT NULL

+

R

+

Timestamp type of the Kafka record. The options are as follows:

+
  • NoTimestampType: No timestamp is defined in the message.
  • CreateTime: time when the message is generated.
  • LogAppendTime: time when the message is added to the Kafka broker.

    For details, see example 1.

    +
+
+
+
+

Example (SASL_SSL Disabled for the Kafka Cluster)

+
+

Example (SASL_SSL Enabled for the Kafka Cluster)

+
+

FAQ

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0387.html b/docs/dli/sqlreference/dli_08_0387.html new file mode 100644 index 00000000..4cbf58f6 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0387.html @@ -0,0 +1,236 @@ + + +

MySQL CDC Source Table

+

Function

The MySQL CDC source table, that is, the MySQL streaming source table, reads all historical data in the database first and then smoothly switches data read to the Binlog to ensure data integrity.

+
+

Prerequisites

+
+

Precautions

+ +
+

Syntax

create table mySqlCdcSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'mysql-cdc',   
+  'hostname' = 'mysqlHostname',
+  'username' = 'mysqlUsername',
+  'password' = 'mysqlPassword',
+  'database-name' = 'mysqlDatabaseName',
+  'table-name' = 'mysqlTableName'
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to mysql-cdc.

+

hostname

+

Yes

+

None

+

String

+

IP address or hostname of the MySQL database.

+

username

+

Yes

+

None

+

String

+

Username of the MySQL database.

+

password

+

Yes

+

None

+

String

+

Password of the MySQL database.

+

database-name

+

Yes

+

None

+

String

+

Name of the database to connect.

+

The database name supports regular expressions to read data from multiple databases. For example, flink(.)* indicates all database names starting with flink.

+

table-name

+

Yes

+

None

+

String

+

Name of the table to read data from.

+

The table name supports regular expressions to read data from multiple tables. For example, cdc_order(.)* indicates all table names starting with cdc_order.

+

port

+

No

+

3306

+

Integer

+

Port number of the MySQL database.

+

server-id

+

No

+

A random value from 5400 to 6000

+

String

+

A numeric ID of the database client, which must be globally unique in the MySQL cluster. You are advised to set a unique ID for each job in the same database.

+

By default, a random value ranging from 5400 to 6400 is generated.

+

scan.startup.mode

+

No

+

initial

+

String

+

Startup mode for consuming data.

+
  • initial (default): In the first startup, the database scans all historical data and then reads the latest Binlog data.
  • latest-offset: In the first startup, the database reads data directly from the end of the Binlog (the latest Binlog) instead of scanning all historical data. That is, it reads only the latest changes after the connector is started.
+

server-time-zone

+

No

+

None

+

String

+

Time zone of the session used by the database.

+
+
+
+

Example

In this example, MySQL-CDC is used to read data from RDS for MySQL in real time and write the data to the Print result table. The procedure is as follows (MySQL 5.7.32 is used in this example):

+
  1. Create an enhanced datasource connection in the VPC and subnet where MySQL locates, and bind the connection to the required Flink elastic resource pool.
  2. Set MySQL security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the MySQL address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Create a table named cdc_order in database flink of the MySQL database.
    CREATE TABLE `flink`.`cdc_order` (
    +	`order_id` VARCHAR(32) NOT NULL,
    +	`order_channel` VARCHAR(32) NULL,
    +	`order_time` VARCHAR(32) NULL,
    +	`pay_amount` DOUBLE  NULL,
    +	`real_pay` DOUBLE  NULL,
    +	`pay_time` VARCHAR(32) NULL,
    +	`user_id` VARCHAR(32) NULL,
    +	`user_name` VARCHAR(32) NULL,
    +	`area_id` VARCHAR(32) NULL,
    +	PRIMARY KEY (`order_id`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4
    +	COLLATE = utf8mb4_general_ci;
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    create table mysqlCdcSource(
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id STRING
    +) with (
    +  'connector' = 'mysql-cdc',
    +  'hostname' = 'mysqlHostname',
    +  'username' = 'mysqlUsername',
    +  'password' = 'mysqlPassword',
    +  'database-name' = 'mysqlDatabaseName',
    +  'table-name' = 'mysqlTableName'
    +);
    +
    +create table printSink(
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id STRING,
    +  primary key(order_id) not enforced
    +) with (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from mysqlCdcSource;
    +
    +
  5. Insert test data in MySQL.
    insert into cdc_order values
    +('202103241000000001','webShop','2021-03-24 10:00:00','100.00','100.00','2021-03-24 10:02:03','0001','Alice','330106'),
    +('202103241606060001','appShop','2021-03-24 16:06:06','200.00','180.00','2021-03-24 16:10:06','0001','Alice','330106');
    +
    +delete from cdc_order  where order_channel = 'webShop';
    +
    +insert into cdc_order values('202103251202020001','miniAppShop','2021-03-25 12:02:02','60.00','60.00','2021-03-25 12:03:00','0002','Bob','330110');
    +
  6. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103241000000001,webShop,2021-03-2410:00:00,100.0,100.0,2021-03-2410:02:03,0001,Alice,330106)
    ++I(202103241606060001,appShop,2021-03-2416:06:06,200.0,180.0,2021-03-2416:10:06,0001,Alice,330106)
    +-D(202103241000000001,webShop,2021-03-2410:00:00,100.0,100.0,2021-03-2410:02:03,0001,Alice,330106)
    ++I(202103251202020001,miniAppShop,2021-03-2512:02:02,60.0,60.0,2021-03-2512:03:00,0002,Bob,330110)
    +
+
+

FAQ

Q: How do I perform window aggregation if the MySQL CDC source table does not support definition of watermarks?

+

A: You can use the non-window aggregation method. That is, convert the time field into a window value, and then use GROUP BY to perform aggregation based on the window value.

+

For example, you can use the following script to collect statistics on the number of orders per minute (order_time indicates the order time, in the string format):

+
insert into printSink select DATE_FORMAT(order_time, 'yyyy-MM-dd HH:mm'), count(*) from mysqlCdcSource group by DATE_FORMAT(order_time, 'yyyy-MM-dd HH:mm');
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0388.html b/docs/dli/sqlreference/dli_08_0388.html new file mode 100644 index 00000000..2908c89b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0388.html @@ -0,0 +1,250 @@ + + +

Postgres CDC Source Table

+

Function

The Postgres CDC source table, that is, Postgres streaming source table, is used to read the full snapshot data and changed data of the PostgreSQL database in sequence. The exactly-once processing semantics is used to ensure data accuracy even if a failure occurs.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table postgresCdcSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'postgres-cdc',   
+  'hostname' = 'PostgresHostname',
+  'username' = 'PostgresUsername',
+  'password' = 'PostgresPassword',
+  'database-name' = 'PostgresDatabaseName',
+  'schema-name' = 'PostgresSchemaName',
+  'table-name' = 'PostgresTableName'
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to postgres-cdc.

+

hostname

+

Yes

+

None

+

String

+

IP address or hostname of the Postgres database.

+

username

+

Yes

+

None

+

String

+

Username of the Postgres database.

+

password

+

Yes

+

None

+

String

+

Password of the Postgres database.

+

database-name

+

Yes

+

None

+

String

+

Database name.

+

schema-name

+

Yes

+

None

+

String

+

Postgres schema name.

+

The schema name supports regular expressions to read data from multiple schemas. For example, test(.)* indicates all schema names starting with test.

+

table-name

+

Yes

+

None

+

String

+

Postgres table name.

+

The table name supports regular expressions to read data from multiple tables. For example, cdc_order(.)* indicates all table names starting with cdc_order.

+

port

+

No

+

5432

+

Integer

+

Port number of the Postgres database.

+

decoding.plugin.name

+

No

+

decoderbufs

+

String

+

Determined based on the plug-in that is installed in the PostgreSQL database. The value can be:

+
  • decoderbufs (default)
  • wal2json
  • wal2json_rds
  • wal2json_streaming
  • wal2json_rds_streaming
  • pgoutput
+

debezium.*

+

No

+

None

+

String

+

Fine-grained control over the behavior of Debezium clients, for example, 'debezium.snapshot.mode' = 'never'. For details, see Connector configuration properties.

+

You are advised to set the debezium.slot.name parameter for each table to avoid the following error: "PSQLException: ERROR: replication slot "debezium" is active for PID 974"

+
+
+
+

Example

In this example, Postgres-CDC is used to read data from RDS for PostgreSQL in real time and write the data to the Print result table. The procedure is as follows (PostgreSQL 11.11 is used in this example):

+
  1. Create an enhanced datasource connection in the VPC and subnet where PostgreSQL locates, and bind the connection to the required Flink elastic resource pool.
  2. Set PostgreSQL security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the PostgreSQL address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. In PostgreSQL, create database flink and schema test.
  4. Create table cdc_order in the schema test of database flink in PostgreSQL.
    create table test.cdc_order(
    +  order_id VARCHAR,
    +  order_channel VARCHAR,
    +  order_time VARCHAR,
    +  pay_amount FLOAT8,
    +  real_pay FLOAT8,
    +  pay_time VARCHAR,
    +  user_id VARCHAR,
    +  user_name VARCHAR,
    +  area_id VARCHAR,
    +  primary key(order_id)
    +);
    +
  5. Run the following SQL statement in PostgreSQL. If you do not run this statement, an error will be reported when the Flink job is executed. For details, see the error message in FAQ.
    ALTER TABLE test.cdc_order REPLICA IDENTITY FULL
    +
  6. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    create table postgresCdcSource(
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id STRING,
    +  primary key (order_id) not enforced
    +) with (
    +  'connector' = 'postgres-cdc',
    +  'hostname' = 'PostgresHostname',
    +  'username' = 'PostgresUsername',
    +  'password' = 'PostgresPassword',
    +  'database-name' = 'flink',
    +  'schema-name' = 'test',
    +  'table-name' = 'cdc_order'
    +);
    +
    +create table printSink(
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id STRING,
    +  primary key(order_id) not enforced
    +) with (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from postgresCdcSource;
    +
    +
  7. Run the following command in PostgreSQL:
    insert into test.cdc_order
    +  (order_id,
    +  order_channel,
    +  order_time,
    +  pay_amount,
    +  real_pay,
    +  pay_time,
    +  user_id,
    +  user_name,
    +  area_id) values
    +  ('202103241000000001', 'webShop', '2021-03-24 10:00:00', '100.00', '100.00', '2021-03-24 10:02:03', '0001', 'Alice', '330106'),
    +  ('202103251202020001', 'miniAppShop', '2021-03-25 12:02:02', '60.00', '60.00', '2021-03-25 12:03:00', '0002', 'Bob', '330110');
    +
    +update test.cdc_order set order_channel = 'webShop' where order_id = '202103251202020001';
    +
    +delete from test.cdc_order where order_id = '202103241000000001';
    +
  8. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106)
    ++I(202103251202020001,miniAppShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    +-U(202103251202020001,miniAppShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    ++U(202103251202020001,webShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    +-D(202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106)
    +
+
+

FAQ

+
+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0389.html b/docs/dli/sqlreference/dli_08_0389.html new file mode 100644 index 00000000..1edf4d40 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0389.html @@ -0,0 +1,415 @@ + + +

Redis Source Table

+

Function

Create a source stream to obtain data from Redis as input for jobs.

+
+

Prerequisites

An enhanced datasource connection has been created for DLI to connect to the Redis database, so that you can configure security group rules as required.

+ +
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+  ,PRIMARY KEY (attr_name, ...) NOT ENFORCED
+)
+with (
+  'connector' = 'redis',
+  'host' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to redis.

+

host

+

Yes

+

None

+

String

+

Redis connector address.

+

port

+

No

+

6379

+

Integer

+

Redis connector port.

+

password

+

No

+

None

+

String

+

Redis authentication password.

+

namespace

+

No

+

None

+

String

+

Redis key namespace.

+

delimiter

+

No

+

:

+

String

+

Delimiter between the Redis key and namespace.

+

data-type

+

No

+

hash

+

String

+

Redis data type. Available values are as follows:

+
  • hash
  • list
  • set
  • sorted-set
  • string
+

For details about the constraints, see Constraints on data-type.

+

schema-syntax

+

No

+

fields

+

String

+

Redis schema semantics. Available values are as follows (for details, see Precautions and FAQ):

+
  • fields: applicable to all data types
  • fields-scores: applicable to sorted-set data
  • array: applicable to list, set, and sorted-set data
  • array-scores: applicable to sorted-set data
  • map: applicable to hash and sorted-set data
+

For details about the constraints, see Constraints on schema-syntax.

+

deploy-mode

+

No

+

standalone

+

String

+

Deployment mode of the Redis cluster. The value can be standalone, master-replica, or cluster. The default value is standalone.

+

retry-count

+

No

+

5

+

Integer

+

Number of attempts to connect to the Redis cluster.

+

connection-timeout-millis

+

No

+

10000

+

Integer

+

Maximum timeout for connecting to the Redis cluster.

+

commands-timeout-millis

+

No

+

2000

+

Integer

+

Maximum time for waiting for a completion response.

+

rebalancing-timeout-millis

+

No

+

15000

+

Integer

+

Sleep time when the Redis cluster fails.

+

scan-keys-count

+

No

+

1000

+

Integer

+

Number of data records read in each scan.

+

default-score

+

No

+

0

+

Double

+

Default score when data-type is sorted-set.

+

deserialize-error-policy

+

No

+

fail-job

+

Enum

+

Policy of how to process a data parsing failure. Available values are as follows:

+
  • fail-job: Fail the job.
  • skip-row: Skip the current data.
  • null-field: Set the current data to null.
+

skip-null-values

+

No

+

true

+

Boolean

+

Whether null values will be skipped.

+
+
+
+

Example

In this example, data is read from the DCS Redis data source and written to the Print result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where Redis locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Redis security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Redis address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Run the following commands on the Redis client to insert data into different keys and store the data in hash format:
    HMSET redisSource order_id 202103241000000001 order_channel webShop order_time "2021-03-24 10:00:00" pay_amount 100.00 real_pay 100.00 pay_time "2021-03-24 10:02:03" user_id 0001 user_name Alice area_id 330106
    +
    +HMSET redisSource1 order_id 202103241606060001 order_channel appShop order_time "2021-03-24 16:06:06" pay_amount 200.00 real_pay 180.00 pay_time "2021-03-24 16:10:06" user_id 0001 user_name Alice area_id 330106
    +
    +HMSET redisSource2 order_id 202103251202020001 order_channel miniAppShop order_time "2021-03-25 12:02:02" pay_amount 60.00 real_pay 60.00 pay_time "2021-03-25 12:03:00" user_id 0002 user_name Bob area_id 330110
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script to read data in hash format from Redis.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE redisSource (
    +  redisKey string,
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  primary key (redisKey) not enforced  --Obtains the key value from Redis.
    +) WITH (
    +  'connector' = 'redis',
    +  'host' = 'RedisIP',
    +  'password' = 'RedisPassword',
    +  'data-type' = 'hash',
    +  'deploy-mode' = 'master-replica'
    +);
    +
    +CREATE TABLE printSink (
    +  redisKey string,
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from redisSource;
    +
    +
  5. Perform the following operations to view the data result in the taskmanager.out file:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(redisSource1,202103241606060001,appShop,2021-03-24 16:06:06,200.0,180.0,2021-03-24 16:10:06,0001,Alice,330106)
    ++I(redisSource,202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106)
    ++I(redisSource2,202103251202020001,miniAppShop,2021-03-25 12:02:02,60.0,60.0,2021-03-25 12:03:00,0002,Bob,330110)
    +
+
+

FAQ

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0390.html b/docs/dli/sqlreference/dli_08_0390.html new file mode 100644 index 00000000..7886b6b4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0390.html @@ -0,0 +1,213 @@ + + +

Upsert Kafka Source Table

+

Function

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+

As a source, the upsert-kafka connector produces a changelog stream, where each data record represents an update or delete event. More precisely, the value in a data record is interpreted as an UPDATE of the last value for the same key, if any (if a corresponding key does not exist yet, the UPDATE will be considered an INSERT). Using the table analogy, a data record in a changelog stream is interpreted as an UPSERT, also known as INSERT/UPDATE, because any existing row with the same key is overwritten. Also, null values are interpreted in a special way: A record with a null value represents a DELETE.

+
+

Prerequisites

An enhanced datasource connection has been created for DLI to connect to Kafka clusters, so that jobs can run on the dedicated queue of DLI and you can set the security group rules as required. +
+
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'upsert-kafka',
+  'topic' = '',
+  'properties.bootstrap.servers' = '',
+  'key.format' = '',
+  'value.format' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to upsert-kafka.

+

topic

+

Yes

+

None

+

String

+

Kafka topic name.

+

properties.bootstrap.servers

+

Yes

+

None

+

String

+

Comma separated list of Kafka brokers.

+

key.format

+

Yes

+

None

+

String

+

Format used to deserialize and serialize the key part of Kafka messages. The key fields are specified by the PRIMARY KEY syntax. The following formats are supported:

+
  • csv
  • json
  • avro
+

Refer to Format for more details and format parameters.

+

key.fields-prefix

+

No

+

None

+

String

+

Defines a custom prefix for all fields of the key format to avoid name clashes with fields of the value format.

+

By default, the prefix is empty. If a custom prefix is defined, both the table schema and key.fields will work with prefixed names. When constructing the data type of the key format, the prefix will be removed and the non-prefixed names will be used within the key format. Note that this option requires that value.fields-include must be set to EXCEPT_KEY.

+

value.format

+

Yes

+

None

+

String

+

Format used to deserialize and serialize the value part of Kafka messages. The following formats are supported:

+
  • csv
  • json
  • avro
+

Refer to Format for more details and format parameters.

+

value.fields-include

+

Yes

+

ALL

+

String

+

Controls which fields should appear in the value part. Possible values are:

+
  • ALL: All fields in the schema, including the primary key field, are included in the value part.
  • EXCEPT_KEY: All the fields of the table schema are included, except the primary key field.
+

properties.*

+

No

+

None

+

String

+

This option can set and pass arbitrary Kafka configurations.

+

The suffix to properties. must match the parameter defined in Kafka Configuration documentation. Flink will remove the properties. key prefix and pass the transformed key and value to the underlying KafkaClient.

+

For example, you can disable automatic topic creation via 'properties.allow.auto.create.topics' = 'false'.

+

But there are some configurations that do not support to set, because Flink will override them, for example, 'key.deserializer' and 'value.deserializer'.

+
+
+
+

Example

In this example, data is read from the Kafka data source and written to the Print result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where Kafka locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE upsertKafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,  
    +  area_id string,
    +  PRIMARY KEY (order_id) NOT ENFORCED
    +) WITH (
    +  'connector' = 'upsert-kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' =  'KafkaAddress1:KafkaPort,KafkAddress2:KafkaPort',
    +  'key.format' = 'csv',
    +  'value.format' = 'json'
    +);
    +
    +CREATE TABLE printSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,  
    +  area_id string,
    +  PRIMARY KEY (order_id) NOT ENFORCED
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +INSERT INTO printSink
    +SELECT * FROM upsertKafkaSource;
    +
    +
  4. Insert the following data to the specified topics in Kafka. (Note: Specify the key when inserting data to Kafka.)
    {"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
  5. Perform the following operations to view the output:
    1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
    2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
    3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
    +

    The data result is as follows:

    +
    +I(202103251202020001,miniAppShop,2021-03-2512:02:02,60.0,60.0,2021-03-2512:03:00,0002,Bob,330110)
    ++I(202103251505050001,qqShop,2021-03-2515:05:05,500.0,400.0,2021-03-2515:10:00,0003,Cindy,330108)
    +-U(202103251202020001,miniAppShop,2021-03-2512:02:02,60.0,60.0,2021-03-2512:03:00,0002,Bob,330110)
    ++U(202103251202020001,miniAppShop,2021-03-2512:02:02,60.0,60.0,2021-03-2512:03:00,0002,Bob,330110)
    +
+
+

FAQ

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0391.html b/docs/dli/sqlreference/dli_08_0391.html new file mode 100644 index 00000000..6a57a488 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0391.html @@ -0,0 +1,33 @@ + + +

Creating Result Tables

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0392.html b/docs/dli/sqlreference/dli_08_0392.html new file mode 100644 index 00000000..8f307c6b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0392.html @@ -0,0 +1,69 @@ + + +

BlackHole Result Table

+

Function

The BlackHole connector allows for swallowing all input records. It is designed for high-performance testing and UDF output. It is not a substantive sink. The BlackHole result table is a built-in connector.

+

For example, if an error is reported when you register a result table of another type, but you are not sure whether it is caused by a system fault or an invalid setting of the WITH parameter for the result table, you can change the value of connector to blackhole and click Run. If no error is reported, the system is normal. You must check the settings of the WITH parameter.

+
+

Prerequisites

None

+
+

Precautions

When creating a Flink OpenSource SQL job, you need to set Flink Version to 1.12 on the Running Parameters tab of the job editing page, select Save Job Log, and set the OBS bucket for saving job logs.

+
+

Syntax

create table blackhole_table (
+ attr_name attr_type (',' attr_name attr_type) *
+) with (
+ 'connector = blackhole'
+)
+
+

Parameters

+
+ + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to blackhole.

+
+
+
+

Example

The DataGen source table generates data, and the BlackHole result table receives the data.

+
create table datagenSource (
+ user_id string,
+ user_name string,
+ user_age int
+) with (
+ 'connector' = 'datagen',
+ 'rows-per-second'='1'
+);
+create table blackholeSink (
+ user_id string,
+ user_name string,
+ user_age int
+) with (
+ 'connector' = 'blackhole'
+);
+insert into blackholeSink select * from datagenSource;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0393.html b/docs/dli/sqlreference/dli_08_0393.html new file mode 100644 index 00000000..3abdf736 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0393.html @@ -0,0 +1,223 @@ + + +

ClickHouse Result Table

+

Function

DLI can output Flink job data to the ClickHouse database. ClickHouse is a column-based database oriented to online analysis and processing. It supports SQL query and provides good query performance. The aggregation analysis and query performance based on large and wide tables is excellent, which is one order of magnitude faster than other analytical databases.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
create table clickhouseSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector.type' = clickhouse,
+  'connector.url' = '',
+  'connector.table' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector.type

+

Yes

+

None

+

String

+

Result table type. Set this parameter to clickhouse.

+

connector.url

+

Yes

+

None

+

String

+

ClickHouse URL.

+

Parameter format: jdbc:clickhouse://ClickHouseBalancer instance IP address:HTTP port number for ClickHouseBalancer instances/Database name

+
  • IP address of a ClickHouseBalancer instance:

    Log in to the MRS console and choose Clusters > Active Clusters in the navigation pane. Click a cluster name, and choose Components > ClickHouse > Instances to obtain the business IP address of the ClickHouseBalancer instance.

    +
  • HTTP port of a ClickHouseBalancer instance:

    Log in to the MRS console and choose Clusters > Active Clusters in the navigation pane. Click a cluster name, and choose Components > ClickHouse > Service Configuration. On the Service Configuration page, select ClickHouseBalancer from the All Roles drop-down list, search for lb_http_port, and obtain the parameter value. The default value is 21425.

    +
  • The database name is the name of the database created for the ClickHouse cluster.
+

connector.table

+

Yes

+

None

+

String

+

Name of the ClickHouse table to be created.

+

connector.driver

+

No

+

ru.yandex.clickhouse.ClickHouseDriver

+

String

+

Driver required for connecting to the database.

+
  • If this parameter is not specified during table creation, the driver automatically extracts the value from the ClickHouse URL.
  • If this parameter is specified during table creation, the value must be ru.yandex.clickhouse.ClickHouseDriver.
+

connector.username

+

No

+

None

+

String

+

Username for connecting to the ClickHouse database.

+

connector.password

+

No

+

None

+

String

+

Password for connecting to the ClickHouse database.

+

connector.write.flush.max-rows

+

No

+

5000

+

Integer

+

Maximum number of rows to be updated when data is written. The default value is 5000.

+

connector.write.flush.interval

+

No

+

0

+

Duration

+

Interval for data update. The unit can be ms, milli, millisecond/s, sec, second/min, or minute.

+

Value 0 indicates that data is not updated.

+

connector.write.max-retries

+

No

+

3

+

Integer

+

Maximum number of retries for writing data to the result table. The default value is 3.

+
+
+
+

Example

In this example, data is from Kafka and inserted to table order in ClickHouse database flink. The procedure is as follows (the ClickHouse version is 21.3.4.25 in MRS):

+
  1. Create an enhanced datasource connection in the VPC and subnet where ClickHouse and Kafka clusters locate, and bind the connection to the required Flink queue.
  2. Set ClickHouse and Kafka cluster security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the ClickHouse address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Use the ClickHouse client to connect to the ClickHouse server and run the following command to query other environment parameters such as the cluster ID:
    select cluster,shard_num,replica_num,host_name from system.clusters;
    +
    The following information is displayed:
    ┌─cluster────┬────┬─shard_num─┐
    +│ default_cluster │    1   │           1 │
    +│ default_cluster │    1   │           2 │
    +└──────── ┴────┴────── ┘
    +
    +
  4. Run the following command to create database flink on a node of the ClickHouse cluster based on the obtained cluster ID, for example, default_cluster:
    CREATE DATABASE flink ON CLUSTER default_cluster;
    +
  5. Run the following command to create the ReplicatedMergeTree table named order on the node of cluster default_cluster and on database flink:
    CREATE TABLE flink.order ON CLUSTER default_cluster(order_id String,order_channel String,order_time String,pay_amount Float64,real_pay Float64,pay_time String,user_id String,user_name String,area_id String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/flink/order', '{replica}')ORDER BY order_id;
    +
  6. Create a Flink OpenSource SQL job. Enter the following job script and submit the job. The job script uses the Kafka data source and the ClickHouse result table.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +create table clickhouseSink(
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) with (
    +  'connector.type' = 'clickhouse',
    +  'connector.url' = 'jdbc:clickhouse://ClickhouseAddress:ClickhousePort/flink',
    +  'connector.table' = 'order',
    +  'connector.write.flush.max-rows' = '1'
    +);
    +
    +insert into clickhouseSink select * from orders;
    +
    +
  7. Connect to the Kafka cluster and insert the following test data into Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
  8. Use the ClickHouse client to connect to the ClickHouse and run the following command to query the data written to table order in database flink:
    select * from flink.order;
    +
    The query result is as follows:
    202103241000000001 webShop 2021-03-24 10:00:00 100 100 2021-03-24 10:02:03 0001 Alice 330106
    +
    +202103241606060001 appShop 2021-03-24 16:06:06 200 180 2021-03-24 16:10:06 0001 Alice 330106 
    +
    +202103251202020001 miniAppShop 2021-03-25 12:02:02 60 60 2021-03-25 12:03:00 0002 Bob 330110 
    +
    +
+
+

FAQ

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0394.html b/docs/dli/sqlreference/dli_08_0394.html new file mode 100644 index 00000000..436a4144 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0394.html @@ -0,0 +1,309 @@ + + +

GaussDB(DWS) Result Table

+

Function

DLI outputs the Flink job output data to GaussDB(DWS). GaussDB(DWS) database kernel is compliant with PostgreSQL. The PostgreSQL database can store data of more complex types and deliver space information services, multi-version concurrent control (MVCC), and high concurrency. It applies to location applications, financial insurance, and e-Commerce.

+

GaussDB(DWS) is an online data processing database based on the cloud infrastructure and platform and helps you mine and analyze massive sets of data.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table dwsSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'gaussdb',
+  'url' = '',
+  'table-name' = '',
+  'driver' = '',
+  'username' = '',
+  'password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to gaussdb.

+

url

+

Yes

+

None

+

String

+

JDBC connection address.

+

If you use the gsjdbc4 driver, set the value in jdbc:postgresql://${ip}:${port}/${dbName} format.

+

If you use the gsjdbc200 driver, set the value in jdbc:gaussdb://${ip}:${port}/${dbName} format.

+

table-name

+

Yes

+

None

+

String

+

Name of the table to be operated. If the GaussDB(DWS) table is in a schema, the format is schema\".\"Table name. For details, see FAQ.

+

driver

+

No

+

org.postgresql.Driver

+

String

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

username

+

No

+

None

+

String

+

Username for GaussDB(DWS) database authentication. This parameter must be configured in pair with password.

+

password

+

No

+

None

+

String

+

Password for GaussDB(DWS) database authentication. This parameter must be configured in pair with username.

+

write.mode

+

No

+

None

+

String

+

Data write mode. The value can be copy, insert, or upsert. The default value is upsert.

+

This parameter must be configured depending on primary key.

+
  • If primary key is not configured, data can be appended in copy and insert modes.
  • If primary key is configured, all the three modes are available.
+

Note: GaussDB(DWS) does not support the update of distribution columns. The primary keys of columns to be updated must cover all distribution columns defined in the GaussDB(DWS) table.

+

sink.buffer-flush.max-rows

+

No

+

100

+

Integer

+

Maximum rows allowed for data flush. If the data size exceeds the value, data flush is triggered. The default value is 100.

+

If this parameter is set to 0, this configuration is disabled, and data is flushed in real time.

+

sink.buffer-flush.interval

+

No

+

1s

+

Duration

+

Data flush period. Data flush is triggered periodically. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

sink.max-retries

+

No

+

3

+

Integer

+

Maximum number of write retries.

+

write.escape-string-value

+

No

+

false

+

Boolean

+

Whether to escape values of the string type. This parameter is used only when write.mode is set to copy.

+
+
+
+

Example

In this example, data is read from the Kafka data source and written to the GaussDB(DWS) result table in insert mode. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where GaussDB(DWS) and Kafka locate, and bind the connection to the required Flink elastic resource pool.
  2. Set GaussDB(DWS) and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the GaussDB(DWS) and Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Connect to the GaussDB(DWS) database and create a table named dws_order.
    create table public.dws_order(
    +  order_id VARCHAR,
    +  order_channel VARCHAR,
    +  order_time VARCHAR,
    +  pay_amount FLOAT8,
    +  real_pay FLOAT8,
    +  pay_time VARCHAR,
    +  user_id VARCHAR,
    +  user_name VARCHAR,
    +  area_id VARCHAR);
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job. The job script uses the Kafka data source and the GaussDB(DWS) result table.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +CREATE TABLE dwsSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'gaussdb',
    +  'url' = 'jdbc:postgresql://DWSAddress:DWSPort/DWSdbName',
    +  'table-name' = 'dws_order',
    +  'driver' = 'org.postgresql.Driver',
    +  'username' = 'DWSUserName',
    +  'password' = 'DWSPassword',
    +  'write.mode' = 'insert'
    +);
    +
    +insert into dwsSink select * from kafkaSource;
    +
    +
  5. Connect to the Kafka cluster and enter the following test data to Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
  6. Run the following SQL statement in GaussDB(DWS) to view the data result:
     select * from dws_order
    +
    The data result is as follows:
    202103241000000001	webShop	2021-03-24 10:00:00	100.0	100.0	2021-03-24 10:02:03	0001	Alice	330106
    +
    +
+
+

FAQ

+ +
+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0395.html b/docs/dli/sqlreference/dli_08_0395.html new file mode 100644 index 00000000..d268aeec --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0395.html @@ -0,0 +1,380 @@ + + +

Elasticsearch Result Table

+

Function

DLI outputs Flink job output data to Elasticsearch of Cloud Search Service (CSS). Elasticsearch is a popular enterprise-class Lucene-powered search server and provides the distributed multi-user capabilities. It delivers multiple functions, including full-text retrieval, structured search, analytics, aggregation, and highlighting. With Elasticsearch, you can achieve stable, reliable, real-time search. Elasticsearch applies to diversified scenarios, such as log analysis and site search.

+

CSS is a fully managed, distributed search service. It is fully compatible with open-source Elasticsearch and provides DLI with structured and unstructured data search, statistics, and report capabilities.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table esSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'elasticsearch-7',
+  'hosts' = '',
+  'index' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to elasticsearch-7, indicating to connect to a cluster of Elasticsearch 7.x or later.

+

hosts

+

Yes

+

None

+

String

+

Host name of the cluster where Elasticsearch locates. Use semicolons (;) to separate multiple host names. Ensure that the host name starts with http, for example, http://x.x.x.x:9200.

+

index

+

Yes

+

None

+

String

+

Elasticsearch index for every record. The index can be a static index (for example, 'myIndex') or a dynamic index (for example, 'index-{log_ts|yyyy-MM-dd}').

+

username

+

No

+

None

+

String

+

Username of the cluster where Elasticsearch locates. This parameter must be configured in pair with password.

+

If the username and password are used, the security mode must be enabled and HTTPS must be disabled for the created CSS cluster.

+

password

+

No

+

None

+

String

+

Password of the cluster where Elasticsearch locates. This parameter must be configured in pair with username.

+

document-id.key-delimiter

+

No

+

_

+

String

+

Delimiter of composite primary keys. The default value is _.

+

failure-handler

+

No

+

fail

+

String

+

Failure handling strategy in case a request to Elasticsearch fails. Valid strategies are:

+
  • fail: throws an exception if a request fails and thus causes a job failure.
  • ignore: ignores failures and drops the request.
  • retry-rejected: re-adds requests that have failed due to queue capacity saturation.
  • Custom class name: for failure handling with an ActionRequestFailureHandler subclass.
+

sink.flush-on-checkpoint

+

No

+

true

+

Boolean

+

Whether to flush on checkpoint.

+

If this parameter is set to false, the connector will not wait for all pending action requests to be acknowledged by Elasticsearch on checkpoints. Therefore, the connector does not provide any strong guarantees for at-least-once delivery of action requests.

+

sink.bulk-flush.max-actions

+

No

+

1000

+

Interger

+

Maximum number of buffered actions per bulk request. You can set this parameter to 0 to disable it.

+

sink.bulk-flush.max-size

+

No

+

2mb

+

MemorySize

+

Maximum size in memory of buffered actions per bulk request. It must be in MB granularity. You can set this parameter to 0 to disable it.

+

sink.bulk-flush.interval

+

No

+

1s

+

Duration

+

Interval for flushing buffered actions. You can set this parameter to 0 to disable it.

+

Note:

+

Both sink.bulk-flush.max-size and sink.bulk-flush.max-actions can be set to 0 with the flush interval set allowing for complete asynchronous processing of buffered actions.

+

sink.bulk-flush.backoff.strategy

+

No

+

DISABLED

+

String

+

Specifies how to perform retries if any flush actions failed due to a temporary request error. Valid strategies are:

+
  • DISABLED: no retry performed, that is, fail after the first request error.
  • CONSTANT: wait for backoff delay between retries.
  • EXPONENTIAL: initially wait for backoff delay and increase exponentially between retries.
+

sink.bulk-flush.backoff.max-retries

+

No

+

8

+

Integer

+

Maximum number of backoff retries.

+

sink.bulk-flush.backoff.delay

+

No

+

50ms

+

Duration

+

Delay between each backoff attempt.

+

For CONSTANT backoff, this is simply the delay between each retry.

+

For EXPONENTIAL backoff, this is the initial base delay.

+

connection.max-retry-timeout

+

No

+

None

+

Duration

+

Maximum timeout between retries.

+

connection.path-prefix

+

No

+

None

+

String

+

Prefix string to be added to every REST communication, for example, '/v1'.

+

format

+

No

+

json

+

String

+

The Elasticsearch connector supports to specify a format. The format must produce a valid JSON document. By default, the built-in JSON format is used.

+

Refer to Format for more details and format parameters.

+
+
+
+

Example

In this example, data is read from the Kafka data source and written to the Elasticsearch result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where Elasticsearch and Kafka locate, and bind the connection to the required Flink elastic resource pool.
  2. Set Elasticsearch and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Elasticsearch and Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Log in to Kibana of the Elasticsearch cluster, select Dev Tools, enter and execute the following statement to create an index whose value is orders:
    PUT /orders
    +{
    +  "settings": {
    +    "number_of_shards": 1
    +  },
    +	"mappings": {
    +	  "properties": {
    +	    "order_id": {
    +	      "type": "text"
    +	    },
    +	    "order_channel": {
    +	      "type": "text"
    +	    },
    +	    "order_time": {
    +	      "type": "text"
    +	    },
    +	    "pay_amount": {
    +	      "type": "double"
    +	    },
    +	    "real_pay": {
    +	      "type": "double"
    +	    },
    +	    "pay_time": {
    +	      "type": "text"
    +	    },
    +	    "user_id": {
    +	      "type": "text"
    +	    },
    +	    "user_name": {
    +	      "type": "text"
    +	    },
    +	    "area_id": {
    +	      "type": "text"
    +	    }
    +	  }
    +	}
    +}
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +
    +CREATE TABLE elasticsearchSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'elasticsearch-7',
    +  'hosts' = 'ElasticsearchAddress:ElasticsearchPort',
    +  'index' = 'orders'
    +);
    +
    +insert into elasticsearchSink select * from kafkaSource;
    +
    +
  5. Connect to the Kafka cluster and insert the following test data into Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
  6. Enter the following statement in Kibana of the Elasticsearch cluster and view the result:
    GET orders/_search
    +
    {
    +  "took" : 1,
    +  "timed_out" : false,
    +  "_shards" : {
    +    "total" : 1,
    +    "successful" : 1,
    +    "skipped" : 0,
    +    "failed" : 0
    +  },
    +  "hits" : {
    +    "total" : {
    +      "value" : 2,
    +      "relation" : "eq"
    +    },
    +    "max_score" : 1.0,
    +    "hits" : [
    +      {
    +        "_index" : "orders",
    +        "_type" : "_doc",
    +        "_id" : "ae7wpH4B1dV9conjpXeB",
    +        "_score" : 1.0,
    +        "_source" : {
    +          "order_id" : "202103241000000001",
    +          "order_channel" : "webShop",
    +          "order_time" : "2021-03-24 10:00:00",
    +          "pay_amount" : 100.0,
    +          "real_pay" : 100.0,
    +          "pay_time" : "2021-03-24 10:02:03",
    +          "user_id" : "0001",
    +          "user_name" : "Alice",
    +          "area_id" : "330106"
    +        }
    +      },
    +      {
    +        "_index" : "orders",
    +        "_type" : "_doc",
    +        "_id" : "au7xpH4B1dV9conjn3er",
    +        "_score" : 1.0,
    +        "_source" : {
    +          "order_id" : "202103241606060001",
    +          "order_channel" : "appShop",
    +          "order_time" : "2021-03-24 16:06:06",
    +          "pay_amount" : 200.0,
    +          "real_pay" : 180.0,
    +          "pay_time" : "2021-03-24 16:10:06",
    +          "user_id" : "0001",
    +          "user_name" : "Alice",
    +          "area_id" : "330106"
    +        }
    +      }
    +    ]
    +  }
    +}
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0396.html b/docs/dli/sqlreference/dli_08_0396.html new file mode 100644 index 00000000..c80b31b7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0396.html @@ -0,0 +1,359 @@ + + +

HBase Result Table

+

Function

DLI outputs the job data to HBase. HBase is a column-oriented distributed cloud storage system that features enhanced reliability, excellent performance, and elastic scalability. It applies to the storage of massive amounts of data and distributed computing. You can use HBase to build a storage system capable of storing TB- or even PB-level data. With HBase, you can filter and analyze data with ease and get responses in milliseconds, rapidly mining data value. Structured and semi-structured key-value data can be stored, including messages, reports, recommendation data, risk control data, logs, and orders. With DLI, you can write massive volumes of data to HBase at a high speed and with low latency.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table hbaseSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  ','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+) with (
+  'connector' = 'hbase-2.2',
+  'table-name' = '',
+  'zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to hbase-2.2.

+

table-name

+

Yes

+

None

+

String

+

Name of the HBase table to connect.

+

zookeeper.quorum

+

Yes

+

None

+

String

+

HBase ZooKeeper instance information, in the format of ZookeeperAddress:ZookeeperPort.

+

The following uses an MRS HBase cluster as an example to describe how to obtain the IP address and port number of ZooKeeper used by this parameter:

+
  • On MRS Manager, choose Cluster and click the name of the desired cluster. Choose Services > ZooKeeper > Instance, and obtain the IP address of the ZooKeeper instance.
  • On MRS Manager, choose Cluster and click the name of the desired cluster. Choose Services > ZooKeeper > Configurations > All Configurations, search for the clientPort parameter, and obtain its value, that is, the ZooKeeper port number.
+

zookeeper.znode.parent

+

No

+

/hbase

+

String

+

Root directory in ZooKeeper. The default value is /hbase.

+

null-string-literal

+

No

+

null

+

String

+

Representation for null values for string fields.

+

The HBase sink encodes/decodes empty bytes as null values for all types except the string type.

+

sink.buffer-flush.max-size

+

No

+

2mb

+

MemorySize

+

Maximum size in memory of buffered rows for each write request.

+

This can improve performance for writing data to the HBase database, but may increase the latency.

+

You can set this parameter to 0 to disable it.

+

sink.buffer-flush.max-rows

+

No

+

1000

+

Integer

+

Maximum number of rows to buffer for each write request.

+

This can improve performance for writing data to the HBase database, but may increase the latency.

+

You can set this parameter to 0 to disable it.

+

sink.buffer-flush.interval

+

No

+

1s

+

Duration

+

Interval for flushing any buffered rows.

+

This can improve performance for writing data to the HBase database, but may increase the latency.

+

You can set this parameter to 0 to disable it.

+

Note: Both sink.buffer-flush.max-size and sink.buffer-flush.max-rows can be set to 0 with the flush interval set allowing for complete asynchronous processing of buffered actions.

+

sink.parallelism

+

No

+

None

+

Integer

+

Defines the parallelism of the HBase sink operator.

+

By default, the parallelism is determined by the framework using the same parallelism of the upstream chained operator.

+
+
+
+

Data Type Mapping

HBase stores all data as byte arrays. The data needs to be serialized and deserialized during read and write operations.

+

When serializing and de-serializing, Flink HBase connector uses utility class org.apache.hadoop.hbase.util.Bytes provided by HBase (Hadoop) to convert Flink data types to and from byte arrays.

+

Flink HBase connector encodes null values to empty bytes, and decode empty bytes to null values for all data types except the string type. For the string type, the null literal is determined by the null-string-literal option.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data type mapping

Flink SQL Type

+

HBase Conversion

+

CHAR / VARCHAR / STRING

+

byte[] toBytes(String s)

+

String toString(byte[] b)

+

BOOLEAN

+

byte[] toBytes(boolean b)

+

boolean toBoolean(byte[] b)

+

BINARY / VARBINARY

+

Returns byte[] as is.

+

DECIMAL

+

byte[] toBytes(BigDecimal v)

+

BigDecimal toBigDecimal(byte[] b)

+

TINYINT

+

new byte[] { val }

+

bytes[0] // returns first and only byte from bytes

+

SMALLINT

+

byte[] toBytes(short val)

+

short toShort(byte[] bytes)

+

INT

+

byte[] toBytes(int val)

+

int toInt(byte[] bytes)

+

BIGINT

+

byte[] toBytes(long val)

+

long toLong(byte[] bytes)

+

FLOAT

+

byte[] toBytes(float val)

+

float toFloat(byte[] bytes)

+

DOUBLE

+

byte[] toBytes(double val)

+

double toDouble(byte[] bytes)

+

DATE

+

Stores the number of days since epoch as an int value.

+

TIME

+

Stores the number of milliseconds of the day as an int value.

+

TIMESTAMP

+

Stores the milliseconds since epoch as a long value.

+

ARRAY

+

Not supported

+

MAP / MULTISET

+

Not supported

+

ROW

+

Not supported

+
+
+
+

Example

In this example, data is read from the Kafka data source and written to the HBase result table. The procedure is as follows (the HBase versions used in this example are 1.3.1 and 2.2.3):

+
  1. Create an enhanced datasource connection in the VPC and subnet where HBase and Kafka locate, and bind the connection to the required Flink elastic resource pool. .
  2. Set HBase and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the HBase and Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Use the HBase shell to create HBase table order that has only one column family detail.
    create 'order', {NAME => 'detail'}
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job. The job script uses Kafka as the data source and HBase as the result table (the Rowkey is order_id and the column family name is detail).
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +create table hbaseSink(
    +  order_id string,
    +  detail Row(
    +    order_channel string,
    +    order_time string,
    +    pay_amount double,
    +    real_pay double,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string)
    +) with (
    +  'connector' = 'hbase-2.2',
    +  'table-name' = 'order',
    +  'zookeeper.quorum' = 'ZookeeperAddress:ZookeeperPort',
    +  'sink.buffer-flush.max-rows' = '1'
    +);
    +
    +insert into hbaseSink select order_id, Row(order_channel,order_time,pay_amount,real_pay,pay_time,user_id,user_name,area_id) from orders;
    +
    +
  5. Connect to the Kafka cluster and enter the following data to Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
  6. Run the following statement on the HBase shell to view the data result:
     scan 'order'
    +
    The data result is as follows:
    202103241000000001   column=detail:area_id, timestamp=2021-12-16T21:30:37.954, value=330106
    +
    +202103241000000001   column=detail:order_channel, timestamp=2021-12-16T21:30:37.954, value=webShop
    +
    +202103241000000001   column=detail:order_time, timestamp=2021-12-16T21:30:37.954, value=2021-03-24 10:00:00
    +
    +202103241000000001   column=detail:pay_amount, timestamp=2021-12-16T21:30:37.954, value=@Y\x00\x00\x00\x00\x00\x00
    +
    +202103241000000001   column=detail:pay_time, timestamp=2021-12-16T21:30:37.954, value=2021-03-24 10:02:03
    +
    +202103241000000001   column=detail:real_pay, timestamp=2021-12-16T21:30:37.954, value=@Y\x00\x00\x00\x00\x00\x00
    +
    +202103241000000001   column=detail:user_id, timestamp=2021-12-16T21:30:37.954, value=0001
    +
    +202103241000000001   column=detail:user_name, timestamp=2021-12-16T21:30:37.954, value=Alice
    +
    +202103241606060001   column=detail:area_id, timestamp=2021-12-16T21:30:44.842, value=330106
    +
    +202103241606060001   column=detail:order_channel, timestamp=2021-12-16T21:30:44.842, value=appShop
    +
    +202103241606060001   column=detail:order_time, timestamp=2021-12-16T21:30:44.842, value=2021-03-24 16:06:06
    +
    +202103241606060001   column=detail:pay_amount, timestamp=2021-12-16T21:30:44.842, value=@i\x00\x00\x00\x00\x00\x00
    +
    +202103241606060001   column=detail:pay_time, timestamp=2021-12-16T21:30:44.842, value=2021-03-24 16:10:06
    +
    +202103241606060001   column=detail:real_pay, timestamp=2021-12-16T21:30:44.842, value=@f\x80\x00\x00\x00\x00\x00
    +
    +202103241606060001   column=detail:user_id, timestamp=2021-12-16T21:30:44.842, value=0001
    +
    +202103241606060001   column=detail:user_name, timestamp=2021-12-16T21:30:44.842, value=Alice
    +
    +202103251202020001   column=detail:area_id, timestamp=2021-12-16T21:30:52.181, value=330110
    +
    +202103251202020001   column=detail:order_channel, timestamp=2021-12-16T21:30:52.181, value=miniAppShop
    +
    +202103251202020001   column=detail:order_time, timestamp=2021-12-16T21:30:52.181, value=2021-03-25 12:02:02
    +
    +202103251202020001   column=detail:pay_amount, timestamp=2021-12-16T21:30:52.181, value=@N\x00\x00\x00\x00\x00\x00
    +
    +202103251202020001   column=detail:pay_time, timestamp=2021-12-16T21:30:52.181, value=2021-03-25 12:03:00
    +
    +202103251202020001   column=detail:real_pay, timestamp=2021-12-16T21:30:52.181, value=@N\x00\x00\x00\x00\x00\x00
    +
    +202103251202020001   column=detail:user_id, timestamp=2021-12-16T21:30:52.181, value=0002
    +
    +202103251202020001   column=detail:user_name, timestamp=2021-12-16T21:30:52.181, value=Bob
    +
    +
+
+

FAQ

Q: What should I do if the Flink job execution fails and the log contains the following error information?

+
org.apache.zookeeper.ClientCnxn$SessionTimeoutException: Client session timed out, have not heard from server in 90069ms for connection id 0x0
+

A: The datasource connection is not bound or the binding fails. Configure the datasource connection or configure the security group of the Kafka cluster to allow access from the DLI queue.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0397.html b/docs/dli/sqlreference/dli_08_0397.html new file mode 100644 index 00000000..593ef459 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0397.html @@ -0,0 +1,379 @@ + + +

JDBC Result Table

+

Function

DLI outputs the Flink job output data to RDS through the JDBC result table.

+
+

Prerequisites

An enhanced datasource connection with the instances has been established, so that you can configure security group rules as required.

+ +
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
create table jdbcSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'jdbc',
+  'url' = '',
+  'table-name' = '',
+  'driver' = '',
+  'username' = '',
+  'password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to jdbc.

+

url

+

Yes

+

None

+

String

+

Database URL.

+

table-name

+

Yes

+

None

+

String

+

Name of the table where the data will be read from the database.

+

driver

+

No

+

None

+

String

+

Driver required for connecting to the database. If you do not set this parameter, it will be automatically derived from the URL.

+

username

+

No

+

None

+

String

+

Database authentication username. This parameter must be configured in pair with password.

+

password

+

No

+

None

+

String

+

Database authentication password. This parameter must be configured in pair with username.

+

sink.buffer-flush.max-rows

+

No

+

100

+

Integer

+

Maximum size of buffered records before flush. You can set this parameter to 0 to disable it.

+

sink.buffer-flush.interval

+

No

+

1s

+

Duration

+

Flush interval, during which asynchronous threads will flush data. You can set this parameter to 0 to disable it.

+

Note that sink.buffer-flush.max-rows can be set to 0 with the flush interval set allowing for complete asynchronous processing of buffered actions.

+

sink.max-retries

+

No

+

3

+

Integer

+

Maximum number of retries if writing records to the database failed.

+
+
+
+

Data Type Mapping

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Data type mapping

MySQL Type

+

PostgreSQL Type

+

Flink SQL Type

+

TINYINT

+

-

+

TINYINT

+

SMALLINT

+

TINYINT UNSIGNED

+

SMALLINT

+

INT2

+

SMALLSERIAL

+

SERIAL2

+

SMALLINT

+

INT

+

MEDIUMINT

+

SMALLINT UNSIGNED

+

INTEGER

+

SERIAL

+

INT

+

BIGINT

+

INT UNSIGNED

+

BIGINT

+

BIGSERIAL

+

BIGINT

+

BIGINT UNSIGNED

+

-

+

DECIMAL(20, 0)

+

BIGINT

+

BIGINT

+

BIGINT

+

FLOAT

+

REAL

+

FLOAT4

+

FLOAT

+

DOUBLE

+

DOUBLE PRECISION

+

FLOAT8

+

DOUBLE PRECISION

+

DOUBLE

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

DECIMAL(p, s)

+

BOOLEAN

+

TINYINT(1)

+

BOOLEAN

+

BOOLEAN

+

DATE

+

DATE

+

DATE

+

TIME [(p)]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

DATETIME [(p)]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

CHAR(n)

+

VARCHAR(n)

+

TEXT

+

CHAR(n)

+

CHARACTER(n)

+

VARCHAR(n)

+

CHARACTER

+

VARYING(n)

+

TEXT

+

STRING

+

BINARY

+

VARBINARY

+

BLOB

+

BYTEA

+

BYTES

+

-

+

ARRAY

+

ARRAY

+
+
+
+

Example

In this example, Kafka is used to send data, and Kafka data is written to the MySQL database through the JDBC result table.

+
  1. Create an enhanced datasource connection in the VPC and subnet where MySQL and Kafka locate, and bind the connection to the required Flink elastic resource pool.
  2. Set MySQL and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the MySQL and Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Log in to the MySQL database and create table orders in database flink.
    CREATE TABLE `flink`.`orders` (
    +	`order_id` VARCHAR(32) NOT NULL,
    +	`order_channel` VARCHAR(32) NULL,
    +	`order_time` VARCHAR(32) NULL,
    +	`pay_amount` DOUBLE UNSIGNED NOT NULL,
    +	`real_pay` DOUBLE UNSIGNED NULL,
    +	`pay_time` VARCHAR(32) NULL,
    +	`user_id` VARCHAR(32) NULL,
    +	`user_name` VARCHAR(32) NULL,
    +	`area_id` VARCHAR(32) NULL,
    +	PRIMARY KEY (`order_id`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4
    +	COLLATE = utf8mb4_general_ci;
    +
  4. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +CREATE TABLE jdbcSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'jdbc',
    +  'url? = 'jdbc:mysql://MySQLAddress:MySQLPort/flink',-- flink is the MySQL database where the orders table locates.
    +  'table-name' = 'orders',
    +  'username' = 'MySQLUsername',
    +  'password' = 'MySQLPassword',
    +  'sink.buffer-flush.max-rows' = '1'
    +);
    +
    +insert into jdbcSink select * from kafkaSource;
    +
    +
  5. Connect to the Kafka cluster and send the following test data to the Kafka topics:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"} 
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
  6. Run the SQL statement in the MySQL database to view data in the table:
    select * from orders;
    +
    The following is an example of the result (note that the following data is replicated from the MySQL database but not the data style in the MySQL database):
    202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106
    +202103241606060001,appShop,2021-03-24 16:06:06,200.0,180.0,2021-03-24 16:10:06,0001,Alice,330106
    +
    +
+
+

FAQ

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0398.html b/docs/dli/sqlreference/dli_08_0398.html new file mode 100644 index 00000000..f774e418 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0398.html @@ -0,0 +1,495 @@ + + +

Kafka Result Table

+

Function

DLI outputs the Flink job output data to Kafka through the Kafka result table.

+

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
create table kafkaSink(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'kafka',
+  'topic' = '',
+  'properties.bootstrap.servers' = '',
+  'format' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

string

+

Connector to be used. Set this parameter to kafka.

+

topic

+

Yes

+

None

+

string

+

Topic name of the Kafka result table.

+

properties.bootstrap.servers

+

Yes

+

None

+

string

+

Kafka broker address. The value is in the format of host:port,host:port,host:port. Multiple host:port pairs are separated with commas (,).

+

format

+

Yes

+

None

+

string

+

Format used by the Flink Kafka connector to serialize Kafka messages. Either this parameter or the value.format parameter is required.

+

The following formats are supported:

+
  • csv
  • json
  • avro
+

Refer to Format for more details and format parameters.

+

topic-pattern

+

No

+

None

+

String

+

Regular expression for matching the Kafka topic name.

+

Only one of topic and topic-pattern can be specified.

+

Example: 'topic.*'

+

'(topic-c|topic-d)'

+

'(topic-a|topic-b|topic-\\d*)'

+

'(topic-a|topic-b|topic-[0-9]*)'

+

properties.*

+

No

+

None

+

String

+

This parameter can set and pass arbitrary Kafka configurations.

+

Note:

+
  • Suffix names must match the configuration key defined in Apache Kafka.

    For example, you can disable automatic topic creation via 'properties.allow.auto.create.topics' = 'false'.

    +
  • Some configurations are not supported, for example, 'key.deserializer' and 'value.deserializer'.
+

key.format

+

No

+

None

+

String

+

Format used to deserialize and serialize the key part of Kafka messages.

+

Note:

+
  • If a key format is defined, the key.fields parameter is required as well. Otherwise, the Kafka records will have an empty key.
  • Possible values are:

    csv

    +

    json

    +

    avro

    +

    debezium-json

    +

    canal-json

    +

    maxwell-json

    +

    avro-confluent

    +

    raw

    +

    Refer to Format for more details and format parameters.

    +
+

key.fields

+

+

No

+

+

[]

+

List<String>

+

Defines the columns in the table as the list of keys. This parameter must be configured in pair with key.format.

+

This parameter is left empty by default. Therefore, no key is defined.

+

The format is like field1;field2.

+

key.fields-prefix

+

No

+

None

+

String

+

Defines a custom prefix for all fields of the key format to avoid name clashes with fields of the value format.

+

value.format

+

Yes

+

None

+

String

+

Format used to deserialize and serialize the value part of Kafka messages.

+

Note:

+
  • Either this parameter or the format parameter is required. If two parameters are configured, a conflict occurs.
  • Refer to Format for more details and format parameters.
+

value.fields-include

+

No

+

ALL

+

Enum

+

Possible values: [ALL, EXCEPT_KEY]

+

Whether to contain the key field when parsing the message body.

+

Possible values are:

+
  • ALL (default): All defined fields are included in the value of Kafka messages.
  • EXCEPT_KEY: All the fields except those defined by key.fields are included in the value of Kafka messages.
+

sink.partitioner

+

No

+

None

+

string

+

Mapping from Flink's partitions into Kafka's partitions. Valid values are as follows:

+
  • fixed (default): Each Flink partition ends up in at most one Kafka partition.
  • round-robin: A Flink partition is distributed to Kafka partitions in a round-robin manner.
  • Custom FlinkKafkaPartitioner subclass: If fixed and round-robin do not meet your requirements, you can create subclass FlinkKafkaPartitioner to customize the partition mapping, for example, org.mycompany.MyPartitioner.
+

sink.semantic

+

No

+

at-least-once

+

String

+

Defines the delivery semantic for the Kafka sink.

+

Valid values are as follows:

+
  • at-least-once
  • exactly-once
  • none
+

sink.parallelism

+

No

+

None

+

Integer

+

Defines the parallelism of the Kafka sink operator.

+

By default, the parallelism is determined by the framework using the same parallelism of the upstream chained operator.

+
+
+
+

Example (SASL_SSL Disabled for the Kafka Cluster)

In this example, data is read from a Kafka topic and written to another using a Kafka result table.

+
  1. Create an enhanced datasource connection in the VPC and subnet where Kafka locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +
    +CREATE TABLE kafkaSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSinkTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  "format" = "json"
    +);
    +
    +insert into kafkaSink select * from kafkaSource;
    +
    +
  4. Connect to the Kafka cluster and insert the following test data into the source topic in Kafka:
    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
  5. Connect to the Kafka cluster and read data from the sink topic of Kafka.
    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
+
+

Example (SASL_SSL Enabled for the Kafka Cluster)

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0399.html b/docs/dli/sqlreference/dli_08_0399.html new file mode 100644 index 00000000..37aa7d83 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0399.html @@ -0,0 +1,154 @@ + + +

Print Result Table

+

Function

The Print connector is used to print output data to the error file or TaskManager file, making it easier for you to view the result in code debugging.

+
+

Prerequisites

None

+
+

Precautions

+ +
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
create table printSink (
+  attr_name attr_type 
+  (',' attr_name attr_type) * 
+  (',' PRIMARY KEY (attr_name,...) NOT ENFORCED)
+) with (
+  'connector' = 'print',
+  'print-identifier' = '',
+  'standard-error' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to print.

+

print-identifier

+

No

+

None

+

String

+

Message that identifies print and is prefixed to the output of the value.

+

standard-error

+

No

+

false

+

Boolean

+

The value can be only true or false. The default value is false.

+
  • If the value is true, data is output to the error file of the TaskManager.
  • If the value is false, data is output to the out file of the TaskManager.
+
+
+
+

Example

Create a Flink OpenSource SQL job. Run the following script to generate random data through the DataGen table and output the data to the Print result table.

+

When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs.

+
create table dataGenSOurce(
+  user_id string,
+  amount int
+) with (
+  'connector' = 'datagen',
+  'rows-per-second' = '1', --Generate a piece of data per second.
+  'fields.user_id.kind' = 'random', --Specify a random generator for the user_id field.
+  'fields.user_id.length' = '3' --Limit the length of user_id to 3.
+);
+
+create table printSink(
+  user_id string,
+  amount int
+) with (
+  'connector' = 'print'
+);
+
+insert into printSink select * from dataGenSOurce;
+

After the job is submitted, the job status changes to Running. You can perform the following operations of either method to view the output result:

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0400.html b/docs/dli/sqlreference/dli_08_0400.html new file mode 100644 index 00000000..2bd55ec4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0400.html @@ -0,0 +1,564 @@ + + +

Redis Result Table

+

Function

DLI outputs the Flink job output data to Redis. Redis is a key-value storage system that supports multiple types of data structures. It can be used in scenarios such as caching, event publish/subscribe, and high-speed queuing. Redis supports direct read/write of strings, hashes, lists, queues, and sets. Redis works with in-memory datasets and provides persistence. For more information about Redis, visit https://redis.io/.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

1
+2
+3
+4
+5
+6
+7
+8
+9
create table dwsSink (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'redis',
+  'host' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector to be used. Set this parameter to redis.

+

host

+

Yes

+

None

+

String

+

Redis connector address.

+

port

+

No

+

6379

+

Integer

+

Redis connector port.

+

password

+

No

+

None

+

String

+

Redis authentication password.

+

namespace

+

No

+

None

+

String

+

Redis key namespace.

+

For example, if the value is set to "person" and the key is "jack", the value in the Redis is person:jack.

+

delimiter

+

No

+

:

+

String

+

Delimiter between the Redis key and namespace.

+

data-type

+

No

+

hash

+

String

+

Redis data type. Available values are as follows:

+
  • hash
  • list
  • set
  • sorted-set
  • string
+

For details about the constraints, see Constraints on data-type.

+

schema-syntax

+

No

+

fields

+

String

+

Redis schema semantics. Available values are as follows:

+
  • fields: applicable to all data types. This value indicates that multiple fields can be set and the value of each field is read when data is written.
  • fields-scores: applicable to sorted-set data, indicating that each field is read as an independent score.
  • array: applicable to list, set, and sorted-set data.
  • array-scores: applicable to sorted-set data.
  • map: applicable to hash and sorted-set data.
+

For details about the constraints, see Constraints on schema-syntax.

+

deploy-mode

+

No

+

standalone

+

String

+

Deployment mode of the Redis cluster. The value can be standalone, master-replica, or cluster. The default value is standalone.

+

For details about the setting, see the instance type description of the Redis cluster.

+

retry-count

+

No

+

5

+

Integer

+

Number of attempts to connect to the Redis cluster.

+

connection-timeout-millis

+

No

+

10000

+

Integer

+

Maximum timeout for connecting to the Redis cluster.

+

commands-timeout-millis

+

No

+

2000

+

Integer

+

Maximum time for waiting for a completion response.

+

rebalancing-timeout-millis

+

No

+

15000

+

Integer

+

Sleep time when the Redis cluster fails.

+

default-score

+

No

+

0

+

Double

+

Default score when data-type is sorted-set.

+

ignore-retraction

+

No

+

false

+

Boolean

+

Whether to ignore Retract messages.

+

skip-null-values

+

No

+

true

+

Boolean

+

Whether null values will be skipped. If this parameter is false, null will be assigned for null values.

+

key-ttl-mode

+

No

+

no-ttl

+

String

+

Whether the Redis sink TTL function will be enabled. The value can be no-ttl, expire-msec, expire-at-date or expire-at-timestamp.

+
  • no-ttl: No expiration time is set.
  • expire-msec: validity period of the key. The parameter is a long string, in milliseconds.
  • expire-at-date: Date and time when the key expires. The value is in UTC time format.
  • expire-at-timestamp: Timestamp when the key expires.
+

key-ttl

+

No

+

None

+

String

+

Supplementary parameter of key-ttl-mode. Available values are as follows:

+
  • If key-ttl-mode is no-ttl, this parameter does not need to be configured.
  • If key-ttl-mode is expire-msec, set this parameter to a string that can be parsed into the Long type. For example, 5000 indicates that the key will expire in 5000 ms.
  • If key-ttl-mode is expire-at-date, set this parameter to a date. For example, 2011-12-03T10:15:30 indicates that the expiration time is 2011-12-03 18:15:30 (UTC+8).
  • If key-ttl-mode is expire-at-timestamp, set this parameter to a timestamp, in milliseconds. For example, 1679385600000 indicates that the expiration time is 2023-03-21 16:00:00.
+
+
+
+

Example

In this example, data is read from the Kafka data source and written to the Redis result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where Redis locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Redis security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Redis address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourTopic>',
    +  'properties.bootstrap.servers' = '<yourKafka>:<port>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +--In the following redisSink table, data-type is set to default value hash, schema-syntax is fields, and order_id is defined as the primary key. Therefore, the value of this field is used as the Redis key.
    +CREATE TABLE redisSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  primary key (order_id) not enforced
    +) WITH (
    +  'connector' = 'redis',
    +  'host' = '<yourRedis>',
    +  'password' = '<yourPassword>',
    +  'deploy-mode' = 'master-replica',
    +  'schema-syntax' = 'fields'
    +);
    +
    +insert into redisSink select * from orders;
    +
    +
  4. Connect to the Kafka cluster and insert the following test data into Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
  5. Run the following commands in Redis and view the result:
    • Obtain the result whose key is 202103241606060001.

      Run following command:

      +
      HGETALL 202103241606060001
      +
      Command output:
       1) "user_id"
      + 2) "0001"
      + 3) "user_name"
      + 4) "Alice"
      + 5) "pay_amount"
      + 6) "200.0"
      + 7) "real_pay"
      + 8) "180.0"
      + 9) "order_time"
      +10) "2021-03-24 16:06:06"
      +11) "area_id"
      +12) "330106"
      +13) "order_channel"
      +14) "appShop"
      +15) "pay_time"
      +16) "2021-03-24 16:10:06"
      +
      +
    • Obtain the result whose key is 202103241000000001.

      Run following command:

      +
      HGETALL 202103241000000001
      +
      Command output:
       1) "user_id"
      + 2) "0001"
      + 3) "user_name"
      + 4) "Alice"
      + 5) "pay_amount"
      + 6) "100.0"
      + 7) "real_pay"
      + 8) "100.0"
      + 9) "order_time"
      +10) "2021-03-24 10:00:00"
      +11) "area_id"
      +12) "330106"
      +13) "order_channel"
      +14) "webShop"
      +15) "pay_time"
      +16) "2021-03-24 10:02:03"
      +
      +
    +
+
+

FAQ

+
+ +
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0401.html b/docs/dli/sqlreference/dli_08_0401.html new file mode 100644 index 00000000..fc57477b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0401.html @@ -0,0 +1,224 @@ + + +

Upsert Kafka Result Table

+

Function

Apache Kafka is a fast, scalable, and fault-tolerant distributed message publishing and subscription system. It delivers high throughput and built-in partitions and provides data replicas and fault tolerance. Apache Kafka is applicable to scenarios of handling massive messages. DLI outputs the Flink job output data to Kafka in upsert mode.

+

The Upsert Kafka connector allows for reading data from and writing data into Kafka topics in the upsert fashion.

+

As a sink, the Upsert Kafka connector can consume a changelog stream. It will write INSERT/UPDATE_AFTER data as normal Kafka messages value, and write DELETE data as Kafka messages with null values (indicate tombstone for the key). Flink will guarantee the message ordering on the primary key by partition data on the values of the primary key columns, so the UPDATE/DELETE messages on the same key will fall into the same partition.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
create table kafkaSource(
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (','PRIMARY KEY (attr_name, ...) NOT ENFORCED)
+)
+with (
+  'connector' = 'upsert-kafka',
+  'topic' = '',
+  'properties.bootstrap.servers' = '',
+  'key.format' = '',
+  'value.format' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Type

+

Description

+

connector

+

Yes

+

(none)

+

String

+

Connector to be used. Set this parameter to upsert-kafka.

+

topic

+

Yes

+

(none)

+

String

+

Kafka topic name.

+

properties.bootstrap.servers

+

Yes

+

(none)

+

String

+

Comma separated list of Kafka brokers.

+

key.format

+

Yes

+

(none)

+

String

+

Format used to deserialize and serialize the key part of Kafka messages. The key fields are specified by the PRIMARY KEY syntax. The following formats are supported:

+
  • csv
  • json
  • avro
+

Refer to Format for more details and format parameters.

+

key.fields-prefix

+

No

+

(none)

+

String

+

Defines a custom prefix for all fields of the key format to avoid name clashes with fields of the value format.

+

By default, the prefix is empty. If a custom prefix is defined, both the table schema and key.fields will work with prefixed names. When constructing the data type of the key format, the prefix will be removed and the non-prefixed names will be used within the key format. Note that this option requires that value.fields-include must be set to EXCEPT_KEY.

+

value.format

+

Yes

+

(none)

+

String

+

Format used to deserialize and serialize the value part of Kafka messages. The following formats are supported:

+
  • csv
  • json
  • avro
+

Refer to Format for more details and format parameters.

+

value.fields-include

+

No

+

'ALL'

+

String

+

Controls which fields should appear in the value part. Options:

+
  • ALL: All fields in the schema, including the primary key field, are included in the value part.
  • EXCEPT_KEY: All the fields of the table schema are included, except the primary key field.
+

sink.parallelism

+

No

+

(none)

+

Interger

+

Defines the parallelism of the Upsert Kafka sink operator. By default, the parallelism is determined by the framework using the same parallelism of the upstream chained operator.

+

properties.*

+

No

+

(none)

+

String

+

This option can set and pass arbitrary Kafka configurations.

+

The suffix of this parameter must match the parameter defined in Kafka Configuration documentation. Flink will remove the properties. key prefix and pass the transformed key and value to the underlying KafkaClient.

+

For example, you can disable automatic topic creation via 'properties.allow.auto.create.topics' = 'false'. But there are some configurations that do not support to set, because Flink will override them, for example, 'key.deserializer' and 'value.deserializer'.

+
+
+
+

Example

In this example, Kafka source topic data is read from the Kafka source table and written to the Kafka sink topic through the Upsert Kafka result table.

+
  1. Create an enhanced datasource connection in the VPC and subnet where Kafka locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  3. Create a Flink OpenSource SQL job. Enter the following job script and submit the job.
    When you create a job, set Flink Version to 1.12 on the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +CREATE TABLE UPSERTKAFKASINK (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  PRIMARY KEY (order_id) NOT ENFORCED
    +) WITH (
    +  'connector' = 'upsert-kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' =  'KafkaAddress1:KafkaPort,KafkAddress2:KafkaPort',
    +  'key.format' = 'json',
    +  'value.format' = 'json'
    +);
    +insert into UPSERTKAFKASINK
    +select * from orders;
    +
    +
  4. Connect to the Kafka cluster and send the following test data to the Kafka source topic:
    {"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
  5. Connect to the Kafka cluster and read data from the Kafka sink topic. The result is as follows:
    {"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
+
+

FAQ

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0402.html b/docs/dli/sqlreference/dli_08_0402.html new file mode 100644 index 00000000..d2015c5f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0402.html @@ -0,0 +1,21 @@ + + +

Creating Dimension Tables

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0403.html b/docs/dli/sqlreference/dli_08_0403.html new file mode 100644 index 00000000..d85cd575 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0403.html @@ -0,0 +1,358 @@ + + +

GaussDB(DWS) Dimension Table

+

Function

Create a GaussDB(DWS) table to connect to source streams for wide table generation.

+
+

Prerequisites

+
+

Precautions

When you create a Flink OpenSource SQL job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+with (
+  'connector' = 'gaussdb',
+  'url' = '',
+  'table-name' = '',
+  'username' = '',
+  'password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Types

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector type. Set this parameter to gaussdb.

+

url

+

Yes

+

None

+

String

+

JDBC connection address.

+

If you use the gsjdbc4 driver, set the value in jdbc:postgresql://${ip}:${port}/${dbName} format.

+

If you use the gsjdbc200 driver, set the value in jdbc:gaussdb://${ip}:${port}/${dbName} format.

+

table-name

+

Yes

+

None

+

String

+

Name of the table where the data will be read from the database

+

driver

+

No

+

None

+

String

+

JDBC connection driver. The default value is org.postgresql.Driver.

+

username

+

No

+

None

+

String

+

Database authentication user name. This parameter must be configured in pair with password.

+

password

+

No

+

None

+

String

+

Database authentication password. This parameter must be configured in pair with username.

+

scan.partition.column

+

No

+

None

+

String

+

Name of the column used to partition the input

+

This parameter must be set when scan.partition.lower-bound, scan.partition.upper-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.lower-bound

+

No

+

None

+

Integer

+

Lower bound of values to be fetched for the first partition

+

This parameter must be set when scan.partition.column, scan.partition.upper-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.upper-bound

+

No

+

None

+

Integer

+

Upper bound of values to be fetched for the last partition

+

This parameter must be set when scan.partition.column, scan.partition.lower-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.num

+

No

+

None

+

Integer

+

Number of partitions to be created

+

This parameter must be set when scan.partition.column, scan.partition.upper-bound, and scan.partition.upper-bound are all configured, and should not be set when other three parameters are not.

+

scan.fetch-size

+

No

+

0

+

Integer

+

Number of rows fetched from the database each time. The default value 0 indicates that the number of rows is not limited.

+

scan.auto-commit

+

No

+

true

+

Boolean

+

Automatic commit flag.

+

It determines whether each statement is committed in a transaction automatically.

+

lookup.cache.max-rows

+

No

+

None

+

Integer

+

The max number of rows of lookup cache. Caches exceeding the TTL will be expired.

+

Lookup cache is disabled by default.

+

lookup.cache.ttl

+

No

+

+

None

+

Duration

+

Maximum time to live (TTL) of for every rows in lookup cache. Caches exceeding the TTL will be expired. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

Lookup cache is disabled by default.

+

lookup.max-retries

+

No

+

3

+

Integer

+

Maximum retry times if lookup database failed.

+
+
+
+

Example

Read data from a Kafka source table, use a GaussDB(DWS) table as the dimension table. Write wide table information generated by the source and dimension tables to a Kafka result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where DWS and Kafka locate, and bind the connection to the required Flink elastic resource pool.
  2. Set GaussDB(DWS) and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the DWS and Kafka address. If the connection passes the test, it is bound to the queue.
  3. Connect to the GaussDB(DWS) database instance, create a table as a dimension table, and name the table area_info. Example SQL statements are as follows:
    create table public.area_info(
    +  area_id VARCHAR,
    +  area_province_name VARCHAR,
    +  area_city_name VARCHAR,
    +  area_county_name VARCHAR,
    +  area_street_name VARCHAR,
    +  region_name VARCHAR);
    +
  4. Connect to the database and run the following statement to insert test data into the dimension table area_info:
      insert into area_info
    +  (area_id, area_province_name, area_city_name, area_county_name, area_street_name, region_name) 
    +  values
    +  ('330102', 'a1', 'b1', 'c1', 'd1', 'e1'),
    +  ('330106', 'a1', 'b1', 'c2', 'd2', 'e1'),
    +  ('330108', 'a1', 'b1', 'c3', 'd3', 'e1'),
    +  ('330110', 'a1', 'b1', 'c4', 'd4', 'e1');
    +
  5. Create a Flink OpenSource SQL job Enter the following job script and submit the job. The job script uses Kafka as the data source and a GaussDB(DWS) table as the dimension table. Data is output to a Kafka result table.
    When you create a job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Set the values of the parameters in bold in the following script as needed.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  proctime as Proctime()
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSourceTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'dws-order',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +-- Create an address dimension table
    +create table area_info (
    +    area_id string, 
    +    area_province_name string, 
    +    area_city_name string, 
    +    area_county_name string,
    +    area_street_name string, 
    +    region_name string 
    +) WITH (
    +  'connector' = 'gaussdb',
    +  'driver' = 'org.postgresql.Driver',
    +  'url' = 'jdbc:gaussdb://DwsAddress:DwsPort/DwsDbName',
    +  'table-name' = 'area_info',
    +  'username' = 'DwsUserName',
    +  'password' = 'DwsPassword',
    +  'lookup.cache.max-rows' = '10000',
    +  'lookup.cache.ttl' = '2h'
    +);
    +
    +-- Generate a wide table based on the address dimension table containing detailed order information.
    +create table order_detail(
    +    order_id string,
    +    order_channel string,
    +    order_time string,
    +    pay_amount double,
    +    real_pay double,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string,
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string,
    +    area_street_name string,
    +    region_name string
    +) with (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSinkTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'format' = 'json'
    +);
    +
    +insert into order_detail
    +    select orders.order_id, orders.order_channel, orders.order_time, orders.pay_amount, orders.real_pay, orders.pay_time, orders.user_id, orders.user_name,
    +           area.area_id, area.area_province_name, area.area_city_name, area.area_county_name,
    +           area.area_street_name, area.region_name  from orders
    +    left join area_info for system_time as of orders.proctime as area on orders.area_id = area.area_id;
    +
    +
  6. Connect to the Kafka cluster and insert the following test data into the source topic in Kafka:
    {"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
  7. Connect to the Kafka cluster and read data from the sink topic of Kafka. The result is as follows:
    {"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106","area_province_name":"a1","area_city_name":"b1","area_county_name":"c2","area_street_name":"d2","region_name":"e1"}
    +
    +{"order_id":"202103251202020001","order_channel":"miniAppShop","order_time":"2021-03-25 12:02:02","pay_amount":60.0,"real_pay":60.0,"pay_time":"2021-03-25 12:03:00","user_id":"0002","user_name":"Bob","area_id":"330110","area_province_name":"a1","area_city_name":"b1","area_county_name":"c4","area_street_name":"d4","region_name":"e1"}
    +
    +{"order_id":"202103251505050001","order_channel":"qqShop","order_time":"2021-03-25 15:05:05","pay_amount":500.0,"real_pay":400.0,"pay_time":"2021-03-25 15:10:00","user_id":"0003","user_name":"Cindy","area_id":"330108","area_province_name":"a1","area_city_name":"b1","area_county_name":"c3","area_street_name":"d3","region_name":"e1"}
    +
+
+

FAQs

+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0404.html b/docs/dli/sqlreference/dli_08_0404.html new file mode 100644 index 00000000..6e676394 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0404.html @@ -0,0 +1,337 @@ + + +

HBase Dimension Table

+

Function

Create a Hbase dimension table to connect to the source streams for wide table generation.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table hbaseSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+ )
+with (
+  'connector' = 'hbase-2.2',
+  'table-name' = '',
+  'zookeeper.quorum' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector type. Set this parameter to hbase-2.2.

+

table-name

+

Yes

+

None

+

String

+

Name of the HBase table

+

zookeeper.quorum

+

Yes

+

None

+

String

+

HBase Zookeeper quorum. The format is ZookeeperAddress:ZookeeperPort.

+

The following describes how to obtain the ZooKeeper IP address and port number:

+
  • On the MRS Manager console, choose Cluster > Name of the desired cluster > Service > ZooKeeper > Instance. On the displayed page, obtain the IP address of the ZooKeeper instance.
  • On the MRS Manager console, choose Cluster > Name of the desired cluster > Service > ZooKeeper > Configuration, and click All Configurations. Search for the clientPort parameter, and obtain the ZooKeeper port number.
+

zookeeper.znode.parent

+

No

+

/hbase

+

String

+

Root directory in ZooKeeper for the HBase cluster.

+

lookup.async

+

No

+

false

+

Boolean

+

Whether async lookup is enabled.

+

lookup.cache.max-rows

+

No

+

-1

+

Long

+

The max number of rows of lookup cache. Caches exceeding the TTL will be expired.

+

Lookup cache is disabled by default.

+

lookup.cache.ttl

+

No

+

-1

+

Long

+

Maximum time to live (TTL) of for every rows in lookup cache. Caches exceeding the TTL will be expired. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

Lookup cache is disabled by default.

+

lookup.max-retries

+

No

+

3

+

Integer

+

Maximum retry times if lookup database failed.

+
+
+
+

Data Type Mapping

HBase stores all data as byte arrays. The data needs to be serialized and deserialized during read and write operation.

+

When serializing and de-serializing, Flink HBase connector uses utility class org.apache.hadoop.hbase.util.Bytes provided by HBase (Hadoop) to convert Flink data types to and from byte arrays.

+

Flink HBase connector encodes null values to empty bytes, and decode empty bytes to null values for all data types except string type. For string type, the null literal is determined by null-string-literal option.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data type mapping

Flink SQL Type

+

HBase Conversion

+

CHAR / VARCHAR / STRING

+

byte[] toBytes(String s)

+

String toString(byte[] b)

+

BOOLEAN

+

byte[] toBytes(boolean b)

+

boolean toBoolean(byte[] b)

+

BINARY / VARBINARY

+

Return byte[] as is.

+

DECIMAL

+

byte[] toBytes(BigDecimal v)

+

BigDecimal toBigDecimal(byte[] b)

+

TINYINT

+

new byte[] { val }

+

bytes[0] // returns first and only byte from bytes

+

SMALLINT

+

byte[] toBytes(short val)

+

short toShort(byte[] bytes)

+

INT

+

byte[] toBytes(int val)

+

int toInt(byte[] bytes)

+

BIGINT

+

byte[] toBytes(long val)

+

long toLong(byte[] bytes)

+

FLOAT

+

byte[] toBytes(float val)

+

float toFloat(byte[] bytes)

+

DOUBLE

+

byte[] toBytes(double val)

+

double toDouble(byte[] bytes)

+

DATE

+

Number of days since 1970-01-01 00:00:00 UTC. The value is an integer.

+

TIME

+

Number of milliseconds since 1970-01-01 00:00:00 UTC. The value is an integer.

+

TIMESTAMP

+

Number of milliseconds since 1970-01-01 00:00:00 UTC. The value is of the long type.

+

ARRAY

+

Not supported

+

MAP / MULTISET

+

Not supported

+

ROW

+

Not supported

+
+
+
+

Example

In this example, data is read from a Kafka data source, an HBase table is used as a dimension table to generate a wide table, and the result is written to a Kafka result table. The procedure is as follows (the HBase versions in this example are 1.3.1 and 2.2.3):

+
  1. Create an enhanced datasource connection in the VPC and subnet where HBase and Kafka locate, and bind the connection to the required Flink elastic resource pool. Add MRS host information for the enhanced datasource connection..
  2. Set HBase and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the HBase and Kafka address. If the connection passes the test, it is bound to the queue.
  3. Create a HBase table and name it area_info using the HBase shell. The table has only one column family detail. The creation statement is as follows:
    create 'area_info', {NAME => 'detail'}
    +
  4. Run the following statement in the HBase shell to insert dimension table data:
    put 'area_info', '330106', 'detail:area_province_name', 'a1'
    +put 'area_info', '330106', 'detail:area_city_name', 'b1'
    +put 'area_info', '330106', 'detail:area_county_name', 'c2'
    +put 'area_info', '330106', 'detail:area_street_name', 'd2'
    +put 'area_info', '330106', 'detail:region_name', 'e1'
    +
    +put 'area_info', '330110', 'detail:area_province_name', 'a1'
    +put 'area_info', '330110', 'detail:area_city_name', 'b1'
    +put 'area_info', '330110', 'detail:area_county_name', 'c4'
    +put 'area_info', '330110', 'detail:area_street_name', 'd4'
    +put 'area_info', '330110', 'detail:region_name', 'e1'
    +
  5. Create a Flink OpenSource SQL job Enter the following job script and submit the job. The job script uses Kafka as the data source and an HBase table as the dimension table. Data is output to a Kafka result table.
    When you create a job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Set the values of the parameters in bold in the following script as needed.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  proctime as Proctime()
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSourceTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +-- Create an address dimension table
    +create table area_info (
    +  area_id string,   
    +  detail row(
    +    area_province_name string, 
    +    area_city_name string, 
    +    area_county_name string, 
    +    area_street_name string, 
    +    region_name string) 
    +) WITH (
    +  'connector' = 'hbase-2.2',
    +  'table-name' = 'area_info',
    +  'zookeeper.quorum' = 'ZookeeperAddress:ZookeeperPort',
    +  'lookup.async' = 'true',
    +  'lookup.cache.max-rows' = '10000',
    +  'lookup.cache.ttl' = '2h'
    +);
    +
    +-- Generate a wide table based on the address dimension table containing detailed order information.
    +create table order_detail(
    +    order_id string,
    +    order_channel string,
    +    order_time string,
    +    pay_amount double,
    +    real_pay double,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string,
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string,
    +    area_street_name string,
    +    region_name string
    +) with (
    +  'connector' = 'kafka',
    +  'topic' = '<yourSinkTopic>',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'format' = 'json'
    +);
    +
    +insert into order_detail
    +    select orders.order_id, orders.order_channel, orders.order_time, orders.pay_amount, orders.real_pay, orders.pay_time, orders.user_id, orders.user_name,
    +           area.area_id, area.area_province_name, area.area_city_name, area.area_county_name,
    +           area.area_street_name, area.region_name  from orders
    +    left join area_info for system_time as of orders.proctime as area on orders.area_id = area.area_id;
    +
    +
  6. Connect to the Kafka cluster and insert the following test data into the source topic in Kafka:
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
  7. Connect to the Kafka cluster and read data from the sink topic of Kafka. The result data is as follows:
    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106","area_province_name":"a1","area_city_name":"b1","area_county_name":"c2","area_street_name":"d2","region_name":"e1"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106","area_province_name":"a1","area_city_name":"b1","area_county_name":"c2","area_street_name":"d2","region_name":"e1"}
    +
    +{"order_id":"202103251202020001","order_channel":"miniAppShop","order_time":"2021-03-25 12:02:02","pay_amount":60.0,"real_pay":60.0,"pay_time":"2021-03-25 12:03:00","user_id":"0002","user_name":"Bob","area_id":"330110","area_province_name":"a1","area_city_name":"b1","area_county_name":"c4","area_street_name":"d4","region_name":"e1"}
    +
+
+

FAQs

Q: What should I do if Flink job logs contain the following error information?

+
org.apache.zookeeper.ClientCnxn$SessionTimeoutException: Client session timed out, have not heard from server in 90069ms for connection id 0x0
+

A: The datasource connection is not bound or the binding fails. Configure the datasource connection or configure the security group of the Kafka cluster to allow access from the DLI queue.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0405.html b/docs/dli/sqlreference/dli_08_0405.html new file mode 100644 index 00000000..0aab9b20 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0405.html @@ -0,0 +1,469 @@ + + +

JDBC Dimension Table

+

Create a JDBC dimension table to connect to the source stream.

+

Prerequisites

You have created a JDBC instance for your account.

+
+

Precautions

When you create a Flink OpenSource SQL job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs.

+
+

Syntax

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
CREATE TABLE  table_id (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+)
+  WITH (
+  'connector' = 'jdbc',
+  'url' = '',
+  'table-name' = '',
+  'driver' = '',
+  'username' = '',
+  'password' = ''
+);
+
+ +
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter descriptions

Parameter

+

Mandatory

+

Description

+

connector

+

Yes

+

Data source type. The value is fixed to jdbc.

+

url

+

Yes

+

Database URL

+

table-name

+

Yes

+

Name of the table where the data will be read from the database

+

driver

+

No

+

Driver required for connecting to the database. If you do not set this parameter, the automatically extracted URL will be used.

+

username

+

No

+

Database authentication user name. This parameter must be configured in pair with password.

+

password

+

No

+

Database authentication password. This parameter must be configured in pair with username.

+

scan.partition.column

+

No

+

Name of the column used to partition the input

+

This parameter must be set when scan.partition.lower-bound, scan.partition.upper-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.lower-bound

+

No

+

Lower bound of values to be fetched for the first partition

+

This parameter must be set when scan.partition.column, scan.partition.upper-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.upper-bound

+

No

+

Upper bound of values to be fetched for the last partition

+

This parameter must be set when scan.partition.column, scan.partition.lower-bound, and scan.partition.num are all configured, and should not be set when other three parameters are not.

+

scan.partition.num

+

No

+

Number of partitions to be created

+

This parameter must be set when scan.partition.column, scan.partition.upper-bound, and scan.partition.upper-bound are all configured, and should not be set when other three parameters are not.

+

scan.fetch-size

+

No

+

Number of rows fetched from the database each time. The default value is 0, indicating the hint is ignored.

+

lookup.cache.max-rows

+

No

+

Maximum number of cached rows in a dimension table. If the number of cached rows exceeds the value , old data will be deleted. The value -1 indicates that data cache disabled.

+

lookup.cache.ttl

+

No

+

Maximum time to live (TTL) of for every rows in lookup cache. Caches exceeding the TTL will be expired. The format is {length value}{time unit label}, for example, 123ms, 321s. The supported time units include d, h, min, s, and ms (default unit).

+

lookup.max-retries

+

No

+

Maximum number of attempts to obtain data from the dimension table. The default value is 3.

+
+
+
+

Data Type Mapping

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data type mapping

MySQL Type

+

PostgreSQL Type

+

Flink SQL Type

+

TINYINT

+

-

+

TINYINT

+

SMALLINT

+

TINYINT UNSIGNED

+

SMALLINT

+

INT2

+

SMALLSERIAL

+

SERIAL2

+

SMALLINT

+

INT

+

MEDIUMINT

+

SMALLINT UNSIGNED

+

INTEGER

+

SERIAL

+

INT

+

BIGINT

+

INT UNSIGNED

+

BIGINT

+

BIGSERIAL

+

BIGINT

+

BIGINT UNSIGNED

+

-

+

DECIMAL(20, 0)

+

BIGINT

+

BIGINT

+

BIGINT

+

FLOAT

+

REAL

+

FLOAT4

+

FLOAT

+

DOUBLE

+

DOUBLE PRECISION

+

FLOAT8

+

DOUBLE PRECISION

+

DOUBLE

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

NUMERIC(p, s)

+

DECIMAL(p, s)

+

DECIMAL(p, s)

+

BOOLEAN

+

TINYINT(1)

+

BOOLEAN

+

BOOLEAN

+

DATE

+

DATE

+

DATE

+

TIME [(p)]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

TIME [(p)] [WITHOUT TIMEZONE]

+

DATETIME [(p)]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

TIMESTAMP [(p)] [WITHOUT TIMEZONE]

+

CHAR(n)

+

VARCHAR(n)

+

TEXT

+

CHAR(n)

+

CHARACTER(n)

+

VARCHAR(n)

+

CHARACTER

+

VARYING(n)

+

TEXT

+

STRING

+

BINARY

+

VARBINARY

+

BLOB

+

BYTEA

+

BYTES

+

-

+

ARRAY

+

ARRAY

+
+
+
+

Example

Read data from a Kafka source table, use a JDBC table as the dimension table. Write table information generated by the source and dimension tables to a Kafka result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where MySQL and Kafka locate, and bind the connection to the required Flink elastic resource pool.
  2. Set MySQL and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the MySQL and Kafka address. If the connection passes the test, it is bound to the queue.
  3. Connect to the MySQL database instance, create a table in the flink database as a dimension table, and name the table area_info. Example SQL statements are as follows:
    CREATE TABLE `flink`.`area_info` (
    +	`area_id` VARCHAR(32) NOT NULL,
    +	`area_province_name` VARCHAR(32) NOT NULL,
    +	`area_city_name` VARCHAR(32) NOT NULL,
    +	`area_county_name` VARCHAR(32) NOT NULL,
    +	`area_street_name` VARCHAR(32) NOT NULL,
    +	`region_name` VARCHAR(32) NOT NULL,
    +	PRIMARY KEY (`area_id`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4
    +	COLLATE = utf8mb4_general_ci;
    +
  4. Connect to the MySQL database and run the following statement to insert test data into the JDBC dimension table area_info:
    insert into flink.area_info
    +  (area_id, area_province_name, area_city_name, area_county_name, area_street_name, region_name) 
    +  values
    +  ('330102', 'a1', 'b1', 'c1', 'd1', 'e1'),
    +  ('330106', 'a1', 'b1', 'c2', 'd2', 'e1'),
    +  ('330108', 'a1', 'b1', 'c3', 'd3', 'e1'),  ('330110', 'a1', 'b1', 'c4', 'd4', 'e1');
    +
  5. Create a Flink OpenSource SQL job Enter the following job script and submit the job. The job script uses Kafka as the data source and a JDBC table as the dimension table. Data is output to a Kafka result table.
    When you create a job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Set the values of the parameters in bold in the following script as needed.
     1
    + 2
    + 3
    + 4
    + 5
    + 6
    + 7
    + 8
    + 9
    +10
    +11
    +12
    +13
    +14
    +15
    +16
    +17
    +18
    +19
    +20
    +21
    +22
    +23
    +24
    +25
    +26
    +27
    +28
    +29
    +30
    +31
    +32
    +33
    +34
    +35
    +36
    +37
    +38
    +39
    +40
    +41
    +42
    +43
    +44
    +45
    +46
    +47
    +48
    +49
    +50
    +51
    +52
    +53
    +54
    +55
    +56
    +57
    +58
    +59
    +60
    +61
    +62
    +63
    +64
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  proctime as Proctime()
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSourceTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'jdbc-order',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +-- Create an address dimension table
    +create table area_info (
    +    area_id string, 
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string, 
    +    area_street_name string, 
    +    region_name string 
    +) WITH (
    +  'connector' = 'jdbc',
    +  'url' = 'jdbc:mysql://JDBCAddress:JDBCPort/flink',--flink is the MySQL database where the area_info table locates.
    +  'table-name' = 'area_info',
    +  'username' = 'JDBCUserName',
    +  'password' = 'JDBCPassWord'
    +);
    +
    +-- Generate a wide table based on the address dimension table containing detailed order information.
    +create table order_detail(
    +    order_id string,
    +    order_channel string,
    +    order_time string,
    +    pay_amount double,
    +    real_pay double,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string,
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string,
    +    area_street_name string,
    +    region_name string
    +) with (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaSinkTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'format' = 'json'
    +);
    +
    +insert into order_detail
    +    select orders.order_id, orders.order_channel, orders.order_time, orders.pay_amount, orders.real_pay, orders.pay_time, orders.user_id, orders.user_name,
    +           area.area_id, area.area_province_name, area.area_city_name, area.area_county_name,
    +           area.area_street_name, area.region_name  from orders 
    +           left join area_info for system_time as of orders.proctime as area on orders.area_id = area.area_id;
    +
    + +
    +
    +
  6. Connect to the Kafka cluster and insert the following test data into the source topic in Kafka:
    {"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
  7. Connect to the Kafka cluster and read data from the sink topic of Kafka.
    {"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106","area_province_name":"a1","area_city_name":"b1","area_county_name":"c2","area_street_name":"d2","region_name":"e1"}
    +
    +{"order_id":"202103251202020001","order_channel":"miniAppShop","order_time":"2021-03-25 12:02:02","pay_amount":60.0,"real_pay":60.0,"pay_time":"2021-03-25 12:03:00","user_id":"0002","user_name":"Bob","area_id":"330110","area_province_name":"a1","area_city_name":"b1","area_county_name":"c4","area_street_name":"d4","region_name":"e1"}
    +
    +{"order_id":"202103251505050001","order_channel":"qqShop","order_time":"2021-03-25 15:05:05","pay_amount":500.0,"real_pay":400.0,"pay_time":"2021-03-25 15:10:00","user_id":"0003","user_name":"Cindy","area_id":"330108","area_province_name":"a1","area_city_name":"b1","area_county_name":"c3","area_street_name":"d3","region_name":"e1"}
    +
+
+

FAQs

None

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0406.html b/docs/dli/sqlreference/dli_08_0406.html new file mode 100644 index 00000000..2c9597af --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0406.html @@ -0,0 +1,384 @@ + + +

Redis Dimension Table

+

Function

Create a Redis table to connect to source streams for wide table generation.

+
+

Prerequisites

+
+

Precautions

+
+

Syntax

create table dwsSource (
+  attr_name attr_type 
+  (',' attr_name attr_type)* 
+  (',' watermark for rowtime_column_name as watermark-strategy_expression)
+  ,PRIMARY KEY (attr_name, ...) NOT ENFORCED
+)
+with (
+  'connector' = 'redis',
+  'host' = ''
+);
+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Data Types

+

Description

+

connector

+

Yes

+

None

+

String

+

Connector type. Set this parameter to redis.

+

host

+

Yes

+

None

+

String

+

Redis connector address

+

port

+

No

+

6379

+

Integer

+

Redis connector port

+

password

+

No

+

None

+

String

+

Redis authentication password

+

namespace

+

No

+

None

+

String

+

Redis key namespace

+

delimiter

+

No

+

:

+

String

+

Delimiter between the Redis key and namespace

+

data-type

+

No

+

hash

+

String

+

Redis data type. Available values are as follows:

+
  • hash
  • list
  • set
  • sorted-set
  • string
+

For details about the constraints, see Constraints on data-type.

+

schema-syntax

+

No

+

fields

+

String

+

Redis schema semantics. Available values are as follows:

+
  • fields: applicable to all data types
  • fields-scores: applicable to sorted set data
  • array: applicable to list, set, and sorted set data
  • array-scores: applicable to sorted set data
  • map: applicable to hash and sorted set data
+

For details about the constraints, see Constraints on schema-syntax.

+

deploy-mode

+

No

+

standalone

+

String

+

Deployment mode of the Redis cluster. The value can be standalone, master-replica, or cluster. The default value is standalone.

+

retry-count

+

Yes

+

5

+

Integer

+

Size of each connection request queue. If the number of connection requests in a queue exceeds the queue size, command calling will cause RedisException. Setting requestQueueSize to a small value will cause exceptions to occur earlier during overload or disconnection. A larger value indicates more time required to reach the boundary, but more requests may be queued and more heap space may be used. The default value is 2147483647.

+

connection-timeout-millis

+

No

+

10000

+

Integer

+

Maximum timeout for connecting to the Redis cluster

+

commands-timeout-millis

+

No

+

2000

+

Integer

+

Maximum time for waiting for a completion response

+

rebalancing-timeout-millis

+

No

+

15000

+

Integer

+

Sleep time when the Redis cluster fails

+

scan-keys-count

+

No

+

1000

+

Integer

+

Number of data records read in each scan

+

default-score

+

No

+

0

+

Double

+

Default score when data-type is sorted-set

+

deserialize-error-policy

+

No

+

fail-job

+

Enum

+

How to process a data parsing failure

+

Available values are as follows:

+
  • fail-job: Fail the job
  • skip-row: Skip the current data.
  • null-field: Set the current data to null.
+

skip-null-values

+

No

+

true

+

Boolean

+

Whether null values will be skipped

+

lookup.async

+

No

+

false

+

Boolean

+

Whether asynchronous I/O will be used when this table is used as a dimension table

+
+
+
+

Example

Read data from a Kafka source table, use a Redis table as the dimension table. Write wide table information generated by the source and dimension tables to a Kafka result table. The procedure is as follows:

+
  1. Create an enhanced datasource connection in the VPC and subnet where Redis and Kafka locates, and bind the connection to the required Flink elastic resource pool.
  2. Set Redis and Kafka security groups and add inbound rules to allow access from the Flink queue. Test the connectivity using the Redis address. If the connection passes the test, it is bound to the queue.
  3. Run the following commands on the Redis client to send data to Redis:
    HMSET 330102  area_province_name a1 area_province_name b1 area_county_name c1 area_street_name d1 region_name e1
    +
    +HMSET 330106  area_province_name a1 area_province_name b1 area_county_name c2 area_street_name d2 region_name e1
    +
    +HMSET 330108  area_province_name a1 area_province_name b1 area_county_name c3 area_street_name d3 region_name e1
    +
    +HMSET 330110  area_province_name a1 area_province_name b1 area_county_name c4 area_street_name d4 region_name e1
    +
  4. Create a Flink OpenSource SQL job Enter the following job script and submit the job. The job script uses Kafka as the data source and a Redis table as the dimension table. Data is output to a Kafka result table.
    When you create a job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Set the values of the parameters in bold in the following script as needed.
    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time string,
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  proctime as Proctime()
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'kafkaSourceTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +-- Create an address dimension table
    +create table area_info (
    +    area_id string, 
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string, 
    +    area_street_name string, 
    +    region_name string, 
    +    primary key (area_id) not enforced -- Redis key
    +) WITH (
    +  'connector' = 'redis',
    +  'host' = 'RedisIP',
    +  'password' = 'RedisPassword',
    +  'data-type' = 'hash',
    +  'deploy-mode' = 'master-replica'
    +);
    +
    +-- Generate a wide table based on the address dimension table containing detailed order information.
    +create table order_detail(
    +    order_id string,
    +    order_channel string,
    +    order_time string,
    +    pay_amount double,
    +    real_pay double,
    +    pay_time string,
    +    user_id string,
    +    user_name string,
    +    area_id string,
    +    area_province_name string,
    +    area_city_name string,
    +    area_county_name string,
    +    area_street_name string,
    +    region_name string
    +) with (
    +  'connector' = 'kafka',
    +  'topic' = 'kafkaSinkTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'format' = 'json'
    +);
    +
    +insert into order_detail
    +    select orders.order_id, orders.order_channel, orders.order_time, orders.pay_amount, orders.real_pay, orders.pay_time, orders.user_id, orders.user_name,
    +           area.area_id, area.area_province_name, area.area_city_name, area.area_county_name,
    +           area.area_street_name, area.region_name  from orders
    +    left join area_info for system_time as of orders.proctime as area on orders.area_id = area.area_id;
    +
    +
  5. Connect to the Kafka cluster and insert the following test data into the source topic in Kafka:
    {"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103251202020001", "order_channel":"miniAppShop", "order_time":"2021-03-25 12:02:02", "pay_amount":"60.00", "real_pay":"60.00", "pay_time":"2021-03-25 12:03:00", "user_id":"0002", "user_name":"Bob", "area_id":"330110"}
    +
    +{"order_id":"202103251505050001", "order_channel":"qqShop", "order_time":"2021-03-25 15:05:05", "pay_amount":"500.00", "real_pay":"400.00", "pay_time":"2021-03-25 15:10:00", "user_id":"0003", "user_name":"Cindy", "area_id":"330108"}
    +
  6. Connect to the Kafka cluster and read data from the sink topic of Kafka. The result data is as follows:
    {"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106","area_province_name":"a1","area_city_name":"b1","area_county_name":"c2","area_street_name":"d2","region_name":"e1"}
    +
    +{"order_id":"202103251202020001","order_channel":"miniAppShop","order_time":"2021-03-25 12:02:02","pay_amount":60.0,"real_pay":60.0,"pay_time":"2021-03-25 12:03:00","user_id":"0002","user_name":"Bob","area_id":"330110","area_province_name":"a1","area_city_name":"b1","area_county_name":"c4","area_street_name":"d4","region_name":"e1"}
    +
    +{"order_id":"202103251505050001","order_channel":"qqShop","order_time":"2021-03-25 15:05:05","pay_amount":500.0,"real_pay":400.0,"pay_time":"2021-03-25 15:10:00","user_id":"0003","user_name":"Cindy","area_id":"330108","area_province_name":"a1","area_city_name":"b1","area_county_name":"c3","area_street_name":"d3","region_name":"e1"}
    +
    +
+
+

FAQs

If Chinese characters are written to the Redis in the Windows environment, an exception will occur during data writing.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0407.html b/docs/dli/sqlreference/dli_08_0407.html new file mode 100644 index 00000000..60a4bcfd --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0407.html @@ -0,0 +1,29 @@ + + +

Format

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0408.html b/docs/dli/sqlreference/dli_08_0408.html new file mode 100644 index 00000000..ef162c9b --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0408.html @@ -0,0 +1,231 @@ + + +

Avro

+

Function

Apache Avro is supported for you to read and write Avro data based on an Avro schema with Flink. The Avro schema is derived from the table schema.

+
+

Supported Connectors

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 Parameter

Parameter

+

Mandatory

+

Default value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set the value to avro.

+

avro.codec

+

No

+

None

+

String

+

Avro compression codec for the file system only The codec is disabled by default. Available values are deflate, snappy, bzip2, and xz.

+
+
+
+

Data Type Mapping

Currently, the Avro schema is derived from the table schema and cannot be explicitly defined. The following table lists mappings between Flink to Avro types.

+

In addition to the following types, Flink supports reading/writing nullable types. Flink maps nullable types to Avro union(something, null), where something is an Avro type converted from Flink type.

+

You can refer to Apache Avro 1.11.0 Specification for more information about Avro types.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Data Type Mapping

Flink SQL Type

+

Avro Type

+

Avro Logical Type

+

CHAR / VARCHAR / STRING

+

string

+

-

+

BOOLEAN

+

boolean

+

-

+

BINARY / VARBINARY

+

bytes

+

-

+

DECIMAL

+

fixed

+

decimal

+

TINYINT

+

int

+

-

+

SMALLINT

+

int

+

-

+

INT

+

int

+

-

+

BIGINT

+

long

+

-

+

FLOAT

+

float

+

-

+

DOUBLE

+

double

+

-

+

DATE

+

int

+

date

+

TIME

+

int

+

time-millis

+

TIMESTAMP

+

long

+

timestamp-millis

+

ARRAY

+

array

+

-

+

MAP (keys must be of the string, char, or varchar type.)

+

map

+

-

+

MULTISET (elements must be of the string, char, or varchar type.)

+

map

+

-

+

ROW

+

record

+

-

+
+
+
+

Example

Read data from Kafka, deserialize the data to the Avro format, and outputs the data to print.

+
  1. Create a datasource connection for access to the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job and select Flink 1.12. Copy the following statement and submit the job:

    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourTopic>',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress1>:<yourKafkaPort>,<yourKafkaAddress2>:<yourKafkaPort>,<yourKafkaAddress3>:<yourKafkaPort>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "avro"
    +);
    +
    +CREATE TABLE printSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select * from kafkaSource;
    +

  3. Insert the following data to Kafka using Avro data serialization:

    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +

  4. Perform the following operations to view the output:

    • Method 1: Locate the job and click More > FlinkUI. Choose Task Managers > Stdout.
    • Method 2: If you allow DLI to save job logs in OBS, view the output in the taskmanager.out file.
      +I(202103241000000001,webShop,2021-03-2410:00:00,100.0,100.0,2021-03-2410:02:03,0001,Alice,330106)
      ++I(202103241606060001,appShop,2021-03-2416:06:06,200.0,180.0,2021-03-2416:10:06,0001,Alice,330106)
      +
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0409.html b/docs/dli/sqlreference/dli_08_0409.html new file mode 100644 index 00000000..64482521 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0409.html @@ -0,0 +1,182 @@ + + +

Canal

+

Function

Canal is a Changelog Data Capture (CDC) tool that can stream changes in real-time from MySQL into other systems. Canal provides a unified format schema for changelog and supports to serialize messages using JSON and protobuf (the default format for Canal).

+

Flink supports to interpret Canal JSON messages as INSERT, UPDATE, and DELETE messages into the Flink SQL system. This is useful in many cases to leverage this feature, such as:

+ +

Flink also supports to encode the INSERT, UPDATE, and DELETE messages in Flink SQL as Canal JSON messages, and emit to storage like Kafka. However, currently Flink cannot combine UPDATE_BEFORE and UPDATE_AFTER into a single UPDATE message. Therefore, Flink encodes UPDATE_BEFORE and UPDATE_AFTER as DELETE and INSERT Canal messages.

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. In this example.Set this parameter to canal-json.

+

canal-json.ignore-parse-errors

+

No

+

false

+

Boolean

+

Whether fields and rows with parse errors will be skipped or failed. The default value is false, indicating that an error will be thrown. Fields are set to null in case of errors.

+

canal-json.timestamp-format.standard

+

No

+

'SQL'

+

String

+

Input and output timestamp formats. Currently supported values are SQL and ISO-8601:

+
  • SQL will parse input timestamp in "yyyy-MM-dd HH:mm:ss.s{precision}" format, for example 2020-12-30 12:13:14.123 and output timestamp in the same format.
  • ISO-8601 will parse input timestamp in "yyyy-MM-ddTHH:mm:ss.s{precision}" format, for example 2020-12-30T12:13:14.123 and output timestamp in the same format.
+

canal-json.map-null-key.mode

+

No

+

'FALL'

+

String

+

Handling mode when serializing null keys for map data. Available values are as follows:

+
  • FAIL will throw exception when encountering map value with null key.
  • DROP will drop null key entries for map data.
  • LITERAL replaces the empty key value in the map with a string constant. The string literal is defined by canal-json.map-null-key.literal option.
+

canal-json.map-null-key.literal

+

No

+

'null'

+

String

+

String literal to replace null key when canal-json.map-null-key.mode is LITERAL.

+

canal-json.database.include

+

No

+

None

+

+

String

+

+

An optional regular expression to only read the specific databases changelog rows by regular matching the database meta field in the Canal record.

+

canal-json.table.include

+

No

+

None

+

String

+

An optional regular expression to only read the specific tables changelog rows by regular matching the table meta field in the Canal record.

+
+
+
+

Supported Connectors

+
+

Example

Use Kafka to send data and output the data to print.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job and select Flink 1.12. Copy the following statement and submit the job:

    create table kafkaSource(
    +  id bigint,
    +  name string,
    +  description string,
    +  weight DECIMAL(10, 2)
    +  ) with (
    +    'connector' = 'kafka',
    +    'topic' = '<yourTopic>',
    +    'properties.group.id' = '<yourGroupId>',
    +    'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +    'scan.startup.mode' = 'latest-offset',
    +    'format' = 'canal-json'
    +);
    +create table printSink(
    +  id bigint,
    +  name string,
    +  description string,
    +  weight DECIMAL(10, 2)
    +   ) with (
    +     'connector' = 'print'
    +   );
    +insert into printSink select * from kafkaSource;
    +

  3. Insert the following data to the corresponding topic in Kafka:

    {
    +  "data": [
    +    {
    +      "id": "111",
    +      "name": "scooter",
    +      "description": "Big 2-wheel scooter",
    +      "weight": "5.18"
    +    }
    +  ],
    +  "database": "inventory",
    +  "es": 1589373560000,
    +  "id": 9,
    +  "isDdl": false,
    +  "mysqlType": {
    +    "id": "INTEGER",
    +    "name": "VARCHAR(255)",
    +    "description": "VARCHAR(512)",
    +    "weight": "FLOAT"
    +  },
    +  "old": [
    +    {
    +      "weight": "5.15"
    +    }
    +  ],
    +  "pkNames": [
    +    "id"
    +  ],
    +  "sql": "",
    +  "sqlType": {
    +    "id": 4,
    +    "name": 12,
    +    "description": 12,
    +    "weight": 7
    +  },
    +  "table": "products",
    +  "ts": 1589373560798,
    +  "type": "UPDATE"
    +}
    +

  4. View the output through either of the following methods:

    • Method 1: Locate the job and click More > FlinkUI. Choose Task Managers > Stdout.
    • Method 2: If you allow DLI to save job logs in OBS, view the output in the taskmanager.out file.
    +
    -U(111,scooter,Big2-wheel scooter,5.15)
    ++U(111,scooter,Big2-wheel scooter,5.18)
    +

+

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0410.html b/docs/dli/sqlreference/dli_08_0410.html new file mode 100644 index 00000000..e474a414 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0410.html @@ -0,0 +1,118 @@ + + +

Confluent Avro

+

Function

The Avro Schema Registry (avro-confluent) format allows you to read records that were serialized by the io.confluent.kafka.serializers.KafkaAvroSerializer and to write records that can in turn be read by the io.confluent.kafka.serializers.KafkaAvroDeserializer.

+

When reading (deserializing) a record with this format the Avro writer schema is fetched from the configured Confluent Schema Registry based on the schema version ID encoded in the record while the reader schema is inferred from table schema.

+

When writing (serializing) a record with this format the Avro schema is inferred from the table schema and used to retrieve a schema ID to be encoded with the data The lookup is performed with in the configured Confluent Schema Registry under the subject. The subject is specified by avro-confluent.schema-registry.subject.

+
+

Supported Connectors

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Parameter description

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set this parameter to avro-confluent.

+

avro-confluent.schema-registry.subject

+

No

+

None

+

String

+

The Confluent Schema Registry subject under which to register the schema used by this format during serialization.

+

By default, kafka and upsert-kafka connectors use <topic_name>-value or <topic_name>-key as the default subject name if this format is used as the value or key format.

+

avro-confluent.schema-registry.url

+

Yes

+

None

+

String

+

URL of the Confluent Schema Registry to fetch/register schemas.

+
+
+
+

Example

1. Read JSON data from the source topic in Kafka and write the data in Confluent Avro format to the sink topic.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka and ECS locate and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka and ECS IP addresses. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Purchase an ECS cluster, download Confluent 5.5.2 (https://packages.confluent.io/archive/5.5/) and jdk1.8.0_232, and upload them to the ECS cluster. Run the following command to decompress the packages (assume that the decompression directories are confluent-5.5.2 and jdk1.8.0_232):

    tar zxvf confluent-5.5.2-2.11.tar.gz
    +tar zxvf jdk1.8.0_232.tar.gz
    +

  3. Run the following commands to install jdk1.8.0_232 in the current ECS cluster. You can run the pwd command in the jdk1.8.0_232 folder to view the value of yourJdkPath.

    export JAVA_HOME=<yourJdkPath>
    +export PATH=$JAVA_HOME/bin:$PATH
    +export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
    +

  4. Go to the confluent-5.5.2/etc/schema-registry/ directory and modify the following configuration items in the schema-registry.properties file:

    listeners=http://<yourEcsIp>:8081  
    +kafkastore.bootstrap.servers=<yourKafkaAddress1>:<yourKafkaPort>,<yourKafkaAddress2>:<yourKafkaPort>
    +

  5. Switch to the confluent-5.5.2 directory and run the following command to start Confluent:

    bin/schema-registry-start etc/schema-registry/schema-registry.properties
    +

  6. Create a Flink opensource SQL job, select the Flink 1.12 version, and allow DLI to save job logs in OBS. Add the following statement to the job and submit it:

    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,  
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress1>:<yourKafkaPort>,<yourKafkaAddress2>:<yourKafkaPort>',
    +  'topic' = '<yourSourceTopic>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +CREATE TABLE kafkaSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,  
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress1>:<yourKafkaPort>,<yourKafkaAddress2>:<yourKafkaPort>',
    +  'topic' = '<yourSinkTopic>',
    +  'format' = 'avro-confluent',
    +  'avro-confluent.schema-registry.url' = 'http://<yourEcsIp>:8081',
    +  'avro-confluent.schema-registry.subject' = '<yourSubject>'
    +);
    +insert into kafkaSink select * from kafkaSource;
    +

  7. Insert the following data into Kafka:

    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +

  8. Read the data of the sink Kafka topic. You will find that the data has been written and the schema has been saved to the _schema topic of Kafka.
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0411.html b/docs/dli/sqlreference/dli_08_0411.html new file mode 100644 index 00000000..cd93d189 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0411.html @@ -0,0 +1,180 @@ + + +

CSV

+

Function

The CSV format allows you to read and write CSV data based on a CSV schema. Currently, the CSV schema is derived from table schema.

+
+

Supported Connectors

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set the value to csv.

+

csv.field-delimiter

+

No

+

,

+

String

+

Field delimiter character, which must be a single character. You can use backslash to specify special characters, for example, \t represents the tab character. You can also use unicode to specify them in plain SQL, for example, 'csv.field-delimiter' = '\u0001' represents the 0x01 character.

+

csv.disable-quote-character

+

No

+

false

+

Boolean

+

Disabled quote character for enclosing field values. If you set this parameter to true, csv.quote-character cannot be set.

+

csv.quote-character

+

No

+

''

+

String

+

Quote character for enclosing field values.

+

csv.allow-comments

+

No

+

false

+

Boolean

+

Ignore comment lines that start with #. If you set this parameter to true, make sure to also ignore parse errors to allow empty rows.

+

csv.ignore-parse-errors

+

No

+

false

+

Boolean

+

Whether fields and rows with parse errors will be skipped or failed. The default value is false, indicating that an error will be thrown. Fields are set to null in case of errors.

+

csv.array-element-delimiter

+

No

+

;

+

String

+

Array element delimiter string for separating array and row element values.

+

csv.escape-character

+

No

+

+

None

+

String

+

+

Escape character for escaping values

+

+

csv.null-literal

+

No

+

None

+

String

+

Null literal string that is interpreted as a null value.

+
+
+
+

Example

Use Kafka to send data and output the data to print.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job. Copy the following statement and submit the job:

    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourSourceTopic>',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "csv"
    +);
    +
    +CREATE TABLE kafkaSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourSinkTopic>',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +  "format" = "csv"
    +);
    +
    +insert into kafkaSink select * from kafkaSource;
    +
    +

  3. Insert the following data into the source Kafka topic:

    202103251505050001,qqShop,2021-03-25 15:05:05,500.00,400.00,2021-03-25 15:10:00,0003,Cindy,330108
    +
    +202103241606060001,appShop,2021-03-24 16:06:06,200.00,180.00,2021-03-24 16:10:06,0001,Alice,330106
    +

  4. Read data from the sink Kafka topic. The result is as follows:

    202103251505050001,qqShop,"2021-03-25 15:05:05",500.0,400.0,"2021-03-25 15:10:00",0003,Cindy,330108
    +
    +202103241606060001,appShop,"2021-03-24 16:06:06",200.0,180.0,"2021-03-24 16:10:06",0001,Alice,330106
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0412.html b/docs/dli/sqlreference/dli_08_0412.html new file mode 100644 index 00000000..3d101b34 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0412.html @@ -0,0 +1,159 @@ + + +

Debezium

+

Function

Debezium is a Changelog Data Capture (CDC) tool that can stream changes in real-time from other databases into Kafka. Debezium provides a unified format schema for changelog and supports to serialize messages using JSON.

+

Flink supports to interpret Debezium JSON and Avro messages as INSERT/UPDATE/DELETE messages into Flink SQL system. This is useful in many cases to leverage this feature, such as:

+ +
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default Value

+

Mandatory

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. In this example.Set this parameter to debezium-json.

+

debezium-json.schema-include

+

No

+

false

+

Boolean

+

Whether the Debezium JSON messages contain the schema. When setting up Debezium Kafka Connect, enable the Kafka configuration value.converter.schemas.enable to include the schema in the message.

+

debezium-json.ignore-parse-errors

+

No

+

false

+

Boolean

+

Whether fields and rows with parse errors will be skipped or failed. The default value is false, indicating that an error will be thrown. Fields are set to null in case of errors.

+

debezium-json.timestamp-format.standard

+

No

+

'SQL'

+

String

+

Input and output timestamp formats. Currently supported values are SQL and ISO-8601.

+
  • SQL will parse input timestamp in "yyyy-MM-dd HH:mm:ss.s{precision}" format, for example 2020-12-30 12:13:14.123 and output timestamp in the same format.
  • ISO-8601 will parse input timestamp in "yyyy-MM-ddTHH:mm:ss.s{precision}" format, for example 2020-12-30T12:13:14.123 and output timestamp in the same format.
+

debezium-json.map-null-key.mode

+

No

+

'FAIL'

+

String

+

Handling mode when serializing null keys for map data. Available values are as follows:

+
  • FAIL will throw exception when encountering map value with null key.
  • DROP will drop null key entries for map data.
  • LITERAL replaces the empty key value in the map with a string constant. The string literal is defined by debezium-json.map-null-key.literal option.
+

debezium-json.map-null-key.literal

+

No

+

'null'

+

String

+

String literal to replace null key when debezium-json.map-null-key.mode is LITERAL.

+
+
+
+

Supported Connectors

+
+

Example

Use Kafka to send data and output the data to print.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job. Copy the following statement and submit the job:

    create table kafkaSource(
    +  id BIGINT,
    +  name STRING,
    +  description STRING,
    +  weight DECIMAL(10, 2)
    +  ) with (
    +    'connector' = 'kafka',
    +    'topic' = '<yourTopic>',
    +    'properties.group.id' = '<yourGroupId>',
    +    'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +    'scan.startup.mode' = 'latest-offset',
    +    'format' = 'debezium-json'
    +);
    +create table printSink(
    +  id BIGINT,
    +  name STRING,
    +  description STRING,
    +  weight DECIMAL(10, 2)
    +   ) with (
    +     'connector' = 'print'
    +   );
    +insert into printSink select * from kafkaSource;
    +

  3. Insert the following data to the corresponding topic in Kafka:

    {
    +  "before": {
    +    "id": 111,
    +    "name": "scooter",
    +    "description": "Big 2-wheel scooter",
    +    "weight": 5.18
    +  },
    +  "after": {
    +    "id": 111,
    +    "name": "scooter",
    +    "description": "Big 2-wheel scooter",
    +    "weight": 5.15
    +  },
    +  "source": {
    +    "version": "0.9.5.Final",
    +	"connector": "mysql",
    +	"name": "fullfillment",
    +	"server_id" :1,
    +	"ts_sec": 1629607909,
    +	"gtid": "mysql-bin.000001",
    +	"pos": 2238,"row": 0,
    +	"snapshot": false,
    +	"thread": 7,
    +	"db": "inventory",
    +	"table": "test",
    +	"query": null},
    +  "op": "u",
    +  "ts_ms": 1589362330904,
    +  "transaction": null
    +}
    +

  4. View the output through either of the following methods:

    • Method 1: Locate the job and click More > FlinkUI. Choose Task Managers > Stdout.
    • Method 2: If you allow DLI to save job logs in OBS, view the output in the taskmanager.out file.
    +
    -U(111,scooter,Big2-wheel scooter,5.18)
    ++U(111,scooter,Big2-wheel scooter,5.15)
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0413.html b/docs/dli/sqlreference/dli_08_0413.html new file mode 100644 index 00000000..fce41d8f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0413.html @@ -0,0 +1,146 @@ + + +

JSON

+

Function

The JSON format allows you to read and write JSON data based on a JSON schema. Currently, the JSON schema is derived from table schema.

+
+

Supported Connectors

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set this parameter to json.

+

json.fail-on-missing-field

+

No

+

false

+

Boolean

+

Whether missing fields and rows will be skipped or failed. The default value is false, indicating that an error will be thrown.

+

json.ignore-parse-errors

+

No

+

false

+

Boolean

+

Whether fields and rows with parse errors will be skipped or failed. The default value is false, indicating that an error will be thrown. Fields are set to null in case of errors.

+

json.timestamp-format.standard

+

No

+

'SQL'

+

String

+

Input and output timestamp format for TIMESTAMP and TIMESTAMP WITH LOCAL TIME ZONE.

+

Currently supported values are SQL and ISO-8601:

+
  • SQL will parse the input TIMESTAMP values in "yyyy-MM-dd HH:mm:ss.s{precision}" format, for example, 2020-12-30 12:13:14.123, parse TIMESTAMP WITH LOCAL TIME ZONE values in "yyyy-MM-dd HH:mm:ss.s{precision}'Z'" format, for example, 2020-12-30 12:13:14.123Z and output timestamp in the same format.
  • ISO-8601 will parse the input TIMESTAMP values in "yyyy-MM-ddTHH:mm:ss.s{precision}" format, for example, 2020-12-30T12:13:14.123 parse TIMESTAMP WITH LOCAL TIME ZONE values in "yyyy-MM-ddTHH:mm:ss.s{precision}'Z'" format, for example, 2020-12-30T12:13:14.123Z and output timestamp in the same format.
+

json.map-null-key.mode

+

No

+

'FALL'

+

String

+

Handling mode when serializing null keys for map data. Available values are as follows:

+
  • FAIL will throw exception when encountering map value with null key.
  • DROP will drop null key entries for map data.
  • LITERAL replaces the empty key value in the map with a string constant. The string literal is defined by json.map-null-key.literal option.
+

json.map-null-key.literal

+

No

+

'null'

+

String

+

String literal to replace null key when json.map-null-key.mode is LITERAL.

+
+
+
+

Example

In this example, data is read from a topic and written to another using a Kafka sink.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set an inbound rule for the security group to allow access of the queue and test the connectivity using the Kafka address. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job, select Flink 1.12, and allow DLI to save job logs in OBS. Use the following statement in the job and submit it:

    CREATE TABLE kafkaSource (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourSourceTopic>',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +
    +CREATE TABLE kafkaSink (
    +  order_id string,
    +  order_channel string,
    +  order_time string, 
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourSinkTopic>',
    +  'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +  "format" = "json"
    +);
    +
    +insert into kafkaSink select * from kafkaSource;
    +

  3. Insert the following data into the source Kafka topic:

    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +

  4. Read data from the sink topic. The result is as follows:

    {"order_id":"202103241000000001","order_channel":"webShop","order_time":"2021-03-24 10:00:00","pay_amount":100.0,"real_pay":100.0,"pay_time":"2021-03-24 10:02:03","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +
    +{"order_id":"202103241606060001","order_channel":"appShop","order_time":"2021-03-24 16:06:06","pay_amount":200.0,"real_pay":180.0,"pay_time":"2021-03-24 16:10:06","user_id":"0001","user_name":"Alice","area_id":"330106"}
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0414.html b/docs/dli/sqlreference/dli_08_0414.html new file mode 100644 index 00000000..2ba09be4 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0414.html @@ -0,0 +1,143 @@ + + +

Maxwell

+

Function

Flink supports to interpret Maxwell JSON messages as INSERT/UPDATE/DELETE messages into Flink SQL system. This is useful in many cases to leverage this feature,

+
+

such as:

+ +

Flink also supports to encode the INSERT/UPDATE/DELETE messages in Flink SQL as Maxwell JSON messages, and emit to external systems like Kafka. However, currently Flink cannot combine UPDATE_BEFORE and UPDATE_AFTER into a single UPDATE message. Therefore, Flink encodes UPDATE_BEFORE and UDPATE_AFTER as DELETE and INSERT Maxwell messages.

+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set this parameter to maxwell-json.

+

maxwell-json.ignore-parse-errors

+

No

+

false

+

Boolean

+

Whether fields and rows with parse errors will be skipped or failed. Fields are set to null in case of errors.

+

maxwell-json.timestamp-format.standard

+

No

+

'SQL'

+

String

+

Input and output timestamp formats. Currently supported values are SQL and ISO-8601:

+

SQL will parse input timestamp in "yyyy-MM-dd HH:mm:ss.s{precision}" format, for example, 2020-12-30 12:13:14.123 and output timestamp in the same format.

+

ISO-8601 will parse input timestamp in "yyyy-MM-ddTHH:mm:ss.s{precision}" format, for example 2020-12-30T12:13:14.123 and output timestamp in the same format.

+

maxwell-json.map-null-key.mode

+

No

+

'FAIL'

+

String

+

Handling mode when serializing null keys for map data. Currently supported values are 'FAIL', 'DROP' and 'LITERAL':

+

FAIL will throw exception when encountering map with null key.

+

DROP will drop null key entries for map data.

+

LITERAL will replace null key with string literal. The string literal is defined by maxwell-json.map-null-key.literal option.

+

maxwell-json.map-null-key.literal

+

No

+

'null'

+

String

+

String literal to replace null key when maxwell-json.map-null-key.mode is LITERAL.

+
+
+
+

Supported Connectors

+
+

Example

Use Kafka to send data and output the data to print.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job and select Flink 1.12. Copy the following statement and submit the job:

    create table kafkaSource(
    +  id bigint,
    +  name string,
    +  description string,
    +  weight DECIMAL(10, 2)  
    +  ) with (
    +    'connector' = 'kafka',
    +    'topic' = '<yourTopic>',
    +    'properties.group.id' = '<yourGroupId>',
    +    'properties.bootstrap.servers' = '<yourKafkaAddress1>:<yourKafkaPort>,<yourKafkaAddress2>:<yourKafkaPort>',
    +    'scan.startup.mode' = 'latest-offset',
    +    'format' = 'maxwell-json'
    +);
    +create table printSink(
    +  id bigint,
    +  name string,
    +  description string,
    +  weight DECIMAL(10, 2)
    +   ) with (
    +     'connector' = 'print'
    +   );
    +insert into printSink select * from kafkaSource;
    +

  3. Insert the following data to the corresponding topic in Kafka:

    {
    +   "database":"test",
    +   "table":"e",
    +   "type":"insert",
    +   "ts":1477053217,
    +   "xid":23396,
    +   "commit":true,
    +   "position":"master.000006:800911",
    +   "server_id":23042,
    +   "thread_id":108,
    +   "primary_key": [1, "2016-10-21 05:33:37.523000"],
    +   "primary_key_columns": ["id", "c"],
    +   "data":{
    +     "id":111,
    +     "name":"scooter",
    +     "description":"Big 2-wheel scooter",
    +     "weight":5.15
    +   },
    +   "old":{
    +     "weight":5.18
    +   }
    +}
    +

  4. View the output through either of the following methods:

    • Method 1: Locate the job and click More > FlinkUI. Choose Task Managers > Stdout.
    • Method 2: If you allow DLI to save job logs in OBS, view the output in the taskmanager.out file.
    +
    +I(111,scooter,Big 2-wheel scooter,5.15)
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0415.html b/docs/dli/sqlreference/dli_08_0415.html new file mode 100644 index 00000000..ca2877f7 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0415.html @@ -0,0 +1,89 @@ + + +

Raw

+

Function

The raw format allows you to read and write raw (byte based) values as a single column.

+

Note: This format encodes null values as null of the byte[] type. This may have limitation when used in upsert-kafka, because upsert-kafka treats null values as a tombstone message (DELETE on the key). Therefore, we recommend avoiding using upsert-kafka connector and the raw format as a value.format if the field can have a null value.

+

The raw format connector is built-in, no additional dependencies are required.

+
+

Parameters

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1

Parameter

+

Mandatory

+

Default Value

+

Type

+

Description

+

format

+

Yes

+

None

+

String

+

Format to be used. Set this parameter to raw.

+

raw.charset

+

No

+

UTF-8

+

String

+

Charset to encode the text string.

+

raw.endianness

+

No

+

big-endian

+

String

+

Endianness to encode the bytes of numeric value. Valid values are big-endian and little-endian. You can search for endianness for more details.

+
+
+
+

Supported Connectors

+
+

Example

Use Kafka to send data and output the data to print.

+
  1. Create a datasource connection for the communication with the VPC and subnet where Kafka locates and bind the connection to the queue. Set a security group and inbound rule to allow access of the queue and test the connectivity of the queue using the Kafka IP address. For example, locate a general-purpose queue where the job runs and choose More > Test Address Connectivity in the Operation column. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Create a Flink OpenSource SQL job and select Flink 1.12. Copy the following statement and submit the job:

    create table kafkaSource(
    +  log string
    +  ) with (
    +    'connector' = 'kafka',
    +    'topic' = '<yourTopic>',
    +    'properties.group.id' = '<yourGroupId>',
    +    'properties.bootstrap.servers' = '<yourKafkaAddress>:<yourKafkaPort>',
    +    'scan.startup.mode' = 'latest-offset',
    +    'format' = 'raw'
    +);
    +create table printSink(
    +  log string
    +   ) with (
    +     'connector' = 'print'
    +   );
    +insert into printSink select * from kafkaSource;
    +

  3. Insert the following data to the corresponding topic in Kafka:

    47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0" 200 5316 "https://domain.com/?p=1" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75"
    +

  4. View the output through either of the following methods:

    • Method 1: Locate the job and click More > FlinkUI. Choose Task Managers > Stdout.
    • Method 2: If you allow DLI to save job logs in OBS, view the output in the taskmanager.out file.
    +
    +I(47.29.201.179 - - [28/Feb/2019:13:17:10 +0000] "GET /?p=1 HTTP/2.0"2005316"https://domain.com/?p=1"
    +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36" "2.75")
    +

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0416.html b/docs/dli/sqlreference/dli_08_0416.html new file mode 100644 index 00000000..833ccd20 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0416.html @@ -0,0 +1,27 @@ + + +

DML Snytax

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0417.html b/docs/dli/sqlreference/dli_08_0417.html new file mode 100644 index 00000000..c635a3be --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0417.html @@ -0,0 +1,197 @@ + + +

SELECT

+

SELECT

Syntax

+
1
+2
+3
+4
+5
+6
SELECT [ ALL | DISTINCT ]
+  { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

SELECT is used to select data from a table.

+

ALL indicates that all results are returned.

+

DISTINCT indicates that the duplicated results are removed.

+

Precautions

+ +

Example

+

Select the order which contains more than 3 pieces of data.

+
1
insert into temp SELECT  * FROM Orders WHERE units > 3; 
+
+ +
+

Insert a group of constant data.

+
1
insert into temp select 'Lily', 'male', 'student', 17;
+
+ +
+
+

WHERE

Syntax

+
1
+2
+3
SELECT   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+
+ +
+

Description

+

This clause is used to filter the query results using the WHERE clause.

+

Precautions

+ +

Example

+

Search orders which contain more than 3 pieces and fewer than 10 pieces of data.

+
1
+2
insert into temp SELECT  * FROM Orders
+  WHERE units > 3 and units < 10; 
+
+ +
+
+

HAVING

Function

+

This clause is used to search for the query results that meet the search condition.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. You can use GROUP BY for grouping and then use HAVING for filtering. Arithmetic operations and aggregate functions are supported in the HAVING clause.

+

Precautions

+

If the filtering condition is subject to the results of GROUP BY, the HAVING clause, rather than the WHERE clause, must be used for search.

+

Example

+

Group the student table according to the name field and search for the records in which the maximum score is higher than 95 in the group.

+
1
+2
+3
insert into temp SELECT name, max(score) FROM student
+  GROUP BY name
+  HAVING max(score) >95;
+
+ +
+
+

Column-Based GROUP BY

Function

+

This clause is used to group a table based on columns.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

Column-based GROUP BY can be categorized into single-column GROUP BY and multi-column GROUP BY.

+ +

Precautions

+

GroupBy generates update results in the stream processing table.

+

Example

+

Group the student table according to the score and name fields and return the grouping results.

+
1
+2
insert into temp SELECT name,score, max(score) FROM student 
+  GROUP BY name,score;
+
+ +
+
+

Expression-Based GROUP BY

Function

+

This clause is used to group streams according to expressions.

+

Syntax

+
1
+2
+3
+4
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+
+ +
+

Description

+

groupItem can have one or more fields. The fields can be called by string functions, but cannot be called by aggregate functions.

+

Precautions

+

None

+

Example

+

Use the substring function to obtain the character string from the name field, group the student table according to the obtained character string, and return each sub character string and the number of records.

+
1
+2
insert into temp SELECT substring(name,6),count(name) FROM student
+  GROUP BY substring(name,6);
+
+ +
+
+

Grouping sets, Rollup, Cube

Function

+ +
Syntax
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY groupingItem]
+
+

Description

+

Values of groupingItem can be Grouping sets(columnName [, columnName]*), Rollup(columnName [, columnName]*), and Cube(columnName [, columnName]*).

+

Precautions

+

None

+

Example

+

Return the results generated based on user and product.

+
INSERT INTO temp SELECT SUM(amount)
+FROM Orders
+GROUP BY GROUPING SETS ((user), (product));
+
+

GROUP BY Using HAVING

Function

+

This clause filters a table after grouping it using the HAVING clause.

+

Syntax

+
1
+2
+3
+4
+5
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  [ WHERE booleanExpression ]
+  [ GROUP BY { groupItem [, groupItem ]* } ]
+  [ HAVING booleanExpression ]
+
+ +
+

Description

+

Generally, HAVING and GROUP BY are used together. You can use GROUP BY for grouping and the HAVING for filtering.

+

Precautions

+ +

Example

+

Group the transactions by num, use the HAVING clause to search for the records in which the maximum value derived from multiplying price with amount is higher than 5000, and return the filtered results.

+
1
+2
+3
+4
insert into temp SELECT num, max(price*amount) FROM transactions
+  WHERE time > '2016-06-01'
+  GROUP BY num
+  HAVING max(price*amount)>5000;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0418.html b/docs/dli/sqlreference/dli_08_0418.html new file mode 100644 index 00000000..c49aa952 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0418.html @@ -0,0 +1,56 @@ + + +

Set Operations

+

Union/Union ALL/Intersect/Except

Syntax

+
1
query UNION [ ALL ] | Intersect | Except query
+
+ +
+

Description

+ +

Precautions

+ +

Example

+

Output distinct records found in either Orders1 and Orders2 tables.

+
1
+2
insert into temp SELECT  * FROM Orders1
+  UNION SELECT  * FROM Orders2;
+
+ +
+
+

IN

Syntax

+
1
+2
+3
SELECT [ ALL | DISTINCT ]   { * | projectItem [, projectItem ]* }
+  FROM tableExpression
+  WHERE column_name IN (value (, value)* ) | query
+
+ +
+

Description

+

The IN operator allows multiple values to be specified in the WHERE clause. It returns true if the expression exists in the given table subquery.

+

Precautions

+

The subquery table must consist of a single column, and the data type of the column must be the same as that of the expression.

+

Example

+

Return user and amount information of the products in NewProducts of the Orders table.

+
1
+2
+3
+4
+5
insert into temp SELECT user, amount
+FROM Orders
+WHERE product IN (
+    SELECT product FROM NewProducts
+);
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0419.html b/docs/dli/sqlreference/dli_08_0419.html new file mode 100644 index 00000000..9b2041dc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0419.html @@ -0,0 +1,419 @@ + + +

Window

+

GROUP WINDOW

Description

+

Group Window is defined in GROUP BY. One record is generated from each group. Group Window involves the following functions:

+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
// Calculate the SUM every day (event time).
+insert into temp SELECT name,
+    TUMBLE_START(ts, INTERVAL '1' DAY) as wStart,
+    SUM(amount)
+    FROM Orders
+    GROUP BY TUMBLE(ts, INTERVAL '1' DAY), name;
+
+// Calculate the SUM every day (processing time). 
+insert into temp SELECT name, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY TUMBLE(proctime, INTERVAL '1' DAY), name;
+
+// Calculate the SUM over the recent 24 hours every hour (event time).
+insert into temp SELECT product, 
+    SUM(amount) 
+    FROM Orders 
+    GROUP BY HOP(ts, INTERVAL '1' HOUR, INTERVAL '1' DAY), product;
+
+// Calculate the SUM of each session and an inactive interval every 12 hours (event time).
+insert into temp SELECT name, 
+    SESSION_START(ts, INTERVAL '12' HOUR) AS sStart,
+    SESSION_END(ts, INTERVAL '12' HOUR) AS sEnd,
+    SUM(amount)
+    FROM Orders
+    GROUP BY SESSION(ts, INTERVAL '12' HOUR), name;
+
+ +
+
+

TUMBLE WINDOW Extension

Function

+
The extension functions of the DLI tumbling window are as follows:
  • A tumbling window is triggered periodically to reduce latency.

    Before the tumbling window ends, the window can be periodically triggered based on the configured frequency. The compute result from the start to the current time is output, which does not affect the final output. The latest result can be viewed in each period before the window ends.

    +
  • Data accuracy is improved.

    You can set a latency for the end of the window. The output of the window is updated according to the configured latency each time a piece of late data reaches.

    +
+
+

Precautions

+ +

Syntax

+
TUMBLE(time_attr, window_interval, period_interval, lateness_interval)
+

Example

+
The current time attribute column is testtime, the window interval is 10 seconds, and the latency is 10 seconds.
TUMBLE(testtime, INTERVAL '10' SECOND, INTERVAL '10' SECOND, INTERVAL '10' SECOND)
+
+
+

Description

+ +
+ + + + + + + + + + + + + + + + + + + +
Table 3 Parameters

Parameter

+

Description

+

Format

+

time_attr

+

Event time or processing time attribute column

+
  • event-time: The type is timestamp(3).
  • processing-time: No need to specify the type.
+

-

+

window_interval

+

Duration of the window

+
  • Format 1: INTERVAL '10' SECOND

    The window interval is 10 seconds. You can change the value as needed.

    +
  • Format 2: INTERVAL '10' MINUTE

    The window interval is 10 minutes. You can change the value as needed.

    +
  • Format 3: INTERVAL '10' DAY

    The window interval is 10 days. You can change the value as needed.

    +
+

period_interval

+

Frequency of periodic triggering within the window range. That is, before the window ends, the output result is updated at an interval specified by period_interval from the time when the window starts. If this parameter is not set, the periodic triggering policy is not used by default.

+

lateness_interval

+

Time to postpone the end of the window. The system continues to collect the data that reaches the window within lateness_interval after the window ends. The output is updated for each data that reaches the window within lateness_interval.

+
NOTE:

If the time window is for processing time, lateness_interval does not take effect.

+
+
+
+
Values of period_interval and lateness_interval cannot be negative numbers.
  • If period_interval is set to 0, periodic triggering is disabled for the window.
  • If lateness_interval is set to 0, the latency after the window ends is disabled.
  • If neither of the two parameters is set, both periodic triggering and latency are disabled and only the regular tumbling window functions are available .
  • If only the latency function needs to be used, set period_interval INTERVAL '0' SECOND.
+
+
+

Auxiliary Functions

+ +
+ + + + + + + + + + +
Table 4 Auxiliary function

Auxiliary Function

+

Description

+

TUMBLE_START(time_attr, window_interval, period_interval, lateness_interval)

+

Returns the timestamp of the inclusive lower bound of the corresponding tumbling window.

+

TUMBLE_END(time_attr, window_interval, period_interval, lateness_interval)

+

Returns the timestamp of the exclusive upper bound of the corresponding tumbling window.

+
+
+

Example

+

1. The Kafka is used as the data source table containing the order information, and the JDBC is used as the data result table for statistics on the number of orders settled by a user within 30 seconds. The order ID and window opening time are used as primary keys to collect result statistics in real time to JDBC.

+
  1. Create a datasource connection for the communication with the VPC and subnet where MySQL and Kafka locate and bind the connection to the queue. Set an inbound rule for the security group to allow access of the queue, and test the connectivity of the queue using the MySQL and Kafka addresses. If the connection is successful, the datasource is bound to the queue. Otherwise, the binding fails.
  2. Run the following statement to create the order_count table in the MySQL Flink database:

    CREATE TABLE `flink`.`order_count` (
    +	`user_id` VARCHAR(32) NOT NULL,
    +	`window_start` TIMESTAMP NOT NULL,
    +	`window_end` TIMESTAMP NULL,
    +	`total_num` BIGINT UNSIGNED NULL,
    +	PRIMARY KEY (`user_id`, `window_start`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4
    +	COLLATE = utf8mb4_general_ci;
    +

  3. Create a Flink OpenSource SQL job and submit the job. In this example, the window size is 30 seconds, the triggering period is 10 seconds, and the latency is 5 seconds. That is, if the result is updated before the window ends, the intermediate result will be output every 10 seconds. After the watermark is reached and the window ends, the data whose event time is within 5 seconds of the watermark will still be processed and counted in the current window. If the event time exceeds 5 seconds of the watermark, the data will be discarded.

    CREATE TABLE orders (
    +  order_id string,
    +  order_channel string,
    +  order_time timestamp(3),
    +  pay_amount double,
    +  real_pay double,
    +  pay_time string,
    +  user_id string,
    +  user_name string,
    +  area_id string,
    +  watermark for order_time as order_time - INTERVAL '3' SECOND
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = '<yourTopic>',
    +  'properties.bootstrap.servers' = '<yourKafka>:<port>',
    +  'properties.group.id' = '<yourGroupId>',
    +  'scan.startup.mode' = 'latest-offset',
    +  'format' = 'json'
    +);
    +
    +CREATE TABLE jdbcSink (
    +  user_id string,
    +  window_start timestamp(3),
    +  window_end timestamp(3),
    +  total_num BIGINT,
    +  primary key (user_id, window_start) not enforced
    +) WITH (
    +  'connector' = 'jdbc',
    +  'url' = 'jdbc:mysql://<yourMySQL>:3306/flink',
    +  'table-name' = 'order_count',
    +  'username' = '<yourUserName>',
    +  'password' = '<yourPassword>',
    +  'sink.buffer-flush.max-rows' = '1'
    +);
    +
    +insert into jdbcSink select 
    +    order_id,
    +    TUMBLE_START(order_time, INTERVAL '30' SECOND, INTERVAL '10' SECOND, INTERVAL '5' SECOND),
    +    TUMBLE_END(order_time, INTERVAL '30' SECOND, INTERVAL '10' SECOND, INTERVAL '5' SECOND),
    +    COUNT(*) from orders
    +    GROUP BY user_id, TUMBLE(order_time, INTERVAL '30' SECOND, INTERVAL '10' SECOND, INTERVAL '5' SECOND);
    +

  4. Insert data to Kafka. Assume that orders are settled at different time and the order data at 10:00:13 arrives late.

    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241000000002", "order_channel":"webShop", "order_time":"2021-03-24 10:00:20", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241000000003", "order_channel":"webShop", "order_time":"2021-03-24 10:00:33", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241000000004", "order_channel":"webShop", "order_time":"2021-03-24 10:00:13", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +

  5. Run the following statement in the MySQL database to view the output result. The final result is displayed as follows because the periodic output result cannot be collected:

    select * from order_count
    +
    user_id      window_start         window_end        total_num
    +0001      2021-03-24 10:00:00  2021-03-24 10:00:30    3
    +0001      2021-03-24 10:00:30  2021-03-24 10:01:00    1
    +

+

OVER WINDOW

The difference between Over Window and Group Window is that one record is generated from one row in Over Window.

+

Syntax

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
SELECT agg1(attr1) OVER (
+  [PARTITION BY partition_name]
+  ORDER BY proctime|rowtime 
+  ROWS  
+ BETWEEN (UNBOUNDED|rowCOUNT) PRECEDING AND CURRENT ROW FROM TABLENAME
+
+SELECT agg1(attr1) OVER (
+  [PARTITION BY partition_name]
+  ORDER BY proctime|rowtime 
+  RANGE  
+  BETWEEN (UNBOUNDED|timeInterval) PRECEDING AND CURRENT ROW FROM TABLENAME
+
+ +
+

Description

+ +
+ + + + + + + + + + + + + + + + +
Table 5 Parameter description

Parameter

+

Description

+

PARTITION BY

+

Indicates the primary key of the specified group. Each group separately performs calculation.

+

ORDER BY

+

Indicates the processing time or event time as the timestamp for data.

+

ROWS

+

Indicates the count window.

+

RANGE

+

Indicates the time window.

+
+
+

Precautions

+ +

Example

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
// Calculate the count and total number from syntax rules enabled to now (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime RANGE UNBOUNDED preceding) as cnt2
+    FROM Orders;
+  
+// Calculate the count and total number of the recent four records (in proctime).
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY proctime ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+// Calculate the count and total number last 60s (in eventtime). Process the events based on event time, which is the timeattr field in Orders.
+insert into temp SELECT name,
+    count(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt1,
+    sum(amount) OVER (PARTITION BY name ORDER BY timeattr RANGE BETWEEN INTERVAL '60' SECOND PRECEDING AND CURRENT ROW) as cnt2
+    FROM Orders;
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0420.html b/docs/dli/sqlreference/dli_08_0420.html new file mode 100644 index 00000000..46adac89 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0420.html @@ -0,0 +1,102 @@ + + +

JOIN

+

Equi-join

Syntax

+
1
+2
FROM tableExpression INNER | LEFT | RIGHT | FULL JOIN tableExpression
+  ON value11 = value21 [ AND value12 = value22]
+
+ +
+
+

Precautions

+ +

Example

+
SELECT *
+FROM Orders INNER JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders LEFT JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders RIGHT JOIN Product ON Orders.productId = Product.id;
+
+SELECT *
+FROM Orders FULL OUTER JOIN Product ON Orders.productId = Product.id;
+

Time-windowed Join

Function

+

Each piece of data in a stream is joined with data in different time zones in another stream.

+

Syntax

+
from t1 JOIN t2 ON t1.key = t2.key AND TIMEBOUND_EXPRESSIO
+

Description

+

TIMEBOUND_EXPRESSION can be in either of the following formats:

+ +

Precautions

+

A time window join requires at least one equi join predicate and a join condition that limits the time of both streams.

+

For example, use two range predicates (<, <=, >=, or >), a BETWEEN predicate, or an equal predicate that compares the same type of time attributes (such as processing time and event time) in two input tables.

+

For example, the following predicate is a valid window join condition:

+ +
+

Example

+

Join all orders shipped within 4 hours with their associated shipments.

+
SELECT *
+FROM Orders o, Shipments s
+WHERE o.id = s.orderId AND
+      o.ordertime BETWEEN s.shiptime - INTERVAL '4' HOUR AND s.shiptime;
+

Expanding arrays into a relation

Precautions

+

This clause is used to return a new row for each element in the given array. Unnesting WITH ORDINALITY is not yet supported.

+

Example

+
SELECT users, tag
+FROM Orders CROSS JOIN UNNEST(tags) AS t (tag);
+
+

User-Defined Table Functions

Function

+

This clause is used to join a table with the results of a table function. ach row of the left (outer) table is joined with all rows produced by the corresponding call of the table function.

+

Precautions

+

A left outer join against a lateral table requires a TRUE literal in the ON clause.

+

Example

+

The row of the left (outer) table is dropped, if its table function call returns an empty result.

+
SELECT users, tag
+FROM Orders, LATERAL TABLE(unnest_udtf(tags)) t AS tag;
+

If a table function call returns an empty result, the corresponding outer row is preserved, and the result padded with null values.

+
SELECT users, tag
+FROM Orders LEFT JOIN LATERAL TABLE(unnest_udtf(tags)) t AS tag ON TRUE;
+
+

Join Temporal Table Function

Function

+

Precautions

+

Currently only inner join and left outer join with temporal tables are supported.

+

Example

+

Assuming Rates is a temporal table function, the join can be expressed in SQL as follows:

+
SELECT
+  o_amount, r_rate
+FROM
+  Orders,
+  LATERAL TABLE (Rates(o_proctime))
+WHERE
+  r_currency = o_currency;
+
+

Join Temporal Tables

Function

+

This clause is used to join the Temporal table.

+

Syntax

+
SELECT column-names
+FROM table1  [AS <alias1>]
+[LEFT] JOIN table2 FOR SYSTEM_TIME AS OF table1.proctime [AS <alias2>]
+ON table1.column-name1 = table2.key-name1
+

Description

+ +

Precautions

+

Only inner and left joins are supported for temporal tables with processing time attributes.

+

Example

+

LatestRates is a dimension table (such as HBase table) that is materialized with the latest rate.

+
SELECT
+  o.amout, o.currency, r.rate, o.amount * r.rate
+FROM
+  Orders AS o
+  JOIN LatestRates FOR SYSTEM_TIME AS OF o.proctime AS r
+  ON r.currency = o.currency;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0421.html b/docs/dli/sqlreference/dli_08_0421.html new file mode 100644 index 00000000..929411c1 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0421.html @@ -0,0 +1,30 @@ + + +

OrderBy & Limit

+

OrderBy

Function

+

This clause is used to sort data in ascending order on a time attribute.

+

Precautions

+

Currently, only sorting by time attribute is supported.

+

Example

+

Sort data in ascending order on the time attribute.

+
SELECT *
+FROM Orders
+ORDER BY orderTime;
+
+

Limit

Function

+

This clause is used to constrain the number of rows returned.

+

Precautions

+

This clause is used in conjunction with ORDER BY to ensure that the results are deterministic.

+

Example

+
SELECT *
+FROM Orders
+ORDER BY orderTime
+LIMIT 3;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0422.html b/docs/dli/sqlreference/dli_08_0422.html new file mode 100644 index 00000000..425bf10d --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0422.html @@ -0,0 +1,32 @@ + + +

Top-N

+

Function

Top-N queries ask for the N smallest or largest values ordered by columns. Both smallest and largest values sets are considered Top-N queries. Top-N queries are useful in cases where the need is to display only the N bottom-most or the N top- most records from batch/streaming table on a condition.

+
+

Syntax

SELECT [column_list]
+FROM (
+   SELECT [column_list],
+     ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]]
+       ORDER BY col1 [asc|desc][, col2 [asc|desc]...]) AS rownum
+   FROM table_name)
+WHERE rownum <= N [AND conditions]
+
+

Description

+
+

Precautions

+
+

Example

This is an example to get the top five products per category that have the maximum sales in realtime.

+
SELECT * 
+  FROM ( 
+     SELECT *,
+         ROW_NUMBER() OVER (PARTITION BY category ORDER BY sales DESC) as row_num
+     FROM ShopSales)
+  WHERE row_num <= 5;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0423.html b/docs/dli/sqlreference/dli_08_0423.html new file mode 100644 index 00000000..5ae5f1cc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0423.html @@ -0,0 +1,32 @@ + + +

Deduplication

+

Function

Deduplication removes rows that duplicate over a set of columns, keeping only the first one or the last one.

+
+

Syntax

SELECT [column_list]
+FROM (
+   SELECT [column_list],
+     ROW_NUMBER() OVER ([PARTITION BY col1[, col2...]]
+       ORDER BY time_attr [asc|desc]) AS rownum
+   FROM table_name)
+WHERE rownum = 1
+
+

Description

+
+

Precautions

None

+
+

Example

The following examples show how to remove duplicate rows on order_id. The proctime is an event time attribute.

+
SELECT order_id, user, product, number
+  FROM (
+     SELECT *,
+         ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY proctime ASC) as row_num
+     FROM Orders)
+  WHERE row_num = 1;
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0424.html b/docs/dli/sqlreference/dli_08_0424.html new file mode 100644 index 00000000..1aed98e9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0424.html @@ -0,0 +1,17 @@ + + +

Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0425.html b/docs/dli/sqlreference/dli_08_0425.html new file mode 100644 index 00000000..d1ab4d7e --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0425.html @@ -0,0 +1,181 @@ + + +

User-Defined Functions (UDFs)

+

Overview

DLI supports the following three types of user-defined functions (UDFs):

+
+ +
  • UDFs can only be used in dedicated queues.
  • Currently, Python is not supported for programming UDFs, UDTFs, and UDAFs.
+
+

POM Dependency

<dependency>
+        <groupId>org.apache.flink</groupId>
+        <artifactId>flink-table-common</artifactId>
+        <version>1.10.0</version>
+        <scope>provided</scope>
+</dependency>
+
+

Using UDFs

  1. Encapsulate the implemented UDFs into a JAR package and upload the package to OBS.
  2. In the navigation pane of the DLI management console, choose Data Management > Package Management. On the displayed page, click Create and use the JAR package uploaded to OBS to create a package.
  3. In the left navigation, choose Job Management and click Flink Jobs. Locate the row where the target resides and click Edit in the Operation column to switch to the page where you can edit the job.
  4. Click the Running Parameters tab of your job, select the UDF JAR and click Save.
  5. Add the following statement to the SQL statements to use the functions:
+
+

UDF

The regular UDF must inherit the ScalarFunction function and implement the eval method. The open and close functions are optional.

+
+

Example code

+
import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.ScalarFunction;
+public class UdfScalarFunction extends ScalarFunction {
+  private int factor = 12;
+  public UdfScalarFunction() {
+    this.factor = 12;
+  }
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  /**
+   * Custom logic
+   * @param s
+   * @return
+   */
+   public int eval(String s) {
+     return s.hashCode() * factor;
+   }
+   /**
+    * Optional
+    */
+   @Override
+   public void close() {}
+}
+

Example

+
1
+2
CREATE FUNCTION udf_test AS 'com.company.udf.UdfScalarFunction';
+INSERT INTO sink_stream select udf_test(attr) FROM source_stream;
+
+ +
+

UDTF

The UDTF must inherit the TableFunction function and implement the eval method. The open and close functions are optional. If the UDTF needs to return multiple columns, you only need to declare the returned value as Tuple or Row. If Row is used, you need to overload the getResultType method to declare the returned field type.

+
+

Example code

+
import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.table.functions.FunctionContext;
+import org.apache.flink.table.functions.TableFunction;
+import org.apache.flink.types.Row;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+public class UdfTableFunction extends TableFunction<Row> {
+  private Logger log = LoggerFactory.getLogger(TableFunction.class);
+  /**
+   * (optional) Initialization
+   * @param context
+   */
+  @Override
+  public void open(FunctionContext context) {}
+  public void eval(String str, String split) {
+    for (String s : str.split(split)) {
+      Row row = new Row(2);
+      row.setField(0, s);
+      row.setField(1, s.length());
+      collect(row);
+    }
+  }
+  /**
+   * Declare the type returned by the function
+   * @return
+   */
+  @Override
+  public TypeInformation<Row> getResultType() {
+  return Types.ROW(Types.STRING, Types.INT);
+  }
+  /**
+    * Optional
+   */
+  @Override
+  public void close() {}
+ }
+

Example

+

The UDTF supports CROSS JOIN and LEFT JOIN. When the UDTF is used, the LATERAL and TABLE keywords must be included.

+ +
1
+2
+3
+4
+5
+6
+7
CREATE FUNCTION udtf_test AS 'com.company.udf.TableFunction';
+// CROSS JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream, LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length);
+// LEFT JOIN
+INSERT INTO sink_stream select subValue, length FROM source_stream LEFT JOIN LATERAL
+TABLE(udtf_test(attr, ',')) as T(subValue, length) ON TRUE;
+
+ +
+

UDAF

The UDAF must inherit the AggregateFunction function. You need to create an accumulator for storing the computing result, for example, WeightedAvgAccum in the following example code.

+
+

Example code

+
public class WeightedAvgAccum {
+public long sum = 0;
+public int count = 0;
+}
+

+
import org.apache.flink.table.functions.AggregateFunction;
+import java.util.Iterator;
+/**
+* The first type variable is the type returned by the aggregation function, and the second type variable is of the Accumulator type.
+* Weighted Average user-defined aggregate function.
+*/
+public class UdfAggFunction extends AggregateFunction<Long, WeightedAvgAccum> {
+// Initialize the accumulator.
+  @Override
+  public WeightedAvgAccum createAccumulator() {
+    return new WeightedAvgAccum();
+  }
+// Return the intermediate computing value stored in the accumulator.
+  @Override
+  public Long getValue(WeightedAvgAccum acc) {
+    if (acc.count == 0) {
+       return null;
+    } else {
+      return acc.sum / acc.count;
+ }
+}
+// Update the intermediate computing value according to the input.
+public void accumulate(WeightedAvgAccum acc, long iValue) {
+acc.sum += iValue;
+acc.count += 1;
+}
+// Perform the retraction operation, which is opposite to the accumulate operation.
+public void retract(WeightedAvgAccum acc, long iValue) {
+acc.sum -= iValue;
+acc.count -= 1;
+}
+// Combine multiple accumulator values.
+public void merge(WeightedAvgAccum acc, Iterable<WeightedAvgAccum> it) {
+Iterator<WeightedAvgAccum> iter = it.iterator();
+while (iter.hasNext()) {
+WeightedAvgAccum a = iter.next();
+acc.count += a.count;
+acc.sum += a.sum;
+}
+}
+// Reset the intermediate computing value.
+public void resetAccumulator(WeightedAvgAccum acc) {
+acc.count = 0;
+acc.sum = 0L;
+}
+}
+

Example

+
1
+2
CREATE FUNCTION udaf_test AS 'com.company.udf.UdfAggFunction';
+INSERT INTO sink_stream SELECT udaf_test(attr2) FROM source_stream GROUP BY attr1;
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0426.html b/docs/dli/sqlreference/dli_08_0426.html new file mode 100644 index 00000000..3d792d45 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0426.html @@ -0,0 +1,35 @@ + + +

Built-In Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0427.html b/docs/dli/sqlreference/dli_08_0427.html new file mode 100644 index 00000000..817e2506 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0427.html @@ -0,0 +1,573 @@ + + +

Mathematical Operation Functions

+

Relational Operators

All data types can be compared by using relational operators and the result is returned as a BOOLEAN value.

+

Relationship operators are binary operators. Two compared data types must be of the same type or they must support implicit conversion.

+

Table 1 lists all relational operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Relational Operators

Operator

+

Returned Data Type

+

Description

+

A = B

+

BOOLEAN

+

If A is equal to B, then TRUE is returned. Otherwise, FALSE is returned. This operator is used for value assignment.

+

A <> B

+

BOOLEAN

+

If A is not equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned. This operator follows the standard SQL syntax.

+

A < B

+

BOOLEAN

+

If A is less than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A <= B

+

BOOLEAN

+

If A is less than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A > B

+

BOOLEAN

+

If A is greater than B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A >= B

+

BOOLEAN

+

If A is greater than or equal to B, then TRUE is returned. Otherwise, FALSE is returned. If A or B is NULL, then NULL is returned.

+

A IS NULL

+

BOOLEAN

+

If A is NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS NOT NULL

+

BOOLEAN

+

If A is not NULL, then TRUE is returned. Otherwise, FALSE is returned.

+

A IS DISTINCT FROM B

+

BOOLEAN

+

If A is not equal to B, TRUE is returned. NULL indicates A equals B.

+

A IS NOT DISTINCT FROM B

+

BOOLEAN

+

If A is equal to B, TRUE is returned. NULL indicates A equals B.

+

A BETWEEN [ASYMMETRIC | SYMMETRIC] B AND C

+

BOOLEAN

+

If A is greater than or equal to B but less than or equal to C, TRUE is returned.

+
  • ASYMMETRIC: indicates that B and C are location-related.

    For example, "A BETWEEN ASYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C".

    +
  • SYMMETRIC: indicates that B and C are not location-related.

    For example, "A BETWEEN SYMMETRIC B AND C" is equivalent to "A BETWEEN B AND C) OR (A BETWEEN C AND B".

    +
+

A NOT BETWEEN B [ASYMMETRIC | SYMMETRIC]AND C

+

BOOLEAN

+

If A is less than B or greater than C, TRUE is returned.

+
  • ASYMMETRIC: indicates that B and C are location-related.

    For example, "A NOT BETWEEN ASYMMETRIC B AND C" is equivalent to "A NOT BETWEEN B AND C".

    +
  • SYMMETRIC: indicates that B and C are not location-related.

    For example, "A NOT BETWEEN SYMMETRIC B AND C" is equivalent to "(A NOT BETWEEN B AND C) OR (A NOT BETWEEN C AND B)".

    +
+

A LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A matches pattern B, TRUE is returned. The escape character C can be defined as required.

+

A NOT LIKE B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match pattern B, TRUE is returned. The escape character C can be defined as required.

+

A SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A matches regular expression B, TRUE is returned. The escape character C can be defined as required.

+

A NOT SIMILAR TO B [ ESCAPE C ]

+

BOOLEAN

+

If A does not match regular expression B, TRUE is returned. The escape character C can be defined as required.

+

value IN (value [, value]* )

+

BOOLEAN

+

If the value is equal to any value in the list, TRUE is returned.

+

value NOT IN (value [, value]* )

+

BOOLEAN

+

If the value is not equal to any value in the list, TRUE is returned.

+

EXISTS (sub-query)

+

BOOLEAN

+

If sub-query returns at least one row, TRUE is returned.

+

value IN (sub-query)

+

BOOLEAN

+

If value is equal to a row returned by subquery, TRUE is returned.

+

value NOT IN (sub-query)

+

BOOLEAN

+

If value is not equal to a row returned by subquery, TRUE is returned.

+
+
+

Precautions

+ + +
+

Logical Operators

Common logical operators are AND, OR, and NOT. Their priority order is NOT > AND > OR.

+

Table 2 lists the calculation rules. A and B indicate logical expressions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Logical Operators

Operator

+

Returned Data Type

+

Description

+

A OR B

+

BOOLEAN

+

If A or B is TRUE, TRUE is returned. Three-valued logic is supported.

+

A AND B

+

BOOLEAN

+

If both A and B are TRUE, TRUE is returned. Three-valued logic is supported.

+

NOT A

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, UNKNOWN is returned.

+

A IS FALSE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT FALSE

+

BOOLEAN

+

If A is not FALSE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS TRUE

+

BOOLEAN

+

If A is TRUE, TRUE is returned. If A is UNKNOWN, FALSE is returned.

+

A IS NOT TRUE

+

BOOLEAN

+

If A is not TRUE, TRUE is returned. If A is UNKNOWN, TRUE is returned.

+

A IS UNKNOWN

+

BOOLEAN

+

If A is UNKNOWN, TRUE is returned.

+

A IS NOT UNKNOWN

+

BOOLEAN

+

If A is not UNKNOWN, TRUE is returned.

+
+
+

Precautions

+

Only data of the Boolean type can be used for calculation using logical operators. Implicit type conversion is not supported.

+
+

Arithmetic Operators

Arithmetic operators include binary operators and unary operators, for all of which, the returned results are of the numeric type. Table 3 lists arithmetic operators supported by Flink SQL.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 3 Arithmetic Operators

Operator

+

Returned Data Type

+

Description

+

+ numeric

+

All numeric types

+

Returns numbers.

+

- numeric

+

All numeric types

+

Returns negative numbers.

+

A + B

+

All numeric types

+

A plus B. The result type is associated with the operation data type. For example, if floating-point number is added to an integer, the result will be a floating-point number.

+

A - B

+

All numeric types

+

A minus B. The result type is associated with the operation data type.

+

A * B

+

All numeric types

+

Multiply A and B. The result type is associated with the operation data type.

+

A / B

+

All numeric types

+

Divide A by B. The result is a number of the double type (double-precision number).

+

POWER(A, B)

+

All numeric types

+

Returns the value of A raised to the power B.

+

ABS(numeric)

+

All numeric types

+

Returns the absolute value of a specified value.

+

MOD(A, B)

+

All numeric types

+

Returns the remainder (modulus) of A divided by B. A negative value is returned only when A is a negative value.

+

SQRT(A)

+

All numeric types

+

Returns the square root of A.

+

LN(A)

+

All numeric types

+

Returns the nature logarithm of A (base e).

+

LOG10(A)

+

All numeric types

+

Returns the base 10 logarithms of A.

+

LOG2(A)

+

All numeric types

+

Returns the base 2 logarithm of A.

+

LOG(B)

+

LOG(A, B)

+

All numeric types

+

When called with one argument, returns the natural logarithm of B.

+

When called with two arguments, this function returns the logarithm of B to the base A.

+

B must be greater than 0 and A must be greater than 1.

+

EXP(A)

+

All numeric types

+

Return the value of e raised to the power of a.

+

CEIL(A)

+

CEILING(A)

+

All numeric types

+

Return the smallest integer that is greater than or equal to a. For example: ceil(21.2) = 22.

+

FLOOR(A)

+

All numeric types

+

Return the largest integer that is less than or equal to a. For example: floor(21.2) = 21.

+

SIN(A)

+

All numeric types

+

Returns the sine value of A.

+

COS(A)

+

All numeric types

+

Returns the cosine value of A.

+

TAN(A)

+

All numeric types

+

Returns the tangent value of A.

+

COT(A)

+

All numeric types

+

Returns the cotangent value of A.

+

ASIN(A)

+

All numeric types

+

Returns the arc sine value of A.

+

ACOS(A)

+

All numeric types

+

Returns the arc cosine value of A.

+

ATAN(A)

+

All numeric types

+

Returns the arc tangent value of A.

+

ATAN2(A, B)

+

All numeric types

+

Returns the arc tangent of a coordinate (A, B).

+

COSH(A)

+

All numeric types

+

Returns the hyperbolic cosine of A. Return value type is DOUBLE.

+

DEGREES(A)

+

All numeric types

+

Convert the value of a from radians to degrees.

+

RADIANS(A)

+

All numeric types

+

Convert the value of a from degrees to radians.

+

SIGN(A)

+

All numeric types

+

Returns the sign of A. 1 is returned if A is positive. –1 is returned if A is negative. Otherwise, 0 is returned.

+

ROUND(A, d)

+

All numeric types

+

Returns a number rounded to d decimal places for A. For example: round(21.263,2) = 21.26.

+

PI

+

All numeric types

+

Returns the value of pi.

+

E()

+

All numeric types

+

Returns the value of e.

+

RAND()

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, 1.0)

+

RAND(A)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, 1.0) with an initial seed A. Two RAND functions will return identical sequences of numbers if they have the same initial seed.

+

RAND_INTEGER(A)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, A)

+

RAND_INTEGER(A, B)

+

All numeric types

+

Returns a pseudorandom double value in the range [0.0, B) with an initial seed A.

+

UUID()

+

All numeric types

+

Returns a UUID string.

+

BIN(A)

+

All numeric types

+

Returns a string representation of integer A in binary format. Returns NULL if A is NULL.

+

HEX(A)

+

HEX(B)

+

All numeric types

+

Returns a string representation of an integer A value or a string B in hex format. Returns NULL if the argument is NULL.

+

TRUNCATE(A, d)

+

All numeric types

+

Returns a number of truncated to d decimal places. Returns NULL if A or d is NULL.

+

Example: truncate (42.345, 2) = 42.340

+

truncate(42.345) = 42.000

+

PI()

+

All numeric types

+

Returns the value of pi.

+
+
+

Precautions

+

Data of the string type is not allowed in arithmetic operations.

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0428.html b/docs/dli/sqlreference/dli_08_0428.html new file mode 100644 index 00000000..91c5d67f --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0428.html @@ -0,0 +1,437 @@ + + +

String Functions

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 String Functions

Function

+

Return Type

+

Description

+

string1 || string2

+

STRING

+

Returns the concatenation of string1 and string2.

+

CHAR_LENGTH(string)

+

CHARACTER_LENGTH(string)

+

INT

+

Returns the number of characters in the string.

+

UPPER(string)

+

STRING

+

Returns the string in uppercase.

+

LOWER(string)

+

STRING

+

Returns the string in lowercase.

+

POSITION(string1 IN string2)

+

INT

+

Returns the position (start from 1) of the first occurrence of string1 in string2; returns 0 if string1 cannot be found in string2.

+

TRIM([ BOTH | LEADING | TRAILING ] string1 FROM string2)

+

STRING

+

Returns a string that removes leading and/or trailing characters string2 from string1.

+

LTRIM(string)

+

STRING

+

Returns a string that removes the left whitespaces from the specified string.

+

For example, LTRIM(' This is a test String.') returns "This is a test String.".

+

RTRIM(string)

+

STRING

+

Returns a string that removes the right whitespaces from the specified string.

+

For example, RTRIM('This is a test String. ') returns "This is a test String.".

+

REPEAT(string, integer)

+

STRING

+

Returns a string that repeats the base string integer times.

+

For example, REPEAT('This is a test String.', 2) returns "This is a test String.This is a test String.".

+

REGEXP_REPLACE(string1, string2, string3)

+

STRING

+

Returns a string from string1 with all the substrings that match a regular expression string2 consecutively being replaced with string3.

+

For example, REGEXP_REPLACE('foobar', 'oo|ar', '') returns "fb".

+

REGEXP_REPLACE('ab\ab', '\\', 'e') returns "abeab".

+

OVERLAY(string1 PLACING string2 FROM integer1 [ FOR integer2 ])

+

STRING

+

Returns a string that replaces integer2 characters of STRING1 with STRING2 from position integer1.

+

The default value of integer2 is the length of string2.

+

For example, OVERLAY('This is an old string' PLACING ' new' FROM 10 FOR 5) returns "This is a new string".

+

SUBSTRING(string FROM integer1 [ FOR integer2 ])

+

STRING

+

Returns a substring of the specified string starting from position integer1 with length integer2 (to the end by default). If integer2 is not configured, the substring from integer1 to the end is returned by default.

+

REPLACE(string1, string2, string3)

+

STRING

+

Returns a new string which replaces all the occurrences of string2 with string3 (non-overlapping) from string1.

+

For example, REPLACE('hello world', 'world', 'flink') returns "hello flink"; REPLACE('ababab', 'abab', 'z') returns "zab".

+

REPLACE('ab\\ab', '\\', 'e') returns "abeab".

+

REGEXP_EXTRACT(string1, string2[, integer])

+

STRING

+

Returns a string from string1 which extracted with a specified regular expression string2 and a regex match group index integer.

+

Returns NULL, if the parameter is NULL or the regular expression is invalid.

+

For example, REGEXP_EXTRACT('foothebar', 'foo(.*?)(bar)', 2)" returns "bar".

+

INITCAP(string)

+

STRING

+

Returns a new form of STRING with the first character of each word converted to uppercase and the rest characters to lowercase.

+

CONCAT(string1, string2,...)

+

STRING

+

Returns a string that concatenates string1, string2, ….

+

For example, CONCAT('AA', 'BB', 'CC') returns "AABBCC".

+

CONCAT_WS(string1, string2, string3,...)

+

STRING

+

Returns a string that concatenates string2, string3, … with a separator string1. The separator is added between the strings to be concatenated. Returns NULL if string1 is NULL. If other arguments are NULL, this function automatically skips NULL arguments.

+

For example, CONCAT_WS('~', 'AA', NULL, 'BB', '', 'CC') returns "AA~BB~~CC".

+

LPAD(string1, integer, string2)

+

STRING

+

Returns a new string from string1 left-padded with string2 to a length of integer characters.

+

If any argument is NULL, NULL is returned.

+

If integer is negative, NULL is returned.

+

If the length of string1 is shorter than integer, returns string1 shortened to integer characters.

+

For example, LPAD(Symbol,4,Symbol) returns "Symbol hi".

+

LPAD('hi',1,'??') returns "h".

+

RPAD(string1, integer, string2)

+

STRING

+

Returns a new string from string1 right-padded with string2 to a length of integer characters.

+

If any argument is NULL, NULL is returned.

+

If integer is negative, NULL is returned.

+

If the length of string1 is shorter than integer, returns string1 shortened to integer characters.

+

For example, RPAD('hi',4,'??') returns "hi??".

+

RPAD('hi',1,'??') returns "h".

+

FROM_BASE64(string)

+

STRING

+

Returns the base64-decoded result from string.

+

Returns NULL if string is NULL.

+

For example, FROM_BASE64('aGVsbG8gd29ybGQ=') returns "hello world".

+

TO_BASE64(string)

+

STRING

+

Returns the base64-encoded result from string; f string is NULL.

+

Returns NULL if string is NULL.

+

For example, TO_BASE64(hello world) returns "aGVsbG8gd29ybGQ=".

+

ASCII(string)

+

INT

+

Returns the numeric value of the first character of string.

+

Returns NULL if string is NULL.

+

For example, ascii('abc') returns 97.

+

ascii(CAST(NULL AS VARCHAR)) returns NULL.

+

CHR(integer)

+

STRING

+

Returns the ASCII character having the binary equivalent to integer.

+

If integer is larger than 255, we will get the modulus of integer divided by 255 first, and returns CHR of the modulus.

+

Returns NULL if integer is NULL.

+

chr(97) returns a.

+

chr(353) Return a.

+

DECODE(binary, string)

+

STRING

+

Decodes the first argument into a String using the provided character set (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').

+

If either argument is NULL, the result will also be NULL.

+

ENCODE(strinh1, string2)

+

STRING

+

Encodes the string1 into a BINARY using the provided string2 character set (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').

+

If either argument is NULL, the result will also be NULL.

+

INSTR(string1, string2)

+

INT

+

Returns the position of the first occurrence of string2 in string1.

+

Returns NULL if any argument is NULL.

+

LEFT(string, integer)

+

STRING

+

Returns the leftmost integer characters from the string.

+

Returns EMPTY String if integer is negative.

+

Returns NULL if any argument is NULL.

+

RIGHT(string, integer)

+

STRING

+

Returns the rightmost integer characters from the string.

+

Returns EMPTY String if integer is negative.

+

Returns NULL if any argument is NULL.

+

LOCATE(string1, string2[, integer])

+

INT

+

Returns the position of the first occurrence of string1 in string2 after position integer.

+

Returns 0 if not found.

+

The value of integer defaults to 0.

+

Returns NULL if any argument is NULL.

+

PARSE_URL(string1, string2[, string3])

+

STRING

+

Returns the specified part from the URL.

+

Valid values for string2 include 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'AUTHORITY', 'FILE', and 'USERINFO'.

+

Returns NULL if any argument is NULL.

+

If string2 is QUERY, the key in QUERY can be specified as string3.

+

Example:

+

The parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') returns 'facebook.com'.

+

parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'QUERY', 'k1') returns 'v1'.

+

REGEXP(string1, string2)

+

BOOLEAN

+

Performs a regular expression search on the specified string and returns a BOOLEAN value indicating whether the specified match pattern is found. If it is found, TRUE is returned. string1 indicates the specified string, and string2 indicates the regular expression.

+

Returns NULL if any argument is NULL.

+

REVERSE(string)

+

STRING

+

Returns the reversed string.

+

Returns NULL if any argument is NULL.

+
NOTE:

Note that backquotes must be added to this function, for example, `REVERSE`.

+
+

SPLIT_INDEX(string1, string2, integer1)

+

STRING

+

Splits string1 by the delimiter string2, returns the integerth (zero-based) string of the split strings. Returns NULL if integer is negative.

+

Returns NULL if integer is negative.

+

Returns NULL if any argument is NULL.

+

STR_TO_MAP(string1[, string2, string3]])

+

MAP

+

Returns a map after splitting the string1 into key/value pairs using delimiters.

+

The default value of string2 is ','.

+

The default value of string3 is '='.

+

SUBSTR(string[, integer1[, integer2]])

+

STRING

+

Returns a substring of string starting from position integer1 with length integer2.

+

If integer2 is not specified, the string is truncated to the end.

+

JSON_VAL(STRING json_string, STRING json_path)

+

STRING

+

Returns the value of the specified json_path from the json_string. For details about how to use the functions, see JSON_VAL Function.

+
NOTE:

The following rules are listed in descending order of priority.

+
  1. The two arguments json_string and json_path cannot be NULL.
  2. The value of json_string must be a valid JSON string. Otherwise, the function returns NULL.
  3. If json_string is an empty string, the function returns an empty string.
  4. If json_path is an empty string or the path does not exist, the function returns NULL.
+
+
+
+

JSON_VAL Function

+
STRING JSON_VAL(STRING json_string, STRING json_path)
+ +
+ + + + + + + + + + + + + +
Table 2 Parameters

Parameter

+

Data Types

+

Description

+

json_string

+

STRING

+

JSON object to be parsed

+

json_path

+

STRING

+

Path expression for parsing the JSON string For the supported expressions, see Table 3.

+
+
+ +
+ + + + + + + + + + + + + + + + +
Table 3 Expressions supported

Expression

+

Description

+

$

+

Root node in the path

+

[]

+

Access array elements

+

*

+

Array wildcard

+

.

+

Access child elements

+
+
+ +
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0429.html b/docs/dli/sqlreference/dli_08_0429.html new file mode 100644 index 00000000..ec843133 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0429.html @@ -0,0 +1,1680 @@ + + +

Temporal Functions

+

Table 1 lists the time functions supported by Flink OpenSource SQL.

+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Temporal Functions

Function

+

Return Type

+

Description

+

DATE string

+

DATE

+

Parse the date string (yyyy-MM-dd) to a SQL date.

+

TIME string

+

TIME

+

Parse the time string (HH:mm:ss[.fff]) to a SQL time.

+

TIMESTAMP string

+

TIMESTAMP

+

Convert the time string into a timestamp. The time string format is yyyy-MM-dd HH:mm:ss[.fff].

+

INTERVAL string range

+

INTERVAL

+

interval indicates the interval. There are two forms:

+
  • yyyy-MM for SQL intervals of months. An interval range might be YEAR or YEAR TO MONTH for intervals of months.
  • dd hh:mm:ss.fff for SQL intervals of milliseconds. An interval range might be DAY, MINUTE, DAY TO HOUR, or DAY TO SECOND.
+

Example:

+

INTERVAL '10 00:00:00.004' DAY TO second indicates that the interval is 10 days and 4 milliseconds.

+

INTERVAL '10' DAY: indicates that the interval is 10 days.

+

INTERVAL '2-10' YEAR TO MONTH indicates that the interval is two years and ten months.

+

CURRENT_DATE

+

DATE

+

Return the SQL date of UTC time zone.

+

CURRENT_TIME

+

TIME

+

Return the SQL time of UTC time zone.

+

CURRENT_TIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of UTC time zone.

+

LOCALTIME

+

TIME

+

Return the SQL time of the current time zone.

+

LOCALTIMESTAMP

+

TIMESTAMP

+

Return the SQL timestamp of the current time zone.

+

EXTRACT(timeintervalunit FROM temporal)

+

BIGINT

+

Extract part of the time point or interval. Return the part in the int type.

+

For example, extract the date 2006-06-05 and return 5.

+

EXTRACT(DAY FROM DATE '2006-06-05') returns 5.

+

YEAR(date)

+

BIGINT

+

Return the year from SQL date.

+

For example, YEAR(DATE'1994-09-27') returns 1994.

+

QUARTER(date)

+

BIGINT

+

Return the quarter of a year (an integer between 1 and 4) from SQL date.

+

MONTH(date)

+

BIGINT

+

+

Return the month of a year (an integer between 1 and 12) from SQL date.

+

For example, MONTH(DATE '1994-09-27') returns 9.

+

WEEK(date)

+

BIGINT

+

Return the week of a year (an integer between 1 and 53) from SQL date.

+

For example, WEEK(DATE'1994-09-27') returns 39.

+

DAYOFYEAR(date)

+

BIGINT

+

Returns the day of a year (an integer between 1 and 366) from SQL date.

+

For example, DAYOFYEAR(DATE '1994-09-27') is 270.

+

DAYOFMONTH(date)

+

BIGINT

+

Return the day of a month (an integer between 1 and 31) from SQL date.

+

For example, DAYOFMONTH(DATE'1994-09-27') returns 27.

+

DAYOFWEEK(date)

+

BIGINT

+

Return the day of a week (an integer between 1 and 7) from SQL date.

+

Sunday is set to 1.

+

For example, DAYOFWEEK(DATE'1994-09-27') returns 3.

+

HOUR(timestamp)

+

BIGINT

+

Returns the hour of a day (an integer between 0 and 23) from SQL timestamp.

+

For example, HOUR(TIMESTAMP '1994-09-27 13:14:15') returns 13.

+

MINUTE(timestamp)

+

BIGINT

+

Returns the minute of an hour (an integer between 0 and 59) from SQL timestamp.

+

For example, MINUTE(TIMESTAMP '1994-09-27 13:14:15') returns 14.

+

SECOND(timestamp)

+

BIGINT

+

Returns the second of a minute (an integer between 0 and 59) from SQL timestamp.

+

For example, SECOND(TIMESTAMP '1994-09-27 13:14:15') returns 15.

+

FLOOR(timepoint TO timeintervalunit)

+

TIME

+

Round a time point down to the given unit.

+

For example, 12:44:00 is returned from FLOOR(TIME '12:44:31' TO MINUTE).

+

CEIL(timepoint TO timeintervalunit)

+

TIME

+

Round a time point up to the given unit.

+

For example, CEIL(TIME '12:44:31' TO MINUTE) returns 12:45:00.

+

(timepoint1, temporal1) OVERLAPS (timepoint2, temporal2)

+

BOOLEAN

+

Return TRUE if two time intervals defined by (timepoint1, temporal1) and (timepoint2, temporal2) overlap.

+

Example:

+

(TIME '2:55:00', INTERVAL '1' HOUR) OVERLAPS (TIME '3:30:00', INTERVAL '2' HOUR) returns TRUE.

+

(TIME '9:00:00', TIME '10:00:00') OVERLAPS (TIME '10:15:00', INTERVAL '3' HOUR) returns FALSE.

+

DATE_FORMAT(timestamp, string)

+

STRING

+

Convert timestamp to a value of string in the format specified by the date format string.

+

TIMESTAMPADD(timeintervalunit, interval, timepoint)

+

TIMESTAMP/DATE/TIME

+

Return the date and time added to timepoint based on the result of interval and timeintervalunit.

+

For example, TIMESTAMPADD(WEEK, 1, DATE '2003-01-02') returns 2003-01-09.

+

TIMESTAMPDIFF(timepointunit, timepoint1, timepoint2)

+

INT

+

Return the (signed) number of timepointunit between timepoint1 and timepoint2.

+

The unit for the interval is given by the first argument, which should be one of the following values: SECOND, MINUTE, HOUR, DAY, MONTH, or YEAR.

+

For example, TIMESTAMPDIFF(DAY, TIMESTAMP '2003-01-02 10:00:00', TIMESTAMP '2003-01-03 10:00:00') returns 1.

+

CONVERT_TZ(string1, string2, string3)

+

TIMESTAMP

+

Convert a datetime string1 from time zone string2 to time zone string3.

+

For example, CONVERT_TZ('1970-01-01 00:00:00', 'UTC', 'America/Los_Angeles') returns '1969-12-31 16:00:00'.

+

FROM_UNIXTIME(numeric[, string])

+

STRING

+

Return a string representation of the numeric argument (in seconds) in the current time zone.

+

The default string format is YYYY-MM-DD hh:mm:ss.

+

For example, FROM_UNIXTIME(44) returns 1970-01-01 09:00:44.

+

UNIX_TIMESTAMP()

+

BIGINT

+

Get current Unix timestamp in seconds.

+

+

UNIX_TIMESTAMP(string1[, string2])

+

BIGINT

+

Convert date time string string1 in format string2 to Unix timestamp (in seconds), using the specified timezone in table config.

+

The default format of string2 is yyyy-MM-dd HH:mm:ss.

+

TO_DATE(string1[, string2])

+

DATE

+

Convert a date string string1 with format string2 to a date.

+

The default format of string2 is yyyy-MM-dd.

+

TO_TIMESTAMP(string1[, string2])

+

TIMESTAMP

+

Converts date time string string1 with format string2 under the 'UTC+0' time zone to a timestamp.

+

The default format of string2 is yyyy-MM-dd HH:mm:ss.

+
+
+
+

DATE

+
+

TIME

+
+

TIMESTAMP

+
+

INTERVAL

+
+

CURRENT_DATE

+
+

CURRENT_TIME

+
+

CURRENT_TIMESTAMP

+
+

LOCALTIME

+
+

LOCALTIMESTAMP

+
+

EXTRACT

+
+

YEAR

+
+

QUARTER

+
+

MONTH

+
+

WEEK

+
+

DAYOFYEAR

+
+

DAYOFMONTH

+
+

DAYOFWEEK

+
+

HOUR

+
+

MINUTE

+
+

SECOND

+
+

FLOOR

+
+

CEIL

+
+

OVERLAPS

+
+

DATE_FORMAT

+
+

TIMESTAMPADD

+
+

TIMESTAMPDIFF

+
+

CONVERT_TZ

+
+

FROM_UNIXTIME

+
+

UNIX_TIMESTAMP

+
+

UNIX_TIMESTAMP(string1[, string2])

+
+

TO_DATE

+
+

TO_TIMESTAMP

+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0430.html b/docs/dli/sqlreference/dli_08_0430.html new file mode 100644 index 00000000..f532ccf9 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0430.html @@ -0,0 +1,77 @@ + + +

Conditional Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Conditional Functions

Conditional Functions

+

Description

+

CASE value

+

WHEN value1_1 [, value1_2 ]* THEN result1

+

[ WHEN value2_1 [, value2_2 ]* THEN result2 ]*

+

[ ELSE resultZ ]

+

END

+

Returns resultX when the value is contained in (valueX_1, valueX_2, …).

+

Only the first matched value is returned.

+

When no value matches, returns result_z if it is provided and returns NULL otherwise.

+

CASE

+

WHEN condition1 THEN result1

+

[ WHEN condition2 THEN result2 ]*

+

[ ELSE resultZ ]

+

END

+

Returns resultX when the first conditionX is met.

+

Only the first matched value is returned.

+

When no condition is met, returns result_z if it is provided and returns NULL otherwise.

+

NULLIF(value1, value2)

+

Returns NULL if value1 is equal to value2; returns value1 otherwise.

+

For example, NullIF (5, 5) returns NULL.

+

NULLIF(5, 0) returns 5.

+

COALESCE(value1, value2 [, value3 ]* )

+

Returns the first value (from left to right) that is not NULL from value1, value2, ….

+

For example, COALESCE(NULL, 5) returns 5.

+

IF(condition, true_value, false_value)

+

Returns the true_value if condition is met, otherwise false_value.

+

For example, IF(5 > 3, 5, 3) returns 5.

+

IS_ALPHA(string)

+

Returns TRUE if all characters in the string are letters, otherwise FALSE.

+

IS_DECIMAL(string)

+

Returns TRUE if string can be parsed to a valid numeric, otherwise FALSE.

+

IS_DIGIT(string)

+

Returns TRUE if all characters in string are digits, otherwise FALSE. Otherwise, FALSE is returned.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0431.html b/docs/dli/sqlreference/dli_08_0431.html new file mode 100644 index 00000000..510077c3 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0431.html @@ -0,0 +1,139 @@ + + +

Type Conversion Functions

+

Syntax

CAST(value AS type)
+
+

Description

This function is used to forcibly convert types.

+
+

Precautions

If the input is NULL, NULL is returned.

+
+

Example 1: Convert the amount value to an integer.

The following example converts the amount value to an integer.

+
insert into temp select cast(amount as INT) from source_stream;
+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 1 Examples of type conversion functions

Example

+

Description

+

Example

+

cast(v1 as string)

+

Converts v1 to a string. The value of v1 can be of the numeric type or of the timestamp, date, or time type.

+

Table T1:

+
| content (INT)           |
+| -------------           |
+| 5                       |
+

Statement:

+
SELECT
+  cast(content as varchar)
+FROM
+  T1;
+

Result:

+
"5"
+

cast (v1 as int)

+

Converts v1 to the int type. The value of v1 can be a number or a character.

+

Table T1:

+
| content  (STRING)           |
+| -------------               |
+| "5"                         |
+

Statement:

+
SELECT
+  cast(content as int)
+FROM
+  T1;
+

Result:

+
5
+

cast(v1 as timestamp)

+

Converts v1 to the timestamp type. The value of v1 can be of the string, date, or time type.

+

Table T1:

+
| content  (STRING)          |
+| -------------              |
+| "2018-01-01 00:00:01"     |
+

Statement:

+
SELECT
+  cast(content as timestamp)
+FROM
+  T1;
+

Result:

+
1514736001000
+

cast(v1 as date)

+

Converts v1 to the date type. The value of v1 can be of the string or timestamp type.

+

Table T1:

+
| content  (TIMESTAMP)     |
+| -------------            |
+| 1514736001000            |
+

Statement:

+
SELECT
+  cast(content as date)
+FROM
+  T1;
+

Result:

+
"2018-01-01"
+
+
+

Flink jobs do not support the conversion of bigint to timestamp using CAST. You can convert it using to_timestamp.

+
+
+

Example 2:

  1. Create a Flink opensource SQL job by referring to Kafka Source Table and Print Result Table, enter the following job running script, and submit the job.
    Note: When creating a job, set Flink Version to 1.12 in the Running Parameters area on the job editing page, select Save Job Log, and set the OBS bucket for saving job logs to facilitate subsequent job log viewing. Change the values of the parameters in bold in the following script according to the actual situation.
    CREATE TABLE kafkaSource (
    +  cast_int_to_string int, 
    +  cast_String_to_int string,
    +  case_string_to_timestamp string, 
    +  case_timestamp_to_date timestamp
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +
    +CREATE TABLE printSink (
    +  cast_int_to_string string, 
    +  cast_String_to_int int, 
    +  case_string_to_timestamp timestamp, 
    +  case_timestamp_to_date date
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink select 
    +  cast(cast_int_to_string as string),
    +  cast(cast_String_to_int as int),
    +  cast(case_string_to_timestamp as timestamp),  
    +  cast(case_timestamp_to_date as date)
    +from kafkaSource;
    +
    +
  2. Connect to the Kafka cluster and send the following test data to the Kafka topic:
    {"cast_int_to_string":"1", "cast_String_to_int": "1", "case_string_to_timestamp": "2022-04-02 15:00:00", "case_timestamp_to_date": "2022-04-02 15:00:00"}
    +
  3. View output.
    • Method 1:
      1. Log in to the DLI management console and choose Job Management > Flink Streaming Jobs.
      2. Locate the row that contains the target Flink job, and choose More & > FlinkUI in the Operation column.
      3. On the Flink UI, choose Task Managers, click the task name, and select Stdout to view the job run logs.
      +
    • Method 2: If you select Save Job Log for Running Parameters before submitting the job, perform the following operations:
      1. Log in to the DLI management console and choose Job Management > Flink Streaming Jobs.
      2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the corresponding log based on the job running date.
      3. Go to the folder of the corresponding date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view the result log.
      +
    +
    The query result is as follows:
    +I(1,1,2022-04-02T15:00,2022-04-02)
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0432.html b/docs/dli/sqlreference/dli_08_0432.html new file mode 100644 index 00000000..091c3ab2 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0432.html @@ -0,0 +1,48 @@ + + +

Collection Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + +
Table 1 Collection functions

Collection Functions

+

Description

+

CARDINALITY(array)

+

Returns the number of elements in array.

+

array '[' integer ']'

+

Returns the element at position INT in array. The index starts from 1.

+

ELEMENT(array)

+

Returns the sole element of array (whose cardinality should be one)

+

Returns NULL if array is empty.

+

Throws an exception if array has more than one element.

+

CARDINALITY(map)

+

Returns the number of entries in map.

+

map '[' key ']'

+

Returns the value specified by key value in map.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0433.html b/docs/dli/sqlreference/dli_08_0433.html new file mode 100644 index 00000000..373edb04 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0433.html @@ -0,0 +1,38 @@ + + +

Value Construction Functions

+

Description

+
+ + + + + + + + + + + + + +
Table 1 Value construction functions

Value Construction Functions

+

Description

+

ROW(value1, [, value2]*)

+

(value1, [, value2]*)

+

Returns a row created from a list of values (value1, value2,…).

+

ARRAY '[' value1 [, value2 ]* ']'

+

Returns an array created from a list of values (value1, value2, …).

+

MAP '[' key1, value1 [, key2, value2]* ']'

+

Returns a map created from a list of key-value pairs ((value1, value2), (value3, value4), …).

+

The key-value pair is (key1, value1),(key2, value2).

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0434.html b/docs/dli/sqlreference/dli_08_0434.html new file mode 100644 index 00000000..ebdcaefa --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0434.html @@ -0,0 +1,31 @@ + + +

Value Access Functions

+

Description

+
+ + + + + + + + + + +
Table 1 Value access functions

Function

+

Description

+

tableName.compositeType.field

+

Returns the value of a field from a Flink composite type (e.g., Tuple, POJO) by name.

+

tableName.compositeType.*

+

Returns a flat representation of a Flink composite type (e.g., Tuple, POJO) that converts each of its direct subtype into a separate field.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0435.html b/docs/dli/sqlreference/dli_08_0435.html new file mode 100644 index 00000000..c2181d17 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0435.html @@ -0,0 +1,64 @@ + + +

Hash Functions

+

Description

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Hash functions

Hash Functions

+

Description

+

MD5(string)

+

Returns the MD5 hash of string as a string of 32 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA1(string)

+

Returns the SHA-1 hash of string as a string of 40 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA224(string)

+

Returns the SHA-224 hash of string as a string of 56 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA256(string)

+

Returns the SHA-256 hash of string as a string of 64 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA384(string)

+

Returns the SHA-384 hash of string as a string of 96 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA512(string)

+

Returns the SHA-512 hash of string as a string of 128 hexadecimal digits.

+

Returns NULL if string is NULL.

+

SHA2(string, hashLength)

+

Returns the hash using the SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, or SHA-512).

+

The first argument string is the string to be hashed and the second argument hashLength is the bit length of the result (224, 256, 384, or 512).

+

If either argument is NULL, the result will also be NULL.

+
+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0436.html b/docs/dli/sqlreference/dli_08_0436.html new file mode 100644 index 00000000..5239b896 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0436.html @@ -0,0 +1,124 @@ + + +

Aggregate Functions

+

An aggregate function performs a calculation operation on a set of input values and returns a value. For example, the COUNT function counts the number of rows retrieved by an SQL statement. Table 1 lists aggregate functions.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Aggregate functions

Function

+

Return Type

+

Description

+

COUNT([ ALL ] expression | DISTINCT expression1 [, expression2]*)

+

BIGINT

+

Returns the number of input rows for which the expression is not NULL. Use DISTINCT for one unique instance of each value.

+

COUNT(*)

+

COUNT(1)

+

BIGINT

+

Returns the number of input rows.

+

AVG([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the average (arithmetic mean) of expression across all input rows.

+

Use DISTINCT for one unique instance of each value.

+

SUM([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sum of expression across all input rows.

+

Use DISTINCT for one unique instance of each value.

+

MAX([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the maximum value of expression across all input rows.

+

MIN([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the minimum value of expression across all input rows.

+

STDDEV_POP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the population standard deviation of expression across all input rows.

+

STDDEV_SAMP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample standard deviation of expression across all input rows.

+

VAR_POP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the population variance (square of the population standard deviation) of expression across all input rows.

+

VAR_SAMP([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample variance (square of the sample standard deviation) of expression across all input rows.

+

COLLECT([ ALL | DISTINCT ] expression)

+

MULTISET

+

Returns a multiset of expression across all input rows.

+

VARIANCE([ ALL | DISTINCT ] expression)

+

DOUBLE

+

Returns the sample variance (square of the sample standard deviation) of expression across all input rows.

+

FIRST_VALUE(expression)

+

Actual type

+

Returns the first value in an ordered set of values.

+

LAST_VALUE(expression)

+

Actual type

+

Returns the last value in an ordered set of values.

+
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0437.html b/docs/dli/sqlreference/dli_08_0437.html new file mode 100644 index 00000000..506368df --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0437.html @@ -0,0 +1,15 @@ + + +

Table-Valued Functions

+
+
+ + + +
+ diff --git a/docs/dli/sqlreference/dli_08_0438.html b/docs/dli/sqlreference/dli_08_0438.html new file mode 100644 index 00000000..030cb1e0 --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0438.html @@ -0,0 +1,154 @@ + + +

string_split

+

The string_split function splits a target string into substrings based on the specified separator and returns a substring list.

+

Description

string_split(target, separator)
+ +
+ + + + + + + + + + + + + +
Table 1 string_split parameters

Parameter

+

Data Types

+

Description

+

target

+

STRING

+

Target string to be processed

+
NOTE:
  • If target is NULL, an empty line is returned.
  • If target contains two or more consecutive separators, an empty substring is returned.
  • If target does not contain a specified separator, the original string passed to target is returned.
+
+

separator

+

VARCHAR

+

Separator. Currently, only single-character separators are supported.

+
+
+
+

Example

  1. Create a Flink OpenSource SQL job by referring to Kafka Source Table and Print Result Table, enter the following job running script, and submit the job.
    When you create a job, set Flink Version to 1.12 in the Running Parameters tab. Select Save Job Log, and specify the OBS bucket for saving job logs. Change the values of the parameters in bold as needed in the following script.
    CREATE TABLE kafkaSource (
    +  target STRING,  
    +  separator  VARCHAR
    +) WITH (
    +  'connector' = 'kafka',
    +  'topic' = 'KafkaTopic',
    +  'properties.bootstrap.servers' = 'KafkaAddress1:KafkaPort,KafkaAddress2:KafkaPort',
    +  'properties.group.id' = 'GroupId',
    +  'scan.startup.mode' = 'latest-offset',
    +  "format" = "json"
    +);
    +
    +CREATE TABLE printSink (
    +  target STRING,  
    +  item STRING
    +) WITH (
    +  'connector' = 'print'
    +);
    +
    +insert into printSink
    +  select target,
    +  item from 
    +  kafkaSource,
    +  lateral table(string_split(target, separator)) as T(item);
    +
    +
  2. Connect to the Kafka cluster and send the following test data to the Kafka topic:
    {"target":"test-flink","separator":"-"}
    +{"target":"flink","separator":"-"}
    +{"target":"one-two-ww-three","separator":"-"}
    +

    The data is as follows:

    + +
    + + + + + + + + + + + + + +
    Table 2 Test table data

    target (STRING)

    +

    separator (VARCHAR)

    +

    test-flink

    +

    -

    +

    flink

    +

    -

    +

    one-two-ww-three

    +

    -

    +
    +
    +
  3. View output.
    • Method 1:
      1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
      2. Locate the row that contains the target Flink job, and choose More > FlinkUI in the Operation column.
      3. On the Flink UI, choose Task Managers, click the task name, and select Stdout to view job logs.
      +
    • Method 2: If you select Save Job Log on the Running Parameters tab before submitting the job, perform the following operations:
      1. Log in to the DLI console. In the navigation pane, choose Job Management > Flink Jobs.
      2. Click the name of the corresponding Flink job, choose Run Log, click OBS Bucket, and locate the folder of the log you want to view according to the date.
      3. Go to the folder of the date, find the folder whose name contains taskmanager, download the taskmanager.out file, and view result logs.
      +
    +
    The query result is as follows:
    +I(test-flink,test)
    ++I(test-flink,flink)
    ++I(flink,flink)
    ++I(one-two-ww-three,one)
    ++I(one-two-ww-three,two)
    ++I(one-two-ww-three,ww)
    ++I(one-two-ww-three,three)
    +
    +

    The output data is as follows:

    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 3 Result table data

    target (STRING)

    +

    item (STRING)

    +

    test-flink

    +

    test

    +

    test-flink

    +

    flink

    +

    flink

    +

    flink

    +

    one-two-ww-three

    +

    one

    +

    one-two-ww-three

    +

    two

    +

    one-two-ww-three

    +

    ww

    +

    one-two-ww-three

    +

    three

    +
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/sqlreference/dli_08_0450.html b/docs/dli/sqlreference/dli_08_0450.html new file mode 100644 index 00000000..eaa99fdc --- /dev/null +++ b/docs/dli/sqlreference/dli_08_0450.html @@ -0,0 +1,12 @@ + + +

Historical Versions

+

+
+
+ +
+ diff --git a/docs/dli/sqlreference/en-us_image_0000001238321520.png b/docs/dli/sqlreference/en-us_image_0000001238321520.png new file mode 100644 index 00000000..f4a29e38 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0000001238321520.png differ diff --git a/docs/dli/sqlreference/en-us_image_0000001282578329.png b/docs/dli/sqlreference/en-us_image_0000001282578329.png new file mode 100644 index 00000000..df00ebd8 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0000001282578329.png differ diff --git a/docs/dli/sqlreference/en-us_image_0000001282578421.png b/docs/dli/sqlreference/en-us_image_0000001282578421.png new file mode 100644 index 00000000..f4a29e38 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0000001282578421.png differ diff --git a/docs/dli/sqlreference/en-us_image_0000001282841453.png b/docs/dli/sqlreference/en-us_image_0000001282841453.png new file mode 100644 index 00000000..df00ebd8 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0000001282841453.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206796795.png b/docs/dli/sqlreference/en-us_image_0206796795.png new file mode 100644 index 00000000..9d0e1a88 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206796795.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206796804.png b/docs/dli/sqlreference/en-us_image_0206796804.png new file mode 100644 index 00000000..b7f7a5a3 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206796804.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206796813.png b/docs/dli/sqlreference/en-us_image_0206796813.png new file mode 100644 index 00000000..479a4cbc Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206796813.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206796876.png b/docs/dli/sqlreference/en-us_image_0206796876.png new file mode 100644 index 00000000..a13b0bb9 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206796876.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206797025.png b/docs/dli/sqlreference/en-us_image_0206797025.png new file mode 100644 index 00000000..26c5d22a Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206797025.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206797032.png b/docs/dli/sqlreference/en-us_image_0206797032.png new file mode 100644 index 00000000..c266b5ba Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206797032.png differ diff --git a/docs/dli/sqlreference/en-us_image_0206797033.png b/docs/dli/sqlreference/en-us_image_0206797033.png new file mode 100644 index 00000000..4ad6fc00 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0206797033.png differ diff --git a/docs/dli/sqlreference/en-us_image_0223994226.png b/docs/dli/sqlreference/en-us_image_0223994226.png new file mode 100644 index 00000000..c412d5f6 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0223994226.png differ diff --git a/docs/dli/sqlreference/en-us_image_0223994227.png b/docs/dli/sqlreference/en-us_image_0223994227.png new file mode 100644 index 00000000..2171ee06 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0223994227.png differ diff --git a/docs/dli/sqlreference/en-us_image_0223994228.png b/docs/dli/sqlreference/en-us_image_0223994228.png new file mode 100644 index 00000000..b735a450 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0223994228.png differ diff --git a/docs/dli/sqlreference/en-us_image_0223994229.png b/docs/dli/sqlreference/en-us_image_0223994229.png new file mode 100644 index 00000000..63108244 Binary files /dev/null and b/docs/dli/sqlreference/en-us_image_0223994229.png differ diff --git a/docs/dli/sqlreference/public_sys-resources/caution_3.0-en-us.png b/docs/dli/sqlreference/public_sys-resources/caution_3.0-en-us.png new file mode 100644 index 00000000..60f60762 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/caution_3.0-en-us.png differ diff --git a/docs/dli/sqlreference/public_sys-resources/danger_3.0-en-us.png b/docs/dli/sqlreference/public_sys-resources/danger_3.0-en-us.png new file mode 100644 index 00000000..47a9c723 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/danger_3.0-en-us.png differ diff --git a/docs/dli/sqlreference/public_sys-resources/delta.gif b/docs/dli/sqlreference/public_sys-resources/delta.gif new file mode 100644 index 00000000..0d1b1f67 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/delta.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/deltaend.gif b/docs/dli/sqlreference/public_sys-resources/deltaend.gif new file mode 100644 index 00000000..cc7da0fc Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/deltaend.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-arrowdn.gif b/docs/dli/sqlreference/public_sys-resources/icon-arrowdn.gif new file mode 100644 index 00000000..37942803 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-arrowdn.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-arrowrt.gif b/docs/dli/sqlreference/public_sys-resources/icon-arrowrt.gif new file mode 100644 index 00000000..6aaaa11c Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-arrowrt.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-caution.gif b/docs/dli/sqlreference/public_sys-resources/icon-caution.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-caution.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-danger.gif b/docs/dli/sqlreference/public_sys-resources/icon-danger.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-danger.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-huawei.gif b/docs/dli/sqlreference/public_sys-resources/icon-huawei.gif new file mode 100644 index 00000000..a31d60f8 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-huawei.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-note.gif b/docs/dli/sqlreference/public_sys-resources/icon-note.gif new file mode 100644 index 00000000..31be2b03 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-note.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-notice.gif b/docs/dli/sqlreference/public_sys-resources/icon-notice.gif new file mode 100644 index 00000000..40907065 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-notice.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-tip.gif b/docs/dli/sqlreference/public_sys-resources/icon-tip.gif new file mode 100644 index 00000000..c47bae05 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-tip.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/icon-warning.gif b/docs/dli/sqlreference/public_sys-resources/icon-warning.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/icon-warning.gif differ diff --git a/docs/dli/sqlreference/public_sys-resources/note_3.0-en-us.png b/docs/dli/sqlreference/public_sys-resources/note_3.0-en-us.png new file mode 100644 index 00000000..57a0e1f5 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/note_3.0-en-us.png differ diff --git a/docs/dli/sqlreference/public_sys-resources/notice_3.0-en-us.png b/docs/dli/sqlreference/public_sys-resources/notice_3.0-en-us.png new file mode 100644 index 00000000..fa4b6499 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/notice_3.0-en-us.png differ diff --git a/docs/dli/sqlreference/public_sys-resources/warning_3.0-en-us.png b/docs/dli/sqlreference/public_sys-resources/warning_3.0-en-us.png new file mode 100644 index 00000000..def5c356 Binary files /dev/null and b/docs/dli/sqlreference/public_sys-resources/warning_3.0-en-us.png differ