diff --git a/docs/dli/dev/ALL_META.TXT.json b/docs/dli/dev/ALL_META.TXT.json new file mode 100644 index 00000000..43e7a943 --- /dev/null +++ b/docs/dli/dev/ALL_META.TXT.json @@ -0,0 +1,1304 @@ +[ + { + "dockw":"Developer Guide" + }, + { + "uri":"dli_09_0120.html", + "node_id":"dli_09_0120.xml", + "product_code":"dli", + "code":"1", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"SQL Jobs", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli" + } + ], + "title":"SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_05_0044.html", + "node_id":"dli_05_0044.xml", + "product_code":"dli", + "code":"2", + "des":"DLI allows you to use data stored on OBS. You can create OBS tables on DLI to access and process data in your OBS bucket.This section describes how to create an OBS table", + "doc_type":"devg", + "kw":"Using Spark SQL Jobs to Analyze OBS Data,SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Using Spark SQL Jobs to Analyze OBS Data", + "githuburl":"" + }, + { + "uri":"dli_09_0171.html", + "node_id":"dli_09_0171.xml", + "product_code":"dli", + "code":"3", + "des":"DLI allows you to use Hive user-defined functions (UDFs) to query data. UDFs take effect only on a single row of data and are applicable to inserting and deleting a singl", + "doc_type":"devg", + "kw":"Calling UDFs in Spark SQL Jobs,SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Calling UDFs in Spark SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_09_0204.html", + "node_id":"dli_09_0204.xml", + "product_code":"dli", + "code":"4", + "des":"You can use Hive User-Defined Table-Generating Functions (UDTF) to customize table-valued functions. Hive UDTFs are used for the one-in-multiple-out data operations. UDTF", + "doc_type":"devg", + "kw":"Calling UDTFs in Spark SQL Jobs,SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Calling UDTFs in Spark SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_05_0062.html", + "node_id":"dli_05_0062.xml", + "product_code":"dli", + "code":"5", + "des":"DLI allows you to use a Hive User Defined Aggregation Function (UDAF) to process multiple rows of data. Hive UDAF is usually used together with groupBy. It is equivalent ", + "doc_type":"devg", + "kw":"Calling UDAFs in Spark SQL Jobs,SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Calling UDAFs in Spark SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_09_0123.html", + "node_id":"dli_09_0123.xml", + "product_code":"dli", + "code":"6", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Submitting a Spark SQL Job Using JDBC", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Submitting a Spark SQL Job Using JDBC", + "githuburl":"" + }, + { + "uri":"dli_09_0124.html", + "node_id":"dli_09_0124.xml", + "product_code":"dli", + "code":"7", + "des":"On DLI, you can connect to the server for data query in the Internet environment. In this case, you need to first obtain the connection information, including the endpoin", + "doc_type":"devg", + "kw":"Obtaining the Server Connection Address,Submitting a Spark SQL Job Using JDBC,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Obtaining the Server Connection Address", + "githuburl":"" + }, + { + "uri":"dli_09_0125.html", + "node_id":"dli_09_0125.xml", + "product_code":"dli", + "code":"8", + "des":"To connect to DLI, JDBC is utilized. You can obtain the JDBC installation package from Maven or download the JDBC driver file from the DLI management console.JDBC driver ", + "doc_type":"devg", + "kw":"Downloading the JDBC Driver Package,Submitting a Spark SQL Job Using JDBC,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Downloading the JDBC Driver Package", + "githuburl":"" + }, + { + "uri":"dli_09_0121.html", + "node_id":"dli_09_0121.xml", + "product_code":"dli", + "code":"9", + "des":"You need to be authenticated when using JDBC to create DLI driver connections.Currently, the JDBC supports authentication using the Access Key/Secret Key (AK/SK) or token", + "doc_type":"devg", + "kw":"Performing Authentication,Submitting a Spark SQL Job Using JDBC,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Performing Authentication", + "githuburl":"" + }, + { + "uri":"dli_09_0127.html", + "node_id":"dli_09_0127.xml", + "product_code":"dli", + "code":"10", + "des":"In Linux or Windows, you can connect to the DLI server using JDBC.Jobs submitted to DLI using JDBC are executed on the Spark engine.Once JDBC 2.X has undergone function r", + "doc_type":"devg", + "kw":"Submitting a Job Using JDBC,Submitting a Spark SQL Job Using JDBC,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Submitting a Job Using JDBC", + "githuburl":"" + }, + { + "uri":"dli_09_0129.html", + "node_id":"dli_09_0129.xml", + "product_code":"dli", + "code":"11", + "des":"Relational Database Service (RDS) is a cloud-based web service that is reliable, scalable, easy to manage, and immediately ready for use. It can be deployed in single-nod", + "doc_type":"devg", + "kw":"Introduction to RDS,Submitting a Spark SQL Job Using JDBC,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Introduction to RDS", + "githuburl":"" + }, + { + "uri":"dli_09_0006.html", + "node_id":"dli_09_0006.xml", + "product_code":"dli", + "code":"12", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Flink OpenSource SQL Jobs", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Flink OpenSource SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_09_0009.html", + "node_id":"dli_09_0009.xml", + "product_code":"dli", + "code":"13", + "des":"This guide provides reference for Flink 1.12 only.In this example, we aim to query information about top three most-clicked offerings in each hour from a set of real-time", + "doc_type":"devg", + "kw":"Reading Data from Kafka and Writing Data to RDS,Flink OpenSource SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Reading Data from Kafka and Writing Data to RDS", + "githuburl":"" + }, + { + "uri":"dli_09_0010.html", + "node_id":"dli_09_0010.xml", + "product_code":"dli", + "code":"14", + "des":"This guide provides reference for Flink 1.12 only.This example analyzes real-time vehicle driving data and collects statistics on data results that meet specific conditio", + "doc_type":"devg", + "kw":"Reading Data from Kafka and Writing Data to GaussDB(DWS),Flink OpenSource SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Reading Data from Kafka and Writing Data to GaussDB(DWS)", + "githuburl":"" + }, + { + "uri":"dli_09_0011.html", + "node_id":"dli_09_0011.xml", + "product_code":"dli", + "code":"15", + "des":"This guide provides reference for Flink 1.12 only.This example analyzes offering purchase data and collects statistics on data results that meet specific conditions. The ", + "doc_type":"devg", + "kw":"Reading Data from Kafka and Writing Data to Elasticsearch,Flink OpenSource SQL Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Reading Data from Kafka and Writing Data to Elasticsearch", + "githuburl":"" + }, + { + "uri":"dli_09_0012.html", + "node_id":"dli_09_0012.xml", + "product_code":"dli", + "code":"16", + "des":"This guide provides reference for Flink 1.12 only.Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. Duri", + "doc_type":"devg", + "kw":"Reading Data from MySQL CDC and Writing Data to GaussDB(DWS),Flink OpenSource SQL Jobs,Developer Gui", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Reading Data from MySQL CDC and Writing Data to GaussDB(DWS)", + "githuburl":"" + }, + { + "uri":"dli_09_0013.html", + "node_id":"dli_09_0013.xml", + "product_code":"dli", + "code":"17", + "des":"This guide provides reference for Flink 1.12 only.Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. Duri", + "doc_type":"devg", + "kw":"Reading Data from PostgreSQL CDC and Writing Data to GaussDB(DWS),Flink OpenSource SQL Jobs,Develope", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Reading Data from PostgreSQL CDC and Writing Data to GaussDB(DWS)", + "githuburl":"" + }, + { + "uri":"dli_09_0207.html", + "node_id":"dli_09_0207.xml", + "product_code":"dli", + "code":"18", + "des":"If you need to configure high reliability for a Flink application, you can set the parameters when creating your Flink jobs.Create an SMN topic and add an email address o", + "doc_type":"devg", + "kw":"Configuring High-Reliability Flink Jobs (Automatic Restart upon Exceptions),Flink OpenSource SQL Job", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Configuring High-Reliability Flink Jobs (Automatic Restart upon Exceptions)", + "githuburl":"" + }, + { + "uri":"dli_09_0202.html", + "node_id":"dli_09_0202.xml", + "product_code":"dli", + "code":"19", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Flink Jar Jobs", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Flink Jar Jobs", + "githuburl":"" + }, + { + "uri":"dli_09_0162.html", + "node_id":"dli_09_0162.xml", + "product_code":"dli", + "code":"20", + "des":"Built on Flink and Spark, the stream ecosystem is fully compatible with the open-source Flink, Storm, and Spark APIs. It is enhanced in features and improved in performan", + "doc_type":"devg", + "kw":"Stream Ecosystem,Flink Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Stream Ecosystem", + "githuburl":"" + }, + { + "uri":"dli_09_0150.html", + "node_id":"dli_09_0150.xml", + "product_code":"dli", + "code":"21", + "des":"You can perform secondary development based on Flink APIs to build your own Jar packages and submit them to the DLI queues to interact with data sources such as MRS Kafka", + "doc_type":"devg", + "kw":"Flink Jar Job Examples,Flink Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Flink Jar Job Examples", + "githuburl":"" + }, + { + "uri":"dli_09_0191.html", + "node_id":"dli_09_0191.xml", + "product_code":"dli", + "code":"22", + "des":"DLI allows you to use a custom JAR package to run Flink jobs and write data to OBS. This section describes how to write processed Kafka data to OBS. You need to modify th", + "doc_type":"devg", + "kw":"Writing Data to OBS Using Flink Jar,Flink Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Writing Data to OBS Using Flink Jar", + "githuburl":"" + }, + { + "uri":"dli_09_0203.html", + "node_id":"dli_09_0203.xml", + "product_code":"dli", + "code":"23", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Spark Jar Jobs", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Spark Jar Jobs", + "githuburl":"" + }, + { + "uri":"dli_09_0205.html", + "node_id":"dli_09_0205.xml", + "product_code":"dli", + "code":"24", + "des":"DLI is fully compatible with open-source Apache Spark and allows you to import, query, analyze, and process job data by programming. This section describes how to write a", + "doc_type":"devg", + "kw":"Using Spark Jar Jobs to Read and Query OBS Data,Spark Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Using Spark Jar Jobs to Read and Query OBS Data", + "githuburl":"" + }, + { + "uri":"dli_09_0176.html", + "node_id":"dli_09_0176.xml", + "product_code":"dli", + "code":"25", + "des":"DLI allows you to develop a program to create Spark jobs for operations related to databases, DLI or OBS tables, and table data. This example demonstrates how to develop ", + "doc_type":"devg", + "kw":"Using the Spark Job to Access DLI Metadata,Spark Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Using the Spark Job to Access DLI Metadata", + "githuburl":"" + }, + { + "uri":"dli_09_0122.html", + "node_id":"dli_09_0122.xml", + "product_code":"dli", + "code":"26", + "des":"DLI Spark-submit is a command line tool used to submit Spark jobs to the DLI server. This tool provides command lines compatible with open-source Spark.Getting authorized", + "doc_type":"devg", + "kw":"Using Spark-submit to Submit a Spark Jar Job,Spark Jar Jobs,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Using Spark-submit to Submit a Spark Jar Job", + "githuburl":"" + }, + { + "uri":"dli_09_0019.html", + "node_id":"dli_09_0019.xml", + "product_code":"dli", + "code":"27", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Using Spark Jobs to Access Data Sources of Datasource Connections", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Using Spark Jobs to Access Data Sources of Datasource Connections", + "githuburl":"" + }, + { + "uri":"dli_09_0020.html", + "node_id":"dli_09_0020.xml", + "product_code":"dli", + "code":"28", + "des":"DLI supports the native Spark DataSource capability and other extended capabilities. You can use SQL statements or Spark jobs to access other data storage services and im", + "doc_type":"devg", + "kw":"Overview,Using Spark Jobs to Access Data Sources of Datasource Connections,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_09_0089.html", + "node_id":"dli_09_0089.xml", + "product_code":"dli", + "code":"29", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to CSS", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Connecting to CSS", + "githuburl":"" + }, + { + "uri":"dli_09_0189.html", + "node_id":"dli_09_0189.xml", + "product_code":"dli", + "code":"30", + "des":"The Elasticsearch 6.5.4 and later versions provided by CSS provides the security settings. Once the function is enabled, CSS provides identity authentication, authorizati", + "doc_type":"devg", + "kw":"CSS Security Cluster Configuration,Connecting to CSS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"CSS Security Cluster Configuration", + "githuburl":"" + }, + { + "uri":"dli_09_0061.html", + "node_id":"dli_09_0061.xml", + "product_code":"dli", + "code":"31", + "des":"A datasource connection has been created on the DLI management console.Development descriptionConstructing dependency information and creating a Spark sessionImport depen", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to CSS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0090.html", + "node_id":"dli_09_0090.xml", + "product_code":"dli", + "code":"32", + "des":"A datasource connection has been created on the DLI management console.Development descriptionCode implementationImport dependency packages.from __future__ import print_f", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to CSS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0190.html", + "node_id":"dli_09_0190.xml", + "product_code":"dli", + "code":"33", + "des":"A datasource connection has been created on the DLI management console.Development descriptionCode implementationConstructing dependency information and creating a Spark ", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to CSS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0086.html", + "node_id":"dli_09_0086.xml", + "product_code":"dli", + "code":"34", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to GaussDB(DWS)", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Connecting to GaussDB(DWS)", + "githuburl":"" + }, + { + "uri":"dli_09_0069.html", + "node_id":"dli_09_0069.xml", + "product_code":"dli", + "code":"35", + "des":"This section provides Scala example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been create", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to GaussDB(DWS),Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0087.html", + "node_id":"dli_09_0087.xml", + "product_code":"dli", + "code":"36", + "des":"This section provides PySpark example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been crea", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to GaussDB(DWS),Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0199.html", + "node_id":"dli_09_0199.xml", + "product_code":"dli", + "code":"37", + "des":"This section provides Java example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been created", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to GaussDB(DWS),Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0077.html", + "node_id":"dli_09_0077.xml", + "product_code":"dli", + "code":"38", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to HBase", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Connecting to HBase", + "githuburl":"" + }, + { + "uri":"dli_09_0196.html", + "node_id":"dli_09_0196.xml", + "product_code":"dli", + "code":"39", + "des":"Create a datasource connection on the DLI management console.Add the /etc/hosts information of MRS cluster nodes to the host file of the DLI queue.For details, see sectio", + "doc_type":"devg", + "kw":"MRS Configuration,Connecting to HBase,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"MRS Configuration", + "githuburl":"" + }, + { + "uri":"dli_09_0063.html", + "node_id":"dli_09_0063.xml", + "product_code":"dli", + "code":"40", + "des":"The CloudTable HBase and MRS HBase can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-coded ", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to HBase,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0078.html", + "node_id":"dli_09_0078.xml", + "product_code":"dli", + "code":"41", + "des":"The CloudTable HBase and MRS HBase can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-coded ", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to HBase,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0197.html", + "node_id":"dli_09_0197.xml", + "product_code":"dli", + "code":"42", + "des":"This example applies only to MRS HBase.PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext pa", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to HBase,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0198.html", + "node_id":"dli_09_0198.xml", + "product_code":"dli", + "code":"43", + "des":"SymptomThe Spark job fails to be executed, and the job log indicates that the Java server connection or container fails to be started.The Spark job fails to be executed, ", + "doc_type":"devg", + "kw":"Troubleshooting,Connecting to HBase,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Troubleshooting", + "githuburl":"" + }, + { + "uri":"dli_09_0080.html", + "node_id":"dli_09_0080.xml", + "product_code":"dli", + "code":"44", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to OpenTSDB", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Connecting to OpenTSDB", + "githuburl":"" + }, + { + "uri":"dli_09_0065.html", + "node_id":"dli_09_0065.xml", + "product_code":"dli", + "code":"45", + "des":"The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to OpenTSDB,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0081.html", + "node_id":"dli_09_0081.xml", + "product_code":"dli", + "code":"46", + "des":"The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to OpenTSDB,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0193.html", + "node_id":"dli_09_0193.xml", + "product_code":"dli", + "code":"47", + "des":"This example applies only to MRS OpenTSDB.PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to OpenTSDB,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0195.html", + "node_id":"dli_09_0195.xml", + "product_code":"dli", + "code":"48", + "des":"SymptomA Spark job fails to be executed and \"No respond\" is displayed in the job log.A Spark job fails to be executed and \"No respond\" is displayed in the job log.Solutio", + "doc_type":"devg", + "kw":"Troubleshooting,Connecting to OpenTSDB,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Troubleshooting", + "githuburl":"" + }, + { + "uri":"dli_09_0083.html", + "node_id":"dli_09_0083.xml", + "product_code":"dli", + "code":"49", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to RDS", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Connecting to RDS", + "githuburl":"" + }, + { + "uri":"dli_09_0067.html", + "node_id":"dli_09_0067.xml", + "product_code":"dli", + "code":"50", + "des":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to RDS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0084.html", + "node_id":"dli_09_0084.xml", + "product_code":"dli", + "code":"51", + "des":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to RDS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0187.html", + "node_id":"dli_09_0187.xml", + "product_code":"dli", + "code":"52", + "des":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to RDS,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0093.html", + "node_id":"dli_09_0093.xml", + "product_code":"dli", + "code":"53", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to Redis", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Connecting to Redis", + "githuburl":"" + }, + { + "uri":"dli_09_0094.html", + "node_id":"dli_09_0094.xml", + "product_code":"dli", + "code":"54", + "des":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to Redis,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0097.html", + "node_id":"dli_09_0097.xml", + "product_code":"dli", + "code":"55", + "des":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to Redis,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0100.html", + "node_id":"dli_09_0100.xml", + "product_code":"dli", + "code":"56", + "des":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to Redis,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0188.html", + "node_id":"dli_09_0188.xml", + "product_code":"dli", + "code":"57", + "des":"SymptomAfter the code is directly copied to the .py file, unexpected characters may exist after the backslashes (\\).After the code is directly copied to the .py file, une", + "doc_type":"devg", + "kw":"Troubleshooting,Connecting to Redis,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "opensource":"true", + "IsBot":"Yes", + "IsMulti":"No", + "doc_type":"devg", + "product_code":"dli" + } + ], + "title":"Troubleshooting", + "githuburl":"" + }, + { + "uri":"dli_09_0113.html", + "node_id":"dli_09_0113.xml", + "product_code":"dli", + "code":"58", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Connecting to Mongo", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Connecting to Mongo", + "githuburl":"" + }, + { + "uri":"dli_09_0114.html", + "node_id":"dli_09_0114.xml", + "product_code":"dli", + "code":"59", + "des":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.An enhanced datasource connection has been created on the ", + "doc_type":"devg", + "kw":"Scala Example Code,Connecting to Mongo,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Scala Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0117.html", + "node_id":"dli_09_0117.xml", + "product_code":"dli", + "code":"60", + "des":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.PrerequisitesAn enhanced datasource connection has been cr", + "doc_type":"devg", + "kw":"PySpark Example Code,Connecting to Mongo,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"PySpark Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_0110.html", + "node_id":"dli_09_0110.xml", + "product_code":"dli", + "code":"61", + "des":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.PrerequisitesAn enhanced datasource connection has been cr", + "doc_type":"devg", + "kw":"Java Example Code,Connecting to Mongo,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli", + "IsMulti":"No", + "IsBot":"Yes" + } + ], + "title":"Java Example Code", + "githuburl":"" + }, + { + "uri":"dli_09_00001.html", + "node_id":"dli_09_00001.xml", + "product_code":"dli", + "code":"62", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"devg", + "kw":"Change History,Developer Guide", + "search_title":"", + "metedata":[ + { + "documenttype":"devg", + "prodname":"dli" + } + ], + "title":"Change History", + "githuburl":"" + } +] \ No newline at end of file diff --git a/docs/dli/dev/CLASS.TXT.json b/docs/dli/dev/CLASS.TXT.json new file mode 100644 index 00000000..3f293aa1 --- /dev/null +++ b/docs/dli/dev/CLASS.TXT.json @@ -0,0 +1,560 @@ +[ + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"SQL Jobs", + "uri":"dli_09_0120.html", + "doc_type":"devg", + "p_code":"", + "code":"1" + }, + { + "desc":"DLI allows you to use data stored on OBS. You can create OBS tables on DLI to access and process data in your OBS bucket.This section describes how to create an OBS table", + "product_code":"dli", + "title":"Using Spark SQL Jobs to Analyze OBS Data", + "uri":"dli_05_0044.html", + "doc_type":"devg", + "p_code":"1", + "code":"2" + }, + { + "desc":"DLI allows you to use Hive user-defined functions (UDFs) to query data. UDFs take effect only on a single row of data and are applicable to inserting and deleting a singl", + "product_code":"dli", + "title":"Calling UDFs in Spark SQL Jobs", + "uri":"dli_09_0171.html", + "doc_type":"devg", + "p_code":"1", + "code":"3" + }, + { + "desc":"You can use Hive User-Defined Table-Generating Functions (UDTF) to customize table-valued functions. Hive UDTFs are used for the one-in-multiple-out data operations. UDTF", + "product_code":"dli", + "title":"Calling UDTFs in Spark SQL Jobs", + "uri":"dli_09_0204.html", + "doc_type":"devg", + "p_code":"1", + "code":"4" + }, + { + "desc":"DLI allows you to use a Hive User Defined Aggregation Function (UDAF) to process multiple rows of data. Hive UDAF is usually used together with groupBy. It is equivalent ", + "product_code":"dli", + "title":"Calling UDAFs in Spark SQL Jobs", + "uri":"dli_05_0062.html", + "doc_type":"devg", + "p_code":"1", + "code":"5" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Submitting a Spark SQL Job Using JDBC", + "uri":"dli_09_0123.html", + "doc_type":"devg", + "p_code":"1", + "code":"6" + }, + { + "desc":"On DLI, you can connect to the server for data query in the Internet environment. In this case, you need to first obtain the connection information, including the endpoin", + "product_code":"dli", + "title":"Obtaining the Server Connection Address", + "uri":"dli_09_0124.html", + "doc_type":"devg", + "p_code":"6", + "code":"7" + }, + { + "desc":"To connect to DLI, JDBC is utilized. You can obtain the JDBC installation package from Maven or download the JDBC driver file from the DLI management console.JDBC driver ", + "product_code":"dli", + "title":"Downloading the JDBC Driver Package", + "uri":"dli_09_0125.html", + "doc_type":"devg", + "p_code":"6", + "code":"8" + }, + { + "desc":"You need to be authenticated when using JDBC to create DLI driver connections.Currently, the JDBC supports authentication using the Access Key/Secret Key (AK/SK) or token", + "product_code":"dli", + "title":"Performing Authentication", + "uri":"dli_09_0121.html", + "doc_type":"devg", + "p_code":"6", + "code":"9" + }, + { + "desc":"In Linux or Windows, you can connect to the DLI server using JDBC.Jobs submitted to DLI using JDBC are executed on the Spark engine.Once JDBC 2.X has undergone function r", + "product_code":"dli", + "title":"Submitting a Job Using JDBC", + "uri":"dli_09_0127.html", + "doc_type":"devg", + "p_code":"6", + "code":"10" + }, + { + "desc":"Relational Database Service (RDS) is a cloud-based web service that is reliable, scalable, easy to manage, and immediately ready for use. It can be deployed in single-nod", + "product_code":"dli", + "title":"Introduction to RDS", + "uri":"dli_09_0129.html", + "doc_type":"devg", + "p_code":"6", + "code":"11" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink OpenSource SQL Jobs", + "uri":"dli_09_0006.html", + "doc_type":"devg", + "p_code":"", + "code":"12" + }, + { + "desc":"This guide provides reference for Flink 1.12 only.In this example, we aim to query information about top three most-clicked offerings in each hour from a set of real-time", + "product_code":"dli", + "title":"Reading Data from Kafka and Writing Data to RDS", + "uri":"dli_09_0009.html", + "doc_type":"devg", + "p_code":"12", + "code":"13" + }, + { + "desc":"This guide provides reference for Flink 1.12 only.This example analyzes real-time vehicle driving data and collects statistics on data results that meet specific conditio", + "product_code":"dli", + "title":"Reading Data from Kafka and Writing Data to GaussDB(DWS)", + "uri":"dli_09_0010.html", + "doc_type":"devg", + "p_code":"12", + "code":"14" + }, + { + "desc":"This guide provides reference for Flink 1.12 only.This example analyzes offering purchase data and collects statistics on data results that meet specific conditions. The ", + "product_code":"dli", + "title":"Reading Data from Kafka and Writing Data to Elasticsearch", + "uri":"dli_09_0011.html", + "doc_type":"devg", + "p_code":"12", + "code":"15" + }, + { + "desc":"This guide provides reference for Flink 1.12 only.Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. Duri", + "product_code":"dli", + "title":"Reading Data from MySQL CDC and Writing Data to GaussDB(DWS)", + "uri":"dli_09_0012.html", + "doc_type":"devg", + "p_code":"12", + "code":"16" + }, + { + "desc":"This guide provides reference for Flink 1.12 only.Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. Duri", + "product_code":"dli", + "title":"Reading Data from PostgreSQL CDC and Writing Data to GaussDB(DWS)", + "uri":"dli_09_0013.html", + "doc_type":"devg", + "p_code":"12", + "code":"17" + }, + { + "desc":"If you need to configure high reliability for a Flink application, you can set the parameters when creating your Flink jobs.Create an SMN topic and add an email address o", + "product_code":"dli", + "title":"Configuring High-Reliability Flink Jobs (Automatic Restart upon Exceptions)", + "uri":"dli_09_0207.html", + "doc_type":"devg", + "p_code":"12", + "code":"18" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink Jar Jobs", + "uri":"dli_09_0202.html", + "doc_type":"devg", + "p_code":"", + "code":"19" + }, + { + "desc":"Built on Flink and Spark, the stream ecosystem is fully compatible with the open-source Flink, Storm, and Spark APIs. It is enhanced in features and improved in performan", + "product_code":"dli", + "title":"Stream Ecosystem", + "uri":"dli_09_0162.html", + "doc_type":"devg", + "p_code":"19", + "code":"20" + }, + { + "desc":"You can perform secondary development based on Flink APIs to build your own Jar packages and submit them to the DLI queues to interact with data sources such as MRS Kafka", + "product_code":"dli", + "title":"Flink Jar Job Examples", + "uri":"dli_09_0150.html", + "doc_type":"devg", + "p_code":"19", + "code":"21" + }, + { + "desc":"DLI allows you to use a custom JAR package to run Flink jobs and write data to OBS. This section describes how to write processed Kafka data to OBS. You need to modify th", + "product_code":"dli", + "title":"Writing Data to OBS Using Flink Jar", + "uri":"dli_09_0191.html", + "doc_type":"devg", + "p_code":"19", + "code":"22" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Spark Jar Jobs", + "uri":"dli_09_0203.html", + "doc_type":"devg", + "p_code":"", + "code":"23" + }, + { + "desc":"DLI is fully compatible with open-source Apache Spark and allows you to import, query, analyze, and process job data by programming. This section describes how to write a", + "product_code":"dli", + "title":"Using Spark Jar Jobs to Read and Query OBS Data", + "uri":"dli_09_0205.html", + "doc_type":"devg", + "p_code":"23", + "code":"24" + }, + { + "desc":"DLI allows you to develop a program to create Spark jobs for operations related to databases, DLI or OBS tables, and table data. This example demonstrates how to develop ", + "product_code":"dli", + "title":"Using the Spark Job to Access DLI Metadata", + "uri":"dli_09_0176.html", + "doc_type":"devg", + "p_code":"23", + "code":"25" + }, + { + "desc":"DLI Spark-submit is a command line tool used to submit Spark jobs to the DLI server. This tool provides command lines compatible with open-source Spark.Getting authorized", + "product_code":"dli", + "title":"Using Spark-submit to Submit a Spark Jar Job", + "uri":"dli_09_0122.html", + "doc_type":"devg", + "p_code":"23", + "code":"26" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Using Spark Jobs to Access Data Sources of Datasource Connections", + "uri":"dli_09_0019.html", + "doc_type":"devg", + "p_code":"23", + "code":"27" + }, + { + "desc":"DLI supports the native Spark DataSource capability and other extended capabilities. You can use SQL statements or Spark jobs to access other data storage services and im", + "product_code":"dli", + "title":"Overview", + "uri":"dli_09_0020.html", + "doc_type":"devg", + "p_code":"27", + "code":"28" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to CSS", + "uri":"dli_09_0089.html", + "doc_type":"devg", + "p_code":"27", + "code":"29" + }, + { + "desc":"The Elasticsearch 6.5.4 and later versions provided by CSS provides the security settings. Once the function is enabled, CSS provides identity authentication, authorizati", + "product_code":"dli", + "title":"CSS Security Cluster Configuration", + "uri":"dli_09_0189.html", + "doc_type":"devg", + "p_code":"29", + "code":"30" + }, + { + "desc":"A datasource connection has been created on the DLI management console.Development descriptionConstructing dependency information and creating a Spark sessionImport depen", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0061.html", + "doc_type":"devg", + "p_code":"29", + "code":"31" + }, + { + "desc":"A datasource connection has been created on the DLI management console.Development descriptionCode implementationImport dependency packages.from __future__ import print_f", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0090.html", + "doc_type":"devg", + "p_code":"29", + "code":"32" + }, + { + "desc":"A datasource connection has been created on the DLI management console.Development descriptionCode implementationConstructing dependency information and creating a Spark ", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0190.html", + "doc_type":"devg", + "p_code":"29", + "code":"33" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to GaussDB(DWS)", + "uri":"dli_09_0086.html", + "doc_type":"devg", + "p_code":"27", + "code":"34" + }, + { + "desc":"This section provides Scala example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been create", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0069.html", + "doc_type":"devg", + "p_code":"34", + "code":"35" + }, + { + "desc":"This section provides PySpark example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been crea", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0087.html", + "doc_type":"devg", + "p_code":"34", + "code":"36" + }, + { + "desc":"This section provides Java example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.A datasource connection has been created", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0199.html", + "doc_type":"devg", + "p_code":"34", + "code":"37" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to HBase", + "uri":"dli_09_0077.html", + "doc_type":"devg", + "p_code":"27", + "code":"38" + }, + { + "desc":"Create a datasource connection on the DLI management console.Add the /etc/hosts information of MRS cluster nodes to the host file of the DLI queue.For details, see sectio", + "product_code":"dli", + "title":"MRS Configuration", + "uri":"dli_09_0196.html", + "doc_type":"devg", + "p_code":"38", + "code":"39" + }, + { + "desc":"The CloudTable HBase and MRS HBase can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-coded ", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0063.html", + "doc_type":"devg", + "p_code":"38", + "code":"40" + }, + { + "desc":"The CloudTable HBase and MRS HBase can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-coded ", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0078.html", + "doc_type":"devg", + "p_code":"38", + "code":"41" + }, + { + "desc":"This example applies only to MRS HBase.PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext pa", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0197.html", + "doc_type":"devg", + "p_code":"38", + "code":"42" + }, + { + "desc":"SymptomThe Spark job fails to be executed, and the job log indicates that the Java server connection or container fails to be started.The Spark job fails to be executed, ", + "product_code":"dli", + "title":"Troubleshooting", + "uri":"dli_09_0198.html", + "doc_type":"devg", + "p_code":"38", + "code":"43" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to OpenTSDB", + "uri":"dli_09_0080.html", + "doc_type":"devg", + "p_code":"27", + "code":"44" + }, + { + "desc":"The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0065.html", + "doc_type":"devg", + "p_code":"44", + "code":"45" + }, + { + "desc":"The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.PrerequisitesA datasource connection has been created on the DLI management console.Hard-", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0081.html", + "doc_type":"devg", + "p_code":"44", + "code":"46" + }, + { + "desc":"This example applies only to MRS OpenTSDB.PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0193.html", + "doc_type":"devg", + "p_code":"44", + "code":"47" + }, + { + "desc":"SymptomA Spark job fails to be executed and \"No respond\" is displayed in the job log.A Spark job fails to be executed and \"No respond\" is displayed in the job log.Solutio", + "product_code":"dli", + "title":"Troubleshooting", + "uri":"dli_09_0195.html", + "doc_type":"devg", + "p_code":"44", + "code":"48" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to RDS", + "uri":"dli_09_0083.html", + "doc_type":"devg", + "p_code":"27", + "code":"49" + }, + { + "desc":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0067.html", + "doc_type":"devg", + "p_code":"49", + "code":"50" + }, + { + "desc":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0084.html", + "doc_type":"devg", + "p_code":"49", + "code":"51" + }, + { + "desc":"PrerequisitesA datasource connection has been created and bound to a queue on the DLI management console.Hard-coded or plaintext passwords pose significant security risks", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0187.html", + "doc_type":"devg", + "p_code":"49", + "code":"52" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to Redis", + "uri":"dli_09_0093.html", + "doc_type":"devg", + "p_code":"27", + "code":"53" + }, + { + "desc":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0094.html", + "doc_type":"devg", + "p_code":"53", + "code":"54" + }, + { + "desc":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0097.html", + "doc_type":"devg", + "p_code":"53", + "code":"55" + }, + { + "desc":"Redis supports only enhanced datasource connections.PrerequisitesAn enhanced datasource connection has been created on the DLI management console and bound to a queue in ", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0100.html", + "doc_type":"devg", + "p_code":"53", + "code":"56" + }, + { + "desc":"SymptomAfter the code is directly copied to the .py file, unexpected characters may exist after the backslashes (\\).After the code is directly copied to the .py file, une", + "product_code":"dli", + "title":"Troubleshooting", + "uri":"dli_09_0188.html", + "doc_type":"devg", + "p_code":"53", + "code":"57" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Connecting to Mongo", + "uri":"dli_09_0113.html", + "doc_type":"devg", + "p_code":"27", + "code":"58" + }, + { + "desc":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.An enhanced datasource connection has been created on the ", + "product_code":"dli", + "title":"Scala Example Code", + "uri":"dli_09_0114.html", + "doc_type":"devg", + "p_code":"58", + "code":"59" + }, + { + "desc":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.PrerequisitesAn enhanced datasource connection has been cr", + "product_code":"dli", + "title":"PySpark Example Code", + "uri":"dli_09_0117.html", + "doc_type":"devg", + "p_code":"58", + "code":"60" + }, + { + "desc":"Mongo can be connected only through enhanced datasource connections.DDS is compatible with the MongoDB protocol.PrerequisitesAn enhanced datasource connection has been cr", + "product_code":"dli", + "title":"Java Example Code", + "uri":"dli_09_0110.html", + "doc_type":"devg", + "p_code":"58", + "code":"61" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Change History", + "uri":"dli_09_00001.html", + "doc_type":"devg", + "p_code":"", + "code":"62" + } +] \ No newline at end of file diff --git a/docs/dli/dev/PARAMETERS.txt b/docs/dli/dev/PARAMETERS.txt new file mode 100644 index 00000000..6da8d5f0 --- /dev/null +++ b/docs/dli/dev/PARAMETERS.txt @@ -0,0 +1,3 @@ +version="" +language="en-us" +type="" \ No newline at end of file diff --git a/docs/dli/dev/dli_05_0044.html b/docs/dli/dev/dli_05_0044.html new file mode 100644 index 00000000..0fc4b9d7 --- /dev/null +++ b/docs/dli/dev/dli_05_0044.html @@ -0,0 +1,175 @@ + + +

Using Spark SQL Jobs to Analyze OBS Data

+

DLI allows you to use data stored on OBS. You can create OBS tables on DLI to access and process data in your OBS bucket.

+

This section describes how to create an OBS table on DLI, import data to the table, and insert and query table data.

+

Prerequisites

+
+

Preparations

Creating a Database on DLI

+
  1. Log in to the DLI management console and click SQL Editor. On the displayed page, set Engine to spark and Queue to the created SQL queue.
  2. Enter the following statement in the SQL editing window to create the testdb database.
    create database testdb;
    +
+

The following operations in this section must be performed for the testdb database.

+
+

DataSource and Hive Syntax for Creating an OBS Table on DLI

The main difference between DataSource syntax and Hive syntax lies in the range of table data storage formats supported and the number of partitions supported. For the key differences in creating OBS tables using these two syntax, refer to Table 1.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Syntax differences

Syntax

+

Data Types

+

Partitioning

+

Number of Partitions

+

DataSource

+

ORC, PARQUET, JSON, CSV, and AVRO

+

You need to specify the partitioning column in both CREATE TABLE and PARTITIONED BY statements. For details, see Creating a Single-Partition OBS Table Using DataSource Syntax.

+

A maximum of 7,000 partitions can be created in a single table.

+

Hive

+

TEXTFILE, AVRO, ORC, SEQUENCEFILE, RCFILE, and PARQUET

+

Do not specify the partitioning column in the CREATE TABLE statement. Specify the column name and data type in the PARTITIONED BY statement. For details, see Creating an OBS Table Using Hive Syntax.

+

A maximum of 100,000 partitions can be created in a single table.

+
+
+
+

Creating an OBS Table Using the DataSource Syntax

The following describes how to create an OBS table for CSV files. The methods of creating OBS tables for other file formats are similar.

+ +
+

Creating an OBS Table Using Hive Syntax

The following describes how to create an OBS table for TEXTFILE files. The methods of creating OBS tables for other file formats are similar.

+ +
+

FAQs

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_05_0062.html b/docs/dli/dev/dli_05_0062.html new file mode 100644 index 00000000..3c820318 --- /dev/null +++ b/docs/dli/dev/dli_05_0062.html @@ -0,0 +1,250 @@ + + +

Calling UDAFs in Spark SQL Jobs

+

Scenario

DLI allows you to use a Hive User Defined Aggregation Function (UDAF) to process multiple rows of data. Hive UDAF is usually used together with groupBy. It is equivalent to SUM() and AVG() commonly used in SQL and is also an aggregation function.

+
+

Constraints

+
+

Environment Preparations

Before you start, set up the development environment.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Development environment

Item

+

Description

+

OS

+

Windows 7 or later

+

JDK

+

JDK 1.8 (Java downloads).

+

IntelliJ IDEA

+

IntelliJ IDEA is used for application development. The version of the tool must be 2019.1 or later.

+

Maven

+

Basic configuration of the development environment. For details about how to get started, see Downloading Apache Maven and Installing Apache Maven. Maven is used for project management throughout the lifecycle of software development.

+
+
+
+

Development Process

The following figure shows the process of developing a UDAF.

+
Figure 1 Development process
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Process description

No.

+

Phase

+

Software Portal

+

Description

+

1

+

Create a Maven project and configure the POM file.

+

IntelliJ IDEA

+

Compile the UDAF function code by referring to the Procedure description.

+

2

+

Editing UDAF code

+

3

+

Debug, compile, and pack the code into a Jar package.

+

4

+

Upload the Jar package to OBS.

+

OBS console

+

Upload the UDAF Jar file to an OBS path.

+

5

+

Create a DLI package.

+

DLI console

+

Select the UDAF Jar file that has been uploaded to OBS for management.

+

6

+

Create a UDAF on DLI.

+

DLI console

+

Create a UDAF on the SQL job management page of the DLI console.

+

7

+

Verify and use the UDAF.

+

DLI console

+

Use the UDAF in your DLI job.

+
+
+
+

Procedure

  1. Create a Maven project and configure the POM file. This step uses IntelliJ IDEA 2020.2 as an example.
    1. Start IntelliJ IDEA and choose File > New > Project.
      Figure 2 Creating a project
      +
    2. Choose Maven, set Project SDK to 1.8, and click Next.
      Figure 3 Configuring the project SDK
      +
    3. Specify the project name and the project path, and click Create. In the displayed page, click Finish.
      Figure 4 Setting project information
      +
    4. Add the following content to the pom.xml file.
      <dependencies> 
      +         <dependency> 
      +             <groupId>org.apache.hive</groupId> 
      +             <artifactId>hive-exec</artifactId> 
      +             <version>1.2.1</version> 
      +         </dependency> 
      + </dependencies>
      +
      Figure 5 Adding configurations to the POM file
      +
    5. Choose src > main and right-click the java folder. Choose New > Package to create a package and a class file.
      Set Package as required. In this example, set Package to com.dli.demo.
      Figure 6 Creating a package
      +
      +

      Create a Java Class file in the package path. In this example, the Java Class file is AvgFilterUDAFDemo.

      +
      Figure 7 Creating a class
      +
    +
  2. Write UDAF code. Pay attention to the following requirements when you implement the UDAF:
    • The UDAF class must inherit from org.apache.hadoop.hive.ql.exec.UDAF and org.apache.hadoop.hive.ql.exec.UDAFEvaluator classes. The function class must inherit from the UDAF class, and the Evaluator class must implement the UDAFEvaluator interface.
    • The Evaluator class must implement the init, iterate, terminatePartial, merge, and terminate functions of UDAFEvaluator.
      • The init function overrides the init function of the UDAFEvaluator interface.
      • The iterate function receives input parameters for internal iteration.
      • The terminatePartial function has no parameter. It returns the data obtained after the iterate traversal is complete. terminatePartial is similar to Hadoop Combiner.
      • The merge function receives the return values of terminatePartial.
      • The terminate function returns the aggregated result.
      +

      For details about how to implement the UDAF, see the following sample code:

      +
      package com.dli.demo;
      + 
      +import org.apache.hadoop.hive.ql.exec.UDAF;
      +import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
      + 
      +/***
      + * @jdk jdk1.8.0
      + * @version 1.0
      + ***/
      +public class AvgFilterUDAFDemo extends UDAF {
      + 
      +    /**
      +     * Defines the static inner class AvgFilter.
      +     */
      +    public static class PartialResult
      +    {
      +        public Long sum;
      +    }
      + 
      +    public static class VarianceEvaluator implements UDAFEvaluator {
      + 
      +        // Initializes the PartialResult object.
      +        private AvgFilterUDAFDemo.PartialResult partial;
      + 
      +        // Declares a VarianceEvaluator constructor that has no parameters.
      +        public VarianceEvaluator(){
      + 
      +            this.partial = new AvgFilterUDAFDemo.PartialResult();
      + 
      +            init();
      +        }
      + 
      +        /**
      +         * Initializes the UDAF, which is similar to a constructor.
      +         */
      +        @Override
      +        public void init() {
      + 
      +            // Sets the initial value of sum.
      +            this.partial.sum = 0L;
      +        }
      + 
      +        /**
      +         * Receives input parameters for internal iteration.
      +         * @param x
      +         * @return
      +         */
      +        public void iterate(Long x) {
      +            if (x == null) {
      +                return;
      +            }
      +            AvgFilterUDAFDemo.PartialResult tmp9_6 = this.partial;
      +            tmp9_6.sum = tmp9_6.sum | x;
      +        }
      + 
      +        /**
      +         * Returns the data obtained after the iterate traversal is complete.
      +         * terminatePartial is similar to Hadoop Combiner.
      +         * @return
      +         */
      +        public AvgFilterUDAFDemo.PartialResult terminatePartial()
      +        {
      +            return this.partial;
      +        }
      + 
      +        /**
      +         * Receives the return values of terminatePartial and merges the data.
      +         * @param
      +         * @return
      +         */
      +        public void merge(AvgFilterUDAFDemo.PartialResult pr)
      +        {
      +            if (pr == null) {
      +                return;
      +            }
      +            AvgFilterUDAFDemo.PartialResult tmp9_6 = this.partial;
      +            tmp9_6.sum = tmp9_6.sum | pr.sum;
      +        }
      + 
      +        /**
      +         * Returns the aggregated result.
      +         * @return
      +         */
      +        public Long terminate()
      +        {
      +            if (this.partial.sum == null) {
      +                return 0L;
      +            }
      +            return this.partial.sum;
      +        }
      +    }
      +}
      +
    +
  3. Use IntelliJ IDEA to compile the code and pack it into the JAR package.
    1. Click Maven in the tool bar on the right, and click clean and compile to compile the code.

      After the compilation is successful, click package.

      +
      Figure 8 Exporting the Jar file
      +
    2. The generated JAR package is stored in the target directory. In this example, MyUDAF-1.0-SNAPSHOT.jar is stored in D:\DLITest\MyUDAF\target.
    +
  4. Log in to the OBS console and upload the file to the OBS path.

    The region of the OBS bucket to which the Jar package is uploaded must be the same as the region of the DLI queue. Cross-region operations are not allowed.

    +
    +
  5. (Optional) Upload the file to DLI for package management.
    1. Log in to the DLI management console and choose Data Management > Package Management.
    2. On the Package Management page, click Create in the upper right corner.
    3. In the Create Package dialog, set the following parameters:
      • Type: Select JAR.
      • OBS Path: Specify the OBS path for storing the package.
      • Set Group and Group Name as required for package identification and management.
      +
    4. Click OK.
    +
  6. Create the UDAF on DLI.
    1. Log in to the DLI management console and create a SQL queue and a database.
    2. Log in to the DLI console, choose SQL Editor. Set Engine to spark, and select the created SQL queue and database.
    3. In the SQL editing area, run the following statement to create a UDAF and click Execute.

      If the reloading function of the UDAF is enabled, the create statement changes.

      +
      +
      CREATE FUNCTION AvgFilterUDAFDemo AS 'com.dli.demo.AvgFilterUDAFDemo' using jar 'obs://dli-test-obs01/MyUDAF-1.0-SNAPSHOT.jar';
      +

      Or

      +
      CREATE OR REPLACE FUNCTION AvgFilterUDAFDemo AS 'com.dli.demo.AvgFilterUDAFDemo' using jar 'obs://dli-test-obs01/MyUDAF-1.0-SNAPSHOT.jar';
      +
    +
  7. Restart the original SQL queue for the added function to take effect.
    1. Log in to the DLI management console and choose Resources > Queue Management from the navigation pane. In the Operation column of the SQL queue, click Restart.
    2. In the Restart dialog box, click OK.
    +
  8. Use the UDAF.

    Use the UDAF function created in 6 in the query statement:

    +
    select AvgFilterUDAFDemo(real_stock_rate) AS show_rate FROM dw_ad_estimate_real_stock_rate limit 1000;
    +
  9. (Optional) Delete the UDAF.

    If the UDAF is no longer used, run the following statement to delete it:

    +
    Drop FUNCTION AvgFilterUDAFDemo;
    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_00001.html b/docs/dli/dev/dli_09_00001.html new file mode 100644 index 00000000..0acb82e0 --- /dev/null +++ b/docs/dli/dev/dli_09_00001.html @@ -0,0 +1,32 @@ + + +

Change History

+
+
+ + + + + + + + + + + + + +

Release Date

+

What's New

+

2024-04-30

+

Modified the following section:

+

In Connecting to Mongo, modified "mongo" in the sample table name and added additional clarification that DDS is compatible with the MongoDB protocol.

+

2024-02-27

+

Modified the following section:

+

Modified the description of the url parameter in the sample code in Scala Example Code.

+

2024-01-05

+

This issue is the first official release.

+
+
+
+ diff --git a/docs/dli/dev/dli_09_0006.html b/docs/dli/dev/dli_09_0006.html new file mode 100644 index 00000000..bbb94176 --- /dev/null +++ b/docs/dli/dev/dli_09_0006.html @@ -0,0 +1,22 @@ + + +

Flink OpenSource SQL Jobs

+

+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0009.html b/docs/dli/dev/dli_09_0009.html new file mode 100644 index 00000000..2aec6f25 --- /dev/null +++ b/docs/dli/dev/dli_09_0009.html @@ -0,0 +1,143 @@ + + +

Reading Data from Kafka and Writing Data to RDS

+

This guide provides reference for Flink 1.12 only.

+
+

Description

In this example, we aim to query information about top three most-clicked offerings in each hour from a set of real-time click data. Offerings' real-time click data will be sent to Kafka as the input source, and then the analysis result of Kafka data is to be output to RDS.

+

For example, enter the following sample data:

+
{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:01:00", "product_id":"0002", "product_name":"name1"}
+{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 08:02:00", "product_id":"0002", "product_name":"name1"}
+{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 08:06:00", "product_id":"0004", "product_name":"name2"}
+{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:10:00", "product_id":"0003", "product_name":"name3"}
+{"user_id":"0003", "user_name":"Cindy", "event_time":"2021-03-24 08:15:00", "product_id":"0005", "product_name":"name4"}
+{"user_id":"0003", "user_name":"Cindy", "event_time":"2021-03-24 08:16:00", "product_id":"0005", "product_name":"name4"}
+{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:56:00", "product_id":"0004", "product_name":"name2"}
+{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 09:05:00", "product_id":"0005", "product_name":"name4"} 
+{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 09:10:00", "product_id":"0006", "product_name":"name5"}
+{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 09:13:00", "product_id":"0006", "product_name":"name5"}
+
Expected output:
2021-03-24 08:00:00 - 2021-03-24 08:59:59,0002,name1,2
+2021-03-24 08:00:00 - 2021-03-24 08:59:59,0004,name2,2
+2021-03-24 08:00:00 - 2021-03-24 08:59:59,0005,name4,2
+2021-03-24 09:00:00 - 2021-03-24 09:59:59,0006,name5,2
+2021-03-24 09:00:00 - 2021-03-24 09:59:59,0005,name4,1
+
+
+

Prerequisites

  1. You have created a DMS for Kafka instance.

    When you create the instance, do not enable Kafka SASL_SSL.

    +
    +
  2. You have created an RDS for MySQL DB instance.

    In this example, the RDS for MySQL database version is 8.0.

    +
+
+

Overall Development Process

Overall Process
Figure 1 Job development process
+
+

Step 1: Create a Queue

+

Step 2: Create a Kafka Topic

+

Step 3: Create an RDS Database and Table

+

Step 4: Create an Enhanced Datasource Connection

+

Step 5: Run a Job

+

Step 6: Send Data and Query Results

+
+

Step 1: Create a Queue

  1. Log in to the DLI console. In the navigation pane on the left, choose Resources > Queue Management.
  2. On the displayed page, click Buy Queue in the upper right corner.
  3. On the Buy Queue page, set queue parameters as follows:
    • Billing Mode: .
    • Region and Project: Retain the default values.
    • Name: Enter a queue name.

      The queue name can contain only digits, letters, and underscores (_), but cannot contain only digits or start with an underscore (_). The name must contain 1 to 128 characters.

      +

      The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters.

      +
      +
    • Type: Select For general purpose. Select the Dedicated Resource Mode.
    • AZ Mode and Specifications: Retain the default values.
    • Enterprise Project: Select default.
    • Advanced Settings: Select Custom.
    • CIDR Block: Specify the queue network segment. For example, 10.0.0.0/16.

      The CIDR block of a queue cannot overlap with the CIDR blocks of DMS Kafka and RDS for MySQL DB instances. Otherwise, datasource connections will fail to be created.

      +
      +
    • Set other parameters as required.
    +
  4. Click Buy. Confirm the configuration and click Submit.
+
+

Step 2: Create a Kafka Topic

  1. On the Kafka management console, click an instance name on the DMS for Kafka page. Basic information of the Kafka instance is displayed.
  2. Choose Topics. On the displayed page, click Create Topic. Configure the following parameters:
    • Topic Name For this example, enter testkafkatopic.
    • Partitions: Set the value to 1.
    • Replicas: Set the value to 1.
    +

    Retain default values for other parameters.

    +
+
+

Step 3: Create an RDS Database and Table

  1. Log in to the RDS console. On the displayed page, locate the target MySQL DB instance and choose More > Log In in the Operation column.
  2. On the displayed login dialog box, enter the username and password and click Log In.
  3. On the Databases page, click Create Database. In the displayed dialog box, enter testrdsdb as the database name and retain default values of rest parameters. Then, click OK.
  4. In the Operation column of row where the created database locates, click SQL Window and enter the following statement to create a table:
    CREATE TABLE clicktop (
    +	`range_time` VARCHAR(64) NOT NULL,
    +	`product_id` VARCHAR(32) NOT NULL,
    +	`product_name` VARCHAR(32),
    +	`event_count` VARCHAR(32),
    +	PRIMARY KEY (`range_time`,`product_id`)
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4;
    +
+
+

Step 4: Create an Enhanced Datasource Connection

+
+

Step 5: Run a Job

  1. On the DLI management console, choose Job Management > Flink Jobs. On the Flink Jobs page, click Create Job.
  2. In the Create Job dialog box, set Type to Flink OpenSource SQL and Name to FlinkKafkaRds. Click OK.
  3. On the job editing page, set the following parameters and retain the default values of other parameters.
    • Queue: Select the queue created in Step 1: Create a Queue.
    • Flink Version: Select 1.12.
    • Save Job Log: Enable this function.
    • OBS Bucket: Select an OBS bucket for storing job logs and grant access permissions of the OBS bucket as prompted.
    • Enable Checkpointing: Enable this function.
    • Enter a SQL statement in the editing pane. The following is an example. Modify the parameters in bold as you need.

      In this example, the syntax version of Flink OpenSource SQL is 1.12. In this example, the data source is Kafka and the result data is written to RDS.

      +
      +
      create table click_product(
      +    user_id string, --ID of the user
      +    user_name string, --Username
      +    event_time string, --Click time
      +    product_id string, --Offering ID
      +    product_name string --Offering name
      +) with (
      +    "connector" = "kafka",
      +    "properties.bootstrap.servers" = " 10.128.0.120:9092,10.128.0.89:9092,10.128.0.83:9092 ",-- Internal network address and port number of the Kafka instance
      +    "properties.group.id" = "click",
      +    "topic" = " testkafkatopic ",--Name of the created Kafka topic
      +    "format" = "json",
      +    "scan.startup.mode" = "latest-offset"
      +);
      +
      +--Result table
      +create table top_product (
      +    range_time string, --Calculated time range
      +    product_id string, --Offering ID
      +    product_name string --Offering name
      +    event_count bigint, --Number of clicks
      +    primary key (range_time, product_id) not enforced
      +) with (
      +    "connector" = "jdbc",
      +    "url" = "jdbc:mysql://192.168.12.148:3306/testrdsdb ",--testrdsdb indicates the name of the created RDS database. Replace the IP address and port number with those of the RDS DB instance.
      +    "table-name" = "clicktop",
      +    "pwd_auth_name"="xxxxx", -- Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
      +    "sink.buffer-flush.max-rows" = "1000",
      +    "sink.buffer-flush.interval" = "1s"
      +);
      +
      +create view current_event_view
      +as
      +    select product_id, product_name, count(1) as click_count, concat(substring(event_time, 1, 13), ":00:00") as min_event_time, concat(substring(event_time, 1, 13), ":59:59") as max_event_time
      +    from click_product group by substring (event_time, 1, 13), product_id, product_name;
      +
      +insert into top_product
      +    select
      +        concat(min_event_time, " - ", max_event_time) as range_time,
      +        product_id,
      +        product_name,
      +        click_count
      +    from (
      +        select *,
      +        row_number() over (partition by min_event_time order by click_count desc) as row_num
      +        from current_event_view
      +    )
      +    where row_num <= 3
      +
    +
  4. Click Check Semantic and ensure that the SQL statement passes the check. Click Save. Click Start, confirm the job parameters, and click Start Now to execute the job. Wait until the job status changes to Running.
+
+

Step 6: Send Data and Query Results

  1. Use the Kafka client to send data to topics created in Step 2: Create a Kafka Topic to simulate real-time data streams.

    The sample data is as follows:

    +
    {"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:01:00", "product_id":"0002", "product_name":"name1"}
    +{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 08:02:00", "product_id":"0002", "product_name":"name1"}
    +{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 08:06:00", "product_id":"0004", "product_name":"name2"}
    +{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:10:00", "product_id":"0003", "product_name":"name3"}
    +{"user_id":"0003", "user_name":"Cindy", "event_time":"2021-03-24 08:15:00", "product_id":"0005", "product_name":"name4"}
    +{"user_id":"0003", "user_name":"Cindy", "event_time":"2021-03-24 08:16:00", "product_id":"0005", "product_name":"name4"}
    +{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 08:56:00", "product_id":"0004", "product_name":"name2"}
    +{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 09:05:00", "product_id":"0005", "product_name":"name4"} 
    +{"user_id":"0001", "user_name":"Alice", "event_time":"2021-03-24 09:10:00", "product_id":"0006", "product_name":"name5"}
    +{"user_id":"0002", "user_name":"Bob", "event_time":"2021-03-24 09:13:00", "product_id":"0006", "product_name":"name5"}
    +
  2. Log in to the RDS console, click the name of the RDS DB instance. On the displayed page, click the name of the created database, for example, testrdsdb, and click Query SQL Statements in the Operation column of the row that containing the clicktop table.
    select * from `clicktop`;
    +
  3. On the displayed page, click Execute SQL. Check whether data has been written into the RDS table.
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0010.html b/docs/dli/dev/dli_09_0010.html new file mode 100644 index 00000000..aa89ad70 --- /dev/null +++ b/docs/dli/dev/dli_09_0010.html @@ -0,0 +1,124 @@ + + +

Reading Data from Kafka and Writing Data to GaussDB(DWS)

+

This guide provides reference for Flink 1.12 only.

+
+

Description

This example analyzes real-time vehicle driving data and collects statistics on data results that meet specific conditions. The real-time vehicle driving data is stored in the Kafka source table, and then the analysis result is output to GaussDB(DWS).

+

For example, enter the following sample data:

+
{"car_id":"3027", "car_owner":"lilei", "car_age":"7", "average_speed":"76", "total_miles":"15000"}
+{"car_id":"3028", "car_owner":"hanmeimei", "car_age":"6", "average_speed":"92", "total_miles":"17000"}
+{"car_id":"3029", "car_owner":"Ann", "car_age":"10", "average_speed":"81", "total_miles":"230000"}
+
Expected output is vehicles meeting the average_speed <= 90 and total_miles <= 200,000 condition.
{"car_id":"3027", "car_owner":"lilei", "car_age":"7", "average_speed":"76", "total_miles":"15000"}
+
+
+

Prerequisites

  1. You have created a DMS for Kafka instance.

    When you create the instance, do not enable Kafka SASL_SSL.

    +
    +
  2. You have created a GaussDB(DWS) instance.
+
+

Overall Development Process

Overall Process
Figure 1 Job development process
+
+

Step 1: Create a Queue

+

Step 2: Create a Kafka Topic

+

Step 3: Create a GaussDB(DWS) Database and Table

+

Step 4: Create an Enhanced Datasource Connection

+

Step 5: Run a Job

+

Step 6: Send Data and Query Results

+
+

Step 1: Create a Queue

  1. Log in to the DLI console. In the navigation pane on the left, choose Resources > Queue Management.
  2. On the displayed page, click Buy Queue in the upper right corner.
  3. On the Buy Queue page, set queue parameters as follows:
    • Billing Mode: .
    • Region and Project: Retain the default values.
    • Name: Enter a queue name.

      The queue name can contain only digits, letters, and underscores (_), but cannot contain only digits or start with an underscore (_). The name must contain 1 to 128 characters.

      +

      The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters.

      +
      +
    • Type: Select For general purpose. Select the Dedicated Resource Mode.
    • AZ Mode and Specifications: Retain the default values.
    • Enterprise Project: Select default.
    • Advanced Settings: Select Custom.
    • CIDR Block: Specify the queue network segment. For example, 10.0.0.0/16.

      The CIDR block of a queue cannot overlap with the CIDR blocks of DMS Kafka and RDS for MySQL DB instances. Otherwise, datasource connections will fail to be created.

      +
      +
    • Set other parameters as required.
    +
  4. Click Buy. Confirm the configuration and click Submit.
+
+

Step 2: Create a Kafka Topic

  1. On the Kafka management console, click an instance name on the DMS for Kafka page. Basic information of the Kafka instance is displayed.
  2. Choose Topics in the navigation pane on the left. On the displayed page, click Create Topic. Configure the following parameters:
    • Topic Name: For this example, enter testkafkatopic.
    • Partitions: Set the value to 1.
    • Replicas: Set the value to 1.
    +

    Retain default values for other parameters.

    +
+
+

Step 3: Create a GaussDB(DWS) Database and Table

  1. .
  2. Connect to the default database gaussdb of a GaussDB(DWS) cluster.
    gsql -d gaussdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
    • gaussdb: Default database of the GaussDB(DWS) cluster
    • Connection address of the DWS cluster: If a public network address is used for connection, set this parameter to the public network IP address or domain name. If a private network address is used for connection, set this parameter to the private network IP address or domain name. If an ELB is used for connection, set this parameter to the ELB address.
    • dbadmin: Default administrator username used during cluster creation
    • password: Default password of the administrator
    +
  3. Run the following command to create the testdwsdb database:
    CREATE DATABASE testdwsdb;
    +
  4. Run the following command to exit the gaussdb database and connect to testdwsdb:
    \q
    +gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  5. Run the following commands to create a table:
    create schema test;
    +set current_schema= test;
    +drop table if exists qualified_cars;
    +CREATE TABLE qualified_cars
    +(
    +    car_id VARCHAR, 
    +    car_owner VARCHAR,
    +    car_age INTEGER ,
    +    average_speed FLOAT8,
    +    total_miles FLOAT8
    +);
    +
+
+

Step 4: Create an Enhanced Datasource Connection

+
+

Step 5: Run a Job

  1. On the DLI management console, choose Job Management > Flink Jobs. On the Flink Jobs page, click Create Job.
  2. In the Create Job dialog box, set Type to Flink OpenSource SQL and Name to FlinkKafkaDWS. Click OK.
  3. On the job editing page, set the following parameters and retain the default values of other parameters.
    • Queue: Select the queue created in Step 1: Create a Queue.
    • Flink Version: Select 1.12.
    • Save Job Log: Enable this function.
    • OBS Bucket: Select an OBS bucket for storing job logs and grant access permissions of the OBS bucket as prompted.
    • Enable Checkpointing: Enable this function.
    • Enter a SQL statement in the editing pane. The following is an example. Modify the parameters in bold as you need.

      In this example, the syntax version of Flink OpenSource SQL is 1.12. In this example, the data source is Kafka and the result data is written to GaussDB(DWS).

      +
      +
      create table car_infos(
      +  car_id STRING,
      +  car_owner STRING,
      +  car_age INT,
      +  average_speed DOUBLE,
      +  total_miles DOUBLE
      +) with (
      +    "connector" = "kafka",
      +    "properties.bootstrap.servers" = " 10.128.0.120:9092,10.128.0.89:9092,10.128.0.83:9092 ",-- Internal network address and port number of the Kafka instance
      +    "properties.group.id" = "click",
      +    "topic" = " testkafkatopic",--Created Kafka topic
      +    "format" = "json",
      +    "scan.startup.mode" = "latest-offset"
      +);
      +
      +create table qualified_cars (
      +  car_id STRING,
      +  car_owner STRING,
      +  car_age INT,
      +  average_speed DOUBLE,
      +  total_miles DOUBLE
      +)
      +WITH (
      +  'connector' = 'gaussdb',
      +  'driver' = 'com.gauss200.jdbc.Driver',
      +  'url'='jdbc:gaussdb://192.168.168.16:8000/testdwsdb ', ---192.168.168.16:8000 indicates the internal IP address and port of the GaussDB(DWS) instance. testdwsdb indicates the name of the created GaussDB(DWS) database.
      +  'table-name' = ' test\".\"qualified_cars', ---test indicates the schema of the created GaussDB(DWS) table, and qualified_cars indicates the GaussDB(DWS) table name.
      +  'pwd_auth_name'= 'xxxxx', -- Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
      +  'write.mode' = 'insert'
      +);
      +
      +/** Output information about qualified vehicles **/
      +INSERT INTO qualified_cars
      +SELECT *
      +FROM car_infos
      +where average_speed <= 90 and total_miles <= 200000;
      +
    +
  4. Click Check Semantic and ensure that the SQL statement passes the check. Click Save. Click Start, confirm the job parameters, and click Start Now to execute the job. Wait until the job status changes to Running.
+
+

Step 6: Send Data and Query Results

  1. Use the Kafka client to send data to topics created in Step 2: Create a Kafka Topic to simulate real-time data streams.

    The sample data is as follows:

    +
    {"car_id":"3027", "car_owner":"lilei", "car_age":"7", "average_speed":"76", "total_miles":"15000"}
    +{"car_id":"3028", "car_owner":"hanmeimei", "car_age":"6", "average_speed":"92", "total_miles":"17000"}
    +{"car_id":"3029", "car_owner":"Ann", "car_age":"10", "average_speed":"81", "total_miles":"230000"}
    +
  2. Connect to the created GaussDB(DWS) cluster.
  3. Connect to the default database testdwsdb of a GaussDB(DWS) cluster.
    gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  4. Run the following statement to query GaussDB(DWS) table data:
    select * from test.qualified_cars;
    +
    The query result is as follows:
    car_id  car_owner  car_age  average_speed  total_miles
    +3027      lilei     7           76.0       15000.0
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0011.html b/docs/dli/dev/dli_09_0011.html new file mode 100644 index 00000000..13fb2fa5 --- /dev/null +++ b/docs/dli/dev/dli_09_0011.html @@ -0,0 +1,202 @@ + + +

Reading Data from Kafka and Writing Data to Elasticsearch

+

This guide provides reference for Flink 1.12 only.

+
+

Description

This example analyzes offering purchase data and collects statistics on data results that meet specific conditions. The offering purchase data is stored in the Kafka source table, and then the analysis result is output to Elasticsearch .

+

For example, enter the following sample data:

+
{"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
+
+{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0002", "user_name":"Jason", "area_id":"330106"}
+

DLI reads data from Kafka and writes the data to Elasticsearch. You can view the result in Kibana of the Elasticsearch cluster.

+
+

Prerequisites

  1. You have created a DMS for Kafka instance.

    When you create a DMS Kafka instance, do not enable Kafka SASL_SSL.

    +
    +
  2. You have created a CSS Elasticsearch cluster.

    In this example, the version of the created CSS cluster is 7.6.2, and security mode is disabled for the cluster.

    +
+
+

Overall Process

Figure 1 shows the overall development process.
Figure 1 Job development process
+
+

Step 1: Create a Queue

+

Step 2: Create a Kafka Topic

+

Step 3: Create an Elasticsearch Index

+

Step 4: Create an Enhanced Datasource Connection

+

Step 5: Run a Job

+

Step 6: Send Data and Query Results

+
+

Step 1: Create a Queue

  1. Log in to the DLI console. In the navigation pane on the left, choose Resources > Queue Management.
  2. On the displayed page, click Buy Queue in the upper right corner.
  3. On the Buy Queue page, set queue parameters as follows:
    • Billing Mode: .
    • Region and Project: Retain the default values.
    • Name: Enter a queue name.

      The queue name can contain only digits, letters, and underscores (_), but cannot contain only digits or start with an underscore (_). The name must contain 1 to 128 characters.

      +

      The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters.

      +
      +
    • Type: Select For general purpose. Select the Dedicated Resource Mode.
    • AZ Mode and Specifications: Retain the default values.
    • Enterprise Project: Select default.
    • Advanced Settings: Select Custom.
    • CIDR Block: Specify the queue network segment. For example, 10.0.0.0/16.

      The CIDR block of a queue cannot overlap with the CIDR blocks of DMS Kafka and RDS for MySQL DB instances. Otherwise, datasource connections will fail to be created.

      +
      +
    • Set other parameters as required.
    +
  4. Click Buy. Confirm the configuration and click Submit.
+
+

Step 2: Create a Kafka Topic

  1. On the Kafka management console, click an instance name on the DMS for Kafka page. Basic information of the Kafka instance is displayed.
  2. Choose Topics in the navigation pane on the left. On the displayed page, click Create Topic. Configure the following parameters:
    • Topic Name: For this example, enter testkafkatopic.
    • Partitions: Set the value to 1.
    • Replicas: Set the value to 1.
    +

    Retain default values for other parameters.

    +
+
+

Step 3: Create an Elasticsearch Index

  1. Log in to the CSS management console and choose Clusters > Elasticsearch from the navigation pane on the left.
  2. On the Clusters page, click Access Kibana in the Operation column of the created CSS cluster.
  3. On the displayed page, choose Dev Tools in the navigation pane on the left. The Console page is displayed.
  4. On the displayed page, run the following command to create index shoporders:
    PUT /shoporders
    +{
    +  "settings": {
    +    "number_of_shards": 1
    +  },
    +	"mappings": {
    +	  "properties": {
    +	    "order_id": {
    +	      "type": "text"
    +	    },
    +	    "order_channel": {
    +	      "type": "text"
    +	    },
    +	    "order_time": {
    +	      "type": "text"
    +	    },
    +	    "pay_amount": {
    +	      "type": "double"
    +	    },
    +	    "real_pay": {
    +	      "type": "double"
    +	    },
    +	    "pay_time": {
    +	      "type": "text"
    +	    },
    +	    "user_id": {
    +	      "type": "text"
    +	    },
    +	    "user_name": {
    +	      "type": "text"
    +	    },
    +	    "area_id": {
    +	      "type": "text"
    +	    }
    +	  }
    +	}
    +}
    +
+
+

Step 4: Create an Enhanced Datasource Connection

+
+

Step 5: Run a Job

  1. On the DLI management console, choose Job Management > Flink Jobs. On the Flink Jobs page, click Create Job.
  2. In the the Create Job dialog box, set Type to Flink OpenSource SQL and Name to FlinkKafkaES. Click OK.
  3. On the job editing page, set the following parameters and retain the default values of other parameters.
    • Queue: Select the queue created in Step 1: Create a Queue.
    • Flink Version: Select 1.12.
    • Save Job Log: Enable this function.
    • OBS Bucket: Select an OBS bucket for storing job logs and grant access permissions of the OBS bucket as prompted.
    • Enable Checkpointing: Enable this function.
    • Enter a SQL statement in the editing pane. The following is an example. Modify the parameters in bold as you need.

      In this example, the syntax version of Flink OpenSource SQL is 1.12. In this example, the data source is Kafka and the result data is written to Elasticsearch.

      +
      +
    • Create a Kafka source table and connect DLI to the Kafka data source.
      CREATE TABLE kafkaSource (
      +  order_id string,
      +  order_channel string,
      +  order_time string,
      +  pay_amount double,
      +  real_pay double,
      +  pay_time string,
      +  user_id string,
      +  user_name string,
      +  area_id string
      +) with (
      +  "connector" = "kafka",
      +  "properties.bootstrap.servers" = "10.128.0.120:9092,10.128.0.89:9092,10.128.0.83:9092",-- Internal network address and port number of the Kafka instance
      +  "properties.group.id" = "click",
      +  "topic" = "testkafkatopic",--Created Kafka topic
      +  "format" = "json",
      +  "scan.startup.mode" = "latest-offset"
      +);
      +
    • Create an Elasticsearch result table to display the data analyzed by DLI.
      CREATE TABLE elasticsearchSink (
      +  order_id string,
      +  order_channel string,
      +  order_time string,
      +  pay_amount double,
      +  real_pay double,
      +  pay_time string,
      +  user_id string,
      +  user_name string,
      +  area_id string
      +) WITH (
      +  'connector' = 'elasticsearch-7',
      +  'hosts' = '192.168.168.125:9200', --Private IP address and port of the CSS cluster
      +  'index' = 'shoporders' --Created Elasticsearch engine
      +);
      +--Write Kafka data to Elasticsearch indexes
      +insert into
      +  elasticsearchSink
      +select
      +  *
      +from
      +  kafkaSource;
      +
    +
  4. Click Check Semantic and ensure that the SQL statement passes the check. Click Save. Click Start, confirm the job parameters, and click Start Now to execute the job. Wait until the job status changes to Running.
+
+

Step 6: Send Data and Query Results

  1. Kafaka sends data.

    Use the Kafka client to send data to topics created in Step 2: Create a Kafka Topic to simulate real-time data streams.

    +

    The sample data is as follows:

    +
    {"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}
    +
    +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0002", "user_name":"Jason", "area_id":"330106"}
    +
  2. View the data processing result on Elasticsearch.
    After the message is sent to Kafka, run the following statement in Kibana for the CSS cluster and check the result:
    GET shoporders/_search
    +
    +
    The query result is as follows:
    {
    +  "took" : 0,
    +  "timed_out" : false,
    +  "_shards" : {
    +    "total" : 1,
    +    "successful" : 1,
    +    "skipped" : 0,
    +    "failed" : 0
    +  },
    +  "hits" : {
    +    "total" : {
    +      "value" : 2,
    +      "relation" : "eq"
    +    },
    +    "max_score" : 1.0,
    +    "hits" : [
    +      {
    +        "_index" : "shoporders",
    +        "_type" : "_doc",
    +        "_id" : "6fswzIAByVjqg3_qAyM1",
    +        "_score" : 1.0,
    +        "_source" : {
    +          "order_id" : "202103241000000001",
    +          "order_channel" : "webShop",
    +          "order_time" : "2021-03-24 10:00:00",
    +          "pay_amount" : 100.0,
    +          "real_pay" : 100.0,
    +          "pay_time" : "2021-03-24 10:02:03",
    +          "user_id" : "0001",
    +          "user_name" : "Alice",
    +          "area_id" : "330106"
    +        }
    +      },
    +      {
    +        "_index" : "shoporders",
    +        "_type" : "_doc",
    +        "_id" : "6vs1zIAByVjqg3_qyyPp",
    +        "_score" : 1.0,
    +        "_source" : {
    +          "order_id" : "202103241606060001",
    +          "order_channel" : "appShop",
    +          "order_time" : "2021-03-24 16:06:06",
    +          "pay_amount" : 200.0,
    +          "real_pay" : 180.0,
    +          "pay_time" : "2021-03-24 16:10:06",
    +          "user_id" : "0002",
    +          "user_name" : "Jason",
    +          "area_id" : "330106"
    +        }
    +      }
    +    ]
    +  }
    +}
    +
    +

    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0012.html b/docs/dli/dev/dli_09_0012.html new file mode 100644 index 00000000..a0581f9e --- /dev/null +++ b/docs/dli/dev/dli_09_0012.html @@ -0,0 +1,129 @@ + + +

Reading Data from MySQL CDC and Writing Data to GaussDB(DWS)

+

This guide provides reference for Flink 1.12 only.

+
+

Description

Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. During data synchronization, CDC processes data, for example, grouping (GROUP BY) and joining multiple tables (JOIN).

+

This example creates a MySQL CDC source table to monitor MySQL data changes and insert the changed data into a GaussDB(DWS) database.

+
+

Prerequisites

  1. You have created an RDS for MySQL instance.. In this example, the RDS for MySQL database version is 8.0.
  2. You have created a GaussDB(DWS) cluster.
+
+

Overall Process

Figure 1 shows the overall development process.
Figure 1 Job development process
+
+

Step 1: Create a Queue

+

Step 2: Create an RDS MySQL Database and Table

+

Step 3: Create a GaussDB(DWS) Database and Table

+

Step 4: Create an Enhanced Datasource Connection

+

Step 5: Run a Job

+

Step 6: Send Data and Query Results

+
+

Step 1: Create a Queue

  1. Log in to the DLI console. In the navigation pane on the left, choose Resources > Queue Management.
  2. On the displayed page, click Buy Queue in the upper right corner.
  3. On the Buy Queue page, set queue parameters as follows:
    • Billing Mode: .
    • Region and Project: Retain the default values.
    • Name: Enter a queue name.

      The queue name can contain only digits, letters, and underscores (_), but cannot contain only digits or start with an underscore (_). The name must contain 1 to 128 characters.

      +

      The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters.

      +
      +
    • Type: Select For general purpose. Select the Dedicated Resource Mode.
    • AZ Mode and Specifications: Retain the default values.
    • Enterprise Project: Select default.
    • Advanced Settings: Select Custom.
    • CIDR Block: Specify the queue network segment. For example, 10.0.0.0/16.

      The CIDR block of a queue cannot overlap with the CIDR blocks of DMS Kafka and RDS for MySQL DB instances. Otherwise, datasource connections will fail to be created.

      +
      +
    • Set other parameters as required.
    +
  4. Click Buy. Confirm the configuration and click Submit.
+
+

Step 2: Create an RDS MySQL Database and Table

  1. Log in to the RDS console. On the displayed page, locate the target MySQL DB instance and choose More > Log In in the Operation column.
  2. On the displayed login dialog box, enter the username and password and click Log In.
  3. On the Databases page, click Create Database. In the displayed dialog box, enter testrdsdb as the database name and retain default values of rest parameters. Then, click OK.
  4. In the Operation column of row where the created database locates, click SQL Window and enter the following statement to create a table:
    CREATE TABLE mysqlcdc (
    +	`order_id` VARCHAR(64) NOT NULL,
    +	`order_channel` VARCHAR(32) NOT NULL,
    +	`order_time` VARCHAR(32),
    +	`pay_amount` DOUBLE,
    +	`real_pay` DOUBLE,
    +	`pay_time` VARCHAR(32),
    +	`user_id` VARCHAR(32),
    +	`user_name` VARCHAR(32),
    +	`area_id` VARCHAR(32)
    +
    +)	ENGINE = InnoDB
    +	DEFAULT CHARACTER SET = utf8mb4;
    +
+
+

Step 3: Create a GaussDB(DWS) Database and Table

+
  1. Connect to the created GaussDB(DWS) cluster.
  2. Connect to the default database gaussdb of a GaussDB(DWS) cluster.
    gsql -d gaussdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
    • gaussdb: Default database of the GaussDB(DWS) cluster
    • Connection address of the DWS cluster: If a public network address is used for connection, set this parameter to the public network IP address or domain name. If a private network address is used for connection, set this parameter to the private network IP address or domain name. If an ELB is used for connection, set this parameter to the ELB address.
    • dbadmin: Default administrator username used during cluster creation
    • -W: Default password of the administrator
    +
  3. Run the following command to create the testdwsdb database:
    CREATE DATABASE testdwsdb;
    +
  4. Run the following command to exit the gaussdb database and connect to testdwsdb:
    \q
    +gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  5. Run the following commands to create a table:
    create schema test;
    +set current_schema= test;
    +drop table if exists dwsresult;
    +CREATE TABLE dwsresult
    +(
    +    car_id VARCHAR, 
    +    car_owner VARCHAR,
    +    car_age INTEGER ,
    +    average_speed FLOAT8,
    +    total_miles FLOAT8
    +);
    +
+
+

Step 4: Create an Enhanced Datasource Connection

+
+

Step 5: Run a Job

  1. On the DLI management console, choose Job Management > Flink Jobs. On the Flink Jobs page, click Create Job.
  2. In the Create Job dialog box, set Type to Flink OpenSource SQL and Name to FlinkCDCMySQLDWS. Click OK.
  3. On the job editing page, set the following parameters and retain the default values of other parameters.
    • Queue: Select the queue created in Step 1: Create a Queue.
    • Flink Version: Select 1.12.
    • Save Job Log: Enable this function.
    • OBS Bucket: Select an OBS bucket for storing job logs and grant access permissions of the OBS bucket as prompted.
    • Enable Checkpointing: Enable this function.
    • Enter a SQL statement in the editing pane. The following is an example. Modify the parameters in bold as you need.

      In this example, the syntax version of Flink OpenSource SQL is 1.12. In this example, the data source is Kafka and the result data is written to Elasticsearch.

      +
      +
      create table mysqlCdcSource(
      +  order_id string,
      +  order_channel string,
      +  order_time string,
      +  pay_amount double,
      +  real_pay double,
      +  pay_time string,
      +  user_id string,
      +  user_name string,
      +  area_id STRING
      +) with (
      +  'connector' = 'mysql-cdc',
      +  'hostname' = ' 192.168.12.148',--IP address of the RDS MySQL instance
      +  'port'= ' 3306',--Port number of the RDS MySQL instance
      +  'pwd_auth_name'= 'xxxxx', -- Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
      +  'database-name' = ' testrdsdb',--Database name of the RDS MySQL instance
      +  'table-name' = ' mysqlcdc'--Name of the tartet table in the database
      +);
      +
      +create table dwsSink(
      +  order_channel string,
      +  pay_amount double,
      +  real_pay double,
      +  primary key(order_channel) not enforced
      +) with (
      +  'connector' = 'gaussdb',
      +  'driver' = 'com.gauss200.jdbc.Driver',
      +  'url'='jdbc:gaussdb://192.168.168.16:8000/testdwsdb ', ---192.168.168.16:8000 indicates the internal IP address and port of the GaussDB(DWS) instance. testdwsdb indicates the name of the created GaussDB(DWS) database.
      +  'table-name' = ' test\".\"dwsresult', ---test indicates the schema of the created GaussDB(DWS) table, and dwsresult indicates the GaussDB(DWS) table name.
      +  'pwd_auth_name'= 'xxxxx', -- Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
      +  'write.mode' = 'insert'
      +);
      +
      +insert into dwsSink select order_channel, sum(pay_amount),sum(real_pay) from mysqlCdcSource group by order_channel;
      +
    +
  4. Click Check Semantic and ensure that the SQL statement passes the check. Click Save. Click Start, confirm the job parameters, and click Start Now to execute the job. Wait until the job status changes to Running.
+
+

Step 6: Send Data and Query Results

  1. Log in to the RDS console. On the displayed page, locate the target MySQL DB instance and choose More > Log In in the Operation column.
  2. On the displayed login dialog box, enter the username and password and click Log In.
  3. In the Operation column of row where the created database locates, click SQL Window and enter the following statement to create a table and insert data to the table:
    insert into mysqlcdc values
    +('202103241000000001','webShop','2021-03-24 10:00:00','100.00','100.00','2021-03-24 10:02:03','0001','Alice','330106'),
    +('202103241206060001','appShop','2021-03-24 12:06:06','200.00','180.00','2021-03-24 16:10:06','0002','Jason','330106'),
    +('202103241403000001','webShop','2021-03-24 14:03:00','300.00','100.00','2021-03-24 10:02:03','0003','Lily','330106'),
    +('202103241636060001','appShop','2021-03-24 16:36:06','200.00','150.00','2021-03-24 16:10:06','0001','Henry','330106');
    +
  4. .
  5. Connect to the default database testdwsdb of a GaussDB(DWS) cluster.
    gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  6. Run the following statements to query table data:
    select * from test.dwsresult;
    +
    The query result is as follows:
    order_channel  pay_amount  real_pay 
    +appShop         400.0       330.0
    +webShop         400.0       200.0
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0013.html b/docs/dli/dev/dli_09_0013.html new file mode 100644 index 00000000..92f1250f --- /dev/null +++ b/docs/dli/dev/dli_09_0013.html @@ -0,0 +1,245 @@ + + +

Reading Data from PostgreSQL CDC and Writing Data to GaussDB(DWS)

+

This guide provides reference for Flink 1.12 only.

+
+

Description

Change Data Capture (CDC) can synchronize incremental changes from the source database to one or more destinations. During data synchronization, CDC processes data, for example, grouping (GROUP BY) and joining multiple tables (JOIN).

+

This example creates a PostgreSQL CDC source table to monitor PostgreSQL data changes and insert the changed data into a GaussDB(DWS) database.

+
+

Prerequisites

  1. You have created an RDS for PostgreSQL DB instance. In this example, the RDS for PostgreSQL database version is 11.

    The version of the RDS PostgreSQL database cannot be earlier than 11.

    +
    +
  2. You have created a GaussDB(DWS) instance.
+
+

Overall Process

Figure 1 shows the overall development process.
Figure 1 Job development process
+
+

Step 1: Create a Queue

+

Step 2: Create an RDS PostgreSQL Database and Table

+

Step 3: Create a GaussDB(DWS) Database and Table

+

Step 4: Create an Enhanced Datasource Connection

+

Step 5: Run a Job

+

Step 6: Send Data and Query Results

+
+

Step 1: Create a Queue

  1. Log in to the DLI console. In the navigation pane on the left, choose Resources > Queue Management.
  2. On the displayed page, click Buy Queue in the upper right corner.
  3. On the Buy Queue page, set queue parameters as follows:
    • Billing Mode: .
    • Region and Project: Retain the default values.
    • Name: Enter a queue name.

      The queue name can contain only digits, letters, and underscores (_), but cannot contain only digits or start with an underscore (_). The name must contain 1 to 128 characters.

      +

      The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters.

      +
      +
    • Type: Select For general purpose. Select the Dedicated Resource Mode.
    • AZ Mode and Specifications: Retain the default values.
    • Enterprise Project: Select default.
    • Advanced Settings: Select Custom.
    • CIDR Block: Specify the queue network segment. For example, 10.0.0.0/16.

      The CIDR block of a queue cannot overlap with the CIDR blocks of DMS Kafka and RDS for MySQL DB instances. Otherwise, datasource connections will fail to be created.

      +
      +
    • Set other parameters as required.
    +
  4. Click Buy. Confirm the configuration and click Submit.
+
+

Step 2: Create an RDS PostgreSQL Database and Table

  1. Log in to the RDS console. On the displayed page, locate the target PostgreSQL DB instance and choose More > Log In in the Operation column.
  2. In the login dialog box displayed, enter the username and password and click Log In.
  3. Create a database instance and name it testrdsdb.
  4. Create a schema named test for the testrdsdb database.
  5. Choose SQL Operations > SQL Query. On the page displayed, create a RDS for PostgreSQL table.
    create table test.cdc_order(
    +  order_id VARCHAR,
    +  order_channel VARCHAR,
    +  order_time VARCHAR,
    +  pay_amount FLOAT8,
    +  real_pay FLOAT8,
    +  pay_time VARCHAR,
    +  user_id VARCHAR,
    +  user_name VARCHAR,
    +  area_id VARCHAR,
    +  primary key(order_id));
    +
    Run the following statement in the PostgreSQL instance:
    ALTER TABLE test.cdc_order REPLICA IDENTITY FULL;
    +
    +
+
+

Step 3: Create a GaussDB(DWS) Database and Table

  1. Connect to the created GaussDB(DWS) cluster.
  2. Connect to the default database gaussdb of a GaussDB(DWS) cluster.
    gsql -d gaussdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
    • gaussdb: Default database of the GaussDB(DWS) cluster
    • Connection address of the DWS cluster: If a public network address is used for connection, set this parameter to the public network IP address or domain name. If a private network address is used for connection, set this parameter to the private network IP address or domain name. If an ELB is used for connection, set this parameter to the ELB address.
    • dbadmin: Default administrator username used during cluster creation
    • -W: Default password of the administrator
    +
  3. Run the following command to create the testdwsdb database:
    CREATE DATABASE testdwsdb;
    +
  4. Run the following command to exit the gaussdb database and connect to testdwsdb:
    \q
    +gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  5. Run the following commands to create a table:
    create schema test;
    +set current_schema= test;
    +drop table if exists dws_order;
    +CREATE TABLE dws_order
    +(
    +  order_id VARCHAR,
    +  order_channel VARCHAR,
    +  order_time VARCHAR,
    +  pay_amount FLOAT8,
    +  real_pay FLOAT8,
    +  pay_time VARCHAR,
    +  user_id VARCHAR,
    +  user_name VARCHAR,
    +  area_id VARCHAR
    +);
    +
+
+

Step 4: Create an Enhanced Datasource Connection

+
+

Step 5: Run a Job

  1. On the DLI management console, choose Job Management > Flink Jobs. On the Flink Jobs page, click Create Job.
  2. In the Create Job dialog box, set Type to Flink OpenSource SQL and Name to FlinkCDCPostgreDWS. Click OK.
  3. On the job editing page, set the following parameters and retain the default values of other parameters.
    • Queue: Select the queue created in Step 1: Create a Queue.
    • Flink Version: Select 1.12.
    • Save Job Log: Enable this function.
    • OBS Bucket: Select an OBS bucket for storing job logs and grant access permissions of the OBS bucket as prompted.
    • Enable Checkpointing: Enable this function.
    • Enter a SQL statement in the editing pane. The following is an example. Modify the parameters in bold as you need.

      In this example, the syntax version of Flink OpenSource SQL is 1.12. In this example, the data source is Kafka and the result data is written to Elasticsearch.

      +
      +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Table 1 Job running parameters

      Parameter

      +

      Description

      +

      Queue

      +

      A shared queue is selected by default. You can select a CCE queue with dedicated resources and configure the following parameters:

      +

      UDF Jar: UDF Jar file. Before selecting such a file, upload the corresponding JAR file to the OBS bucket and choose Data Management > Package Management to create a package. For details, see .

      +

      In SQL, you can call a UDF that is inserted into a JAR file.

      +
      NOTE:

      When creating a job, a sub-user can only select the queue that has been allocated to the user.

      +

      If the remaining capacity of the selected queue cannot meet the job requirements, the system automatically scales up the capacity and you will be billed based on the increased capacity. When a queue is idle, the system automatically scales in its capacity.

      +
      +

      CUs

      +

      Sum of the number of compute units and job manager CUs of DLI. One CU equals 1 vCPU and 4 GB.

      +

      The value is the number of CUs required for job running and cannot exceed the number of CUs in the bound queue.

      +

      Job Manager CUs

      +

      Number of CUs of the management unit.

      +

      Parallelism

      +

      Maximum number of Flink OpenSource SQL jobs that can run at the same time.

      +
      NOTE:

      This value cannot be greater than four times the compute units (number of CUs minus the number of JobManager CUs).

      +
      +

      Task Manager Configuration

      +

      Whether to set Task Manager resource parameters.

      +

      If this option is selected, you need to set the following parameters:

      +
      • CU(s) per TM: Number of resources occupied by each Task Manager.
      • Slot(s) per TM: Number of slots contained in each Task Manager.
      +

      OBS Bucket

      +

      OBS bucket to store job logs and checkpoint information. If the selected OBS bucket is not authorized, click Authorize.

      +

      Save Job Log

      +

      Whether to save job run logs to OBS. The logs are saved in Bucket name/jobs/logs/Directory starting with the job ID.

      +
      CAUTION:

      You are advised to configure this parameter. Otherwise, no run log is generated after the job is executed. If the job fails, the run log cannot be obtained for fault locating.

      +
      +

      If this option is selected, you need to set the following parameters:

      +
      OBS Bucket: Select an OBS bucket to store user job logs. If the selected OBS bucket is not authorized, click Authorize.
      NOTE:

      If Enable Checkpointing and Save Job Log are both selected, you only need to authorize OBS once.

      +
      +
      +

      Alarm Generation upon Job Exception

      +

      Whether to notify users of any job exceptions, such as running exceptions or arrears, via SMS or email.

      +

      If this option is selected, you need to set the following parameters:

      +

      SMN Topic

      +

      Select a user-defined SMN topic. For details about how to create a custom SMN topic, see "Creating a Topic" in Simple Message Notification User Guide.

      +

      Enable Checkpointing

      +

      Whether to enable job snapshots. If this function is enabled, jobs can be restored based on checkpoints.

      +
      If this option is selected, you need to set the following parameters:
      • Checkpoint Interval: interval for creating checkpoints, in seconds. The value ranges from 1 to 999999, and the default value is 30.
      • Checkpoint Mode: checkpointing mode, which can be set to either of the following values:
        • At least once: Events are processed at least once.
        • Exactly once: Events are processed only once.
        +
      +
      • OBS Bucket: Select an OBS bucket to store your checkpoints. If the selected OBS bucket is not authorized, click Authorize.
        Checkpoints are saved in Bucket name/jobs/checkpoint/Directory starting with the job ID.
        NOTE:

        If Enable Checkpointing and Save Job Log are both selected, you only need to authorize OBS once.

        +
        +
        +
      +
      +

      Auto Restart upon Exception

      +

      Whether to enable automatic restart. If this function is enabled, jobs will be automatically restarted and restored when exceptions occur.

      +

      If this option is selected, you need to set the following parameters:

      +
      • Max. Retry Attempts: maximum number of retries upon an exception. The unit is times/hour.
        • Unlimited: The number of retries is unlimited.
        • Limited: The number of retries is user-defined.
        +
      • Restore Job from Checkpoint: This parameter is available only when Enable Checkpointing is selected.
      +

      Idle State Retention Time

      +

      How long the state of a key is retained without being updated before it is removed in GroupBy or Window. The default value is 1 hour.

      +

      Dirty Data Policy

      +

      Policy for processing dirty data. The following policies are supported: Ignore, Trigger a job exception, and Save.

      +

      If you set this field to Save, Dirty Data Dump Address must be set. Click the address box to select the OBS path for storing dirty data.

      +
      +
      +
      create table PostgreCdcSource(
      +  order_id string,
      +  order_channel string,
      +  order_time string,
      +  pay_amount double,
      +  real_pay double,
      +  pay_time string,
      +  user_id string,
      +  user_name string,
      +  area_id STRING,
      +  primary key (order_id) not enforced
      +) with (
      +  'connector' = 'postgres-cdc',
      +  'hostname' = ' 192.168.15.153',--IP address of the PostgreSQL instance
      +  'port'= ' 5432',--Port number of the PostgreSQL instance
      +  'pwd_auth_name'= 'xxxxx', -- Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
      +  'database-name' = ' testrdsdb',--Database name of the PostgreSQL instance
      +  'schema-name' = ' test',-- Schema in the PostgreSQL database
      +  'table-name' = ' cdc_order'--Table name in the PostgreSQL database
      +);
      +
      +create table dwsSink(
      +  order_id string,
      +  order_channel string,
      +  order_time string,
      +  pay_amount double,
      +  real_pay double,
      +  pay_time string,
      +  user_id string,
      +  user_name string,
      +  area_id STRING,  
      +  primary key(order_id) not enforced
      +) with (
      +  'connector' = 'gaussdb',
      +  'driver' = 'com.gauss200.jdbc.Driver',
      +  'url'='jdbc:gaussdb://192.168.168.16:8000/testdwsdb ', ---192.168.168.16:8000 indicates the internal IP address and port of the GaussDB(DWS) instance. testdwsdb indicates the name of the created GaussDB(DWS) database.
      +  'table-name' = ' test\".\"dws_order', ---test indicates the schema of the created GaussDB(DWS) table, and dws_order indicates the GaussDB(DWS) table name.
      +  'username' = 'xxxxx',--Username of the GaussDB(DWS) instance
      +  'password' = 'xxxxx',--Password of the GaussDB(DWS) instance
      +  'write.mode' = 'insert'
      +);
      +
      +insert into dwsSink select * from PostgreCdcSource where pay_amount > 100;
      +
      +
      +
    +
  4. Click Check Semantic and ensure that the SQL statement passes the check. Click Save. Click Start, confirm the job parameters, and click Start Now to execute the job. Wait until the job status changes to Running.
+
+

Step 6: Send Data and Query Results

  1. Log in to the RDS console. On the displayed page, locate the target PostgreSQL DB instance and choose More > Log In in the Operation column.
  2. On the displayed login dialog box, enter the username and password and click Log In.
  3. In the Operation column of row where the created database locates, click SQL Window and enter the following statement to create a table and insert data to the table:
    insert into test.cdc_order values
    +('202103241000000001','webShop','2021-03-24 10:00:00','50.00','100.00','2021-03-24 10:02:03','0001','Alice','330106'),
    +('202103251606060001','appShop','2021-03-24 12:06:06','200.00','180.00','2021-03-24 16:10:06','0002','Jason','330106'),
    +('202103261000000001','webShop','2021-03-24 14:03:00','300.00','100.00','2021-03-24 10:02:03','0003','Lily','330106'),
    +('202103271606060001','appShop','2021-03-24 16:36:06','99.00','150.00','2021-03-24 16:10:06','0001','Henry','330106');
    +
  4. Connect to the created GaussDB(DWS) cluster.
  5. Connect to the default database testdwsdb of a GaussDB(DWS) cluster.
    gsql -d testdwsdb -h Connection address of the GaussDB(DWS) cluster -U dbadmin -p 8000 -W password -r
    +
  6. Run the following statements to query table data:
    select * from test.dws_order;
    +
    The query result is as follows:
    order_channel              order_channel     order_time             pay_amount  real_pay  pay_time              user_id  user_name  area_id
    +202103251606060001         appShop         2021-03-24 12:06:06       200.0      180.0   2021-03-24 16:10:06      0002      Jason     330106
    +202103261000000001         webShop         2021-03-24 14:03:00       300.0      100.0   2021-03-24 10:02:03      0003      Lily      330106
    +
    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0019.html b/docs/dli/dev/dli_09_0019.html new file mode 100644 index 00000000..d231d1ae --- /dev/null +++ b/docs/dli/dev/dli_09_0019.html @@ -0,0 +1,29 @@ + + +

Using Spark Jobs to Access Data Sources of Datasource Connections

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0020.html b/docs/dli/dev/dli_09_0020.html new file mode 100644 index 00000000..8896741d --- /dev/null +++ b/docs/dli/dev/dli_09_0020.html @@ -0,0 +1,13 @@ + + +

Overview

+

DLI supports the native Spark DataSource capability and other extended capabilities. You can use SQL statements or Spark jobs to access other data storage services and import, query, analyze, and process data. Currently, DLI supports the following datasource access services: CloudTable, Cloud Search Service (CSS), Distributed Cache Service (DCS), Document Database Service (DDS), GaussDB(DWS), MapReduce Service (MRS), and Relational Database Service (RDS). To use the datasource capability of DLI, you need to create a datasource connection first.

+

When you use Spark jobs to access other data sources, you can use Scala, PySpark, or Java to program functions for the jobs.

+ +
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0061.html b/docs/dli/dev/dli_09_0061.html new file mode 100644 index 00000000..f90f8e73 --- /dev/null +++ b/docs/dli/dev/dli_09_0061.html @@ -0,0 +1,759 @@ + + +

Scala Example Code

+

Prerequisites

A datasource connection has been created on the DLI management console.

+
+

CSS Non-Security Cluster

+
+

CSS Security Cluster

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0063.html b/docs/dli/dev/dli_09_0063.html new file mode 100644 index 00000000..32100982 --- /dev/null +++ b/docs/dli/dev/dli_09_0063.html @@ -0,0 +1,450 @@ + + +

Scala Example Code

+

Development Description

The CloudTable HBase and MRS HBase can be connected to DLI as data sources.

+ +
+

Accessing a Data Source Using a SQL API

  1. Insert data.
    1
    sparkSession.sql("insert into test_hbase values('12345','abc','guiyang',false,null,3,23,2.3,2.34)")
    +
    + +
    +
  2. Query data.
    1
    sparkSession.sql("select * from test_hbase").show ()
    +
    + +
    +
+
+

Accessing a Data Source Using a DataFrame API

  1. Construct a schema.
     1
    + 2
    + 3
    + 4
    + 5
    + 6
    + 7
    + 8
    + 9
    +10
    val attrId = new StructField("id",StringType)
    +val location = new StructField("location",StringType)
    +val city = new StructField("city",StringType)
    +val booleanf = new StructField("booleanf",BooleanType)
    +val shortf = new StructField("shortf",ShortType)
    +val intf = new StructField("intf",IntegerType)
    +val longf = new StructField("longf",LongType)
    +val floatf = new StructField("floatf",FloatType)
    +val doublef = new StructField("doublef",DoubleType)
    +val attrs = Array(attrId, location,city,booleanf,shortf,intf,longf,floatf,doublef)
    +
    + +
    +
  2. Construct data based on the schema type.
    1
    +2
    val mutableRow: Seq[Any] = Seq("12345","abc","city1",false,null,3,23,2.3,2.34)
    +val rddData: RDD[Row] = sparkSession.sparkContext.parallelize(Array(Row.fromSeq(mutableRow)), 1)
    +
    + +
    +
  3. Import data to HBase.
    1
    sparkSession.createDataFrame(rddData, new StructType(attrs)).write.insertInto("test_hbase")
    +
    + +
    +
  4. Read data from HBase.
    1
    +2
    +3
    +4
    +5
    +6
    +7
    +8
    val map = new mutable.HashMap[String, String]()
    +map("TableName") = "table_DupRowkey1"
    +map("RowKey") = "id:5,location:6,city:7"
    +map("Cols") = "booleanf:CF1.booleanf,shortf:CF1.shortf,intf:CF1.intf,longf:CF1.longf,floatf:CF1.floatf,doublef:CF1.doublef"
    +map("ZKHost")="cloudtable-cf82-zk3-pa6HnHpf.cloudtable.com:2181,
    +               cloudtable-cf82-zk2-weBkIrjI.cloudtable.com:2181,
    +               cloudtable-cf82-zk1-WY09px9l.cloudtable.com:2181"
    +sparkSession.read.schema(new StructType(attrs)).format("hbase").options(map.toMap).load().show()
    +
    + +
    +
+
+

Submitting a Spark Job

  1. Generate a JAR package based on the code and upload the package to DLI.

    +

    +
  2. (Optional) Add the krb5.conf and user.keytab files to other dependency files of the job when creating a Spark job in an MRS cluster with Kerberos authentication enabled. Skip this step if Kerberos authentication is not enabled for the cluster.
  3. In the Spark job editor, select the corresponding dependency module and execute the Spark job.

    +
    • If the Spark version is 2.3.2 (will be offline soon) or 2.4.5, set Module to sys.datasource.hbase when you submit a job.
    • If the Spark version is 3.1.1, you do not need to select a module. Set Spark parameters (--conf).

      spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/hbase/*

      +

      spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/hbase/*

      +
    +
    +
    +
+
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0065.html b/docs/dli/dev/dli_09_0065.html new file mode 100644 index 00000000..251a1d22 --- /dev/null +++ b/docs/dli/dev/dli_09_0065.html @@ -0,0 +1,274 @@ + + +

Scala Example Code

+

Development Description

The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0067.html b/docs/dli/dev/dli_09_0067.html new file mode 100644 index 00000000..c7905695 --- /dev/null +++ b/docs/dli/dev/dli_09_0067.html @@ -0,0 +1,577 @@ + + +

Scala Example Code

+

Development Description

+
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0069.html b/docs/dli/dev/dli_09_0069.html new file mode 100644 index 00000000..1804e177 --- /dev/null +++ b/docs/dli/dev/dli_09_0069.html @@ -0,0 +1,515 @@ + + +

Scala Example Code

+

Scenario

This section provides Scala example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.

+

A datasource connection has been created and bound to a queue on the DLI management console.

+

Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.

+
+
+

Preparations

Constructing dependency information and creating a Spark session
  1. Import dependencies

    Involved Maven dependency

    +
    1
    +2
    +3
    +4
    +5
    <dependency>
    +  <groupId>org.apache.spark</groupId>
    +  <artifactId>spark-sql_2.11</artifactId>
    +  <version>2.3.2</version>
    +</dependency>
    +
    + +
    +
    Import dependency packages.
    1
    +2
    +3
    import java.util.Properties
    +import org.apache.spark.sql.{Row,SparkSession}
    +import org.apache.spark.sql.SaveMode
    +
    + +
    +
    +
  2. Create a session.
    1
    val sparkSession = SparkSession.builder().getOrCreate()
    +
    + +
    +
+
+
+

Accessing a Data Source Using a SQL API

  1. Create a table to connect to a GaussDB(DWS) data source.
    1
    +2
    +3
    +4
    +5
    +6
    +7
    +8
    sparkSession.sql(
    +  "CREATE TABLE IF NOT EXISTS dli_to_dws USING JDBC OPTIONS (
    +     'url'='jdbc:postgresql://to-dws-1174404209-cA37siB6.datasource.com:8000/postgres',
    +     'dbtable'='customer',
    +     'user'='dbadmin',
    +     'passwdauth'='######'// Name of the datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for the job.
    +)"
    +)
    +
    + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 1 Parameters for creating a table

    Parameter

    +

    Description

    +

    url

    +

    To obtain a GaussDB(DWS) IP address, you need to create a datasource connection first. Refer to Data Lake Insight User Guide for more information.

    +

    After an enhanced datasource connection is created, you can use the JDBC connection string (intranet) provided by GaussDB(DWS) or the intranet IP address and port number to connect to GaussDB(DWS). The format is protocol header://internal IP address:internal network port number/database name, for example: jdbc:postgresql://192.168.0.77:8000/postgres. For details about how to obtain the value, see GaussDB(DWS) cluster information.

    +
    NOTE:

    The GaussDB(DWS) IP address is in the following format: protocol header://IP address:port number/database name

    +

    Example:

    +

    jdbc:postgresql://to-dws-1174405119-ihlUr78j.datasource.com:8000/postgres

    +

    If you want to connect to a database created in GaussDB(DWS), change postgres to the corresponding database name in this connection.

    +
    +

    passwdauth

    +

    Name of datasource authentication of the password type created on DLI. If datasource authentication is used, you do not need to set the username and password for jobs.

    +

    dbtable

    +

    Tables in the PostgreSQL database.

    +

    partitionColumn

    +

    This parameter is used to set the numeric field used concurrently when data is read.

    +
    NOTE:
    • The partitionColumn, lowerBound, upperBound, and numPartitions parameters must be set at the same time.
    • To improve the concurrent read performance, you are advised to use auto-increment columns.
    +
    +

    lowerBound

    +

    Minimum value of a column specified by partitionColumn. The value is contained in the returned result.

    +

    upperBound

    +

    Maximum value of a column specified by partitionColumn. The value is not contained in the returned result.

    +

    numPartitions

    +

    Number of concurrent read operations.

    +
    NOTE:

    When data is read, lowerBound and upperBound are evenly allocated to each task to obtain data. Example:

    +

    'partitionColumn'='id',

    +

    'lowerBound'='0',

    +

    'upperBound'='100',

    +

    'numPartitions'='2'

    +

    Two concurrent tasks are started in DLI. The execution ID of one task is greater than or equal to 0 and the ID is less than 50, and the execution ID of the other task is greater than or equal to 50 and the ID is less than 100.

    +
    +

    fetchsize

    +

    Number of data records obtained in each batch during data reading. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

    +

    batchsize

    +

    Number of data records written in each batch. The default value is 1000. If this parameter is set to a large value, the performance is good but more memory is occupied. If this parameter is set to a large value, memory overflow may occur.

    +

    truncate

    +

    Indicates whether to clear the table without deleting the original table when overwrite is executed. The options are as follows:

    +
    • true
    • false
    +

    The default value is false, indicating that the original table is deleted and then a new table is created when the overwrite operation is performed.

    +

    isolationLevel

    +

    Transaction isolation level. The options are as follows:

    +
    • NONE
    • READ_UNCOMMITTED
    • READ_COMMITTED
    • REPEATABLE_READ
    • SERIALIZABLE
    +

    The default value is READ_UNCOMMITTED.

    +
    +
    +
  2. Insert data
    1
    sparkSession.sql("insert into dli_to_dws values(1, 'John',24),(2, 'Bob',32)")
    +
    + +
    +
  3. Query data
    1
    +2
    val dataFrame = sparkSession.sql("select * from dli_to_dws")
    +dataFrame.show()
    +
    + +
    +

    Before data is inserted:

    +

    +

    Response:

    +

    +
  4. Delete the datasource connection table.
    1
    sparkSession.sql("drop table dli_to_dws")
    +
    + +
    +
+
+

Accessing a Data Source Using a DataFrame API

  1. Set connection parameters.
    1
    +2
    +3
    +4
    val url = "jdbc:postgresql://to-dws-1174405057-EA1Kgo8H.datasource.com:8000/postgres"
    +val username = "dbadmin"
    +val password = "######"
    +val dbtable = "customer"
    +
    + +
    +
  2. Create a DataFrame, add data, and rename fields
    1
    +2
    +3
    +4
    var dataFrame_1 = sparkSession.createDataFrame(List((8, "Jack_1", 18)))
    +val df = dataFrame_1.withColumnRenamed("_1", "id")
    +                    .withColumnRenamed("_2", "name")
    +                    .withColumnRenamed("_3", "age")
    +
    + +
    +
  3. Import data to GaussDB(DWS).
    1
    +2
    +3
    +4
    +5
    +6
    +7
    df.write.format("jdbc")
    +  .option("url", url)
    +  .option("dbtable", dbtable)
    +  .option("user", username)
    +  .option("password", password)
    +  .mode(SaveMode.Append)
    +  .save()
    +
    + +
    +

    The options of SaveMode can be one of the following:

    +
    • ErrorIfExis: If the data already exists, the system throws an exception.
    • Overwrite: If the data already exists, the original data will be overwritten.
    • Append: If the data already exists, the system saves the new data.
    • Ignore: If the data already exists, no operation is required. This is similar to the SQL statement CREATE TABLE IF NOT EXISTS.
    +
    +
    +
  4. Read data from GaussDB(DWS).
    • Method 1: read.format()
      1
      +2
      +3
      +4
      +5
      +6
      val jdbcDF = sparkSession.read.format("jdbc")
      +                 .option("url", url)
      +                 .option("dbtable", dbtable)
      +                 .option("user", username)
      +                 .option("password", password)
      +                 .load()
      +
      + +
      +
    • Method 2: read.jdbc()
      1
      +2
      +3
      +4
      val properties = new Properties()
      + properties.put("user", username)
      + properties.put("password", password)
      + val jdbcDF2 = sparkSession.read.jdbc(url, dbtable, properties)
      +
      + +
      +
    +

    Before data is inserted:

    +

    +

    Response:

    +

    +

    The dateFrame read by the read.format() or read.jdbc() method is registered as a temporary table. Then, you can use SQL statements to query data.

    +
    1
    +2
    jdbcDF.registerTempTable("customer_test")
    + sparkSession.sql("select * from customer_test where id = 1").show()
    +
    + +
    +

    Query results

    +

    +
+
+

DataFrame-Related Operations

The data created by the createDataFrame() method and the data queried by the read.format() method and the read.jdbc() method are all DataFrame objects. You can directly query a single record. (In Accessing a Data Source Using a DataFrame API, the DataFrame data is registered as a temporary table.)

+ +
+

Submitting a Job

  1. Generate a JAR file based on the code and upload the file to DLI.

    +

    +
  2. In the Spark job editor, select the corresponding dependency module and execute the Spark job.

    +

    +
+
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0077.html b/docs/dli/dev/dli_09_0077.html new file mode 100644 index 00000000..0be5a04e --- /dev/null +++ b/docs/dli/dev/dli_09_0077.html @@ -0,0 +1,23 @@ + + +

Connecting to HBase

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0078.html b/docs/dli/dev/dli_09_0078.html new file mode 100644 index 00000000..03093374 --- /dev/null +++ b/docs/dli/dev/dli_09_0078.html @@ -0,0 +1,299 @@ + + +

PySpark Example Code

+

Development Description

The CloudTable HBase and MRS HBase can be connected to DLI as data sources.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0080.html b/docs/dli/dev/dli_09_0080.html new file mode 100644 index 00000000..b7c36e21 --- /dev/null +++ b/docs/dli/dev/dli_09_0080.html @@ -0,0 +1,21 @@ + + +

Connecting to OpenTSDB

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0081.html b/docs/dli/dev/dli_09_0081.html new file mode 100644 index 00000000..c14fa840 --- /dev/null +++ b/docs/dli/dev/dli_09_0081.html @@ -0,0 +1,167 @@ + + +

PySpark Example Code

+

Development Description

The CloudTable OpenTSDB and MRS OpenTSDB can be connected to DLI as data sources.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0083.html b/docs/dli/dev/dli_09_0083.html new file mode 100644 index 00000000..71f36578 --- /dev/null +++ b/docs/dli/dev/dli_09_0083.html @@ -0,0 +1,19 @@ + + +

Connecting to RDS

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0084.html b/docs/dli/dev/dli_09_0084.html new file mode 100644 index 00000000..df951d2e --- /dev/null +++ b/docs/dli/dev/dli_09_0084.html @@ -0,0 +1,220 @@ + + +

PySpark Example Code

+

Development Description

+
+

Complete Example Code

If the following sample code is directly copied to the .py file, note that unexpected characters may exist after the backslashes (\) in the file content. You need to delete the indentations or spaces after the backslashes (\).

+
+ +
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0086.html b/docs/dli/dev/dli_09_0086.html new file mode 100644 index 00000000..4d6a9e47 --- /dev/null +++ b/docs/dli/dev/dli_09_0086.html @@ -0,0 +1,19 @@ + + +

Connecting to GaussDB(DWS)

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0087.html b/docs/dli/dev/dli_09_0087.html new file mode 100644 index 00000000..8199d396 --- /dev/null +++ b/docs/dli/dev/dli_09_0087.html @@ -0,0 +1,298 @@ + + +

PySpark Example Code

+

Scenario

This section provides PySpark example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.

+

A datasource connection has been created and bound to a queue on the DLI management console.

+

Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.

+
+
+

Preparations

  1. Import dependency packages.
    1
    +2
    +3
    from __future__ import print_function
    +from pyspark.sql.types import StructType, StructField, IntegerType, StringType
    +from pyspark.sql import SparkSession
    +
    + +
    +
  2. Create a session.
    1
    sparkSession = SparkSession.builder.appName("datasource-dws").getOrCreate()
    +
    + +
    +
+
+

Accessing a Data Source Using a DataFrame API

  1. Set connection parameters.
    1
    +2
    +3
    +4
    +5
    url = "jdbc:postgresql://to-dws-1174404951-W8W4cW8I.datasource.com:8000/postgres"
    +dbtable = "customer"
    +user = "dbadmin"
    +password = "######"
    +driver = "org.postgresql.Driver"
    +
    + +
    +
  2. Set data.
    1
    dataList = sparkSession.sparkContext.parallelize([(1, "Katie", 19)])
    +
    + +
    +
  3. Configure the schema.
    1
    +2
    +3
    schema = StructType([StructField("id", IntegerType(), False),\                
    +                     StructField("name", StringType(), False),\            
    +                     StructField("age", IntegerType(), False)])
    +
    + +
    +
  4. Create a DataFrame.
    1
    dataFrame = sparkSession.createDataFrame(dataList, schema)
    +
    + +
    +
  5. Save the data to GaussDB(DWS).
    1
    +2
    +3
    +4
    +5
    +6
    +7
    +8
    +9
    dataFrame.write \   
    +    .format("jdbc") \  
    +    .option("url", url) \  
    +    .option("dbtable", dbtable) \  
    +    .option("user", user) \ 
    +    .option("password", password) \ 
    +    .option("driver", driver) \ 
    +    .mode("Overwrite") \  
    +    .save()
    +
    + +
    +

    The options of mode can be one of the following:

    +
    • ErrorIfExis: If the data already exists, the system throws an exception.
    • Overwrite: If the data already exists, the original data will be overwritten.
    • Append: If the data already exists, the system saves the new data.
    • Ignore: If the data already exists, no operation is required. This is similar to the SQL statement CREATE TABLE IF NOT EXISTS.
    +
    +
  6. Read data from GaussDB(DWS).
    1
    +2
    +3
    +4
    +5
    +6
    +7
    +8
    +9
    jdbcDF = sparkSession.read \
    +    .format("jdbc") \
    +    .option("url", url) \
    +    .option("dbtable", dbtable) \
    +    .option("user", user) \
    +    .option("password", password) \
    +    .option("driver", driver) \
    +    .load()
    +jdbcDF.show()
    +
    + +
    +
  7. View the operation result.

    +
+
+

Accessing a Data Source Using a SQL API

  1. Create a table to connect to a GaussDB(DWS) data source.
    1
    +2
    +3
    +4
    +5
    +6
    +7
    sparkSession.sql(
    +    "CREATE TABLE IF NOT EXISTS dli_to_dws USING JDBC OPTIONS (
    +    'url'='jdbc:postgresql://to-dws-1174404951-W8W4cW8I.datasource.com:8000/postgres',\
    +    'dbtable'='customer',\
    +    'user'='dbadmin',\
    +    'password'='######',\
    +    'driver'='org.postgresql.Driver')")
    +
    + +
    +

    For details about table creation parameters, see Table 1.

    +
    +
  2. Insert data.
    1
    sparkSession.sql("insert into dli_to_dws values(2,'John',24)")
    +
    + +
    +
  3. Query data.
    1
    jdbcDF = sparkSession.sql("select * from dli_to_dws").show()
    +
    + +
    +
  4. View the operation result.

    +
+
+

Submitting a Spark Job

  1. Upload the Python code file to DLI.
  2. In the Spark job editor, select the corresponding dependency module and execute the Spark job.
    • If the Spark version is 2.3.2 (will be offline soon) or 2.4.5, set Module to sys.datasource.hbase when you submit a job.
    • If the Spark version is 3.1.1, you do not need to select a module. Set Spark parameters (--conf).

      spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/dws/*

      +

      spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/dws/*

      +
    +
    +
+
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0089.html b/docs/dli/dev/dli_09_0089.html new file mode 100644 index 00000000..16562b9a --- /dev/null +++ b/docs/dli/dev/dli_09_0089.html @@ -0,0 +1,21 @@ + + +

Connecting to CSS

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0090.html b/docs/dli/dev/dli_09_0090.html new file mode 100644 index 00000000..e9bc9d94 --- /dev/null +++ b/docs/dli/dev/dli_09_0090.html @@ -0,0 +1,546 @@ + + +

PySpark Example Code

+

Prerequisites

A datasource connection has been created on the DLI management console.

+
+

CSS Non-Security Cluster

+
+

CSS Security Cluster

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0093.html b/docs/dli/dev/dli_09_0093.html new file mode 100644 index 00000000..6ffe961c --- /dev/null +++ b/docs/dli/dev/dli_09_0093.html @@ -0,0 +1,21 @@ + + +

Connecting to Redis

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0094.html b/docs/dli/dev/dli_09_0094.html new file mode 100644 index 00000000..517d40e3 --- /dev/null +++ b/docs/dli/dev/dli_09_0094.html @@ -0,0 +1,603 @@ + + +

Scala Example Code

+

Development Description

Redis supports only enhanced datasource connections.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0097.html b/docs/dli/dev/dli_09_0097.html new file mode 100644 index 00000000..62c9a063 --- /dev/null +++ b/docs/dli/dev/dli_09_0097.html @@ -0,0 +1,217 @@ + + +

PySpark Example Code

+

Development Description

Redis supports only enhanced datasource connections.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0100.html b/docs/dli/dev/dli_09_0100.html new file mode 100644 index 00000000..e15596e3 --- /dev/null +++ b/docs/dli/dev/dli_09_0100.html @@ -0,0 +1,157 @@ + + +

Java Example Code

+

Development Description

Redis supports only enhanced datasource connections.

+ +
+

Complete Example Code

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
public class Test_Redis_DaraFrame {
+  public static void main(String[] args) {
+    //create a SparkSession session  
+    SparkConf sparkConf = new SparkConf();  
+    sparkConf.setAppName("datasource-redis")
+             .set("spark.redis.host", "192.168.4.199")
+             .set("spark.redis.port", "6379")
+             .set("spark.redis.auth", "******")
+             .set("spark.driver.allowMultipleContexts","true");
+    JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);
+    SQLContext sqlContext = new SQLContext(javaSparkContext);
+
+    //Read RDD in JSON format to create DataFrame
+    JavaRDD<String> javaRDD = javaSparkContext.parallelize(Arrays.asList(
+            "{\"id\":\"1\",\"name\":\"Ann\",\"age\":\"18\"}",
+            "{\"id\":\"2\",\"name\":\"lisi\",\"age\":\"21\"}"));
+    Dataset dataFrame = sqlContext.read().json(javaRDD);
+
+    Map map = new HashMap<String, String>();
+    map.put("table","person");
+    map.put("key.column","id");
+    dataFrame.write().format("redis").options(map).mode(SaveMode.Overwrite).save();      
+    sqlContext.read().format("redis").options(map).load().show();
+
+  }
+}
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0110.html b/docs/dli/dev/dli_09_0110.html new file mode 100644 index 00000000..3c9d220d --- /dev/null +++ b/docs/dli/dev/dli_09_0110.html @@ -0,0 +1,202 @@ + + +

Java Example Code

+

Development Description

Mongo can be connected only through enhanced datasource connections.

+

DDS is compatible with the MongoDB protocol.

+
+ +
+

Complete Example Code

 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.SaveMode;
+import java.util.Arrays;
+
+public class TestMongoSparkSql {
+  public static void main(String[] args) {
+    SparkContext sparkContext = new SparkContext(new SparkConf().setAppName("datasource-mongo"));
+    JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext);
+    SQLContext sqlContext = new SQLContext(javaSparkContext);
+
+//    // Read json file as DataFrame, read csv / parquet file, same as json file distribution
+//    DataFrame dataFrame = sqlContext.read().format("json").load("filepath");
+
+    // Read RDD in JSON format to create DataFrame
+    JavaRDD<String> javaRDD = javaSparkContext.parallelize(Arrays.asList("{\"id\":\"5\",\"name\":\"Ann\",\"age\":\"23\"}"));
+    Dataset<Row> dataFrame = sqlContext.read().json(javaRDD);
+
+    String url = "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin";
+    String uri = "mongodb://username:pwd@host:8635/db";
+    String user = "rwuser";
+    String database = "test";
+    String collection = "test";
+    String password = "######";
+
+    dataFrame.write().format("mongo")
+            .option("url",url)
+            .option("uri",uri)
+            .option("database",database) 
+            .option("collection",collection)
+            .option("user",user)
+            .option("password",password)
+            .mode(SaveMode.Overwrite) 
+            .save();
+
+    sqlContext.read().format("mongo")
+            .option("url",url)
+            .option("uri",uri)
+            .option("database",database)
+            .option("collection",collection)
+            .option("user",user)
+            .option("password",password)
+            .load().show();
+    sparkContext.stop();
+    javaSparkContext.close();
+  }
+}
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0113.html b/docs/dli/dev/dli_09_0113.html new file mode 100644 index 00000000..40ee6f3c --- /dev/null +++ b/docs/dli/dev/dli_09_0113.html @@ -0,0 +1,19 @@ + + +

Connecting to Mongo

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0114.html b/docs/dli/dev/dli_09_0114.html new file mode 100644 index 00000000..e3525adf --- /dev/null +++ b/docs/dli/dev/dli_09_0114.html @@ -0,0 +1,263 @@ + + +

Scala Example Code

+

Development Description

Mongo can be connected only through enhanced datasource connections.

+

DDS is compatible with the MongoDB protocol.

+
+

An enhanced datasource connection has been created on the DLI management console and bound to a queue in packages.

+

Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.

+
+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0117.html b/docs/dli/dev/dli_09_0117.html new file mode 100644 index 00000000..7701989e --- /dev/null +++ b/docs/dli/dev/dli_09_0117.html @@ -0,0 +1,200 @@ + + +

PySpark Example Code

+

Development Description

Mongo can be connected only through enhanced datasource connections.

+

DDS is compatible with the MongoDB protocol.

+
+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0120.html b/docs/dli/dev/dli_09_0120.html new file mode 100644 index 00000000..7a1e9c02 --- /dev/null +++ b/docs/dli/dev/dli_09_0120.html @@ -0,0 +1,19 @@ + + +

SQL Jobs

+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0121.html b/docs/dli/dev/dli_09_0121.html new file mode 100644 index 00000000..1daecd83 --- /dev/null +++ b/docs/dli/dev/dli_09_0121.html @@ -0,0 +1,46 @@ + + +

Performing Authentication

+

Scenario

You need to be authenticated when using JDBC to create DLI driver connections.

+
+

Procedure

Currently, the JDBC supports authentication using the Access Key/Secret Key (AK/SK) or token.

+ +
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0122.html b/docs/dli/dev/dli_09_0122.html new file mode 100644 index 00000000..2c20cf4b --- /dev/null +++ b/docs/dli/dev/dli_09_0122.html @@ -0,0 +1,209 @@ + + +

Using Spark-submit to Submit a Spark Jar Job

+

Introduction to DLI Spark-submit

DLI Spark-submit is a command line tool used to submit Spark jobs to the DLI server. This tool provides command lines compatible with open-source Spark.

+
+

Preparations

  1. Getting authorized.

    DLI uses the Identity and Access Management (IAM) to implement fine-grained permissions for your enterprise-level tenants. IAM provides identity authentication, permissions management, and access control, helping you securely access your resources.

    +

    With IAM, you can use your account to create IAM users for your employees, and assign permissions to the users to control their access to specific resource types.

    +

    Currently, roles (coarse-grained authorization) and policies (fine-grained authorization) are supported.

    +
  2. Create a queue. Choose Resources > Queue Management. On the page displayed, click Buy Queue in the upper right corner. On the Buy Queue page displayed, select For general purpose for Type, that is, the compute resources of the Spark job.

    If the user who creates the queue is not an administrator, the queue can be used only after being authorized by the administrator. For details about how to assign permissions, see .

    +
    +
+
+

Downloading the DLI Client Tool

You can download the DLI client tool from the DLI management console.

+
  1. Log in to the DLI management console.
  2. Obtain the SDK download address from the administrator.
  3. On the DLI SDK DOWNLOAD page, click dli-clientkit-<version> to download the DLI client tool.

    The Beeline client is named dli-clientkit-<version>-bin.tar.gz, which can be used in Linux and depends on JDK 1.8 or later.

    +
    +

+
+

Configuring DLI Spark-submit

Ensure that you have installed JDK 1.8 or later and configured environment variables on the computer where spark-submit is installed. You are advised to use spark-submit on the computer running Linux.

+
+
  1. Download and decompress dli-clientkit-<version>-bin.tar.gz. In this step, set version to the actual version.
  2. Go to the directory where dli-clientkit-<version>-bin.tar.gz is decompressed. In the directory, there are three subdirectories bin, conf, and lib, which respectively store the execution scripts, configuration files, and dependency packages related to Spark-submit.
  3. Go to the conf directory and modify the configuration items in the client.properties file. For details about the configuration items, see Table 1.

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 1 DLI client parameters

    Item

    +

    Mandatory

    +

    Default Value

    +

    Description

    +

    dliEndPont

    +

    No

    +

    -

    +

    Domain name of DLI

    +

    If you lef this parameter empty, the program determines the domain name based on region.

    +

    obsEndPoint

    +

    Yes

    +

    -

    +

    OBS service domain name.

    +

    Obtain the OBS domain name from the administrator.

    +

    bucketName

    +

    Yes

    +

    -

    +

    Name of a bucket on OBS. This bucket is used to store JAR files, Python program files, and configuration files used in Spark programs.

    +

    obsPath

    +

    Yes

    +

    dli-spark-submit-resources

    +

    Directory for storing JAR files, Python program files, and configuration files on OBS. The directory is in the bucket specified by Bucket Name. If the directory does not exist, the program automatically creates it.

    +

    localFilePath

    +

    Yes

    +

    -

    +

    The local directory for storing JAR files, Python program files, and configuration files used in Spark programs.

    +

    The program automatically uploads the files on which Spark depends to the OBS path and loads them to the resource package on the DLI server.

    +

    ak

    +

    Yes

    +

    -

    +

    User's Access Key (AK)

    +

    sk

    +

    Yes

    +

    -

    +

    User's Secret Key (SK)

    +

    projectId

    +

    Yes

    +

    -

    +

    Project ID used by a user to access DLI.

    +

    region

    +

    Yes

    +

    -

    +

    Region of interconnected DLI.

    +
    +
    +

    Modify the configuration items in the spark-defaults.conf file based on the Spark application requirements. The configuration items are compatible with the open-source Spark configuration items. For details, see the open-source Spark configuration item description.

    +

+

Using Spark-submit to Submit a Spark Job

  1. Go to the bin directory of the tool file, run the spark-submit command, and carry related parameters.

    The command format is as follows:
    spark-submit [options] <app jar | python file> [app arguments]
    +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 2 DLI Spark-submit parameters

    Parameter

    +

    Value

    +

    Description

    +

    --class

    +

    <CLASS_NAME>

    +

    Name of the main class of the submitted Java or Scala application.

    +

    --conf

    +

    <PROP=VALUE>

    +

    Spark program parameters can be configured in the spark-defaults.conf file in the conf directory. If both the command and the configuration file are configured, the parameter value specified in the command is preferentially used.

    +
    NOTE:

    If there are multiple conf files, the format is --conf key1=value1 --conf key2=value2.

    +
    +

    --jars

    +

    <JARS>

    +

    Name of the JAR file on which the Spark application depends. Use commas (,) to separate multiple names. The JAR file must be stored in the local path specified by localFilePath in the client.properties file in advance.

    +

    --name

    +

    <NAME>

    +

    Name of a Spark application.

    +

    --queue

    +

    <QUEUE_NAME>

    +

    Name of the Spark queue on the DLI server. Jobs are submitted to the queue for execution.

    +

    --py-files

    +

    <PY_FILES>

    +

    Name of the Python program file on which the Spark application depends. Use commas (,) to separate multiple file names. The Python program file must be saved in the local path specified by localFilePath in the client.properties file in advance.

    +

    -s,--skip-upload-resources

    +

    <all | app | deps>

    +

    Specifies whether to skip. Upload the JAR file, Python program file, and configuration file to OBS and load them to the resource list on the DLI server. If related resource files have been loaded to the DLI resource list, skip this step.

    +

    If this parameter is not specified, all resource files in the command are uploaded and loaded to DLI by default.

    +
    • all: Skips the upload and loading all resource files.
    • app: Skips the upload and loading of Spark application files.
    • deps: skips the upload and loading of all dependent files.
    +

    -h,--help

    +

    -

    +

    Displays command help information.

    +
    +
    +
    +
    Command example:
    ./spark-submit --name <name> --queue <queue_name> --class org.apache.spark.examples.SparkPi spark-examples_2.11-2.1.0.luxor.jar 10
    +./spark-submit --name <name> --queue <queue_name> word_count.py
    +

    To use the DLI queue rather than the existing Spark environment, use ./spark-submit instead of spark-submit.

    +
    +
    +

+

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0123.html b/docs/dli/dev/dli_09_0123.html new file mode 100644 index 00000000..9e370f59 --- /dev/null +++ b/docs/dli/dev/dli_09_0123.html @@ -0,0 +1,23 @@ + + +

Submitting a Spark SQL Job Using JDBC

+
+
+ + + +
+ diff --git a/docs/dli/dev/dli_09_0124.html b/docs/dli/dev/dli_09_0124.html new file mode 100644 index 00000000..057e3c49 --- /dev/null +++ b/docs/dli/dev/dli_09_0124.html @@ -0,0 +1,15 @@ + + +

Obtaining the Server Connection Address

+

Scenario

On DLI, you can connect to the server for data query in the Internet environment. In this case, you need to first obtain the connection information, including the endpoint and project ID by following the following procedure.

+
+

Procedure

The format of the address for connecting to DLI is jdbc:dli://<endPoint>/<projectId>. Therefore, you need to obtain the endpoint and project ID.

+

Obtain the DLI endpoint from the administrator. Specifically, log in to the cloud, click your username, and choose My Credentials from the short-cut menu. You can obtain the project ID on the My Credentials page.

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0125.html b/docs/dli/dev/dli_09_0125.html new file mode 100644 index 00000000..22376c13 --- /dev/null +++ b/docs/dli/dev/dli_09_0125.html @@ -0,0 +1,18 @@ + + +

Downloading the JDBC Driver Package

+

Scenario

To connect to DLI, JDBC is utilized. You can obtain the JDBC installation package from Maven or download the JDBC driver file from the DLI management console.

+
+

Downloading the JDBC Driver File from the DLI Management Console

  1. Log in to the DLI management console.
  2. Ask the administrator for the download link to the SDK driver package.
  3. On the DLI SDK DOWNLOAD page, select a driver and download it.

    • JDBC driver package

      For example, click dli-jdbc-1.2.5 to download the JDBC driver package of version 1.2.5.

      +

      The JDBC driver package is named dli-jdbc-<version>.zip. It can be used in all versions of all platforms (such as Linux and Windows) and depends on JDK 1.7 or later.

      +
      +
    +

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0127.html b/docs/dli/dev/dli_09_0127.html new file mode 100644 index 00000000..ac72b703 --- /dev/null +++ b/docs/dli/dev/dli_09_0127.html @@ -0,0 +1,463 @@ + + +

Submitting a Job Using JDBC

+

Scenario

In Linux or Windows, you can connect to the DLI server using JDBC.

+
  • Jobs submitted to DLI using JDBC are executed on the Spark engine.
  • Once JDBC 2.X has undergone function reconstruction, query results can only be accessed from DLI job buckets. To utilize this feature, certain conditions must be met:
    • On the DLI management console, choose Global Configuration > Project to configure the job bucket.
    • Submit a service ticket to request the whitelisting of the feature that allows writing query results to buckets.
    +
+
+
DLI supports 13 data types. Each type can be mapped to a JDBC type. If JDBC is used to connect to the server, you must use the mapped Java type. Table 1 describes the mapping relationships. +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 1 Data type mapping

DLI Data Type

+

JDBC Type

+

Java Type

+

INT

+

INTEGER

+

java.lang.Integer

+

STRING

+

VARCHAR

+

java.lang.String

+

FLOAT

+

FLOAT

+

java.lang.Float

+

DOUBLE

+

DOUBLE

+

java.lang.Double

+

DECIMAL

+

DECIMAL

+

java.math.BigDecimal

+

BOOLEAN

+

BOOLEAN

+

java.lang.Boolean

+

SMALLINT/SHORT

+

SMALLINT

+

java.lang.Short

+

TINYINT

+

TINYINT

+

java.lang.Short

+

BIGINT/LONG

+

BIGINT

+

java.lang.Long

+

TIMESTAMP

+

TIMESTAMP

+

java.sql.Timestamp

+

CHAR

+

CHAR

+

Java.lang.Character

+

VARCHAR

+

VARCHAR

+

java.lang.String

+

DATE

+

DATE

+

java.sql.Date

+
+
+
+
+

Prerequisites

Before using JDBC, perform the following operations:

+
  1. Getting authorized.

    DLI uses the Identity and Access Management (IAM) to implement fine-grained permissions for your enterprise-level tenants. IAM provides identity authentication, permissions management, and access control, helping you securely access your resources.

    +

    With IAM, you can use your account to create IAM users for your employees, and assign permissions to the users to control their access to specific resource types.

    +

    Currently, roles (coarse-grained authorization) and policies (fine-grained authorization) are supported.

    +
  2. Create a queue. Choose Resources > Queue Management. On the page displayed, click Buy Queue in the upper right corner. On the Buy Queue page displayed, select For general purpose for Type, that is, the compute resources of the Spark job.

    If the user who creates the queue is not an administrator, the queue can be used only after being authorized by the administrator. For details about how to assign permissions, see .

    +
    +
+
+

Procedure

  1. Install JDK 1.7 or later on the computer where JDBC is installed, and configure environment variables.
  2. Obtain the DLI JDBC driver package dli-jdbc-<version>.zip by referring to Downloading the JDBC Driver Package. Decompress the package to obtain dli-jdbc-<version>-jar-with-dependencies.jar.
  3. On the computer using JDBC, add dli-jdbc-1.1.1-jar-with-dependencies.jar to the classpath path of the Java project.
  4. DLI JDBC provides two authentication modes, namely, token and AK/SK, to connect to DLI. For details about how to obtain the token and AK/SK, see Performing Authentication.
  5. Run the Class.forName() command to load the DLI JDBC driver.

    Class.forName("com.dli.jdbc.DliDriver");

    +

  6. Call the GetConnection method of DriverManager to create a connection.

    Connection conn = DriverManager.getConnection(String url, Properties info);

    +
    JDBC configuration items are passed using the URL. For details, see Table 2. JDBC configuration items can be separated by semicolons (;) in the URL, or you can dynamically set the attribute items using the Info object. For details, see Table 3. +
    + + + + + + + + + + +
    Table 2 Database connection parameters

    Parameter

    +

    Description

    +

    url

    +

    The URL format is as follows:

    +

    jdbc:dli://<endPoint>/projectId? <key1>=<val1>;<key2>=<val2>...

    +
    • EndPoint indicates the DLI domain name. ProjectId indicates the project ID.

      To obtain the endpoint corresponding to DLI, contact the administrator to obtain the region and endpoint information. To obtain the project ID, log in to the cloud, move the mouse on the account, and click My Credentials from the shortcut menu.

      +
    • Other configuration items are listed after ? in the form of key=value. The configuration items are separated by semicolons (;). They can also be passed using the Info object.
    +

    Info

    +

    The Info object passes user-defined configuration items. If Info does not pass any attribute item, you can set it to null. The format is as follows: info.setProperty ("Attribute item", "Attribute value").

    +
    +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 3 Attribute items

    Item

    +

    Mandatory

    +

    Default Value

    +

    Description

    +

    queuename

    +

    Yes

    +

    -

    +

    Queue name of DLI.

    +

    databasename

    +

    No

    +

    -

    +

    Name of a database.

    +

    authenticationmode

    +

    No

    +

    token

    +

    Authentication mode. Currently, token- and AK/SK-based authentication modes are supported.

    +

    accesskey

    +

    Yes

    +

    -

    +

    AK/SK authentication key. For details about how to obtain the key, see Performing Authentication.

    +

    secretkey

    +

    Yes

    +

    -

    +

    AK/SK authentication key. For details about how to obtain the key, see Performing Authentication.

    +

    servicename

    +

    This parameter must be configured if authenticationmode is set to aksk.

    +

    -

    +

    Indicates the service name, that is, dli.

    +

    token

    +

    This parameter must be configured if authenticationmode is set to token.

    +

    -

    +

    Token authentication. For details about the authentication mode, see Performing Authentication.

    +

    charset

    +

    No

    +

    UTF-8

    +

    JDBC encoding mode.

    +

    usehttpproxy

    +

    No

    +

    false

    +

    Whether to use the access proxy.

    +

    proxyhost

    +

    This parameter must be configured if usehttpproxy is set to true.

    +

    -

    +

    Access proxy host.

    +

    proxyport

    +

    This parameter must be configured if usehttpproxy is set to true.

    +

    -

    +

    Access proxy port.

    +

    dli.sql.checkNoResultQuery

    +

    No

    +

    false

    +

    Whether to allow invoking the executeQuery API to execute statements (for example, DDL) that do not return results.

    +
    • Value false indicates that invoking of the executeQuery API is allowed.
    • Value true indicates that invoking of the executeQuery API is not allowed.
    +

    jobtimeout

    +

    No

    +

    300

    +

    End time of the job submission. Unit: second

    +

    iam.endpoint

    +

    No. By default, the value is automatically combined based on regionName.

    +

    -

    +

    -

    +

    obs.endpoint

    +

    No. By default, the value is automatically combined based on regionName.

    +

    -

    +

    -

    +

    directfetchthreshold

    +

    No

    +

    1000

    +

    Check whether the number of returned results exceeds the threshold based on service requirements.

    +

    The default threshold is 1000.

    +
    +
    +
    +

  7. Create a Statement object, set related parameters, and submit Spark SQL to DLI.

    Statement statement = conn.createStatement();

    +

    statement.execute("SET dli.sql.spark.sql.forcePartitionPredicatesOnPartitionedTable.enabled=true");

    +

    statement.execute("select * from tb1");

    +

  8. Obtain the result.

    ResultSet rs = statement.getResultSet();

    +

  9. Display the result.

    while (rs.next()) {
    +int a = rs.getInt(1);
    +int b = rs.getInt(2);
    +}
    +

  10. Close the connection.

    conn.close();

    +

+
+

Example

  • Hard-coded or plaintext AK and SK pose significant security risks. To ensure security, encrypt your AK and SK, store them in configuration files or environment variables, and decrypt them when needed.
  • In this example, the AK and SK stored in the environment variables are used. Specify the environment variables System.getenv("AK") and System.getenv("SK") in the local environment first.
+
+
import java.sql.*;
+import java.util.Properties;
+
+public class DLIJdbcDriverExample {
+
+    public static void main(String[] args) throws ClassNotFoundException, SQLException {
+        Connection conn = null;
+        try {
+            Class.forName("com.dli.jdbc.DliDriver");
+            String url = "jdbc:dli://<endpoint>/<projectId>?databasename=db1;queuename=testqueue";
+            Properties info = new Properties();
+            info.setProperty("authenticationmode", "aksk");
+            info.setProperty("regionname", "<real region name>");
+            info.setProperty("accesskey", "<System.getenv("AK")>");
+            info.setProperty("secretkey", "<System.getenv("SK")>")
+            conn = DriverManager.getConnection(url, info);
+            Statement statement = conn.createStatement();
+            statement.execute("select * from tb1");
+            ResultSet rs = statement.getResultSet();
+            int line = 0;
+            while (rs.next()) {
+                line ++;
+                int a = rs.getInt(1);
+                int b = rs.getInt(2);
+                System.out.println("Line:" + line + ":" + a + "," + b);
+            }
+            statement.execute("SET dli.sql.spark.sql.forcePartitionPredicatesOnPartitionedTable.enabled=true");
+            statement.execute("describe tb1");
+            ResultSet rs1 = statement.getResultSet();
+            line = 0;
+            while (rs1.next()) {
+                line ++;
+                String a = rs1.getString(1);
+                String b = rs1.getString(2);
+                System.out.println("Line:" + line + ":" + a + "," + b);
+            }
+        } catch (SQLException ex) {
+        } finally {
+            if (conn != null) {
+                conn.close();
+            }
+        }
+    }
+}
+
+

Enabling JDBC Requery

If the JDBC requery function is enabled, the system automatically requeries when the query operation fails.

+
  • To avoid repeated data insertion, non-query statements do not support requery.
  • This function is available in the JDBC driver package of 1.1.5 or later. To use this function, obtain the latest JDBC driver package.
+
+

To enable the requery function, add the attributes listed in Table 4 to the Info parameter.

+ +
+ + + + + + + + + + + + + + + + + + + + + +
Table 4 Requery parameter description

Item

+

Mandatory

+

Default Value

+

Description

+

USE_RETRY_KEY

+

Yes

+

false

+

Whether to enable the requery function. If this parameter is set to True, the requery function is enabled.

+

RETRY_TIMES_KEY

+

Yes

+

3000

+

Requery interval (milliseconds). Set this parameter to 30000 ms.

+

RETRY_INTERVALS_KEY

+

Yes

+

3

+

Requery times. Set this parameter to a value from 3 to 5.

+
+
+

Set JDBC parameters, enable the requery function, and create a link. The following is an example:

+
import com.xxx.dli.jdbs.utils.ConnectionResource;// Introduce "ConnectionResource". Change the class name as needed.
+import java.sql.*;
+import java.util.Properties;
+
+public class DLIJdbcDriverExample {
+
+    private static final String X_AUTH_TOKEN_VALUE = "<realtoken>";
+    public static void main(String[] args) throws ClassNotFoundException, SQLException {
+        Connection conn = null;
+        try {
+            Class.forName("com.dli.jdbc.DliDriver");
+            String url = "jdbc:dli://<endpoint>/<projectId>?databasename=db1;queuename=testqueue";
+            Properties info = new Properties();
+            info.setProperty("token", X_AUTH_TOKEN_VALUE);
+info.setProperty(ConnectionResource.USE_RETRY_KEY, "true"); // Enable the requery function.
+info.setProperty(ConnectionResource.RETRY_TIMES_KEY, "30000");// Requery interval (ms)
+info.setProperty(ConnectionResource.RETRY_INTERVALS_KEY, "5");// Requery Times
+            conn = DriverManager.getConnection(url, info);
+            Statement statement = conn.createStatement();
+            statement.execute("select * from tb1");
+            ResultSet rs = statement.getResultSet();
+            int line = 0;
+            while (rs.next()) {
+                line ++;
+                int a = rs.getInt(1);
+                int b = rs.getInt(2);
+                System.out.println("Line:" + line + ":" + a + "," + b);
+            }
+            statement.execute("describe tb1");
+            ResultSet rs1 = statement.getResultSet();
+            line = 0;
+            while (rs1.next()) {
+                line ++;
+                String a = rs1.getString(1);
+                String b = rs1.getString(2);
+                System.out.println("Line:" + line + ":" + a + "," + b);
+            }
+        } catch (SQLException ex) {
+        } finally {
+            if (conn != null) {
+                conn.close();
+            }
+        }
+    }
+}
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0129.html b/docs/dli/dev/dli_09_0129.html new file mode 100644 index 00000000..6e8d38da --- /dev/null +++ b/docs/dli/dev/dli_09_0129.html @@ -0,0 +1,14 @@ + + +

Introduction to RDS

+

Basic Concepts

Relational Database Service (RDS) is a cloud-based web service that is reliable, scalable, easy to manage, and immediately ready for use. It can be deployed in single-node, active/standby, or cluster mode.

+
+

Features

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0150.html b/docs/dli/dev/dli_09_0150.html new file mode 100644 index 00000000..c45599eb --- /dev/null +++ b/docs/dli/dev/dli_09_0150.html @@ -0,0 +1,180 @@ + + +

Flink Jar Job Examples

+

Overview

You can perform secondary development based on Flink APIs to build your own Jar packages and submit them to the DLI queues to interact with data sources such as MRS Kafka, HBase, Hive, HDFS, GaussDB(DWS), and DCS.

+

This section describes how to interact with MRS through a custom job.

+
+

Environment Preparations

  1. Log in to the MRS management console and create an MRS cluster. During the creation, enable Kerberos Authentication and select Kafka, HBase, and HDFS. For details about how to create an MRS cluster, see "Buying a Custom Cluster" in .
  2. Enable the UDP/TCP port in the security group rule. For details, see "Adding a Security Group Rule" in .
  3. Log in to MRS Manager.
    1. Create a machine-machine account. Ensure that you have the hdfs_admin and hbase_admin permissions. Download the user authentication credentials, including the user.keytab and krb5.conf files.

      The .keytab file of a human-machine account becomes invalid when the user password expires. Use a machine-machine account for configuration.

      +
      +
    2. Click Services, download the client, and click OK.
    3. Download the configuration files from the MRS node, including hbase-site.xml and hiveclient.properties.
    +
  4. Create a dedicated DLI queue.
  5. Set up an enhanced datasource connection between the DLI dedicated queue and the MRS cluster and configure security group rules based on the site requirements.

    For details about how to create an enhanced datasource connection, see Enhanced Datasource Connections in the Data Lake Insight User Guide.

    +

    For details about how to configure security group rules, see "Security Group" in Virtual Private Cloud User Guide.

    +
  6. Obtain the IP address and domain name mapping of all nodes in the MRS cluster, and configure the host mapping in the host information of the DLI cross-source connection.

    For details about how to add an IP-domain mapping, see Modifying the Host Information in the Data Lake Insight User Guide.

    +

    If the Kafka server listens on the port using hostname, you need to add the mapping between the hostname and IP address of the Kafka Broker node to the DLI queue. Contact the Kafka service deployment personnel to obtain the hostname and IP address of the Kafka Broker node.

    +
    +
+
+

Prerequisites

+
+

How to Use

  1. In the left navigation pane of the DLI management console, choose Job Management > Flink Jobs. The Flink Jobs page is displayed.
  2. In the upper right corner of the Flink Jobs page, click Create Job.
  3. Configure job parameters.

    +

    + + + + + + + + + + + + + +
    Table 1 Job parameters

    Parameter

    +

    Description

    +

    Type

    +

    Select Flink Jar.

    +

    Name

    +

    Job name, which contains 1 to 57 characters and consists of only letters, digits, hyphens (-), and underscores (_).

    +
    NOTE:

    The job name must be globally unique.

    +
    +

    Description

    +

    Description of the job, which contains 0 to 512 characters.

    +
    +
    +

  4. Click OK to enter the Edit page.
  5. Select a queue. A Flink Jar job can run only on general queues.

    • A Flink Jar job can run only on a pre-created dedicated queue.
    • If no dedicated queue is available in the Queue drop-down list, create a dedicated queue and bind it to the current user.
    +
    +

  6. Upload the JAR package.

    The Flink version must be the same as that specified in the JAR package. +
    + + + + + + + + + + + + + + + + + + + + + + +
    Table 2 Description

    Parameter

    +

    Description

    +

    Application

    +

    User-defined package. Before selecting a package, upload the corresponding JAR package to the OBS bucket and create a package on the Data Management > Package Management page.

    +

    Main Class

    +

    Name of the main class of the JAR package to be loaded, for example, KafkaMessageStreaming.

    +
    • Default: The value is specified based on the Manifest file in the JAR package.
    • Manually assign: You must enter the class name and confirm the class arguments (separate arguments with spaces).
    +
    NOTE:

    When a class belongs to a package, the package path must be carried, for example, packagePath.KafkaMessageStreaming.

    +
    +

    Class Arguments

    +

    List of arguments of a specified class. The arguments are separated by spaces.

    +

    JAR Package Dependencies

    +

    User-defined dependencies. Before selecting a package, upload the corresponding JAR package to the OBS bucket and create a JAR package on the Data Management > Package Management page.

    +

    Other Dependencies

    +

    User-defined dependency files. Before selecting a file, upload the corresponding file to the OBS bucket and create a package of any type on the Data Management > Package Management page.

    +

    You can add the following content to the application to access the corresponding dependency file: fileName indicates the name of the file to be accessed, and ClassName indicates the name of the class that needs to access the file.

    +
    ClassName.class.getClassLoader().getResource("userData/fileName")
    +

    Flink Version

    +

    Before selecting a Flink version, you need to select the queue to which the Flink version belongs. Currently, the following versions are supported: 1.10.

    +
    +
    +
    +

  7. Configure job parameters.

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 3 Parameter description

    Parameter

    +

    Description

    +

    CUs

    +

    One CU has one vCPU and 4 GB memory. The number of CUs ranges from 2 to 400.

    +

    Job Manager CUs

    +

    Set the number of CUs on a management unit. The value ranges from 1 to 4. The default value is 1.

    +

    Parallelism

    +

    Maximum number of parallel operators in a job.

    +
    NOTE:
    • The value must be less than or equal to four times the number of compute units (CUs minus the number of job manager CUs).
    • You are advised to set this parameter to a value greater than that configured in the code. Otherwise, job submission may fail.
    +
    +

    Task Manager Configuration

    +

    Whether to set Task Manager resource parameters.

    +

    If this option is selected, you need to set the following parameters:

    +
    • CU(s) per TM: Number of resources occupied by each Task Manager.
    • Slot(s) per TM: Number of slots contained in each Task Manager.
    +

    Save Job Log

    +

    Whether to save the job running logs to OBS.

    +

    If this option is selected, you need to set the following parameters:

    +

    OBS Bucket: Select an OBS bucket to store user job logs. If the selected OBS bucket is not authorized, click Authorize.

    +

    Alarm Generation upon Job Exception

    +

    Whether to report job exceptions, for example, abnormal job running or exceptions due to an insufficient balance, to users via SMS or email.

    +

    If this option is selected, you need to set the following parameters:

    +

    SMN Topic

    +

    Select a user-defined SMN topic. For details about how to customize SMN topics, see "Creating a Topic" in the Simple Message Notification User Guide.

    +

    Auto Restart upon Exception

    +

    Whether to enable automatic restart. If this function is enabled, jobs will be automatically restarted and restored when exceptions occur.

    +

    If this option is selected, you need to set the following parameters:

    +
    • Max. Retry Attempts: maximum number of retry times upon an exception. The unit is Times/hour.
      • Unlimited: The number of retries is unlimited.
      • Limited: The number of retries is user-defined.
      +
    • Restore Job from Checkpoint: Restore the job from the latest checkpoint.

      If you select this parameter, you also need to set Checkpoint Path.

      +

      Checkpoint Path: Select the checkpoint saving path. The value must be the same as the checkpoint path you set in the application package. Note that the checkpoint path for each job must be unique. Otherwise, the checkpoint cannot be obtained.

      +
    +
    +
    +

  8. Click Save on the upper right of the page.
  9. Click Start on the upper right side of the page. On the displayed Start Flink Job page, confirm the job specifications, and click Start Now to start the job.

    After the job is started, the system automatically switches to the Flink Jobs page, and the created job is displayed in the job list. You can view the job status in the Status column. After a job is successfully submitted, the job status will change from Submitting to Running. After the execution is complete, the message Completed is displayed.

    +

    If the job status is Submission failed or Running exception, the job submission failed or the job did not execute successfully. In this case, you can move the cursor over the status icon in the Status column of the job list to view the error details. You can click to copy these details. After handling the fault based on the provided information, resubmit the job.

    +

    Other buttons are as follows:

    +

    Save As: Save the created job as a new job.

    +
    +

+
+

Related Operations

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0162.html b/docs/dli/dev/dli_09_0162.html new file mode 100644 index 00000000..3ecee099 --- /dev/null +++ b/docs/dli/dev/dli_09_0162.html @@ -0,0 +1,20 @@ + + +

Stream Ecosystem

+

Overview

Built on Flink and Spark, the stream ecosystem is fully compatible with the open-source Flink, Storm, and Spark APIs. It is enhanced in features and improved in performance to provide easy-to-use DLI with low latency and high throughput.

+
The DLI stream ecosystem includes the cloud service ecosystem, open source ecosystem, and custom stream ecosystem.
  • Cloud service ecosystems

    DLI can be interconnected with other services by using Stream SQLs. You can directly use SQL statements to read and write data from various cloud services, such as Data Ingestion Service (DIS), Object Storage Service (OBS), CloudTable Service (CloudTable), MapReduce Service (MRS), Relational Database Service (RDS), Simple Message Notification (SMN), and Distributed Cache Service (DCS).

    +
  • Open-source ecosystems

    After connections to other VPCs are established through VPC peering connections, you can access all data sources and output targets (such as Kafka, HBase, and Elasticsearch) supported by Flink and Spark in your dedicated DLI queues.

    +
  • Custom stream ecosystems

    You can compile code to obtain data from the desired cloud ecosystem or open-source ecosystem as the input data of Flink jobs.

    +
+
+
+

Supported Data Formats

DLI Flink jobs support the following data formats:

+

Avro, Avro_merge, BLOB, CSV, EMAIL, JSON, ORC, Parquet, and XML.

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0171.html b/docs/dli/dev/dli_09_0171.html new file mode 100644 index 00000000..0dfcc1e0 --- /dev/null +++ b/docs/dli/dev/dli_09_0171.html @@ -0,0 +1,153 @@ + + +

Calling UDFs in Spark SQL Jobs

+

Scenario

DLI allows you to use Hive user-defined functions (UDFs) to query data. UDFs take effect only on a single row of data and are applicable to inserting and deleting a single data record.

+
+

Constraints

+
+

Environment Preparations

Before you start, set up the development environment.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Development environment

Item

+

Description

+

OS

+

Windows 7 or later

+

JDK

+

JDK 1.8.

+

IntelliJ IDEA

+

This tool is used for application development. The version of the tool must be 2019.1 or other compatible versions.

+

Maven

+

Basic configurations of the development environment. Maven is used for project management throughout the lifecycle of software development.

+
+
+
+

Development Process

The process of developing a UDF is as follows:
Figure 1 Development process
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Process description

No.

+

Phase

+

Software Portal

+

Description

+

1

+

Create a Maven project and configure the POM file.

+

IntelliJ IDEA

+

+

Write UDF code by referring the steps in Procedure.

+

+

2

+

Write UDF code.

+

3

+

Debug, compile, and pack the code into a Jar package.

+

4

+

Upload the Jar package to OBS.

+

OBS console

+

Upload the UDF Jar file to an OBS directory.

+

5

+

Create the UDF on DLI.

+

DLI console

+

Create a UDF on the SQL job management page of the DLI console.

+

6

+

Verify and use the UDF on DLI.

+

DLI console

+

Use the UDF in your DLI job.

+
+
+
+
+

Procedure

  1. Create a Maven project and configure the POM file. This step uses IntelliJ IDEA 2020.2 as an example.
    1. Start IntelliJ IDEA and choose File > New > Project.
      Figure 2 Creating a project
      +
    2. Choose Maven, set Project SDK to 1.8, and click Next.
      Figure 3 Choosing Maven
      +
    3. Set the project name, configure the storage path, and click Finish.
      Figure 4 Creating a project
      +
    4. Add the following content to the pom.xml file.
      <dependencies>
      +        <dependency>
      +            <groupId>org.apache.hive</groupId>
      +            <artifactId>hive-exec</artifactId>
      +            <version>1.2.1</version>
      +        </dependency>
      +</dependencies>
      +
      Figure 5 Adding configurations to the POM file
      +
      +
    5. Choose src > main and right-click the java folder. Choose New > Package to create a package and a class file.
      Figure 6 Creating a package and a class file
      +

      Set the package name as you need. Then, press Enter.

      +

      Create a Java Class file in the package path. In this example, the Java Class file is SumUdfDemo.

      +
    +
  2. Write UDF code.
    1. The UDF must inherit org.apache.hadoop.hive.ql.UDF.
    2. You must implement the evaluate function, which can be reloaded.
    +

    For details about how to implement the UDF, see the following sample code:

    +
    package com.demo;
    +import org.apache.hadoop.hive.ql.exec.UDF;
    +  public class SumUdfDemo extends UDF {
    +    public int evaluate(int a, int b) {
    +     return a + b;
    +  }
    + }
    +
  3. Use IntelliJ IDEA to compile the code and pack it into the JAR package.
    1. Click Maven in the tool bar on the right, and click clean and compile to compile the code.

      After the compilation is successful, click package.

      +

      The generated JAR package is stored in the target directory. In this example, MyUDF-1.0-SNAPSHOT.jar is stored in D:\DLITest\MyUDF\target.

      +
    +
  4. Log in to the OBS console and upload the file to the OBS path.

    The region of the OBS bucket to which the Jar package is uploaded must be the same as the region of the DLI queue. Cross-region operations are not allowed.

    +
    +
  5. (Optional) Upload the file to DLI for package management.
    1. Log in to the DLI management console and choose Data Management > Package Management.
    2. On the Package Management page, click Create in the upper right corner.
    3. In the Create Package dialog, set the following parameters:
      1. Type: Select JAR.
      2. OBS Path: Specify the OBS path for storing the package.
      3. Set Group and Group Name as required for package identification and management.
      +
    4. Click OK.
    +
  6. Create the UDF on DLI.
    1. Log in to the DLI console, choose SQL Editor. Set Engine to spark, and select the created SQL queue and database.
    2. In the SQL editing area, run the following statement to create a UDF and click Execute.
      CREATE FUNCTION TestSumUDF AS 'com.demo.SumUdfDemo' using jar 'obs://dli-test-obs01/MyUDF-1.0-SNAPSHOT.jar';
      +
    +
  7. Restart the original SQL queue for the added function to take effect.
    1. Log in to the DLI console and choose Queue Management from the navigation pane. In the Operation column of the SQL queue job, click Restart.
    2. In the Restart dialog box, click OK.
    +
  8. Call the UDF.

    Use the UDF created in 6 in the SELECT statement as follows:

    +
    select TestSumUDF(1,2);
    +
  9. (Optional) Delete the UDF.

    If the UDF is no longer used, run the following statement to delete it:

    +
    Drop FUNCTION TestSumUDF;
    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0176.html b/docs/dli/dev/dli_09_0176.html new file mode 100644 index 00000000..0e3f8dc7 --- /dev/null +++ b/docs/dli/dev/dli_09_0176.html @@ -0,0 +1,372 @@ + + +

Using the Spark Job to Access DLI Metadata

+

Scenario

DLI allows you to develop a program to create Spark jobs for operations related to databases, DLI or OBS tables, and table data. This example demonstrates how to develop a job by writing a Java program, and use a Spark job to create a database and table and insert table data.

+
+

Constraints

+
+

Environment Preparations

Before developing a Spark job to access DLI metadata, set up a development environment that meets the following requirements.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Development environment

Item

+

Description

+

OS

+

Windows 7 or later

+

JDK

+

JDK 1.8.

+

IntelliJ IDEA

+

This tool is used for application development. The version of the tool must be 2019.1 or other compatible versions.

+

Maven

+

Basic configurations of the development environment. Maven is used for project management throughout the lifecycle of software development.

+
+
+
+

Development Process

The following figure shows the process for developing a Spark job to access DLI metadata.
Figure 1 Development process
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Process description

No.

+

Phase

+

Software Portal

+

Description

+

1

+

Create a queue for general use.

+

DLI console

+

The DLI queue is created for running your job.

+

2

+

Configure the OBS file.

+

OBS console

+
  • To create an OBS table, you need to upload the file to the OBS bucket.
  • Configure the path for storing DLI metadata. This folder is used to store DLI metadata in spark.sql.warehouse.dir.
+

3

+

Create a Maven project and configure the POM file.

+

IntelliJ IDEA

+

+

Write a program to create a DLI or OBS table by referring to the sample code.

+

+

4

+

Write code.

+

5

+

Debug, compile, and pack the code into a Jar package.

+

6

+

Upload the Jar package to OBS and DLI.

+

OBS console

+

You can upload the generated Spark Jar package to an OBS directory and DLI program package.

+

7

+

Create a Spark JAR job.

+

DLI console

+

The Spark Jar job is created and submitted on the DLI console.

+

8

+

Check execution result of the job.

+

DLI console

+

You can view the job running status and run logs.

+
+
+
+
+

Step 1: Create a Queue for General Purpose

If you submit a Spark job for the first time, you need to create a queue first. For example, create a queue, name it sparktest, and set Queue Usage to For general purpose.
  1. In the navigation pane of the DLI management console, choose Queue Management.
  2. In the upper right corner of the Queue Management page, click Create Queue to create a queue.
  3. Create a queue, name it sparktest, and set the queue usage to for general purpose. For details about how to create a queue, see Creating a Queue.
  4. Click Create Now to create a queue.
+
+
+

Step 2: Configure the OBS Bucket File

  1. To create an OBS table, upload data to the OBS bucket directory.
    Use the following sample data to create the testdata.csv file and upload it to an OBS bucket.
    12,Michael
    +27,Andy
    +30,Justin
    +
    +
  2. Log in to the OBS Console. In the Bucket page, click the name of the created OBS bucket. In this example, the bucket name is dli-test-obs01. The overview page is displayed.
  3. In the navigation pane on the left, choose Objects. Click Upload Object to upload the testdata.csv file to the root directory of the OBS bucket.
  4. In the root directory of the OBS bucket, click Create Folder to create a folder and name it warehousepath. This folder is used to store DLI metadata in spark.sql.warehouse.dir.
+
+

Step 3: Create a Maven Project and Configure the POM Dependency

This step uses IntelliJ IDEA 2020.2 as an example.
  1. Start IntelliJ IDEA and choose File > New > Project.
    Figure 2 Creating a project
    +
  2. Choose Maven, set Project SDK to 1.8, and click Next.
    Figure 3 Selecting an SDK
    +
  3. Set the project name, configure the storage path, and click Finish.
    Figure 4 Creating a project
    +

    In this example, the Maven project name is SparkJarMetadata, and the project storage path is D:\DLITest\SparkJarMetadata.

    +
  4. Add the following content to the pom.xml file.
    <dependencies>
    +        <dependency>
    +            <groupId>org.apache.spark</groupId>
    +            <artifactId>spark-sql_2.11</artifactId>
    +            <version>2.3.2</version>
    +        </dependency>
    +</dependencies>
    +
    Figure 5 Modifying the pom.xml file
    +
    +
  5. Choose src > main and right-click the java folder. Choose New > Package to create a package and a class file.
    Figure 6 Creating a package
    +

    Set the package name as you need. In this example, set Package to com.dli.demo and press Enter.

    +

    Create a Java Class file in the package path. In this example, the Java Class file is DliCatalogTest.

    +
+
+
+

Step 4: Write Code

Write the DliCatalogTest program to create a database, DLI table, and OBS table.

+

For the sample code, see Java Example Code.

+
  1. Import the dependency.
    import org.apache.spark.sql.SparkSession;
    +
  2. Create a SparkSession instance.

    When you create a SparkSession, you need to specify spark.sql.session.state.builder, spark.sql.catalog.class, and spark.sql.extensions parameters as configured in the following example.

    +
    • Spark 2.x and 3.1.x
      SparkSession spark = SparkSession
      +                .builder()
      +                .config("spark.sql.session.state.builder", "org.apache.spark.sql.hive.UQueryHiveACLSessionStateBuilder")
      +                .config("spark.sql.catalog.class", "org.apache.spark.sql.hive.UQueryHiveACLExternalCatalog")
      +                 .config("spark.sql.extensions","org.apache.spark.sql.DliSparkExtension")
      +                .appName("java_spark_demo")
      +                .getOrCreate();
      +
    • Spark 3.3.x
      SparkSession spark = SparkSession
      +           .builder()           
      +           .config("spark.sql.session.state.builder", "org.apache.spark.sql.hive.DliLakeHouseBuilder")           
      +           .config("spark.sql.catalog.class", "org.apache.spark.sql.hive.DliLakeHouseCatalog")           
      +           .appName("java_spark_demo")           
      +           .getOrCreate();   
      +
    +
  3. Create a database.
    The following sample code shows how to create a database and named test_sparkapp.
    spark.sql("create database if not exists test_sparkapp").collect();
    +
    +
  4. Create a DLI table and insert test data.
    spark.sql("drop table if exists test_sparkapp.dli_testtable").collect();
    +spark.sql("create table test_sparkapp.dli_testtable(id INT, name STRING)").collect();
    +spark.sql("insert into test_sparkapp.dli_testtable VALUES (123,'jason')").collect();
    +spark.sql("insert into test_sparkapp.dli_testtable VALUES (456,'merry')").collect();
    +
  5. Create an OBS Table. Replace the OBS path in the following example with the path you set in Step 2: Configure the OBS Bucket File.
    spark.sql("drop table if exists test_sparkapp.dli_testobstable").collect();
    +spark.sql("create table test_sparkapp.dli_testobstable(age INT, name STRING) using csv options (path 'obs://dli-test-obs01/testdata.csv')").collect();
    +
  6. Disable the spark session.
    spark.stop();
    +
+
+

Step 5: Debug, Compile, and Pack the Code into a Jar Package.

  1. Double-click Maven in the tool bar on the right, and double-click clean and compile to compile the code.

    After the compilation is successful, double-click package.

    +

    The generated JAR package is stored in the target directory. In this example, SparkJarMetadata-1.0-SNAPSHOT.jar is stored in D:\DLITest\SparkJarMetadata\target.

    +
+
+

Step 6: Upload the JAR Package to OBS and DLI

  1. Log in to the OBS console and upload the SparkJarMetadata-1.0-SNAPSHOT.jar file to the OBS path.
  2. Upload the file to DLI for package management.
    1. Log in to the DLI management console and choose Data Management > Package Management.
    2. On the Package Management page, click Create in the upper right corner.
    3. In the Create Package dialog, set the following parameters:
      1. Type: Select JAR.
      2. OBS Path: Specify the OBS path for storing the package.
      3. Set Group and Group Name as required for package identification and management.
      +
    4. Click OK.
    +
+
+

Step 7: Create a Spark Jar Job

  1. Log in to the DLI console. In the navigation pane, choose Job Management > Spark Jobs.
  2. On the Spark Jobs page, click Create Job.
  3. On the displayed page, configure the following parameters: +
    + + + + + + + + + + + + + + + + + + + + + + + + + +
    Table 3 Spark Jar job parameters

    Parameter

    +

    Value

    +

    Queue

    +

    Select the DLI queue created for general purpose. For example, select the queue sparktest created in Step 1: Create a Queue for General Purpose.

    +

    Spark Version

    +

    Select a Spark version. Select a supported Spark version from the drop-down list. The latest version is recommended.

    +

    Job Name (--name)

    +

    Name of a custom Spark Jar job. For example, SparkTestMeta.

    +

    Application

    +

    Select the package uploaded to DLI in Step 6: Upload the JAR Package to OBS and DLI. For example, select SparkJarObs-1.0-SNAPSHOT.jar.

    +

    Main Class (--class)

    +

    The format is program package name + class name.

    +

    Spark Arguments (--conf)

    +

    spark.dli.metaAccess.enable=true

    +

    spark.sql.warehouse.dir=obs://dli-test-obs01/warehousepath

    +
    NOTE:

    Set spark.sql.warehouse.dir to the OBS path that is specified in Step 2: Configure the OBS Bucket File.

    +
    +

    Access Metadata

    +

    Select Yes.

    +
    +
    +

    Retain default values for other parameters.

    +
  4. Click Execute to submit the Spark Jar job. On the Job management page, view the running status.

    +
+
+

Step 8: View Job Execution Result

  1. On the Job management page, view the running status. The initial status is Starting.
  2. If the job is successfully executed, the job status is Finished. Perform the following operations to view the created database and table:
    1. On the DLI console, choose SQL Editor in the left navigation pane. The created database test_sparkapp is displayed in the database list.
    2. Double-click the database name to view the created DLI and OBS tables in the database.
    3. Double-click dli_testtable and click Execute to query data in the table.
    4. Comment out the statement for querying the DLI table, double-click the OBS table dli_testobstable, and click Execute to query the OBS table data.
    +
  3. If the job fails, the job status is Failed. Click More in the Operation column and select Driver Logs to view the running log.

    After the fault is rectified, click Edit in the Operation column of the job, modify job parameters, and click Execute to run the job again.

    +
+
+

Follow-up Guide

+
+

Java Example Code

This example uses Java for coding. The complete sample code is as follows:

+
package com.dli.demo;
+
+import org.apache.spark.sql.SparkSession;
+
+public class DliCatalogTest {
+    public static void main(String[] args) {
+
+        SparkSession spark = SparkSession
+                .builder()
+                .config("spark.sql.session.state.builder", "org.apache.spark.sql.hive.UQueryHiveACLSessionStateBuilder")
+                .config("spark.sql.catalog.class", "org.apache.spark.sql.hive.UQueryHiveACLExternalCatalog")
+                .config("spark.sql.extensions","org.apache.spark.sql.DliSparkExtension")
+                .appName("java_spark_demo")
+                .getOrCreate();
+
+        spark.sql("create database if not exists test_sparkapp").collect();
+        spark.sql("drop table if exists test_sparkapp.dli_testtable").collect();
+        spark.sql("create table test_sparkapp.dli_testtable(id INT, name STRING)").collect();
+        spark.sql("insert into test_sparkapp.dli_testtable VALUES (123,'jason')").collect();
+        spark.sql("insert into test_sparkapp.dli_testtable VALUES (456,'merry')").collect();
+
+        spark.sql("drop table if exists test_sparkapp.dli_testobstable").collect();
+        spark.sql("create table test_sparkapp.dli_testobstable(age INT, name STRING) using csv options (path 'obs://dli-test-obs01/testdata.csv')").collect();
+
+
+        spark.stop();
+
+    }
+}
+
+

Scala Example Code

object DliCatalogTest {
+  def main(args:Array[String]): Unit = {
+    val sql = args(0)
+    val runDdl =
+Try(args(1).toBoolean).getOrElse(true)
+    System.out.println(s"sql is $sql
+runDdl is $runDdl")
+    val sparkConf = new SparkConf(true)
+    sparkConf    
+      .set("spark.sql.session.state.builder","org.apache.spark.sql.hive.UQueryHiveACLSessionStateBuilder")
+      .set("spark.sql.catalog.class","org.apache.spark.sql.hive.UQueryHiveACLExternalCatalog") 
+    sparkConf.setAppName("dlicatalogtester")
+
+    val spark = SparkSession.builder
+      .config(sparkConf)
+      .enableHiveSupport()
+      .config("spark.sql.extensions","org.apache.spark.sql.DliSparkExtension")
+      .appName("SparkTest")
+      .getOrCreate()
+
+    System.out.println("catalog is "
++ spark.sessionState.catalog.toString)
+    if (runDdl) {
+      val df = spark.sql(sql).collect()
+    } else {
+      spark.sql(sql).show()
+    }
+
+    spark.close()
+  }
+
+}
+
+

Example Python Code

#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    url = sys.argv[1]
+    creatTbl = "CREATE TABLE test_sparkapp.dli_rds USING JDBC OPTIONS ('url'='jdbc:mysql://%s'," \
+              "'driver'='com.mysql.jdbc.Driver','dbtable'='test.test'," \
+              " 'passwdauth' = 'DatasourceRDSTest_pwd','encryption' = 'true')" % url
+
+    spark = SparkSession \
+        .builder \
+        .enableHiveSupport() \
+.config("spark.sql.session.state.builder","org.apache.spark.sql.hive.UQueryHiveACLSessionStateBuilder") \       
+.config("spark.sql.catalog.class", "org.apache.spark.sql.hive.UQueryHiveACLExternalCatalog") \  
+.config("spark.sql.extensions","org.apache.spark.sql.DliSparkExtension") \
+        .appName("python Spark test catalog") \
+        .getOrCreate()
+
+    spark.sql("CREATE database if not exists test_sparkapp").collect()
+    spark.sql("drop table if exists test_sparkapp.dli_rds").collect()
+    spark.sql(creatTbl).collect()
+    spark.sql("select * from test_sparkapp.dli_rds").show()
+    spark.sql("insert into table test_sparkapp.dli_rds select 12,'aaa'").collect()
+    spark.sql("select * from test_sparkapp.dli_rds").show()
+    spark.sql("insert overwrite table test_sparkapp.dli_rds select 1111,'asasasa'").collect()
+    spark.sql("select * from test_sparkapp.dli_rds").show()
+    spark.sql("drop table test_sparkapp.dli_rds").collect()
+    spark.stop()
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0187.html b/docs/dli/dev/dli_09_0187.html new file mode 100644 index 00000000..848c2ff6 --- /dev/null +++ b/docs/dli/dev/dli_09_0187.html @@ -0,0 +1,124 @@ + + +

Java Example Code

+

Development Description

+
+

Complete Example Code

Connecting to data sources through SQL APIs

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
import org.apache.spark.sql.SparkSession;
+ 
+public class java_rds {
+ 
+    public static void main(String[] args) {
+        SparkSession sparkSession = SparkSession.builder().appName("datasource-rds").getOrCreate();
+ 
+        // Create a data table for DLI-associated RDS
+        sparkSession.sql("CREATE TABLE IF NOT EXISTS dli_to_rds USING JDBC OPTIONS ('url'='jdbc:mysql://192.168.6.150:3306','dbtable'='test.customer','user'='root','password'='**','driver'='com.mysql.jdbc.Driver')");
+ 
+        //*****************************SQL model***********************************
+        //Insert data into the DLI data table
+        sparkSession.sql("insert into dli_to_rds values(3,'Liu',21),(4,'Joey',34)");
+ 
+        //Read data from DLI data table
+        sparkSession.sql("select * from dli_to_rds");
+ 
+        //drop table
+        sparkSession.sql("drop table dli_to_rds");
+ 
+        sparkSession.close();
+    }
+}
+
+ +
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0188.html b/docs/dli/dev/dli_09_0188.html new file mode 100644 index 00000000..f5705a4d --- /dev/null +++ b/docs/dli/dev/dli_09_0188.html @@ -0,0 +1,14 @@ + + +

Troubleshooting

+

Problem 1

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0189.html b/docs/dli/dev/dli_09_0189.html new file mode 100644 index 00000000..f662db92 --- /dev/null +++ b/docs/dli/dev/dli_09_0189.html @@ -0,0 +1,33 @@ + + +

CSS Security Cluster Configuration

+

Preparations

The Elasticsearch 6.5.4 and later versions provided by CSS provides the security settings. Once the function is enabled, CSS provides identity authentication, authorization, and encryption for users. Before connecting DLI to the CSS security cluster, you need to perform certain preparations.
  1. Select CSS Elasticsearch 6.5.4 or a later cluster version, create a CSS security cluster, and download the security cluster certificate (CloudSearchService.cer).
    1. Log in to the CSS management console, click Clusters, and select the cluster for which you want to create a datasource connection.
    2. Click Download Certificate next to Security Mode to download the security certificate.
    +
  2. Use keytool to generate the keystore and truststore files.
    1. Security certificate CloudSearchService.cer of the security cluster is required when you use keytool to generate the keystore and truststore files. You can set other keytool parameters as required.
      1. Open the cmd command window and run the following command to generate a keystore file that contains a private key:
        keytool -genkeypair -alias certificatekey -keyalg RSA -keystore transport-keystore.jks
        +
      2. After the keystore and truststore files are generated using keytool, you can view the transport-keystore.jks file in the folder. Run the following command to verify the keystore file and certificate information:
        keytool -list -v -keystore transport-keystore.jks
        +

        After you enter the correct keystore password, the corresponding information is displayed.

        +
      3. Run the following commands to create the truststore.jks file and verify it:
        keytool -import -alias certificatekey -file CloudSearchService.cer  -keystore truststore.jks
        +keytool -list -v -keystore truststore.jks
        +
      +
    2. Upload the generated keystore and truststore files to an OBS bucket.
    +
+
+
+

CSS Security Cluster Parameter Configuration

For details about the parameters, see Table 1. This part describes the precautions for configuring the connection parameters of the CSS security cluster.

+
.option("es.net.http.auth.user", "admin") .option("es.net.http.auth.pass", "***")
+

The parameters are the identity authentication account and password, which are also the account and password for logging in to Kibana.

+
.option("es.net.ssl", "true")
+ +
.option("es.net.ssl.keystore.location", "obs://Bucket name/path/transport-keystore.jks")
+.option("es.net.ssl.keystore.pass", "***")
+

Set the location of the keystore.jks file and the key for accessing the file. Place the keystore.jks file generated in Preparations in the OBS bucket, and then enter the AK, SK, and location of the keystore.jks file. Enter the key for accessing the file in es.net.ssl.keystore.pass.

+
.option("es.net.ssl.truststore.location", "obs://Bucket name/path/truststore.jks")
+.option("es.net.ssl.truststore.pass", "***")
+

The parameters in the truststore.jks file are basically the same as those in the keystore.jks file. You can refer to the preceding procedure to set parameters.

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0190.html b/docs/dli/dev/dli_09_0190.html new file mode 100644 index 00000000..045d4f12 --- /dev/null +++ b/docs/dli/dev/dli_09_0190.html @@ -0,0 +1,277 @@ + + +

Java Example Code

+

Prerequisites

A datasource connection has been created on the DLI management console.

+
+

CSS Non-Security Cluster

+
+

CSS Security Cluster

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0191.html b/docs/dli/dev/dli_09_0191.html new file mode 100644 index 00000000..f34989ba --- /dev/null +++ b/docs/dli/dev/dli_09_0191.html @@ -0,0 +1,311 @@ + + +

Writing Data to OBS Using Flink Jar

+

Overview

DLI allows you to use a custom JAR package to run Flink jobs and write data to OBS. This section describes how to write processed Kafka data to OBS. You need to modify the parameters in the example Java code based on site requirements.

+
+

Environment Preparations

Development tools such as IntelliJ IDEA and other development tools, JDK, and Maven have been installed and configured.

+
  • For details about how to configure the pom.xml file of the Maven project, see "POM file configurations" in Java Example Code.
  • Ensure that you can access the public network in the local compilation environment.
+
+
+

Constraints

+
+

Java Example Code

+
+

Compiling and Running the Application

After the application is developed, upload the JAR package to DLI by referring to Flink Jar Job Examples and check whether related data exists in the OBS path.

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0193.html b/docs/dli/dev/dli_09_0193.html new file mode 100644 index 00000000..aaeef07a --- /dev/null +++ b/docs/dli/dev/dli_09_0193.html @@ -0,0 +1,113 @@ + + +

Java Example Code

+

Development Description

This example applies only to MRS OpenTSDB.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0195.html b/docs/dli/dev/dli_09_0195.html new file mode 100644 index 00000000..436d6a2f --- /dev/null +++ b/docs/dli/dev/dli_09_0195.html @@ -0,0 +1,15 @@ + + +

Troubleshooting

+

A Spark Job Fails to Be Executed and "No respond" Is Displayed in the Job Log

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0196.html b/docs/dli/dev/dli_09_0196.html new file mode 100644 index 00000000..326eb23d --- /dev/null +++ b/docs/dli/dev/dli_09_0196.html @@ -0,0 +1,22 @@ + + +

MRS Configuration

+

Configuring MRS Host Information in DLI Datasource Connection

  1. Create a datasource connection on the DLI management console.
  2. Add the /etc/hosts information of MRS cluster nodes to the host file of the DLI queue.

    +

    For details, see section "Modifying the Host Information" in the Data Lake Insight User Guide.

    +
+
+

Completing Configurations for Enabling Kerberos Authentication

  1. Create a cluster with Kerberos authentication enabled by referring to section "Creating a Security Cluster and Logging In to MRS Manager" in . Add a user and grant permissions to the user by referring to section "Creating Roles and Users".
  2. Use the user created in 1 for login authentication. For details, see . A human-machine user must change the password upon the first login.
  3. Log in to Manager and choose System. In the navigation pane on the left, choose Permission > User, locate the row where the new user locates, click More, and select Download Authentication Credential. Save the downloaded package and decompress it to obtain the keytab and krb5.conf files.
+
+

Creating an MRS HBase Table

Before creating an MRS HBase table to be associated with the DLI table, ensure that the HBase table exists. The following provides example code to describe how to create an MRS HBase table:

+
  1. Remotely log in to the ECS and use the HBase Shell command to view table information. In this command, hbtest indicates the name of the table to be queried.
    describe 'hbtest'
    +
  2. (Optional) If the HBase table does not exist, run the following command to create one:
    create 'hbtest', 'info', 'detail'
    +

    In this command, hbtest indicates the table name, and other parameters indicate the column family names.

    +
  3. Configure the connection information. TableName corresponds to the name of the HBase table.
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0197.html b/docs/dli/dev/dli_09_0197.html new file mode 100644 index 00000000..aad426c9 --- /dev/null +++ b/docs/dli/dev/dli_09_0197.html @@ -0,0 +1,270 @@ + + +

Java Example Code

+

Development Description

This example applies only to MRS HBase.

+ +
+

Complete Example Code

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0198.html b/docs/dli/dev/dli_09_0198.html new file mode 100644 index 00000000..f329a73c --- /dev/null +++ b/docs/dli/dev/dli_09_0198.html @@ -0,0 +1,18 @@ + + +

Troubleshooting

+

Problem 1

+
+

Problem 2

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0199.html b/docs/dli/dev/dli_09_0199.html new file mode 100644 index 00000000..2af44992 --- /dev/null +++ b/docs/dli/dev/dli_09_0199.html @@ -0,0 +1,85 @@ + + +

Java Example Code

+

Scenario

This section provides Java example code that demonstrates how to use a Spark job to access data from the GaussDB(DWS) data source.

+

A datasource connection has been created and bound to a queue on the DLI management console.

+

Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.

+
+
+

Preparations

  1. Import dependencies.
    • Maven dependency involved
      1
      +2
      +3
      +4
      +5
      <dependency>
      +  <groupId>org.apache.spark</groupId>
      +  <artifactId>spark-sql_2.11</artifactId>
      +  <version>2.3.2</version>
      +</dependency>
      +
      + +
      +
    • Import dependency packages.
      1
      import org.apache.spark.sql.SparkSession;
      +
      + +
      +
    +
  2. Create a session.
    1
    SparkSession sparkSession = SparkSession.builder().appName("datasource-dws").getOrCreate();
    +
    + +
    +
+
+

Accessing a Data Source Through a SQL API

  1. Create a table to connect to a GaussDB(DWS) data source and set connection parameters.
    1
    sparkSession.sql("CREATE TABLE IF NOT EXISTS dli_to_dws USING JDBC OPTIONS ('url'='jdbc:postgresql://10.0.0.233:8000/postgres','dbtable'='test','user'='dbadmin','password'='**')");
    +
    + +
    +
  2. Insert data.
    1
    sparkSession.sql("insert into dli_to_dws values(3,'L'),(4,'X')");
    +
    + +
    +
  3. Query data.
    1
    sparkSession.sql("select * from dli_to_dws").show();
    +
    + +
    +
+
+

Submitting a Spark Job

  1. Generate a JAR package based on the code file and upload the package to DLI.

    +

    +
  2. In the Spark job editor, select the corresponding dependency module and execute the Spark job.

    +
    • If the Spark version is 2.3.2 (will be offline soon) or 2.4.5, specify the Module to sys.datasource.dws when you submit a job.
    • If the Spark version is 3.1.1, you do not need to select a module. Configure Spark parameters (--conf).

      spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/dws/*

      +

      spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/dws/*

      +
    +
    +
    +
+
+

Complete Example Code

Accessing GaussDB(DWS) tables through SQL APIs

+
import org.apache.spark.sql.SparkSession;
+ 
+public class java_dws {
+    public static void main(String[] args) {
+        SparkSession sparkSession = SparkSession.builder().appName("datasource-dws").getOrCreate();
+ 
+        sparkSession.sql("CREATE TABLE IF NOT EXISTS dli_to_dws USING JDBC OPTIONS ('url'='jdbc:postgresql://10.0.0.233:8000/postgres','dbtable'='test','user'='dbadmin','password'='**')");
+ 
+        //*****************************SQL model***********************************
+        //Insert data into the DLI data table
+        sparkSession.sql("insert into dli_to_dws values(3,'Liu'),(4,'Xie')");
+ 
+        //Read data from DLI data table
+        sparkSession.sql("select * from dli_to_dws").show();
+ 
+        //drop table
+        sparkSession.sql("drop table dli_to_dws");
+ 
+        sparkSession.close();
+    }
+}
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0202.html b/docs/dli/dev/dli_09_0202.html new file mode 100644 index 00000000..10fe980d --- /dev/null +++ b/docs/dli/dev/dli_09_0202.html @@ -0,0 +1,15 @@ + + +

Flink Jar Jobs

+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0203.html b/docs/dli/dev/dli_09_0203.html new file mode 100644 index 00000000..30e7a2ed --- /dev/null +++ b/docs/dli/dev/dli_09_0203.html @@ -0,0 +1,21 @@ + + + +

Spark Jar Jobs

+ +

+
+ +
+ +
+ diff --git a/docs/dli/dev/dli_09_0204.html b/docs/dli/dev/dli_09_0204.html new file mode 100644 index 00000000..6f2440a3 --- /dev/null +++ b/docs/dli/dev/dli_09_0204.html @@ -0,0 +1,232 @@ + + +

Calling UDTFs in Spark SQL Jobs

+

Scenario

You can use Hive User-Defined Table-Generating Functions (UDTF) to customize table-valued functions. Hive UDTFs are used for the one-in-multiple-out data operations. UDTF reads a row of data and output multiple values.

+
+

Constraints

+
+

Environment Preparations

Before you start, set up the development environment.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Development environment

Item

+

Description

+

OS

+

Windows 7 or later

+

JDK

+

JDK 1.8.

+

IntelliJ IDEA

+

This tool is used for application development. The version of the tool must be 2019.1 or other compatible versions.

+

Maven

+

Basic configurations of the development environment. Maven is used for project management throughout the lifecycle of software development.

+
+
+
+

Development Process

The process of developing a UDTF is as follows:
Figure 1 Development process
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Process description

No.

+

Phase

+

Software Portal

+

Description

+

1

+

Create a Maven project and configure the POM file.

+

IntelliJ IDEA

+

+

Write UDTF code by referring the steps in Procedure.

+

+

2

+

Write UDTF code.

+

3

+

Debug, compile, and pack the code into a Jar package.

+

4

+

Upload the Jar package to OBS.

+

OBS console

+

Upload the UDTF Jar file to an OBS directory.

+

5

+

Create the UDTF on DLI.

+

DLI console

+

Create a UDTF on the SQL job management page of the DLI console.

+

6

+

Verify and use the UDTF on DLI.

+

DLI console

+

Use the UDTF in your DLI job.

+
+
+
+
+

Procedure

  1. Create a Maven project and configure the POM file. This step uses IntelliJ IDEA 2020.2 as an example.
    1. Start IntelliJ IDEA and choose File > New > Project.
      Figure 2 Creating a project
      +
    2. Choose Maven, set Project SDK to 1.8, and click Next.
      Figure 3 Choosing Maven
      +
    3. Set the project name, configure the storage path, and click Finish.
      Figure 4 Creating a project
      +
    4. Add the following content to the pom.xml file.
      <dependencies>
      +        <dependency>
      +            <groupId>org.apache.hive</groupId>
      +            <artifactId>hive-exec</artifactId>
      +            <version>1.2.1</version>
      +        </dependency>
      +</dependencies>
      +
      Figure 5 Adding configurations to the POM file
      +
      +
    5. Choose src > main and right-click the java folder. Choose New > Package to create a package and a class file.
      Figure 6 Creating a package and a class file
      +

      Set the package name as you need. Then, press Enter.

      +

      Create a Java Class file in the package path. In this example, the Java Class file is UDTFSplit.

      +
    +
  2. Write UDTF code. For sample code, see Sample Code.

    The UDTF class must inherit org.apache.hadoop.hive.ql.udf.generic.GenericUDTF to implement the initialize, process, and close methods.

    +
    1. Call the initialize method in the UDTF. This method returns the information about the returned data rows of the UDTF, such as the number and type.
    2. Call the process method to process data. Each time forward() is called in the process method, a row is generated.
      If multiple columns are generated, you can put the values in an array and pass the array to the forward() function.
      public void process(Object[] args) throws HiveException {
      +        // TODO Auto-generated method stub
      +        if(args.length == 0){
      +            return;
      +        }
      +        String input = args[0].toString();
      +        if(StringUtils.isEmpty(input)){
      +            return;
      +        }
      +        String[] test = input.split(";");
      +        for (int i = 0; i < test.length; i++) {
      +            try {
      +                String[] result = test[i].split(":");
      +                forward(result);
      +            } catch (Exception e) {
      +                continue;
      +            }
      +        }
      +
      +    }
      +
      +
    3. Call the close method to clear methods that need to be closed.
    +
  3. Use IntelliJ IDEA to compile the code and pack it into the JAR package.
    1. Click Maven in the tool bar on the right, and click clean and compile to compile the code.

      After the compilation is successful, click package.

      +

      The generated JAR package is stored in the target directory. In this example, MyUDTF-1.0-SNAPSHOT.jar is stored in D:\MyUDTF\target.

      +
    +
  4. Log in to the OBS console and upload the file to the OBS path.

    The region of the OBS bucket to which the Jar package is uploaded must be the same as the region of the DLI queue. Cross-region operations are not allowed.

    +
    +
  5. (Optional) Upload the file to DLI for package management.
    1. Log in to the DLI management console and choose Data Management > Package Management.
    2. On the Package Management page, click Create in the upper right corner.
    3. In the Create Package dialog, set the following parameters:
      1. Type: Select JAR.
      2. OBS Path: Specify the OBS path for storing the package.
      3. Set Group and Group Name as required for package identification and management.
      +
    4. Click OK.
    +
  6. Create the UDTF on DLI.
    1. Log in to the DLI console, choose SQL Editor. Set Engine to spark, and select the created SQL queue and database.
    2. In the SQL editing area, enter the path of the JAR file to be uploaded to create a UDTF and click Execute.
      CREATE FUNCTION mytestsplit AS 'com.demo.UDTFSplit' using jar 'obs://dli-test-obs01/MyUDTF-1.0-SNAPSHOT.jar';
      +
    +
  7. Restart the original SQL queue for the added function to take effect.
    1. Log in to the DLI management console and choose Resources > Queue Management from the navigation pane. In the Operation column of the SQL queue job, click Restart.
    2. In the Restart dialog box, click OK.
    +
  8. Verify and use the UDTF on DLI.

    Use the UDTF created in 6 in the SELECT statement as follows:

    +
    select mytestsplit('abc:123\;efd:567\;utf:890');
    +

    +
  9. (Optional) Delete the UDTF.

    If this function is no longer used, run the following statement to delete the function:

    +
    Drop FUNCTION mytestsplit;
    +
+
+

Sample Code

The complete UDTFSplit.java code is as follows:

+
import java.util.ArrayList;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+public class UDTFSplit extends GenericUDTF {
+
+    @Override
+    public void close() throws HiveException {
+        // TODO Auto-generated method stub
+
+    }
+
+    @Override
+    public void process(Object[] args) throws HiveException {
+        // TODO Auto-generated method stub
+        if(args.length == 0){
+            return;
+        }
+        String input = args[0].toString();
+        if(StringUtils.isEmpty(input)){
+            return;
+        }
+        String[] test = input.split(";");
+        for (int i = 0; i < test.length; i++) {
+            try {
+                String[] result = test[i].split(":");
+                forward(result);
+            } catch (Exception e) {
+                continue;
+            }
+        }
+
+    }
+
+    @Override
+    public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
+        if (args.length != 1) {
+            throw new UDFArgumentLengthException("ExplodeMap takes only one argument");
+        }
+        if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentException("ExplodeMap takes string as a parameter");
+        }
+
+        ArrayList<String> fieldNames = new ArrayList<String>();
+        ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
+        fieldNames.add("col1");
+        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+        fieldNames.add("col2");
+        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
+
+        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+    }
+
+}
+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0205.html b/docs/dli/dev/dli_09_0205.html new file mode 100644 index 00000000..125cffdc --- /dev/null +++ b/docs/dli/dev/dli_09_0205.html @@ -0,0 +1,292 @@ + + +

Using Spark Jar Jobs to Read and Query OBS Data

+

Scenario

DLI is fully compatible with open-source Apache Spark and allows you to import, query, analyze, and process job data by programming. This section describes how to write a Spark program to read and query OBS data, compile and package the code, and submit it to a Spark Jar job.

+
+

Environment Preparations

Before you start, set up the development environment.

+ +
+ + + + + + + + + + + + + + + + +
Table 1 Spark Jar job development environment

Item

+

Description

+

OS

+

Windows 7 or later

+

JDK

+

JDK 1.8.

+

IntelliJ IDEA

+

This tool is used for application development. The version of the tool must be 2019.1 or other compatible versions.

+

Maven

+

Basic configurations of the development environment. Maven is used for project management throughout the lifecycle of software development.

+
+
+
+

Development Process

The following figure shows the process of developing a Spark Jar job.
Figure 1 Development process
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table 2 Process description

No.

+

Phase

+

Software Portal

+

Description

+

1

+

Create a queue for general use.

+

DLI console

+

The DLI queue is created for running your job.

+

2

+

Upload data to an OBS bucket.

+

OBS console

+

The test data needs to be uploaded to your OBS bucket.

+

3

+

Create a Maven project and configure the POM file.

+

IntelliJ IDEA

+

+

Write your code by referring to the sample code for reading data from OBS.

+

+

4

+

Write code.

+

5

+

Debug, compile, and pack the code into a Jar package.

+

6

+

Upload the Jar package to OBS and DLI.

+

OBS console

+

You can upload the generated Spark JAR package to an OBS directory and DLI program package.

+

7

+

Create a Spark Jar Job.

+

DLI console

+

The Spark Jar job is created and submitted on the DLI console.

+

8

+

Check execution result of the job.

+

DLI console

+

You can view the job running status and run logs.

+
+
+
+
+

Step 1: Create a Queue for General Purpose

If you submit a Spark job for the first time, you need to create a queue first. For example, create a queue, name it sparktest, and set Queue Usage to For general purpose.
  1. In the navigation pane of the DLI management console, choose Queue Management.
  2. In the upper right corner of the Queue Management page, click Create Queue to create a queue.
  3. Create a queue, name it sparktest, and set the queue usage to for general purpose. For details about how to create a queue, see Creating a Queue.
  4. Click Create Now to create a queue.
+
+
+

Step 2: Upload Data to OBS

  1. Create the people.json file containing the following content:
    {"name":"Michael"}
    +{"name":"Andy", "age":30}
    +{"name":"Justin", "age":19}
    +
  2. Log in to the OBS Console. In the Bucket page, click the name of the created OBS bucket. In this example, the bucket name is dli-test-obs01. The overview page is displayed.
  3. In the navigation pane on the left, choose Objects. Click Upload Object to upload the file to the root directory of the OBS bucket.
  4. In the root directory of the OBS bucket, click Create Folder to create a folder and name it result.
  5. Click the result folder, click Create Folder on the displayed page to create a folder and name it parquet.
+
+

Step 3: Create a Maven Project and Configure the pom Dependency

This step uses IntelliJ IDEA 2020.2 as an example.
  1. Start IntelliJ IDEA and choose File > New > Project.
    Figure 2 Creating a project
    +
  2. Choose Maven, set Project SDK to 1.8, and click Next.
    Figure 3 Creating a project
    +
  3. Set the project name, configure the storage path, and click Finish.
    Figure 4 Creating a project
    +

    In this example, the Maven project name is SparkJarObs, and the project storage path is D:\DLITest\SparkJarObs.

    +
  4. Add the following content to the pom.xml file.
    <dependencies>
    +        <dependency>
    +            <groupId>org.apache.spark</groupId>
    +            <artifactId>spark-sql_2.11</artifactId>
    +            <version>2.3.2</version>
    +        </dependency>
    +</dependencies>
    +
    Figure 5 Modifying the pom.xml file
    +
    +
  5. Choose src > main and right-click the java folder. Choose New > Package to create a package and a class file.
    Figure 6 Creating a package
    +

    Set the package name as you need. Then, press Enter.

    +

    Create a Java Class file in the package path. In this example, the Java Class file is SparkDemoObs.

    +
+
+
+

Step 4: Write Code

Code the SparkDemoObs program to read the people.json file from the OBS bucket, create the temporary table people, and query data.

+

For the sample code, see Sample Code.

+
  1. Import dependencies.
    import org.apache.spark.sql.Dataset;
    +import org.apache.spark.sql.Row;
    +import org.apache.spark.sql.SaveMode;
    +import org.apache.spark.sql.SparkSession;
    +
    +import static org.apache.spark.sql.functions.col;
    +
  2. Create Spark session spark using the AK and SK of the current account.
    SparkSession spark = SparkSession
    +                .builder()
    +                .config("spark.hadoop.fs.obs.access.key", "xxx")
    +                .config("spark.hadoop.fs.obs.secret.key", "yyy")
    +                .appName("java_spark_demo")
    +                .getOrCreate();
    +
    • Replace xxx of "spark.hadoop.fs.obs.access.key" with the AK of the account.
    • Replace yyy of "spark.hadoop.fs.obs.secret.key" with the SK of the account.
    +
  3. Read the people.json file from the OBS bucket.
    dli-test-obs01 is the name of the sample OBS bucket. Replace it with the actual OBS bucket name.
    Dataset<Row> df = spark.read().json("obs://dli-test-obs01/people.json");
    +df.printSchema();
    +
    +
  4. Create temporary table people to read data.
    df.createOrReplaceTempView("people");
    +
  5. Query data in the people table.
    Dataset<Row> sqlDF = spark.sql("SELECT * FROM people");
    +sqlDF.show();
    +
  6. Export people table data in Parquet format to the result/parquet directory of the OBS bucket.
    sqlDF.write().mode(SaveMode.Overwrite).parquet("obs://dli-test-obs01/result/parquet");
    +spark.read().parquet("obs://dli-test-obs01/result/parquet").show();
    +
  7. Disable the spark session.
    spark.stop();
    +
+
+

Step 5: Debug, compile, and pack the code into a JAR package.

  1. Double-click Maven in the tool bar on the right, and double-click clean and compile to compile the code.

    After the compilation is successful, double-click package.

    +

    The generated JAR package is stored in the target directory. In this example, SparkJarObs-1.0-SNAPSHOT.jar is stored in D:\DLITest\SparkJarObs\target.

    +
+
+

Step 6: Upload the JAR Package to OBS and DLI

+
+

Step 7: Create a Spark Jar Job

  1. Log in to the DLI console. In the navigation pane, choose Job Management > Spark Jobs.
  2. On the Spark Jobs page, click Create Job.
  3. On the displayed page, configure the following parameters:
    • Queue: Select the created queue. For example, select the queue sparktest created in Step 1: Create a Queue for General Purpose.
    • Select a supported Spark version from the drop-down list. The latest version is recommended.
    • Job Name (--name): Name of the Spark Jar job. For example, SparkTestObs.
    • Application: Select the package uploaded in Step 6: Upload the JAR Package to OBS and DLI. For example, select SparkJarObs-1.0-SNAPSHOT.jar.
    • Main Class (--class): The format is program package name + class name.
    +

    You do not need to set other parameters.

    +
  4. Click Execute to submit the Spark Jar job. On the Job management page, view the running status.
+
+

Step 8: View Job Execution Result

  1. On the Job management page, view the running status. The initial status is Starting.
  2. If the job is successfully executed, the job status is Finished. Click More in the Operation column and select Driver Logs to view the running log.
    Figure 7 Driver logs
    +
  3. If the job is successfully executed, go to the result/parquet directory in the OBS bucket to view the generated parquet file.
  4. If the job fails to be executed, choose More > Driver Logs in the Operation column to view the detailed error information.
    For example, the following figure shows that when you create the Spark Jar job, you did not add the package path to the main class name.
    Figure 8 Error information
    +
    +

    In the Operation column, click Edit, change the value of Main Class to com.SparkDemoObs, and click Execute to run the job again.

    +
+
+

Sample Code

Hard-coded or plaintext access.key and secret.key pose significant security risks. To ensure security, encrypt your AK and SK, store them in configuration files or environment variables, and decrypt them when needed.

+
+
package com.dli.demo;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+
+import static org.apache.spark.sql.functions.col;
+
+public class SparkDemoObs {
+    public static void main(String[] args) {
+        SparkSession spark = SparkSession
+                .builder()
+                .config("spark.hadoop.fs.obs.access.key", "xxx")
+                .config("spark.hadoop.fs.obs.secret.key", "yyy")
+                .appName("java_spark_demo")
+                .getOrCreate();
+        // can also be used --conf to set the ak sk when submit the app
+
+        // test json data:
+        // {"name":"Michael"}
+        // {"name":"Andy", "age":30}
+        // {"name":"Justin", "age":19}
+        Dataset<Row> df = spark.read().json("obs://dli-test-obs01/people.json");
+        df.printSchema();
+        // root
+        // |-- age: long (nullable = true)
+        // |-- name: string (nullable = true)
+
+        // Displays the content of the DataFrame to stdout
+        df.show();
+        // +----+-------+
+        // | age|   name|
+        // +----+-------+
+        // |null|Michael|
+        // |  30|   Andy|
+        // |  19| Justin|
+        // +----+-------+
+
+        // Select only the "name" column
+        df.select("name").show();
+        // +-------+
+        // |   name|
+        // +-------+
+        // |Michael|
+        // |   Andy|
+        // | Justin|
+        // +-------+
+
+        // Select people older than 21
+        df.filter(col("age").gt(21)).show();
+        // +---+----+
+        // |age|name|
+        // +---+----+
+        // | 30|Andy|
+        // +---+----+
+
+        // Count people by age
+        df.groupBy("age").count().show();
+        // +----+-----+
+        // | age|count|
+        // +----+-----+
+        // |  19|    1|
+        // |null|    1|
+        // |  30|    1|
+        // +----+-----+
+
+        // Register the DataFrame as a SQL temporary view
+        df.createOrReplaceTempView("people");
+
+        Dataset<Row> sqlDF = spark.sql("SELECT * FROM people");
+        sqlDF.show();
+        // +----+-------+
+        // | age|   name|
+        // +----+-------+
+        // |null|Michael|
+        // |  30|   Andy|
+        // |  19| Justin|
+        // +----+-------+
+
+        sqlDF.write().mode(SaveMode.Overwrite).parquet("obs://dli-test-obs01/result/parquet");
+        spark.read().parquet("obs://dli-test-obs01/result/parquet").show();
+
+        spark.stop();
+    }
+}
+

+
+
+
+ +
+ diff --git a/docs/dli/dev/dli_09_0207.html b/docs/dli/dev/dli_09_0207.html new file mode 100644 index 00000000..a6f01869 --- /dev/null +++ b/docs/dli/dev/dli_09_0207.html @@ -0,0 +1,24 @@ + + +

Configuring High-Reliability Flink Jobs (Automatic Restart upon Exceptions)

+

Scenario

If you need to configure high reliability for a Flink application, you can set the parameters when creating your Flink jobs.

+
+

Procedure

  1. Create an SMN topic and add an email address or mobile number to subscribe to the topic. You will receive a subscription notification by an email or message. Click the confirmation link to complete the subscription.
  2. Log in to the DLI console, create a Flink SQL job, write SQL statements for the job, and configure running parameters. In this example, key parameters are described. Set other parameters based on your requirements. For details about how to create a Flink SQL job, see .

    The reliability configuration of a Flink Jar job is the same as that of a SQL job, which will not be described in this section.

    +
    +
    1. Set CUs, Job Manager CUs, and Max Concurrent Jobs based on the following formulas:

      Total number of CUs = Number of manager CUs + (Total number of concurrent operators / Number of slots of a TaskManager) x Number of TaskManager CUs

      +

      For example, with a total of 9 CUs (1 manager CU) and a maximum of 16 concurrent jobs, the number of compute-specific CUs is 8.

      +

      If you do not configure TaskManager specifications, a TaskManager occupies 1 CU by default and has no slot. To ensure a high reliability, set the number of slots of the TaskManager to 2, according to the preceding formula.

      +

      Set the maximum number of concurrent jobs be twice the number of CUs.

      +
    2. Select Save Job Log and select an OBS bucket. If you are not authorized to access the bucket, click Authorize. This allows job logs be saved to your OBS bucket. If a job fails, the logs can be used for fault locating.
    3. Select Alarm Generation upon Job Exception and select the SMN topic created in 1. This allows DLI to send notifications to your email box or phone when a job exception occurs, so you can be notified of any exceptions in time.
    4. Select Enable Checkpointing and set the checkpoint interval and mode as needed. This function ensures that a failed Flink task can be restored from the latest checkpoint.
      • Checkpoint interval indicates the interval between two triggers. Checkpointing hurts real-time computing performance. To minimize the performance loss, you need to allow for the recovery duration when configuring the interval. It is recommended that the checkpoint interval be greater than the checkpointing duration. The recommended value is 5 minutes.
      • The Exactly once mode ensures that each piece of data is consumed only once, and the At least once mode ensures that each piece of data is consumed at least once. Select a mode as you need.
      +
      +
    5. Select Auto Restart upon Exception and Restore Job from Checkpoint, and set the number of retry attempts as needed.
    6. Configure Dirty Data Policy. You can select Ignore, Trigger a job exception, or Save based on your service requirements.
    7. Select a queue, and then submit and run the job.
    +
  3. Log in to the Cloud Eye console. In the navigation pane on the left, choose Cloud Service Monitoring > Data Lake Insight. Locate the target Flink job and click Create Alarm Rule.

    DLI provides various monitoring metrics for Flink jobs. You can define alarm rules as required using different monitoring metrics for fine-grained job monitoring.

    +
+
+
+
+ +
+ diff --git a/docs/dli/dev/en-us_image_0000001102485176.png b/docs/dli/dev/en-us_image_0000001102485176.png new file mode 100644 index 00000000..cc45c153 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001102485176.png differ diff --git a/docs/dli/dev/en-us_image_0000001129442286.png b/docs/dli/dev/en-us_image_0000001129442286.png new file mode 100644 index 00000000..ae402fa7 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001129442286.png differ diff --git a/docs/dli/dev/en-us_image_0000001200075414.png b/docs/dli/dev/en-us_image_0000001200075414.png new file mode 100644 index 00000000..c0b04bc1 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001200075414.png differ diff --git a/docs/dli/dev/en-us_image_0000001200327862.png b/docs/dli/dev/en-us_image_0000001200327862.png new file mode 100644 index 00000000..3a7c3e99 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001200327862.png differ diff --git a/docs/dli/dev/en-us_image_0000001200329970.png b/docs/dli/dev/en-us_image_0000001200329970.png new file mode 100644 index 00000000..3d5736d4 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001200329970.png differ diff --git a/docs/dli/dev/en-us_image_0000001208012082.png b/docs/dli/dev/en-us_image_0000001208012082.png new file mode 100644 index 00000000..93998812 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001208012082.png differ diff --git a/docs/dli/dev/en-us_image_0000001208518262.png b/docs/dli/dev/en-us_image_0000001208518262.png new file mode 100644 index 00000000..4279c271 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001208518262.png differ diff --git a/docs/dli/dev/en-us_image_0000001245010109.png b/docs/dli/dev/en-us_image_0000001245010109.png new file mode 100644 index 00000000..7df0c4fe Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245010109.png differ diff --git a/docs/dli/dev/en-us_image_0000001245011273.png b/docs/dli/dev/en-us_image_0000001245011273.png new file mode 100644 index 00000000..7bd0d251 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245011273.png differ diff --git a/docs/dli/dev/en-us_image_0000001245210469.png b/docs/dli/dev/en-us_image_0000001245210469.png new file mode 100644 index 00000000..664ecb88 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245210469.png differ diff --git a/docs/dli/dev/en-us_image_0000001245448995.png b/docs/dli/dev/en-us_image_0000001245448995.png new file mode 100644 index 00000000..4279c271 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245448995.png differ diff --git a/docs/dli/dev/en-us_image_0000001245542509.png b/docs/dli/dev/en-us_image_0000001245542509.png new file mode 100644 index 00000000..4279c271 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245542509.png differ diff --git a/docs/dli/dev/en-us_image_0000001245542693.png b/docs/dli/dev/en-us_image_0000001245542693.png new file mode 100644 index 00000000..ce1fde62 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245542693.png differ diff --git a/docs/dli/dev/en-us_image_0000001245649477.png b/docs/dli/dev/en-us_image_0000001245649477.png new file mode 100644 index 00000000..a603508f Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245649477.png differ diff --git a/docs/dli/dev/en-us_image_0000001245651049.png b/docs/dli/dev/en-us_image_0000001245651049.png new file mode 100644 index 00000000..08b59201 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245651049.png differ diff --git a/docs/dli/dev/en-us_image_0000001245660555.png b/docs/dli/dev/en-us_image_0000001245660555.png new file mode 100644 index 00000000..7df0c4fe Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001245660555.png differ diff --git a/docs/dli/dev/en-us_image_0000001251907299.png b/docs/dli/dev/en-us_image_0000001251907299.png new file mode 100644 index 00000000..ca4fa338 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001251907299.png differ diff --git a/docs/dli/dev/en-us_image_0000001251908699.png b/docs/dli/dev/en-us_image_0000001251908699.png new file mode 100644 index 00000000..d937e683 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001251908699.png differ diff --git a/docs/dli/dev/en-us_image_0000001252053711.png b/docs/dli/dev/en-us_image_0000001252053711.png new file mode 100644 index 00000000..21b794f8 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001252053711.png differ diff --git a/docs/dli/dev/en-us_image_0000001252187705.png b/docs/dli/dev/en-us_image_0000001252187705.png new file mode 100644 index 00000000..4279c271 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001252187705.png differ diff --git a/docs/dli/dev/en-us_image_0000001252854995.png b/docs/dli/dev/en-us_image_0000001252854995.png new file mode 100644 index 00000000..ca5a4f88 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001252854995.png differ diff --git a/docs/dli/dev/en-us_image_0000001269022192.png b/docs/dli/dev/en-us_image_0000001269022192.png new file mode 100644 index 00000000..63949f06 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001269022192.png differ diff --git a/docs/dli/dev/en-us_image_0000001318102237.png b/docs/dli/dev/en-us_image_0000001318102237.png new file mode 100644 index 00000000..6f198a9d Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001318102237.png differ diff --git a/docs/dli/dev/en-us_image_0000001318262121.png b/docs/dli/dev/en-us_image_0000001318262121.png new file mode 100644 index 00000000..8937fc55 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001318262121.png differ diff --git a/docs/dli/dev/en-us_image_0000001318422061.png b/docs/dli/dev/en-us_image_0000001318422061.png new file mode 100644 index 00000000..20211602 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001318422061.png differ diff --git a/docs/dli/dev/en-us_image_0000001318542105.png b/docs/dli/dev/en-us_image_0000001318542105.png new file mode 100644 index 00000000..c47a3176 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001318542105.png differ diff --git a/docs/dli/dev/en-us_image_0000001487114864.png b/docs/dli/dev/en-us_image_0000001487114864.png new file mode 100644 index 00000000..20dfb6a4 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001487114864.png differ diff --git a/docs/dli/dev/en-us_image_0000001487274748.png b/docs/dli/dev/en-us_image_0000001487274748.png new file mode 100644 index 00000000..f261df2c Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001487274748.png differ diff --git a/docs/dli/dev/en-us_image_0000001487434660.png b/docs/dli/dev/en-us_image_0000001487434660.png new file mode 100644 index 00000000..d6e4e925 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001487434660.png differ diff --git a/docs/dli/dev/en-us_image_0000001487594580.png b/docs/dli/dev/en-us_image_0000001487594580.png new file mode 100644 index 00000000..4a2a6978 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001487594580.png differ diff --git a/docs/dli/dev/en-us_image_0000001538354737.png b/docs/dli/dev/en-us_image_0000001538354737.png new file mode 100644 index 00000000..5b732006 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001538354737.png differ diff --git a/docs/dli/dev/en-us_image_0000001538394645.png b/docs/dli/dev/en-us_image_0000001538394645.png new file mode 100644 index 00000000..504159c4 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001538394645.png differ diff --git a/docs/dli/dev/en-us_image_0000001538394649.png b/docs/dli/dev/en-us_image_0000001538394649.png new file mode 100644 index 00000000..37977cda Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001538394649.png differ diff --git a/docs/dli/dev/en-us_image_0000001538514573.png b/docs/dli/dev/en-us_image_0000001538514573.png new file mode 100644 index 00000000..b358232e Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001538514573.png differ diff --git a/docs/dli/dev/en-us_image_0000001618446021.png b/docs/dli/dev/en-us_image_0000001618446021.png new file mode 100644 index 00000000..e665a110 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001618446021.png differ diff --git a/docs/dli/dev/en-us_image_0000001637398494.png b/docs/dli/dev/en-us_image_0000001637398494.png new file mode 100644 index 00000000..497b8faf Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001637398494.png differ diff --git a/docs/dli/dev/en-us_image_0000001637399398.png b/docs/dli/dev/en-us_image_0000001637399398.png new file mode 100644 index 00000000..0d41d993 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001637399398.png differ diff --git a/docs/dli/dev/en-us_image_0000001637557382.png b/docs/dli/dev/en-us_image_0000001637557382.png new file mode 100644 index 00000000..7df0c4fe Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001637557382.png differ diff --git a/docs/dli/dev/en-us_image_0000001685690365.png b/docs/dli/dev/en-us_image_0000001685690365.png new file mode 100644 index 00000000..9e88776a Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001685690365.png differ diff --git a/docs/dli/dev/en-us_image_0000001685849073.png b/docs/dli/dev/en-us_image_0000001685849073.png new file mode 100644 index 00000000..7df0c4fe Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001685849073.png differ diff --git a/docs/dli/dev/en-us_image_0000001685850245.png b/docs/dli/dev/en-us_image_0000001685850245.png new file mode 100644 index 00000000..6c8397c5 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001685850245.png differ diff --git a/docs/dli/dev/en-us_image_0000001686339805.png b/docs/dli/dev/en-us_image_0000001686339805.png new file mode 100644 index 00000000..2bf77e43 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001686339805.png differ diff --git a/docs/dli/dev/en-us_image_0000001709848312.png b/docs/dli/dev/en-us_image_0000001709848312.png new file mode 100644 index 00000000..38453275 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001709848312.png differ diff --git a/docs/dli/dev/en-us_image_0000001709848328.png b/docs/dli/dev/en-us_image_0000001709848328.png new file mode 100644 index 00000000..cd784455 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001709848328.png differ diff --git a/docs/dli/dev/en-us_image_0000001709994304.png b/docs/dli/dev/en-us_image_0000001709994304.png new file mode 100644 index 00000000..3fffa261 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001709994304.png differ diff --git a/docs/dli/dev/en-us_image_0000001710007784.png b/docs/dli/dev/en-us_image_0000001710007784.png new file mode 100644 index 00000000..e7d54aac Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001710007784.png differ diff --git a/docs/dli/dev/en-us_image_0000001710007804.png b/docs/dli/dev/en-us_image_0000001710007804.png new file mode 100644 index 00000000..fee3b9a2 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001710007804.png differ diff --git a/docs/dli/dev/en-us_image_0000001757793769.png b/docs/dli/dev/en-us_image_0000001757793769.png new file mode 100644 index 00000000..d0c17f7e Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757793769.png differ diff --git a/docs/dli/dev/en-us_image_0000001757807269.png b/docs/dli/dev/en-us_image_0000001757807269.png new file mode 100644 index 00000000..cd8c139f Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757807269.png differ diff --git a/docs/dli/dev/en-us_image_0000001757807293.png b/docs/dli/dev/en-us_image_0000001757807293.png new file mode 100644 index 00000000..ec584b07 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757807293.png differ diff --git a/docs/dli/dev/en-us_image_0000001757887441.png b/docs/dli/dev/en-us_image_0000001757887441.png new file mode 100644 index 00000000..ccdfb17c Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757887441.png differ diff --git a/docs/dli/dev/en-us_image_0000001757887457.png b/docs/dli/dev/en-us_image_0000001757887457.png new file mode 100644 index 00000000..69c61500 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757887457.png differ diff --git a/docs/dli/dev/en-us_image_0000001757887477.png b/docs/dli/dev/en-us_image_0000001757887477.png new file mode 100644 index 00000000..7b92f2f0 Binary files /dev/null and b/docs/dli/dev/en-us_image_0000001757887477.png differ diff --git a/docs/dli/dev/en-us_image_0223996997.png b/docs/dli/dev/en-us_image_0223996997.png new file mode 100644 index 00000000..af1a6ca2 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223996997.png differ diff --git a/docs/dli/dev/en-us_image_0223996999.png b/docs/dli/dev/en-us_image_0223996999.png new file mode 100644 index 00000000..6cb8363f Binary files /dev/null and b/docs/dli/dev/en-us_image_0223996999.png differ diff --git a/docs/dli/dev/en-us_image_0223997003.png b/docs/dli/dev/en-us_image_0223997003.png new file mode 100644 index 00000000..b7619ad9 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997003.png differ diff --git a/docs/dli/dev/en-us_image_0223997004.png b/docs/dli/dev/en-us_image_0223997004.png new file mode 100644 index 00000000..7f599a4f Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997004.png differ diff --git a/docs/dli/dev/en-us_image_0223997302.png b/docs/dli/dev/en-us_image_0223997302.png new file mode 100644 index 00000000..0a51ef00 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997302.png differ diff --git a/docs/dli/dev/en-us_image_0223997303.png b/docs/dli/dev/en-us_image_0223997303.png new file mode 100644 index 00000000..39c48d54 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997303.png differ diff --git a/docs/dli/dev/en-us_image_0223997304.png b/docs/dli/dev/en-us_image_0223997304.png new file mode 100644 index 00000000..c483fcbb Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997304.png differ diff --git a/docs/dli/dev/en-us_image_0223997305.png b/docs/dli/dev/en-us_image_0223997305.png new file mode 100644 index 00000000..5596e334 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997305.png differ diff --git a/docs/dli/dev/en-us_image_0223997308.png b/docs/dli/dev/en-us_image_0223997308.png new file mode 100644 index 00000000..0a15b5a2 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997308.png differ diff --git a/docs/dli/dev/en-us_image_0223997410.png b/docs/dli/dev/en-us_image_0223997410.png new file mode 100644 index 00000000..b7619ad9 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997410.png differ diff --git a/docs/dli/dev/en-us_image_0223997411.png b/docs/dli/dev/en-us_image_0223997411.png new file mode 100644 index 00000000..7f599a4f Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997411.png differ diff --git a/docs/dli/dev/en-us_image_0223997412.png b/docs/dli/dev/en-us_image_0223997412.png new file mode 100644 index 00000000..ccdfb17c Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997412.png differ diff --git a/docs/dli/dev/en-us_image_0223997413.png b/docs/dli/dev/en-us_image_0223997413.png new file mode 100644 index 00000000..e7d54aac Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997413.png differ diff --git a/docs/dli/dev/en-us_image_0223997414.png b/docs/dli/dev/en-us_image_0223997414.png new file mode 100644 index 00000000..cd8c139f Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997414.png differ diff --git a/docs/dli/dev/en-us_image_0223997415.png b/docs/dli/dev/en-us_image_0223997415.png new file mode 100644 index 00000000..38453275 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997415.png differ diff --git a/docs/dli/dev/en-us_image_0223997416.png b/docs/dli/dev/en-us_image_0223997416.png new file mode 100644 index 00000000..69c61500 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997416.png differ diff --git a/docs/dli/dev/en-us_image_0223997417.png b/docs/dli/dev/en-us_image_0223997417.png new file mode 100644 index 00000000..fee3b9a2 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997417.png differ diff --git a/docs/dli/dev/en-us_image_0223997418.png b/docs/dli/dev/en-us_image_0223997418.png new file mode 100644 index 00000000..ec584b07 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997418.png differ diff --git a/docs/dli/dev/en-us_image_0223997419.png b/docs/dli/dev/en-us_image_0223997419.png new file mode 100644 index 00000000..cd784455 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997419.png differ diff --git a/docs/dli/dev/en-us_image_0223997420.png b/docs/dli/dev/en-us_image_0223997420.png new file mode 100644 index 00000000..7b92f2f0 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997420.png differ diff --git a/docs/dli/dev/en-us_image_0223997424.png b/docs/dli/dev/en-us_image_0223997424.png new file mode 100644 index 00000000..832cbe24 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997424.png differ diff --git a/docs/dli/dev/en-us_image_0223997425.png b/docs/dli/dev/en-us_image_0223997425.png new file mode 100644 index 00000000..9817d202 Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997425.png differ diff --git a/docs/dli/dev/en-us_image_0223997787.png b/docs/dli/dev/en-us_image_0223997787.png new file mode 100644 index 00000000..6cb8363f Binary files /dev/null and b/docs/dli/dev/en-us_image_0223997787.png differ diff --git a/docs/dli/dev/en-us_image_0266325813.png b/docs/dli/dev/en-us_image_0266325813.png new file mode 100644 index 00000000..0a51ef00 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266325813.png differ diff --git a/docs/dli/dev/en-us_image_0266325814.png b/docs/dli/dev/en-us_image_0266325814.png new file mode 100644 index 00000000..39c48d54 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266325814.png differ diff --git a/docs/dli/dev/en-us_image_0266325815.png b/docs/dli/dev/en-us_image_0266325815.png new file mode 100644 index 00000000..c483fcbb Binary files /dev/null and b/docs/dli/dev/en-us_image_0266325815.png differ diff --git a/docs/dli/dev/en-us_image_0266325816.png b/docs/dli/dev/en-us_image_0266325816.png new file mode 100644 index 00000000..5596e334 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266325816.png differ diff --git a/docs/dli/dev/en-us_image_0266332985.png b/docs/dli/dev/en-us_image_0266332985.png new file mode 100644 index 00000000..22e3d349 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266332985.png differ diff --git a/docs/dli/dev/en-us_image_0266332986.png b/docs/dli/dev/en-us_image_0266332986.png new file mode 100644 index 00000000..22e3d349 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266332986.png differ diff --git a/docs/dli/dev/en-us_image_0266332987.png b/docs/dli/dev/en-us_image_0266332987.png new file mode 100644 index 00000000..0a15b5a2 Binary files /dev/null and b/docs/dli/dev/en-us_image_0266332987.png differ diff --git a/docs/dli/dev/public_sys-resources/caution_3.0-en-us.png b/docs/dli/dev/public_sys-resources/caution_3.0-en-us.png new file mode 100644 index 00000000..60f60762 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/caution_3.0-en-us.png differ diff --git a/docs/dli/dev/public_sys-resources/danger_3.0-en-us.png b/docs/dli/dev/public_sys-resources/danger_3.0-en-us.png new file mode 100644 index 00000000..47a9c723 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/danger_3.0-en-us.png differ diff --git a/docs/dli/dev/public_sys-resources/delta.gif b/docs/dli/dev/public_sys-resources/delta.gif new file mode 100644 index 00000000..0d1b1f67 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/delta.gif differ diff --git a/docs/dli/dev/public_sys-resources/deltaend.gif b/docs/dli/dev/public_sys-resources/deltaend.gif new file mode 100644 index 00000000..cc7da0fc Binary files /dev/null and b/docs/dli/dev/public_sys-resources/deltaend.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-arrowdn.gif b/docs/dli/dev/public_sys-resources/icon-arrowdn.gif new file mode 100644 index 00000000..37942803 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-arrowdn.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-arrowrt.gif b/docs/dli/dev/public_sys-resources/icon-arrowrt.gif new file mode 100644 index 00000000..6aaaa11c Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-arrowrt.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-caution.gif b/docs/dli/dev/public_sys-resources/icon-caution.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-caution.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-danger.gif b/docs/dli/dev/public_sys-resources/icon-danger.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-danger.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-huawei.gif b/docs/dli/dev/public_sys-resources/icon-huawei.gif new file mode 100644 index 00000000..a31d60f8 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-huawei.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-note.gif b/docs/dli/dev/public_sys-resources/icon-note.gif new file mode 100644 index 00000000..31be2b03 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-note.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-notice.gif b/docs/dli/dev/public_sys-resources/icon-notice.gif new file mode 100644 index 00000000..40907065 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-notice.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-tip.gif b/docs/dli/dev/public_sys-resources/icon-tip.gif new file mode 100644 index 00000000..c47bae05 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-tip.gif differ diff --git a/docs/dli/dev/public_sys-resources/icon-warning.gif b/docs/dli/dev/public_sys-resources/icon-warning.gif new file mode 100644 index 00000000..079c79b2 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/icon-warning.gif differ diff --git a/docs/dli/dev/public_sys-resources/note_3.0-en-us.png b/docs/dli/dev/public_sys-resources/note_3.0-en-us.png new file mode 100644 index 00000000..57a0e1f5 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/note_3.0-en-us.png differ diff --git a/docs/dli/dev/public_sys-resources/notice_3.0-en-us.png b/docs/dli/dev/public_sys-resources/notice_3.0-en-us.png new file mode 100644 index 00000000..fa4b6499 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/notice_3.0-en-us.png differ diff --git a/docs/dli/dev/public_sys-resources/warning_3.0-en-us.png b/docs/dli/dev/public_sys-resources/warning_3.0-en-us.png new file mode 100644 index 00000000..def5c356 Binary files /dev/null and b/docs/dli/dev/public_sys-resources/warning_3.0-en-us.png differ