diff --git a/docs/dli/umn/ALL_META.TXT.json b/docs/dli/umn/ALL_META.TXT.json new file mode 100644 index 00000000..70359215 --- /dev/null +++ b/docs/dli/umn/ALL_META.TXT.json @@ -0,0 +1,2422 @@ +[ + { + "uri":"dli_01_0538.html", + "product_code":"dli", + "code":"1", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Service Overview", + "title":"Service Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0378.html", + "product_code":"dli", + "code":"2", + "des":"Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark and Apache Flink ecosystems. It frees you from managing an", + "doc_type":"usermanual", + "kw":"DLI Introduction,Service Overview,User Guide", + "title":"DLI Introduction", + "githuburl":"" + }, + { + "uri":"dli_07_0007.html", + "product_code":"dli", + "code":"3", + "des":"You do not need a background in big data to use DLI for data analysis. You only need to know SQL, and you are good to go. The SQL syntax is fully compatible with the stan", + "doc_type":"usermanual", + "kw":"Advantages,Service Overview,User Guide", + "title":"Advantages", + "githuburl":"" + }, + { + "uri":"dli_07_0002.html", + "product_code":"dli", + "code":"4", + "des":"DLI is applicable to large-scale log analysis, federated analysis of heterogeneous data sources, and big data ETL processing.Gaming operation data analysisDifferent depar", + "doc_type":"usermanual", + "kw":"Application Scenarios,Service Overview,User Guide", + "title":"Application Scenarios", + "githuburl":"" + }, + { + "uri":"dli_07_0005.html", + "product_code":"dli", + "code":"5", + "des":"When using DLI, you may need to make adjustments according to the following restrictions:Recommended browsers for logging in to DLI:Google Chrome 43.0 or laterMozilla Fir", + "doc_type":"usermanual", + "kw":"Constraints and Limitations on Using DLI,Service Overview,User Guide", + "title":"Constraints and Limitations on Using DLI", + "githuburl":"" + }, + { + "uri":"dli_07_0006.html", + "product_code":"dli", + "code":"6", + "des":"If you need to assign different permissions to employees in your enterprise to access your DLI resources, IAM is a good choice for fine-grained permissions management. IA", + "doc_type":"usermanual", + "kw":"Permissions Management,Service Overview,User Guide", + "title":"Permissions Management", + "githuburl":"" + }, + { + "uri":"dli_07_0003.html", + "product_code":"dli", + "code":"7", + "des":"DLI allows multiple organizations, departments, or applications to share resources. A logical entity, also called a tenant, is provided to use diverse resources and servi", + "doc_type":"usermanual", + "kw":"Basic Concepts,Service Overview,User Guide", + "title":"Basic Concepts", + "githuburl":"" + }, + { + "uri":"dli_01_0220.html", + "product_code":"dli", + "code":"8", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Getting Started", + "title":"Getting Started", + "githuburl":"" + }, + { + "uri":"dli_01_0002.html", + "product_code":"dli", + "code":"9", + "des":"You can use DLI to submit a Spark SQL job to query data. The general procedure is as follows:Step 1: Logging in to the Cloud PlatformStep 2: Uploading Data to OBSStep 3: ", + "doc_type":"usermanual", + "kw":"Creating and Submitting a Spark SQL Job,Getting Started,User Guide", + "title":"Creating and Submitting a Spark SQL Job", + "githuburl":"" + }, + { + "uri":"dli_01_0512.html", + "product_code":"dli", + "code":"10", + "des":"To facilitate SQL operations, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not need to wr", + "doc_type":"usermanual", + "kw":"Developing and Submitting a Spark SQL Job Using the TPC-H Sample Template,Getting Started,User Guide", + "title":"Developing and Submitting a Spark SQL Job Using the TPC-H Sample Template", + "githuburl":"" + }, + { + "uri":"dli_01_0375.html", + "product_code":"dli", + "code":"11", + "des":"You can use DLI to submit Spark jobs for real-time computing. The general procedure is as follows:Step 1: Logging in to the CloudStep 2: Uploading Data to OBSStep 3: Logg", + "doc_type":"usermanual", + "kw":"Creating and Submitting a Spark Jar Job,Getting Started,User Guide", + "title":"Creating and Submitting a Spark Jar Job", + "githuburl":"" + }, + { + "uri":"dli_01_0531.html", + "product_code":"dli", + "code":"12", + "des":"This section describes how to submit a Flink OpenSource SQL job on the DLI console for real-time computing. The general procedure is as follows:Step 1: Log In to the Clou", + "doc_type":"usermanual", + "kw":"Creating and Submitting a Flink OpenSource SQL Job,Getting Started,User Guide", + "title":"Creating and Submitting a Flink OpenSource SQL Job", + "githuburl":"" + }, + { + "uri":"dli_01_0377.html", + "product_code":"dli", + "code":"13", + "des":"Log in to DLI and go to the Overview page.The following table describes the functional areas of the Overview page.", + "doc_type":"usermanual", + "kw":"DLI Console Overview,User Guide", + "title":"DLI Console Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0320.html", + "product_code":"dli", + "code":"14", + "des":"You can use SQL statements in the SQL job editor to execute data query. DLI supports SQL 2003 and complies with Spark SQL.On the Overview page, click SQL Editor in the na", + "doc_type":"usermanual", + "kw":"SQL Editor,User Guide", + "title":"SQL Editor", + "githuburl":"" + }, + { + "uri":"dli_01_0001.html", + "product_code":"dli", + "code":"15", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Job Management", + "title":"Job Management", + "githuburl":"" + }, + { + "uri":"dli_01_0017.html", + "product_code":"dli", + "code":"16", + "des":"SQL jobs allow you to execute SQL statements entered in the SQL job editing window, import data, and export data.SQL job management provides the following functions:Searc", + "doc_type":"usermanual", + "kw":"SQL Job Management,Job Management,User Guide", + "title":"SQL Job Management", + "githuburl":"" + }, + { + "uri":"dli_01_0389.html", + "product_code":"dli", + "code":"17", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Flink Job Management", + "title":"Flink Job Management", + "githuburl":"" + }, + { + "uri":"dli_01_0403.html", + "product_code":"dli", + "code":"18", + "des":"On the Job Management page of Flink jobs, you can submit a Flink job. Currently, the following job types are supported:Flink SQL uses SQL statements to define jobs and ca", + "doc_type":"usermanual", + "kw":"Overview,Flink Job Management,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0479.html", + "product_code":"dli", + "code":"19", + "des":"You can isolate Flink jobs allocated to different users by setting permissions to ensure data query performance.The administrator and job creator have all permissions, wh", + "doc_type":"usermanual", + "kw":"Managing Flink Job Permissions,Flink Job Management,User Guide", + "title":"Managing Flink Job Permissions", + "githuburl":"" + }, + { + "uri":"dli_01_0454.html", + "product_code":"dli", + "code":"20", + "des":"To create a Flink job, you need to enter the data source and data output channel, that is, source and sink. To use another service as the source or sink stream, you need ", + "doc_type":"usermanual", + "kw":"Preparing Flink Job Data,Flink Job Management,User Guide", + "title":"Preparing Flink Job Data", + "githuburl":"" + }, + { + "uri":"dli_01_0498.html", + "product_code":"dli", + "code":"21", + "des":"This section describes how to create a Flink OpenSource SQL job. DLI Flink OpenSource SQL jobs are fully compatible with the syntax of Flink 1.10 and 1.12 provided by the", + "doc_type":"usermanual", + "kw":"(Recommended) Creating a Flink OpenSource SQL Job,Flink Job Management,User Guide", + "title":"(Recommended) Creating a Flink OpenSource SQL Job", + "githuburl":"" + }, + { + "uri":"dli_01_0455.html", + "product_code":"dli", + "code":"22", + "des":"This section describes how to create a Flink SQL job. You can use Flink SQLs to develop jobs to meet your service requirements. Using SQL statements simplifies logic impl", + "doc_type":"usermanual", + "kw":"Creating a Flink SQL job,Flink Job Management,User Guide", + "title":"Creating a Flink SQL job", + "githuburl":"" + }, + { + "uri":"dli_01_0457.html", + "product_code":"dli", + "code":"23", + "des":"This section describes how to create a Flink Jar job. You can perform secondary development based on Flink APIs, build your own JAR file, and submit the JAR file to DLI q", + "doc_type":"usermanual", + "kw":"Creating a Flink Jar Job,Flink Job Management,User Guide", + "title":"Creating a Flink Jar Job", + "githuburl":"" + }, + { + "uri":"dli_01_0458.html", + "product_code":"dli", + "code":"24", + "des":"The job debugging function helps you check the logic correctness of your compiled SQL statements before running a job.Currently, only Flink SQL jobs support this function", + "doc_type":"usermanual", + "kw":"Debugging a Flink Job,Flink Job Management,User Guide", + "title":"Debugging a Flink Job", + "githuburl":"" + }, + { + "uri":"dli_01_0461.html", + "product_code":"dli", + "code":"25", + "des":"After a job is created, you can perform operations on the job as required.Editing a JobStarting a JobStopping a JobDeleting a JobExporting a JobImporting a JobModifying N", + "doc_type":"usermanual", + "kw":"Performing Operations on a Flink Job,Flink Job Management,User Guide", + "title":"Performing Operations on a Flink Job", + "githuburl":"" + }, + { + "uri":"dli_01_0462.html", + "product_code":"dli", + "code":"26", + "des":"After creating a job, you can view the job details to learn about the following information:Viewing Job DetailsChecking the Job Monitoring InformationViewing the Task Lis", + "doc_type":"usermanual", + "kw":"Flink Job Details,Flink Job Management,User Guide", + "title":"Flink Job Details", + "githuburl":"" + }, + { + "uri":"dli_01_0465.html", + "product_code":"dli", + "code":"27", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Spark Job Management", + "title":"Spark Job Management", + "githuburl":"" + }, + { + "uri":"dli_01_0385.html", + "product_code":"dli", + "code":"28", + "des":"Based on the open-source Spark, DLI optimizes performance and reconstructs services to be compatible with the Apache Spark ecosystem and interfaces, and executes batch pr", + "doc_type":"usermanual", + "kw":"Spark Job Management,Spark Job Management,User Guide", + "title":"Spark Job Management", + "githuburl":"" + }, + { + "uri":"dli_01_0384.html", + "product_code":"dli", + "code":"29", + "des":"DLI provides fully-managed Spark computing services by allowing you to execute Spark jobs.On the Overview page, click Create Job in the upper right corner of the Spark Jo", + "doc_type":"usermanual", + "kw":"Creating a Spark Job,Spark Job Management,User Guide", + "title":"Creating a Spark Job", + "githuburl":"" + }, + { + "uri":"dli_01_0012.html", + "product_code":"dli", + "code":"30", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Queue Management", + "title":"Queue Management", + "githuburl":"" + }, + { + "uri":"dli_01_0402.html", + "product_code":"dli", + "code":"31", + "des":"Queues in DLI are computing resources, which are the basis for using DLI. All executed jobs require computing resources.Currently, DLI provides two types of queues, For S", + "doc_type":"usermanual", + "kw":"Overview,Queue Management,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0015.html", + "product_code":"dli", + "code":"32", + "des":"You can isolate queues allocated to different users by setting permissions to ensure data query performance.The administrator and queue owner have all permissions, which ", + "doc_type":"usermanual", + "kw":"Queue Permission Management,Queue Management,User Guide", + "title":"Queue Permission Management", + "githuburl":"" + }, + { + "uri":"dli_01_0363.html", + "product_code":"dli", + "code":"33", + "des":"Before executing a job, you need to create a queue.If you use a sub-account to create a queue for the first time, log in to the DLI management console using the main acco", + "doc_type":"usermanual", + "kw":"Creating a Queue,Queue Management,User Guide", + "title":"Creating a Queue", + "githuburl":"" + }, + { + "uri":"dli_01_0016.html", + "product_code":"dli", + "code":"34", + "des":"You can delete a queue based on actual conditions.This operation will fail if there are jobs in the Submitting or Running state on this queue.Deleting a queue does not ca", + "doc_type":"usermanual", + "kw":"Deleting a Queue,Queue Management,User Guide", + "title":"Deleting a Queue", + "githuburl":"" + }, + { + "uri":"dli_01_0443.html", + "product_code":"dli", + "code":"35", + "des":"If the CIDR block of the DLI queue conflicts with that of the user data source, you can change the CIDR block of the queue.If the queue whose CIDR block is to be modified", + "doc_type":"usermanual", + "kw":"Modifying the CIDR Block,Queue Management,User Guide", + "title":"Modifying the CIDR Block", + "githuburl":"" + }, + { + "uri":"dli_01_0487.html", + "product_code":"dli", + "code":"36", + "des":"Elastic scaling can be performed for a newly created queue only when there were jobs running in this queue.If Status of queue xxx is assigning, which is not available is ", + "doc_type":"usermanual", + "kw":"Elastic Scaling,Queue Management,User Guide", + "title":"Elastic Scaling", + "githuburl":"" + }, + { + "uri":"dli_01_0488.html", + "product_code":"dli", + "code":"37", + "des":"When services are busy, you might need to use more compute resources to process services in a period. After this period, you do not require the same amount of resources. ", + "doc_type":"usermanual", + "kw":"Scheduling CU Changes,Queue Management,User Guide", + "title":"Scheduling CU Changes", + "githuburl":"" + }, + { + "uri":"dli_01_0489.html", + "product_code":"dli", + "code":"38", + "des":"It can be used to test the connectivity between the DLI queue and the peer IP address specified by the user in common scenarios, or the connectivity between the DLI queue", + "doc_type":"usermanual", + "kw":"Testing Address Connectivity,Queue Management,User Guide", + "title":"Testing Address Connectivity", + "githuburl":"" + }, + { + "uri":"dli_01_0421.html", + "product_code":"dli", + "code":"39", + "des":"Once you have created a message notification topic, you can Add subscription of the topic on the Topic Management page of the Simple Message Notification service. You can", + "doc_type":"usermanual", + "kw":"Creating a Message Notification Topic,Queue Management,User Guide", + "title":"Creating a Message Notification Topic", + "githuburl":"" + }, + { + "uri":"dli_01_0004.html", + "product_code":"dli", + "code":"40", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Data Management", + "title":"Data Management", + "githuburl":"" + }, + { + "uri":"dli_01_0390.html", + "product_code":"dli", + "code":"41", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Databases and Tables", + "title":"Databases and Tables", + "githuburl":"" + }, + { + "uri":"dli_01_0228.html", + "product_code":"dli", + "code":"42", + "des":"DLI database and table management provide the following functions:Database Permission ManagementTable Permission ManagementCreating a Database or a TableDeleting a Databa", + "doc_type":"usermanual", + "kw":"Overview,Databases and Tables,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0447.html", + "product_code":"dli", + "code":"43", + "des":"You can isolate databases allocated to different users by setting permissions to ensure data query performance.The administrator and database owner have all permissions, ", + "doc_type":"usermanual", + "kw":"Database Permission Management,Databases and Tables,User Guide", + "title":"Database Permission Management", + "githuburl":"" + }, + { + "uri":"dli_01_0448.html", + "product_code":"dli", + "code":"44", + "des":"You can isolate databases allocated to different users by setting permissions to ensure data query performance.The administrator and database owner have all permissions, ", + "doc_type":"usermanual", + "kw":"Table Permission Management,Databases and Tables,User Guide", + "title":"Table Permission Management", + "githuburl":"" + }, + { + "uri":"dli_01_0005.html", + "product_code":"dli", + "code":"45", + "des":"A database, built on the computer storage device, is a data warehouse where data is organized, stored, and managed based on its structure.The table is an important part o", + "doc_type":"usermanual", + "kw":"Creating a Database or a Table,Databases and Tables,User Guide", + "title":"Creating a Database or a Table", + "githuburl":"" + }, + { + "uri":"dli_01_0011.html", + "product_code":"dli", + "code":"46", + "des":"You can delete unnecessary databases and tables based on actual conditions.You are not allowed to delete databases or tables that are being used for running jobs.The admi", + "doc_type":"usermanual", + "kw":"Deleting a Database or a Table,Databases and Tables,User Guide", + "title":"Deleting a Database or a Table", + "githuburl":"" + }, + { + "uri":"dli_01_0376.html", + "product_code":"dli", + "code":"47", + "des":"During actual use, developers create databases and tables and submit them to test personnel for testing. After the test is complete, the databases and tables are transfer", + "doc_type":"usermanual", + "kw":"Modifying the Owners of Databases and Tables,Databases and Tables,User Guide", + "title":"Modifying the Owners of Databases and Tables", + "githuburl":"" + }, + { + "uri":"dli_01_0253.html", + "product_code":"dli", + "code":"48", + "des":"You can import data from OBS to a table created in DLI.Only one path can be specified during data import. The path cannot contain commas (,).To import data in CSV format ", + "doc_type":"usermanual", + "kw":"Importing Data to the Table,Databases and Tables,User Guide", + "title":"Importing Data to the Table", + "githuburl":"" + }, + { + "uri":"dli_01_0010.html", + "product_code":"dli", + "code":"49", + "des":"You can export data from a DLI table to OBS. During the export, a folder is created in OBS or the content in the existing folder is overwritten.The exported file can be i", + "doc_type":"usermanual", + "kw":"Exporting Data from DLI to OBS,Databases and Tables,User Guide", + "title":"Exporting Data from DLI to OBS", + "githuburl":"" + }, + { + "uri":"dli_01_0008.html", + "product_code":"dli", + "code":"50", + "des":"Metadata is used to define data types. It describes information about the data, including the source, size, format, and other data features. In database fields, metadata ", + "doc_type":"usermanual", + "kw":"Viewing Metadata,Databases and Tables,User Guide", + "title":"Viewing Metadata", + "githuburl":"" + }, + { + "uri":"dli_01_0007.html", + "product_code":"dli", + "code":"51", + "des":"The Preview page displays the first 10 records in the table.You can preview data on either the Data Management page or the SQL Editor page.To preview data on the Data Man", + "doc_type":"usermanual", + "kw":"Previewing Data,Databases and Tables,User Guide", + "title":"Previewing Data", + "githuburl":"" + }, + { + "uri":"dli_01_0366.html", + "product_code":"dli", + "code":"52", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Package Management", + "title":"Package Management", + "githuburl":"" + }, + { + "uri":"dli_01_0407.html", + "product_code":"dli", + "code":"53", + "des":"Package management provides the following functions:Managing Package PermissionsCreating a PackageDeleting a PackageYou can delete program packages in batches.You can del", + "doc_type":"usermanual", + "kw":"Overview,Package Management,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0477.html", + "product_code":"dli", + "code":"54", + "des":"You can isolate package groups or packages allocated to different users by setting permissions to ensure data query performance.The administrator and the owner of a packa", + "doc_type":"usermanual", + "kw":"Managing Permissions on Packages and Package Groups,Package Management,User Guide", + "title":"Managing Permissions on Packages and Package Groups", + "githuburl":"" + }, + { + "uri":"dli_01_0367.html", + "product_code":"dli", + "code":"55", + "des":"DLI allows you to submit program packages in batches to the general-use queue for running.If you need to update a package, you can use the same package or file to upload ", + "doc_type":"usermanual", + "kw":"Creating a Package,Package Management,User Guide", + "title":"Creating a Package", + "githuburl":"" + }, + { + "uri":"dli_01_0369.html", + "product_code":"dli", + "code":"56", + "des":"You can delete a package based on actual conditions.On the left of the management console, choose Data Management > Package Management.Click Delete in the Operation colum", + "doc_type":"usermanual", + "kw":"Deleting a Package,Package Management,User Guide", + "title":"Deleting a Package", + "githuburl":"" + }, + { + "uri":"dli_01_0478.html", + "product_code":"dli", + "code":"57", + "des":"To change the owner of a package, click More > Modify Owner in the Operation column of a package on the Package Management page.If the package has been grouped, you can m", + "doc_type":"usermanual", + "kw":"Modifying the Owner,Package Management,User Guide", + "title":"Modifying the Owner", + "githuburl":"" + }, + { + "uri":"dli_01_0397.html", + "product_code":"dli", + "code":"58", + "des":"DLI built-in dependencies are provided by the platform by default. In case of conflicts, you do not need to upload them when packaging JAR packages of Spark or Flink Jar ", + "doc_type":"usermanual", + "kw":"Built-in Dependencies,Package Management,User Guide", + "title":"Built-in Dependencies", + "githuburl":"" + }, + { + "uri":"dli_01_0379.html", + "product_code":"dli", + "code":"59", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Job Templates", + "title":"Job Templates", + "githuburl":"" + }, + { + "uri":"dli_01_0021.html", + "product_code":"dli", + "code":"60", + "des":"To facilitate SQL operation execution, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not n", + "doc_type":"usermanual", + "kw":"SQL Template Management,Job Templates,User Guide", + "title":"SQL Template Management", + "githuburl":"" + }, + { + "uri":"dli_01_0464.html", + "product_code":"dli", + "code":"61", + "des":"Flink templates include sample templates and custom templates. You can modify an existing sample template to meet the actual job logic requirements and save time for edit", + "doc_type":"usermanual", + "kw":"Flink Template Management,Job Templates,User Guide", + "title":"Flink Template Management", + "githuburl":"" + }, + { + "uri":"dli_01_05110.html", + "product_code":"dli", + "code":"62", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Appendix", + "title":"Appendix", + "githuburl":"" + }, + { + "uri":"dli_01_05111.html", + "product_code":"dli", + "code":"63", + "des":"TPC-H is a test set developed by the Transaction Processing Performance Council (TPC) to simulate decision-making support applications. It is widely used in academia and ", + "doc_type":"usermanual", + "kw":"TPC-H Sample Data in the SQL Template,Appendix,User Guide", + "title":"TPC-H Sample Data in the SQL Template", + "githuburl":"" + }, + { + "uri":"dli_01_0422.html", + "product_code":"dli", + "code":"64", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Datasource Connections", + "title":"Datasource Connections", + "githuburl":"" + }, + { + "uri":"dli_01_0410.html", + "product_code":"dli", + "code":"65", + "des":"DLI supports the datasource capability of the native Spark and extends it. With DLI datasource connection, you can access other data storage services through SQL statemen", + "doc_type":"usermanual", + "kw":"Datasource Connection and Cross-Source Analysis,Datasource Connections,User Guide", + "title":"Datasource Connection and Cross-Source Analysis", + "githuburl":"" + }, + { + "uri":"dli_01_0426.html", + "product_code":"dli", + "code":"66", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Enhanced Datasource Connections", + "title":"Enhanced Datasource Connections", + "githuburl":"" + }, + { + "uri":"dli_01_0003.html", + "product_code":"dli", + "code":"67", + "des":"The enhanced datasource connection uses VPC peering at the bottom layer to directly connect the VPC network between the DLI cluster and the destination datasource. Data i", + "doc_type":"usermanual", + "kw":"Overview,Enhanced Datasource Connections,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0006.html", + "product_code":"dli", + "code":"68", + "des":"The following describes how to create a datasource HBase connection for MRS.Only enhanced datasource connection to MRS HBase is supported.Apply for a cluster in MRS.If a ", + "doc_type":"usermanual", + "kw":"Creating, Querying, and Deleting an Enhanced Datasource Connection,Enhanced Datasource Connections,U", + "title":"Creating, Querying, and Deleting an Enhanced Datasource Connection", + "githuburl":"" + }, + { + "uri":"dli_01_0009.html", + "product_code":"dli", + "code":"69", + "des":"The CIDR block of the DLI queue that is bound with a datasource connection cannot overlap with that of the data source.The default queue cannot be bound with a connection", + "doc_type":"usermanual", + "kw":"Binding and Unbinding a Queue,Enhanced Datasource Connections,User Guide", + "title":"Binding and Unbinding a Queue", + "githuburl":"" + }, + { + "uri":"dli_01_0013.html", + "product_code":"dli", + "code":"70", + "des":"Method 1: Copy hosts information in /etc/hosts of an MRS node.cat /etc/hostsMethod 2: Log in to FusionInsight Manager to obtain MRS hosts information.The host information", + "doc_type":"usermanual", + "kw":"Modifying Host Information,Enhanced Datasource Connections,User Guide", + "title":"Modifying Host Information", + "githuburl":"" + }, + { + "uri":"dli_01_0014.html", + "product_code":"dli", + "code":"71", + "des":"After an enhanced datasource connection is created and bound to a queue, the system automatically configures route information. You can also add a custom route for the qu", + "doc_type":"usermanual", + "kw":"Custom Route Information,Enhanced Datasource Connections,User Guide", + "title":"Custom Route Information", + "githuburl":"" + }, + { + "uri":"dli_01_0018.html", + "product_code":"dli", + "code":"72", + "des":"You can grant or revoke permissions for enhanced datasource connections of other projects.AuthorizationLog in to the DLI management console, choose Datasource Connections", + "doc_type":"usermanual", + "kw":"Enhanced Datasource Connection Permission Management,Enhanced Datasource Connections,User Guide", + "title":"Enhanced Datasource Connection Permission Management", + "githuburl":"" + }, + { + "uri":"dli_01_0480.html", + "product_code":"dli", + "code":"73", + "des":"You can isolate datasource connections allocated to different users by setting permissions to ensure data query performance.The administrator and datasource connection ow", + "doc_type":"usermanual", + "kw":"Managing Datasource Connection Permissions,Datasource Connections,User Guide", + "title":"Managing Datasource Connection Permissions", + "githuburl":"" + }, + { + "uri":"dli_01_0427.html", + "product_code":"dli", + "code":"74", + "des":"Datasource authentication is used to manage authentication information for accessing CSS and MRS security environments and encrypt passwords for accessing DWS, RDS, DDS, ", + "doc_type":"usermanual", + "kw":"Creating and Managing Datasource Authentication,Datasource Connections,User Guide", + "title":"Creating and Managing Datasource Authentication", + "githuburl":"" + }, + { + "uri":"dli_01_0485.html", + "product_code":"dli", + "code":"75", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Global Configuration", + "title":"Global Configuration", + "githuburl":"" + }, + { + "uri":"dli_01_0476.html", + "product_code":"dli", + "code":"76", + "des":"Global variables can be used to simplify complex parameters. For example, long and difficult variables can be replaced to improve the readability of SQL statements.In the", + "doc_type":"usermanual", + "kw":"Global Variables,Global Configuration,User Guide", + "title":"Global Variables", + "githuburl":"" + }, + { + "uri":"dli_01_0486.html", + "product_code":"dli", + "code":"77", + "des":"Only the tenant account or a subaccount of user group admin can authorize access.After entering the DLI management console, you are advised to set agency permissions to e", + "doc_type":"usermanual", + "kw":"Service Authorization,Global Configuration,User Guide", + "title":"Service Authorization", + "githuburl":"" + }, + { + "uri":"dli_01_0408.html", + "product_code":"dli", + "code":"78", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Permissions Management", + "title":"Permissions Management", + "githuburl":"" + }, + { + "uri":"dli_01_0440.html", + "product_code":"dli", + "code":"79", + "des":"DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM", + "doc_type":"usermanual", + "kw":"Overview,Permissions Management,User Guide", + "title":"Overview", + "githuburl":"" + }, + { + "uri":"dli_01_0418.html", + "product_code":"dli", + "code":"80", + "des":"You can use Identity and Access Management (IAM) to implement fine-grained permissions control on DLI resources. For details, see Overview.If your cloud account does not ", + "doc_type":"usermanual", + "kw":"Creating an IAM User and Granting Permissions,Permissions Management,User Guide", + "title":"Creating an IAM User and Granting Permissions", + "githuburl":"" + }, + { + "uri":"dli_01_0451.html", + "product_code":"dli", + "code":"81", + "des":"Custom policies can be created as a supplement to the system policies of DLI. You can add actions to custom policies. For the actions supported for custom policies, see \"", + "doc_type":"usermanual", + "kw":"Creating a Custom Policy,Permissions Management,User Guide", + "title":"Creating a Custom Policy", + "githuburl":"" + }, + { + "uri":"dli_01_0417.html", + "product_code":"dli", + "code":"82", + "des":"A resource is an object that exists within a service. You can select DLI resources by specifying their paths.", + "doc_type":"usermanual", + "kw":"DLI Resources,Permissions Management,User Guide", + "title":"DLI Resources", + "githuburl":"" + }, + { + "uri":"dli_01_0475.html", + "product_code":"dli", + "code":"83", + "des":"Request conditions are useful in determining when a custom policy takes effect. A request condition consists of a condition key and operator. Condition keys are either gl", + "doc_type":"usermanual", + "kw":"DLI Request Conditions,Permissions Management,User Guide", + "title":"DLI Request Conditions", + "githuburl":"" + }, + { + "uri":"dli_01_0441.html", + "product_code":"dli", + "code":"84", + "des":"Table 1 lists the common operations supported by each system policy of DLI. Choose proper system policies according to this table. For details about the SQL statement per", + "doc_type":"usermanual", + "kw":"Common Operations Supported by DLI System Policy,Permissions Management,User Guide", + "title":"Common Operations Supported by DLI System Policy", + "githuburl":"" + }, + { + "uri":"dli_01_0539.html", + "product_code":"dli", + "code":"85", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"FAQs", + "title":"FAQs", + "githuburl":"" + }, + { + "uri":"dli_03_0037.html", + "product_code":"dli", + "code":"86", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Flink Jobs", + "title":"Flink Jobs", + "githuburl":"" + }, + { + "uri":"dli_03_0083.html", + "product_code":"dli", + "code":"87", + "des":"DLI Flink jobs support the following data formats:Avro, Avro_merge, BLOB, CSV, EMAIL, JSON, ORC, Parquet, and XML.DLI Flink jobs support data from the following data sour", + "doc_type":"usermanual", + "kw":"What Data Formats and Data Sources Are Supported by DLI Flink Jobs?,Flink Jobs,User Guide", + "title":"What Data Formats and Data Sources Are Supported by DLI Flink Jobs?", + "githuburl":"" + }, + { + "uri":"dli_03_0139.html", + "product_code":"dli", + "code":"88", + "des":"A sub-user can view queues but cannot view Flink jobs. You can authorize the sub-user using DLI or IAM.Authorization on DLILog in to the DLI console using a tenant accoun", + "doc_type":"usermanual", + "kw":"How Do I Authorize a Subuser to View Flink Jobs?,Flink Jobs,User Guide", + "title":"How Do I Authorize a Subuser to View Flink Jobs?", + "githuburl":"" + }, + { + "uri":"dli_03_0090.html", + "product_code":"dli", + "code":"89", + "des":"DLI Flink jobs are highly available. You can enable the automatic restart function to automatically restart your jobs after short-time faults of peripheral services are r", + "doc_type":"usermanual", + "kw":"How Do I Set Auto Restart upon Exception for a Flink Job?,Flink Jobs,User Guide", + "title":"How Do I Set Auto Restart upon Exception for a Flink Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0099.html", + "product_code":"dli", + "code":"90", + "des":"When you create a Flink SQL job or Flink Jar job, you can select Save Job Log on the job editing page to save job running logs to OBS.To set the OBS bucket for storing th", + "doc_type":"usermanual", + "kw":"How Do I Save Flink Job Logs?,Flink Jobs,User Guide", + "title":"How Do I Save Flink Job Logs?", + "githuburl":"" + }, + { + "uri":"dli_03_0043.html", + "product_code":"dli", + "code":"91", + "des":"DLI can output Flink job results to DIS. You can view the results in DIS. For details, see Retrieving Data from DIS in the Data Ingestion Service User Guide.DLI can outpu", + "doc_type":"usermanual", + "kw":"How Can I Check Flink Job Results?,Flink Jobs,User Guide", + "title":"How Can I Check Flink Job Results?", + "githuburl":"" + }, + { + "uri":"dli_03_0160.html", + "product_code":"dli", + "code":"92", + "des":"Choose Job Management > Flink Jobs. In the Operation column of the target job, choose More > Permissions. When a new user is authorized, No such user. userName:xxxx. is d", + "doc_type":"usermanual", + "kw":"What Should I Do if \"No such user. userName:xxxx.\" Is Displayed on the Flink Job Management Page Whe", + "title":"What Should I Do if \"No such user. userName:xxxx.\" Is Displayed on the Flink Job Management Page When I Grant Permission to a User?", + "githuburl":"" + }, + { + "uri":"dli_03_0180.html", + "product_code":"dli", + "code":"93", + "des":"Checkpoint was enabled when a Flink job is created, and the OBS bucket for storing checkpoints was specified. After a Flink job is manually stopped, no message is display", + "doc_type":"usermanual", + "kw":"How Do I Know Which Checkpoint the Flink Job I Stopped Will Be Restored to When I Start the Job Agai", + "title":"How Do I Know Which Checkpoint the Flink Job I Stopped Will Be Restored to When I Start the Job Again?", + "githuburl":"" + }, + { + "uri":"dli_03_0130.html", + "product_code":"dli", + "code":"94", + "des":"The consumption capability of a Flink SQL job depends on the data source transmission, queue size, and job parameter settings. The peak consumption is 10 Mbit/s.", + "doc_type":"usermanual", + "kw":"How Much Data Can Be Processed in a Day by a Flink SQL Job?,Flink Jobs,User Guide", + "title":"How Much Data Can Be Processed in a Day by a Flink SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0061.html", + "product_code":"dli", + "code":"95", + "des":"The temp stream in Flink SQL is similar to a subquery. It is a logical stream used to simplify the SQL logic and does not generate data storage. Therefore, there is no ne", + "doc_type":"usermanual", + "kw":"Does Data in the Temporary Stream of Flink SQL Need to Be Cleared Periodically? How Do I Clear the D", + "title":"Does Data in the Temporary Stream of Flink SQL Need to Be Cleared Periodically? How Do I Clear the Data?", + "githuburl":"" + }, + { + "uri":"dli_03_0138.html", + "product_code":"dli", + "code":"96", + "des":"SymptomWhen you create a Flink SQL job and configure the parameters, you select an OBS bucket you have created. The system displays a message indicating that the OBS buck", + "doc_type":"usermanual", + "kw":"What Should I Do if a Message Is Displayed Indicating that the OBS Bucket Is Not Authorized When I S", + "title":"What Should I Do if a Message Is Displayed Indicating that the OBS Bucket Is Not Authorized When I Select an OBS Bucket for a Flink SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0089.html", + "product_code":"dli", + "code":"97", + "des":"When using a Flink SQL job, you need to create an OBS partition table for subsequent batch processing.In the following example, the day field is used as the partition fie", + "doc_type":"usermanual", + "kw":"How Do I Map an OBS Table to a Partitioned DLI Table?,Flink Jobs,User Guide", + "title":"How Do I Map an OBS Table to a Partitioned DLI Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0120.html", + "product_code":"dli", + "code":"98", + "des":"SymptomYou used Flink 1.10 to run a Flink Opensource SQL job. You set the number of Kafka partitions for the job a small value at the beginning and need to increase the n", + "doc_type":"usermanual", + "kw":"How Do I Change the Number of Kafka Partitions of a Flink SQL Job Without Stopping It?,Flink Jobs,Us", + "title":"How Do I Change the Number of Kafka Partitions of a Flink SQL Job Without Stopping It?", + "githuburl":"" + }, + { + "uri":"dli_03_0075.html", + "product_code":"dli", + "code":"99", + "des":"In this example, the day field is used as the partition field with the parquet encoding format (only the parquet format is supported currently) to dump car_info data to O", + "doc_type":"usermanual", + "kw":"How Can I Map an OBS Table to a DLI Partition Table?,Flink Jobs,User Guide", + "title":"How Can I Map an OBS Table to a DLI Partition Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0167.html", + "product_code":"dli", + "code":"100", + "des":"When I run the creation statement with an EL expression in the table name in a Flink SQL job, the following error message is displayed:DLI.0005: AnalysisException: t_user", + "doc_type":"usermanual", + "kw":"What Should I Do If Error Message DLI.0005 Is Displayed When I Use an EL Expression to Create a Tabl", + "title":"What Should I Do If Error Message DLI.0005 Is Displayed When I Use an EL Expression to Create a Table in a Flink SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0168.html", + "product_code":"dli", + "code":"101", + "des":"After data is written to OBS through the Flink job output stream, data cannot be queried from the DLI table created in the OBS file path.For example, use the following Fl", + "doc_type":"usermanual", + "kw":"What Should I Do If No Data Is Found in the DLI Table Created Using the OBS File Path That Stores th", + "title":"What Should I Do If No Data Is Found in the DLI Table Created Using the OBS File Path That Stores the Output of a Flink job?", + "githuburl":"" + }, + { + "uri":"dli_03_0174.html", + "product_code":"dli", + "code":"102", + "des":"After a Flink SQL job is submitted on DLI, the job fails to be executed. The following error information is displayed in the job log:connect to DIS failed java.lang.Illeg", + "doc_type":"usermanual", + "kw":"What Should I Do If a Flink SQL Job Fails to Be Executed, and \"connect to DIS failed java.lang.Illeg", + "title":"What Should I Do If a Flink SQL Job Fails to Be Executed, and \"connect to DIS failed java.lang.IllegalArgumentException: Access key cannot be null\" Is Recorded in the Log?", + "githuburl":"" + }, + { + "uri":"dli_03_0176.html", + "product_code":"dli", + "code":"103", + "des":"Semantic verification for a Flink SQL job (reading DIS data) fails. The following information is displayed when the job fails:Get dis channel xxxinfo failed. error info: ", + "doc_type":"usermanual", + "kw":"What Should I Do If \"Not authorized\" Is Reported When a Flink SQL Job Reads DIS Data?,Flink Jobs,Use", + "title":"What Should I Do If \"Not authorized\" Is Reported When a Flink SQL Job Reads DIS Data?", + "githuburl":"" + }, + { + "uri":"dli_03_0232.html", + "product_code":"dli", + "code":"104", + "des":"After a Flink SQL job consumed Kafka and sank data to the Elasticsearch cluster, the job was successfully executed, but no data is available.Possible causes are as follow", + "doc_type":"usermanual", + "kw":"Data Writing Fails After a Flink SQL Job Consumed Kafka and Sank Data to the Elasticsearch Cluster,F", + "title":"Data Writing Fails After a Flink SQL Job Consumed Kafka and Sank Data to the Elasticsearch Cluster", + "githuburl":"" + }, + { + "uri":"dli_03_0038.html", + "product_code":"dli", + "code":"105", + "des":"The procedure is as follows:Add the following code to the JAR package code of the Flink Jar job:// Configure the pom file on which the StreamExecutionEnvironment depends.", + "doc_type":"usermanual", + "kw":"How Do I Configure Checkpoints for Flink Jar Jobs and Save the Checkpoints to OBS?,Flink Jobs,User G", + "title":"How Do I Configure Checkpoints for Flink Jar Jobs and Save the Checkpoints to OBS?", + "githuburl":"" + }, + { + "uri":"dli_03_0044.html", + "product_code":"dli", + "code":"106", + "des":"Configuration files can be uploaded for user-defined jobs (JAR).Upload the configuration file to DLI through Package Management.In the Other Dependencies area of the Flin", + "doc_type":"usermanual", + "kw":"Does a Flink JAR Job Support Configuration File Upload? How Do I Upload a Configuration File?,Flink ", + "title":"Does a Flink JAR Job Support Configuration File Upload? How Do I Upload a Configuration File?", + "githuburl":"" + }, + { + "uri":"dli_03_0119.html", + "product_code":"dli", + "code":"107", + "des":"The dependency of your Flink job conflicts with a built-in dependency of the DLI Flink platform. As a result, the job submission fails.Delete your JAR package that is the", + "doc_type":"usermanual", + "kw":"What Should I Do If Job Submission Fails Due to Flink JAR Package Conflict?,Flink Jobs,User Guide", + "title":"What Should I Do If Job Submission Fails Due to Flink JAR Package Conflict?", + "githuburl":"" + }, + { + "uri":"dli_03_0161.html", + "product_code":"dli", + "code":"108", + "des":"When a Flink Jar job is submitted to access GaussDB(DWS), an error message is displayed indicating that the job fails to be started. The job log contains the following er", + "doc_type":"usermanual", + "kw":"What Should I Do If GaussDB(DWS) Fails to Be Started When a Flink Jar Job Accesses GaussDB(DWS), and", + "title":"What Should I Do If GaussDB(DWS) Fails to Be Started When a Flink Jar Job Accesses GaussDB(DWS), and a Message Indicating Too Many Client Connections is Displayed?", + "githuburl":"" + }, + { + "uri":"dli_03_0165.html", + "product_code":"dli", + "code":"109", + "des":"An exception occurred when a Flink Jar job is running. The following error information is displayed in the job log:org.apache.flink.shaded.curator.org.apache.curator.Conn", + "doc_type":"usermanual", + "kw":"What Should I Do If Error Message \"Authentication failed\" Is Displayed When a Flink Jar Job Is Runni", + "title":"What Should I Do If Error Message \"Authentication failed\" Is Displayed When a Flink Jar Job Is Running?", + "githuburl":"" + }, + { + "uri":"dli_03_0233.html", + "product_code":"dli", + "code":"110", + "des":"The storage path of the Flink Jar job checkpoints was set to an OBS bucket. The job failed to be submitted, and an error message indicating an invalid OBS bucket name was", + "doc_type":"usermanual", + "kw":"What Should I Do If Invalid OBS Bucket Name Is Reported After a Flink Job Submission Failed?,Flink J", + "title":"What Should I Do If Invalid OBS Bucket Name Is Reported After a Flink Job Submission Failed?", + "githuburl":"" + }, + { + "uri":"dli_03_0234.html", + "product_code":"dli", + "code":"111", + "des":"Flink Job submission failed. The exception information is as follows:Flink JAR packages conflicted. The submitted Flink JAR package conflicted with the HDFS JAR package o", + "doc_type":"usermanual", + "kw":"Flink Job Submission Fails Due to Hadoop JAR Package Conflicts,Flink Jobs,User Guide", + "title":"Flink Job Submission Fails Due to Hadoop JAR Package Conflicts", + "githuburl":"" + }, + { + "uri":"dli_03_0266.html", + "product_code":"dli", + "code":"112", + "des":"You can use Flink Jar to connect to Kafka with SASL SSL authentication enabled.", + "doc_type":"usermanual", + "kw":"How Do I Connect a Flink jar Job to SASL_SSL?,Flink Jobs,User Guide", + "title":"How Do I Connect a Flink jar Job to SASL_SSL?", + "githuburl":"" + }, + { + "uri":"dli_03_0106.html", + "product_code":"dli", + "code":"113", + "des":"Data Stacking in a Consumer GroupThe accumulated data of a consumer group can be calculated by the following formula: Total amount of data to be consumed by the consumer ", + "doc_type":"usermanual", + "kw":"How Do I Optimize Performance of a Flink Job?,Flink Jobs,User Guide", + "title":"How Do I Optimize Performance of a Flink Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0048.html", + "product_code":"dli", + "code":"114", + "des":"Add the following SQL statements to the Flink job:", + "doc_type":"usermanual", + "kw":"How Do I Write Data to Different Elasticsearch Clusters in a Flink Job?,Flink Jobs,User Guide", + "title":"How Do I Write Data to Different Elasticsearch Clusters in a Flink Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0096.html", + "product_code":"dli", + "code":"115", + "des":"The DLI Flink checkpoint/savepoint mechanism is complete and reliable. You can use this mechanism to prevent data loss when a job is manually restarted or restarted due t", + "doc_type":"usermanual", + "kw":"How Do I Prevent Data Loss After Flink Job Restart?,Flink Jobs,User Guide", + "title":"How Do I Prevent Data Loss After Flink Job Restart?", + "githuburl":"" + }, + { + "uri":"dli_03_0103.html", + "product_code":"dli", + "code":"116", + "des":"On the Flink job management page, hover the cursor on the status of the job that fails to be submitted to view the brief information about the failure.The possible causes", + "doc_type":"usermanual", + "kw":"How Do I Locate a Flink Job Submission Error?,Flink Jobs,User Guide", + "title":"How Do I Locate a Flink Job Submission Error?", + "githuburl":"" + }, + { + "uri":"dli_03_0105.html", + "product_code":"dli", + "code":"117", + "des":"On the Flink job management, click Edit in the Operation column of the target job. On the displayed page, check whether Save Job Log in the Running Parameters tab is enab", + "doc_type":"usermanual", + "kw":"How Do I Locate a Flink Job Running Error?,Flink Jobs,User Guide", + "title":"How Do I Locate a Flink Job Running Error?", + "githuburl":"" + }, + { + "uri":"dli_03_0136.html", + "product_code":"dli", + "code":"118", + "des":"Check the following operations:Adjusting or adding optimization parameters or the number of concurrent threads of a job, or modifying Flink SQL statements or a Flink Jar ", + "doc_type":"usermanual", + "kw":"How Do I Know Whether a Flink Job Can Be Restored from a Checkpoint After Being Restarted?,Flink Job", + "title":"How Do I Know Whether a Flink Job Can Be Restored from a Checkpoint After Being Restarted?", + "githuburl":"" + }, + { + "uri":"dli_03_0040.html", + "product_code":"dli", + "code":"119", + "des":"To rectify this fault, perform the following steps:Log in to the DIS management console. In the navigation pane, choose Stream Management. View the Flink job SQL statemen", + "doc_type":"usermanual", + "kw":"What Should I Do If the DIS Stream Does Not Exist During the Semantic Check?,Flink Jobs,User Guide", + "title":"What Should I Do If the DIS Stream Does Not Exist During the Semantic Check?", + "githuburl":"" + }, + { + "uri":"dli_03_0045.html", + "product_code":"dli", + "code":"120", + "des":"If the OBS bucket selected for a job is not authorized, perform the following steps:Select Enable Checkpointing or Save Job Log.Specify OBS Bucket.Select Authorize OBS.", + "doc_type":"usermanual", + "kw":"What Should I Do If the OBS Bucket Selected for a Job Is Not Authorized?,Flink Jobs,User Guide", + "title":"What Should I Do If the OBS Bucket Selected for a Job Is Not Authorized?", + "githuburl":"" + }, + { + "uri":"dli_03_0064.html", + "product_code":"dli", + "code":"121", + "des":"Mode for storing generated job logs when a DLI Flink job fails to be submitted or executed. The options are as follows:If the submission fails, a submission log is genera", + "doc_type":"usermanual", + "kw":"What Should I Do If Logs Are Not Written to the OBS Bucket After a DLI Flink Job Fails to Be Submitt", + "title":"What Should I Do If Logs Are Not Written to the OBS Bucket After a DLI Flink Job Fails to Be Submitted for Running?", + "githuburl":"" + }, + { + "uri":"dli_03_0121.html", + "product_code":"dli", + "code":"122", + "des":"SymptomYou used Flink 1.10 to run a Flink Opensource SQL job. The job failed after the following error was reported when Flink Sink wrote data to Kafka.Caused by: org.apa", + "doc_type":"usermanual", + "kw":"How Do I Configure Connection Retries for Kafka Sink If it is Disconnected?,Flink Jobs,User Guide", + "title":"How Do I Configure Connection Retries for Kafka Sink If it is Disconnected?", + "githuburl":"" + }, + { + "uri":"dli_03_0235.html", + "product_code":"dli", + "code":"123", + "des":"The Flink/Spark UI was displayed with incomplete information.When a pay-per-use queue is used to run a job, the system releases the cluster and takes about 10 minutes to ", + "doc_type":"usermanual", + "kw":"What Should I Do If the Flink/Spark UI Page Cannot Be Displayed Properly?,Flink Jobs,User Guide", + "title":"What Should I Do If the Flink/Spark UI Page Cannot Be Displayed Properly?", + "githuburl":"" + }, + { + "uri":"dli_03_0236.html", + "product_code":"dli", + "code":"124", + "des":"JobManager and TaskManager heartbeats timed out. As a result, the Flink job is abnormal.Check whether the network is intermittently disconnected and whether the cluster l", + "doc_type":"usermanual", + "kw":"Wha should I Do If JobManager and TaskManager Heartbeats Timed Out and the Flink Job Is Abnormal?,Fl", + "title":"Wha should I Do If JobManager and TaskManager Heartbeats Timed Out and the Flink Job Is Abnormal?", + "githuburl":"" + }, + { + "uri":"dli_03_0265.html", + "product_code":"dli", + "code":"125", + "des":"Test address connectivity.If the network is unreachable, rectify the network connection first. Ensure that the network connection between the DLI queue and the external d", + "doc_type":"usermanual", + "kw":"What Can I Do If \"Timeout expired while fetching topic metadata\" Is Repeatedly Reported in Flink Job", + "title":"What Can I Do If \"Timeout expired while fetching topic metadata\" Is Repeatedly Reported in Flink JobManager Logs?", + "githuburl":"" + }, + { + "uri":"dli_03_0020.html", + "product_code":"dli", + "code":"126", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Problems Related to SQL Jobs", + "title":"Problems Related to SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_03_0200.html", + "product_code":"dli", + "code":"127", + "des":"A temporary table is used to store intermediate results. When a transaction or session ends, the data in the temporary table can be automatically deleted. For example, in", + "doc_type":"usermanual", + "kw":"SQL Jobs,Problems Related to SQL Jobs,User Guide", + "title":"SQL Jobs", + "githuburl":"" + }, + { + "uri":"dli_03_0086.html", + "product_code":"dli", + "code":"128", + "des":"If a large number of small files are generated during SQL execution, job execution and table query will take a long time. In this case, you should merge small files.Set t", + "doc_type":"usermanual", + "kw":"How Do I Merge Small Files?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Merge Small Files?", + "githuburl":"" + }, + { + "uri":"dli_03_0092.html", + "product_code":"dli", + "code":"129", + "des":"When creating an OBS table, you must specify a table path in the database. The path format is as follows: obs://xxx/database name/table name.If the specified path is akdc", + "doc_type":"usermanual", + "kw":"How Do I Specify an OBS Path When Creating an OBS Table?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Specify an OBS Path When Creating an OBS Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0108.html", + "product_code":"dli", + "code":"130", + "des":"DLI allows you to associate JSON data in an OBS bucket to create tables in asynchronous mode.The statement for creating the table is as follows:", + "doc_type":"usermanual", + "kw":"How Do I Create a Table Using JSON Data in an OBS Bucket?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Create a Table Using JSON Data in an OBS Bucket?", + "githuburl":"" + }, + { + "uri":"dli_03_0087.html", + "product_code":"dli", + "code":"131", + "des":"You can use the where condition statement in the select statement to filter data. For example:", + "doc_type":"usermanual", + "kw":"How Do I Set Local Variables in SQL Statements?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Set Local Variables in SQL Statements?", + "githuburl":"" + }, + { + "uri":"dli_03_0069.html", + "product_code":"dli", + "code":"132", + "des":"The correct method for using the count function to perform aggregation is as follows:OrIf an incorrect method is used, an error will be reported.", + "doc_type":"usermanual", + "kw":"How Can I Use the count Function to Perform Aggregation?,Problems Related to SQL Jobs,User Guide", + "title":"How Can I Use the count Function to Perform Aggregation?", + "githuburl":"" + }, + { + "uri":"dli_03_0072.html", + "product_code":"dli", + "code":"133", + "des":"You can use the cross-region replication function of OBS. The procedure is as follows:Export the DLI table data in region 1 to the user-defined OBS bucket. For details, s", + "doc_type":"usermanual", + "kw":"How Do I Synchronize DLI Table Data from One Region to Another?,Problems Related to SQL Jobs,User Gu", + "title":"How Do I Synchronize DLI Table Data from One Region to Another?", + "githuburl":"" + }, + { + "uri":"dli_03_0191.html", + "product_code":"dli", + "code":"134", + "des":"Currently, DLI does not allow you to insert table data into specific fields. To insert table data, you must insert data of all table fields at a time.", + "doc_type":"usermanual", + "kw":"How Do I Insert Table Data into Specific Fields of a Table Using a SQL Job?,Problems Related to SQL ", + "title":"How Do I Insert Table Data into Specific Fields of a Table Using a SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0014.html", + "product_code":"dli", + "code":"135", + "des":"Create an OBS directory with a unique name. Alternatively, you can manually delete the existing OBS directory and submit the job again. However, exercise caution when del", + "doc_type":"usermanual", + "kw":"What Should I Do If Error \"path obs://xxx already exists\" Is Reported When Data Is Exported to OBS?,", + "title":"What Should I Do If Error \"path obs://xxx already exists\" Is Reported When Data Is Exported to OBS?", + "githuburl":"" + }, + { + "uri":"dli_03_0066.html", + "product_code":"dli", + "code":"136", + "des":"This message indicates that the two tables to be joined contain the same column, but the owner of the column is not specified when the command is executed.For example, ta", + "doc_type":"usermanual", + "kw":"What Should I Do If \"SQL_ANALYSIS_ERROR: Reference 't.id' is ambiguous, could be: t.id, t.id.;\" Is D", + "title":"What Should I Do If \"SQL_ANALYSIS_ERROR: Reference 't.id' is ambiguous, could be: t.id, t.id.;\" Is Displayed When Two Tables Are Joined", + "githuburl":"" + }, + { + "uri":"dli_03_0071.html", + "product_code":"dli", + "code":"137", + "des":"Check whether the account is in arrears. If yes, recharge the account.If the error persists, log out and log in again.", + "doc_type":"usermanual", + "kw":"What Should I Do If the System Notify Me that I Have No Permission to Execute the Query Statement Be", + "title":"What Should I Do If the System Notify Me that I Have No Permission to Execute the Query Statement Because the Current Account Was Restricted?", + "githuburl":"" + }, + { + "uri":"dli_03_0145.html", + "product_code":"dli", + "code":"138", + "des":"Cause AnalysisWhen you query the partitioned table XX.YYY, the partition column is not specified in the search criteria.A partitioned table can be queried only when the q", + "doc_type":"usermanual", + "kw":"What Should I Do If Error Message \"There should be at least one partition pruning predicate on parti", + "title":"What Should I Do If Error Message \"There should be at least one partition pruning predicate on partitioned table XX.YYY\" Is Displayed When a Query Statement Is Executed?", + "githuburl":"" + }, + { + "uri":"dli_03_0169.html", + "product_code":"dli", + "code":"139", + "des":"The following error message is displayed when the LOAD DATA command is executed by a Spark SQL job to import data to a DLI table:In some cases ,the following error messag", + "doc_type":"usermanual", + "kw":"What Should I Do If Error Message IllegalArgumentException: Buffer size too small. size is Displayed", + "title":"What Should I Do If Error Message IllegalArgumentException: Buffer size too small. size is Displayed When Data Is Loaded to an OBS Foreign Table", + "githuburl":"" + }, + { + "uri":"dli_03_0189.html", + "product_code":"dli", + "code":"140", + "des":"An error is reported during SQL job execution:Please contact DLI service. DLI.0002: FileNotFoundException: getFileStatus on obs://xxx: status [404]Check whether there is ", + "doc_type":"usermanual", + "kw":"What Should I Do If \"DLI.0002 FileNotFoundException\" Is Reported During SQL Job Running,Problems Rel", + "title":"What Should I Do If \"DLI.0002 FileNotFoundException\" Is Reported During SQL Job Running", + "githuburl":"" + }, + { + "uri":"dli_03_0046.html", + "product_code":"dli", + "code":"141", + "des":"Currently, DLI supports the Hive syntax for creating tables of the TEXTFILE, SEQUENCEFILE, RCFILE, ORC, AVRO, and PARQUET file types. If the file format specified for cre", + "doc_type":"usermanual", + "kw":"What Should I Do If A Schema Parsing Error Is Reported When I Create a Hive Table Using CTAS,Problem", + "title":"What Should I Do If A Schema Parsing Error Is Reported When I Create a Hive Table Using CTAS", + "githuburl":"" + }, + { + "uri":"dli_03_0173.html", + "product_code":"dli", + "code":"142", + "des":"When you run a DLI SQL script on DataArts Studio, the log shows that the statements fail to be executed. The error information is as follows:DLI.0999: RuntimeException: o", + "doc_type":"usermanual", + "kw":"What Should I Do If \"org.apache.hadoop.fs.obs.OBSIOException\" Is Reported If DLI SQL Scripts Are Exe", + "title":"What Should I Do If \"org.apache.hadoop.fs.obs.OBSIOException\" Is Reported If DLI SQL Scripts Are Executed on DataArts Studio?", + "githuburl":"" + }, + { + "uri":"dli_03_0172.html", + "product_code":"dli", + "code":"143", + "des":"After the migration job is submitted, the following error information is displayed in the log:org.apache.sqoop.common.SqoopException:UQUERY_CONNECTOR_0001:Invoke DLI serv", + "doc_type":"usermanual", + "kw":"What Should I Do If \"UQUERY_CONNECTOR_0001:Invoke DLI service api failed\" Is Reported in the Job Log", + "title":"What Should I Do If \"UQUERY_CONNECTOR_0001:Invoke DLI service api failed\" Is Reported in the Job Log When I Use CDM to Migrate Data to DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0207.html", + "product_code":"dli", + "code":"144", + "des":"Error message \"File not Found\" is displayed when a SQL job is accessed.Generally, the file cannot be found due to a read/write conflict. Check whether a job is overwritin", + "doc_type":"usermanual", + "kw":"What Should I Do If \"File not Found\" Is Displayed When I Access a SQL Job?,Problems Related to SQL J", + "title":"What Should I Do If \"File not Found\" Is Displayed When I Access a SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0208.html", + "product_code":"dli", + "code":"145", + "des":"Error message \"DLI.0003: AccessControlException XXX\" is reported when a SQL job is accessed.View the OBS bucket in the AccessControlException and check whether you are us", + "doc_type":"usermanual", + "kw":"What Should I Do If \"DLI.0003: AccessControlException XXX\" Is Reported When I Access a SQL Job?,Prob", + "title":"What Should I Do If \"DLI.0003: AccessControlException XXX\" Is Reported When I Access a SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0209.html", + "product_code":"dli", + "code":"146", + "des":"Error message \"DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists on {{bucket name}}: status [403]\" is reported when a SQL job is Accessed.Th", + "doc_type":"usermanual", + "kw":"What Should I Do If \"DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists", + "title":"What Should I Do If \"DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists on {{bucket name}}: status [403]\" Is Reported When I Access a SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0210.html", + "product_code":"dli", + "code":"147", + "des":"Error message \"The current account does not have permission to perform this operation,the current account was restricted.\" is reported during SQL statement execution.Chec", + "doc_type":"usermanual", + "kw":"What Should I Do If \"The current account does not have permission to perform this operation,the curr", + "title":"What Should I Do If \"The current account does not have permission to perform this operation,the current account was restricted. Restricted for no budget\" Is Reported During SQL Statement Execution? Restricted for no budget.", + "githuburl":"" + }, + { + "uri":"dli_03_0196.html", + "product_code":"dli", + "code":"148", + "des":"If the job runs slowly, perform the following steps to find the causes and rectify the fault:Check whether the problem is caused by FullGC.Log in to the DLI console. In t", + "doc_type":"usermanual", + "kw":"How Do I Troubleshoot Slow SQL Jobs?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Troubleshoot Slow SQL Jobs?", + "githuburl":"" + }, + { + "uri":"dli_03_0091.html", + "product_code":"dli", + "code":"149", + "des":"You can view SQL job logs for routine O&M.Obtain the ID of the DLI job executed on the DataArts Studio console.Job IDOn the DLI console, choose Job Management > SQL Jobs.", + "doc_type":"usermanual", + "kw":"How Do I View DLI SQL Logs?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I View DLI SQL Logs?", + "githuburl":"" + }, + { + "uri":"dli_03_0116.html", + "product_code":"dli", + "code":"150", + "des":"You can view the job execution records when a job is running.Log in to the DLI management console.In the navigation pane on the left, choose Job Management > SQL Jobs.Ent", + "doc_type":"usermanual", + "kw":"How Do I View SQL Execution Records?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I View SQL Execution Records?", + "githuburl":"" + }, + { + "uri":"dli_03_0093.html", + "product_code":"dli", + "code":"151", + "des":"If the execution of an SQL statement takes a long time, you need to access the Spark UI to check the execution status.If data skew occurs, the running time of a stage exc", + "doc_type":"usermanual", + "kw":"How Do I Eliminate Data Skew by Configuring AE Parameters?,Problems Related to SQL Jobs,User Guide", + "title":"How Do I Eliminate Data Skew by Configuring AE Parameters?", + "githuburl":"" + }, + { + "uri":"dli_03_0184.html", + "product_code":"dli", + "code":"152", + "des":"A DLI table exists but cannot be queried on the DLI console.If a table exists but cannot be queried, there is a high probability that the current user does not have the p", + "doc_type":"usermanual", + "kw":"What Can I Do If a Table Cannot Be Queried on the DLI Console?,Problems Related to SQL Jobs,User Gui", + "title":"What Can I Do If a Table Cannot Be Queried on the DLI Console?", + "githuburl":"" + }, + { + "uri":"dli_03_0013.html", + "product_code":"dli", + "code":"153", + "des":"A high compression ratio of OBS tables in the Parquet or ORC format (for example, a compression ratio of 5 or higher compared with text compression) will lead to large da", + "doc_type":"usermanual", + "kw":"The Compression Ratio of OBS Tables Is Too High,Problems Related to SQL Jobs,User Guide", + "title":"The Compression Ratio of OBS Tables Is Too High", + "githuburl":"" + }, + { + "uri":"dli_03_0009.html", + "product_code":"dli", + "code":"154", + "des":"DLI supports only UTF-8-encoded texts. Ensure that data is encoded using UTF-8 during table creation and import.", + "doc_type":"usermanual", + "kw":"How Can I Avoid Garbled Characters Caused by Inconsistent Character Codes?,Problems Related to SQL J", + "title":"How Can I Avoid Garbled Characters Caused by Inconsistent Character Codes?", + "githuburl":"" + }, + { + "uri":"dli_03_0175.html", + "product_code":"dli", + "code":"155", + "des":"User A created the testTable table in a database through a SQL job and granted user B the permission to insert and delete table data. User A deleted the testTable table a", + "doc_type":"usermanual", + "kw":"Do I Need to Grant Table Permissions to a User and Project After I Delete a Table and Create One wit", + "title":"Do I Need to Grant Table Permissions to a User and Project After I Delete a Table and Create One with the Same Name?", + "githuburl":"" + }, + { + "uri":"dli_03_0177.html", + "product_code":"dli", + "code":"156", + "des":"A CSV file is imported to a DLI partitioned table, but the imported file data does not contain the data in the partitioning column. The partitioning column needs to be sp", + "doc_type":"usermanual", + "kw":"What Should I Do If Table Data Fails to Be Queried After Data Is Imported to a DLI Partitioned Table", + "title":"What Should I Do If Table Data Fails to Be Queried After Data Is Imported to a DLI Partitioned Table Because the File to Be Imported Does Not Contain Data in the Partitioning Column?", + "githuburl":"" + }, + { + "uri":"dli_03_0181.html", + "product_code":"dli", + "code":"157", + "des":"When an OBS foreign table is created, a field in the specified OBS file contains a carriage return line feed (CRLF) character. As a result, the data is incorrect.The stat", + "doc_type":"usermanual", + "kw":"How Do I Fix the Data Error Caused by CRLF Characters in a Field of the OBS File Used to Create an E", + "title":"How Do I Fix the Data Error Caused by CRLF Characters in a Field of the OBS File Used to Create an External OBS Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0182.html", + "product_code":"dli", + "code":"158", + "des":"A SQL job contains join operations. After the job is submitted, it is stuck in the Running state and no result is returned.When a Spark SQL job has join operations on sma", + "doc_type":"usermanual", + "kw":"What Should I Do If a SQL Job That Has Join Operations Stays in the Running State?,Problems Related ", + "title":"What Should I Do If a SQL Job That Has Join Operations Stays in the Running State?", + "githuburl":"" + }, + { + "uri":"dli_03_0187.html", + "product_code":"dli", + "code":"159", + "des":"The on clause was not added to the SQL statement for joining tables. As a result, the Cartesian product query occurs due to multi-table association, and the queue resourc", + "doc_type":"usermanual", + "kw":"The on Clause Is Not Added When Tables Are Joined. Cartesian Product Query Causes High Resource Usag", + "title":"The on Clause Is Not Added When Tables Are Joined. Cartesian Product Query Causes High Resource Usage of the Queue, and the Job Fails to Be Executed", + "githuburl":"" + }, + { + "uri":"dli_03_0190.html", + "product_code":"dli", + "code":"160", + "des":"Partition data is manually uploaded to a partition of an OBS table. However, the data cannot be queried using DLI SQL editor.After manually adding partition data, you nee", + "doc_type":"usermanual", + "kw":"What Should I Do If Manually Added Data in a Partition of an OBS Table Cannot Be Queried?,Problems R", + "title":"What Should I Do If Manually Added Data in a Partition of an OBS Table Cannot Be Queried?", + "githuburl":"" + }, + { + "uri":"dli_03_0212.html", + "product_code":"dli", + "code":"161", + "des":"To dynamically overwrite the specified partitioned data in the DataSource table, set dli.sql.dynamicPartitionOverwrite.enabled to true and then run the insert overwrite s", + "doc_type":"usermanual", + "kw":"Why Is All Data Overwritten When insert overwrite Is Used to Overwrite Partitioned Table?,Problems R", + "title":"Why Is All Data Overwritten When insert overwrite Is Used to Overwrite Partitioned Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0213.html", + "product_code":"dli", + "code":"162", + "des":"The possible causes and solutions are as follows:After you purchase a DLI queue and submit a SQL job for the first time, wait for 5 to 10 minutes. After the cluster is st", + "doc_type":"usermanual", + "kw":"Why Is a SQL Job Stuck in the Submitting State?,Problems Related to SQL Jobs,User Guide", + "title":"Why Is a SQL Job Stuck in the Submitting State?", + "githuburl":"" + }, + { + "uri":"dli_03_0214.html", + "product_code":"dli", + "code":"163", + "des":"Spark does not have the datetime type and uses the TIMESTAMP type instead.You can use a function to convert data types.The following is an example.select cast(create_date", + "doc_type":"usermanual", + "kw":"Why Is the create_date Field in the RDS Table Is a Timestamp in the DLI query result?,Problems Relat", + "title":"Why Is the create_date Field in the RDS Table Is a Timestamp in the DLI query result?", + "githuburl":"" + }, + { + "uri":"dli_03_0215.html", + "product_code":"dli", + "code":"164", + "des":"If the table name is changed immediately after SQL statements are executed, the data size of the table may be incorrect.If you need to change the table name, change it 5 ", + "doc_type":"usermanual", + "kw":"What Can I Do If datasize Cannot Be Changed After the Table Name Is Changed in a Finished SQL Job?,P", + "title":"What Can I Do If datasize Cannot Be Changed After the Table Name Is Changed in a Finished SQL Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0231.html", + "product_code":"dli", + "code":"165", + "des":"When DLI is used to insert data into an OBS temporary table, only part of data is imported.Possible causes are as follows:The amount of data read during job execution is ", + "doc_type":"usermanual", + "kw":"Why Is the Data Volume Changes When Data Is Imported from DLI to OBS?,Problems Related to SQL Jobs,U", + "title":"Why Is the Data Volume Changes When Data Is Imported from DLI to OBS?", + "githuburl":"" + }, + { + "uri":"dli_03_0021.html", + "product_code":"dli", + "code":"166", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Problems Related to Spark Jobs", + "title":"Problems Related to Spark Jobs", + "githuburl":"" + }, + { + "uri":"dli_03_0201.html", + "product_code":"dli", + "code":"167", + "des":"DLI Spark does not support job scheduling. You can use other services, such as DataArts Studio, or use APIs or SDKs to customize job schedule.The Spark SQL syntax does no", + "doc_type":"usermanual", + "kw":"Spark Jobs,Problems Related to Spark Jobs,User Guide", + "title":"Spark Jobs", + "githuburl":"" + }, + { + "uri":"dli_03_0107.html", + "product_code":"dli", + "code":"168", + "des":"To use Spark to write data into a DLI table, configure the following parameters:fs.obs.access.keyfs.obs.secret.keyfs.obs.implfs.obs.endpointThe following is an example:", + "doc_type":"usermanual", + "kw":"How Do I Use Spark to Write Data into a DLI Table?,Problems Related to Spark Jobs,User Guide", + "title":"How Do I Use Spark to Write Data into a DLI Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0017.html", + "product_code":"dli", + "code":"169", + "des":"If the AK and SK are obtained, set the parameters as follows:Create SparkContext using codeval sc: SparkContext = new SparkContext()\nsc.hadoopConfiguration.set(\"fs.obs.ac", + "doc_type":"usermanual", + "kw":"How Do I Set the AK/SK for a Queue to Operate an OBS Table?,Problems Related to Spark Jobs,User Guid", + "title":"How Do I Set the AK/SK for a Queue to Operate an OBS Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0102.html", + "product_code":"dli", + "code":"170", + "des":"Log in to the DLI console. In the navigation pane, choose Job Management > Spark Jobs. In the job list, locate the target job and click next to Job ID to view the parame", + "doc_type":"usermanual", + "kw":"How Do I View the Resource Usage of DLI Spark Jobs?,Problems Related to Spark Jobs,User Guide", + "title":"How Do I View the Resource Usage of DLI Spark Jobs?", + "githuburl":"" + }, + { + "uri":"dli_03_0076.html", + "product_code":"dli", + "code":"171", + "des":"If the pymysql module is missing, check whether the corresponding EGG package exists. If the package does not exist, upload the pyFile package on the Package Management p", + "doc_type":"usermanual", + "kw":"How Do I Use Python Scripts to Access the MySQL Database If the pymysql Module Is Missing from the S", + "title":"How Do I Use Python Scripts to Access the MySQL Database If the pymysql Module Is Missing from the Spark Job Results Stored in MySQL?", + "githuburl":"" + }, + { + "uri":"dli_03_0082.html", + "product_code":"dli", + "code":"172", + "des":"DLI natively supports PySpark.For most cases, Python is preferred for data analysis, and PySpark is the best choice for big data analysis. Generally, JVM programs are pac", + "doc_type":"usermanual", + "kw":"How Do I Run a Complex PySpark Program in DLI?,Problems Related to Spark Jobs,User Guide", + "title":"How Do I Run a Complex PySpark Program in DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0127.html", + "product_code":"dli", + "code":"173", + "des":"You can use DLI Spark jobs to access data in the MySQL database using either of the following methods:Solution 1: Purchase a pay-per-use queue, create an enhanced datasou", + "doc_type":"usermanual", + "kw":"How Does a Spark Job Access a MySQL Database?,Problems Related to Spark Jobs,User Guide", + "title":"How Does a Spark Job Access a MySQL Database?", + "githuburl":"" + }, + { + "uri":"dli_03_0068.html", + "product_code":"dli", + "code":"174", + "des":"When shuffle statements, such as GROUP BY and JOIN, are executed in Spark jobs, data skew occurs, which slows down the job execution.To solve this problem, you can config", + "doc_type":"usermanual", + "kw":"How Do I Use JDBC to Set the spark.sql.shuffle.partitions Parameter to Improve the Task Concurrency?", + "title":"How Do I Use JDBC to Set the spark.sql.shuffle.partitions Parameter to Improve the Task Concurrency?", + "githuburl":"" + }, + { + "uri":"dli_03_0118.html", + "product_code":"dli", + "code":"175", + "des":"You can use SparkFiles to read the file submitted using –-file form a local path: SparkFiles.get(\"Name of the uploaded file\").The file path in the Driver is different fro", + "doc_type":"usermanual", + "kw":"How Do I Read Uploaded Files for a Spark Jar Job?,Problems Related to Spark Jobs,User Guide", + "title":"How Do I Read Uploaded Files for a Spark Jar Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0023.html", + "product_code":"dli", + "code":"176", + "des":"The Spark 2.3 has changed the behavior of the internal interface Logging. If the user code directly inherits the Logging and the earlier version Spark is used during comp", + "doc_type":"usermanual", + "kw":"What Can I Do When Receiving java.lang.AbstractMethodError in the Spark Job?,Problems Related to Spa", + "title":"What Can I Do When Receiving java.lang.AbstractMethodError in the Spark Job?", + "githuburl":"" + }, + { + "uri":"dli_03_0156.html", + "product_code":"dli", + "code":"177", + "des":"The following error is reported when a Spark job accesses OBS data:Set the AK/SK to enable Spark jobs to access OBS data. For details, see How Do I Set the AK/SK for a Qu", + "doc_type":"usermanual", + "kw":"What Should I Do If Errors \"ResponseCode: 403\" and \"ResponseStatus: Forbidden\" Are Reported When a S", + "title":"What Should I Do If Errors \"ResponseCode: 403\" and \"ResponseStatus: Forbidden\" Are Reported When a Spark Job Accesses OBS Data", + "githuburl":"" + }, + { + "uri":"dli_03_0164.html", + "product_code":"dli", + "code":"178", + "des":"Check whether the OBS bucket is used to store DLI logs on the Global Configuration > Job Configurations page. The job log bucket cannot be used for other purpose.", + "doc_type":"usermanual", + "kw":"Why is Error \"verifyBucketExists on XXXX: status [403]\" Is Reported When I Run a Spark Job with a Us", + "title":"Why is Error \"verifyBucketExists on XXXX: status [403]\" Is Reported When I Run a Spark Job with a User that has the Permission to Access the OBS Bucket?", + "githuburl":"" + }, + { + "uri":"dli_03_0157.html", + "product_code":"dli", + "code":"179", + "des":"When a Spark job accesses a large amount of data, for example, accessing data in a GaussDB(DWS) database, you are advised to set the number of concurrent tasks and enable", + "doc_type":"usermanual", + "kw":"What Should I Do if a Running Timeout Error Is Reported When a Spark Job Runs a Large Amount of Data", + "title":"What Should I Do if a Running Timeout Error Is Reported When a Spark Job Runs a Large Amount of Data?", + "githuburl":"" + }, + { + "uri":"dli_03_0188.html", + "product_code":"dli", + "code":"180", + "des":"Spark jobs cannot access SFTP. Upload the files you want to access to OBS and then you can analyze the data using Spark jobs.", + "doc_type":"usermanual", + "kw":"What Should I Do If a Spark job Used to Access Files in SFTP Fails and the Log Shows that the File D", + "title":"What Should I Do If a Spark job Used to Access Files in SFTP Fails and the Log Shows that the File Directory Is Abnormal?", + "githuburl":"" + }, + { + "uri":"dli_03_0192.html", + "product_code":"dli", + "code":"181", + "des":"When a Spark job is running, an error message is displayed, indicating that the user does not have the database permission. The error information is as follows:org.apache", + "doc_type":"usermanual", + "kw":"What Should I Do If a Job Fails Because the Job User Does Not Have the Database and Table Permission", + "title":"What Should I Do If a Job Fails Because the Job User Does Not Have the Database and Table Permission?", + "githuburl":"" + }, + { + "uri":"dli_03_0077.html", + "product_code":"dli", + "code":"182", + "des":"If the specified Python environment cannot be found after a Python3 package is added, the current compute cluster environment is Python2 by default.You can set spark.yarn", + "doc_type":"usermanual", + "kw":"What Can I Do If the Specified Python Environment Cannot Be Found After a Python Package Is Added?,P", + "title":"What Can I Do If the Specified Python Environment Cannot Be Found After a Python Package Is Added?", + "githuburl":"" + }, + { + "uri":"dli_03_0220.html", + "product_code":"dli", + "code":"183", + "des":"The remaining CUs in the queue may be insufficient. As a result, the job cannot be submitted.To view the remaining CUs of a queue, perform the following steps:Check the C", + "doc_type":"usermanual", + "kw":"Why Is a Spark Jar Job Stuck in the Submitting State?,Problems Related to Spark Jobs,User Guide", + "title":"Why Is a Spark Jar Job Stuck in the Submitting State?", + "githuburl":"" + }, + { + "uri":"dli_03_0001.html", + "product_code":"dli", + "code":"184", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Product Consultation", + "title":"Product Consultation", + "githuburl":"" + }, + { + "uri":"dli_03_0002.html", + "product_code":"dli", + "code":"185", + "des":"Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark, and Flink ecosystems. It frees you from managing any serv", + "doc_type":"usermanual", + "kw":"What Is DLI?,Product Consultation,User Guide", + "title":"What Is DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0025.html", + "product_code":"dli", + "code":"186", + "des":"DLI supports the following data formats:ParquetCSVORCJsonAvro", + "doc_type":"usermanual", + "kw":"Which Data Formats Does DLI Support?,Product Consultation,User Guide", + "title":"Which Data Formats Does DLI Support?", + "githuburl":"" + }, + { + "uri":"dli_03_0115.html", + "product_code":"dli", + "code":"187", + "des":"The Spark component of DLI is a fully managed service. You can only use the DLI Spark through its APIs. .The Spark component of MRS is built on the VM in an MRS cluster. ", + "doc_type":"usermanual", + "kw":"What Are the Differences Between MRS Spark and DLI Spark?,Product Consultation,User Guide", + "title":"What Are the Differences Between MRS Spark and DLI Spark?", + "githuburl":"" + }, + { + "uri":"dli_03_0029.html", + "product_code":"dli", + "code":"188", + "des":"DLI data can be stored in either of the following:OBS: Data used by SQL jobs, Spark jobs, and Flink jobs can be stored in OBS, reducing storage costs.DLI: The column-base", + "doc_type":"usermanual", + "kw":"Where Can DLI Data Be Stored?,Product Consultation,User Guide", + "title":"Where Can DLI Data Be Stored?", + "githuburl":"" + }, + { + "uri":"dli_03_0117.html", + "product_code":"dli", + "code":"189", + "des":"DLI tables store data within the DLI service, and you do not need to know the data storage path.OBS tables store data in your OBS buckets, and you need to manage the sour", + "doc_type":"usermanual", + "kw":"What Are the Differences Between DLI Tables and OBS Tables?,Product Consultation,User Guide", + "title":"What Are the Differences Between DLI Tables and OBS Tables?", + "githuburl":"" + }, + { + "uri":"dli_03_0010.html", + "product_code":"dli", + "code":"190", + "des":"Currently, DLI supports analysis only on the data uploaded to the cloud. In scenarios where regular (for example, on a per day basis) one-off analysis on incremental data", + "doc_type":"usermanual", + "kw":"How Can I Use DLI If Data Is Not Uploaded to OBS?,Product Consultation,User Guide", + "title":"How Can I Use DLI If Data Is Not Uploaded to OBS?", + "githuburl":"" + }, + { + "uri":"dli_03_0129.html", + "product_code":"dli", + "code":"191", + "des":"Data in the OBS bucket shared by IAM users under the same account can be imported. You cannot import data in the OBS bucket shared with other IAM account.", + "doc_type":"usermanual", + "kw":"Can I Import OBS Bucket Data Shared by Other Tenants into DLI?,Product Consultation,User Guide", + "title":"Can I Import OBS Bucket Data Shared by Other Tenants into DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0264.html", + "product_code":"dli", + "code":"192", + "des":"Log in to the management console.Click in the upper left corner and select Region and Project.Click (the My Quotas icon) in the upper right corner.The Service Quota pag", + "doc_type":"usermanual", + "kw":"What Should I do If the System Failed to Create a Database and {\"error_code\":\"DLI.1028\";\"error_msg\":", + "title":"What Should I do If the System Failed to Create a Database and {\"error_code\":\"DLI.1028\";\"error_msg\":\"Already reached the maximum quota of databases:XXX\" Is Displayed Indicating that the Quota Is Insufficient?", + "githuburl":"" + }, + { + "uri":"dli_03_0263.html", + "product_code":"dli", + "code":"193", + "des":"No, a global variable can only be used by the user who created it. Global variables can be used to simplify complex parameters. For example, long and difficult variables ", + "doc_type":"usermanual", + "kw":"Can a Member Account Use Global Variables Created by Other Member Accounts?,Product Consultation,Use", + "title":"Can a Member Account Use Global Variables Created by Other Member Accounts?", + "githuburl":"" + }, + { + "uri":"dli_03_0126.html", + "product_code":"dli", + "code":"194", + "des":"If you are suggested to perform following operations to run a large number of DLI jobs:Group the DLI jobs by type, and run each group on a queue.Alternatively, create IAM", + "doc_type":"usermanual", + "kw":"How Do I Manage Tens of Thousands of Jobs Running on DLI?,Product Consultation,User Guide", + "title":"How Do I Manage Tens of Thousands of Jobs Running on DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0162.html", + "product_code":"dli", + "code":"195", + "des":"The field names of tables that have been created cannot be changed.You can create a table, define new table fields, and migrate data from the old table to the new one.", + "doc_type":"usermanual", + "kw":"How Do I Change the Name of a Field in a Created Table?,Product Consultation,User Guide", + "title":"How Do I Change the Name of a Field in a Created Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0260.html", + "product_code":"dli", + "code":"196", + "des":"No. The spark.acls.enable configuration item is not used in DLI. The Apache Spark command injection vulnerability (CVE-2022-33891) does not exist in DLI.", + "doc_type":"usermanual", + "kw":"Does DLI Have the Apache Spark Command Injection Vulnerability (CVE-2022-33891)?,Product Consultatio", + "title":"Does DLI Have the Apache Spark Command Injection Vulnerability (CVE-2022-33891)?", + "githuburl":"" + }, + { + "uri":"dli_03_0053.html", + "product_code":"dli", + "code":"197", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Quota", + "title":"Quota", + "githuburl":"" + }, + { + "uri":"dli_03_0031.html", + "product_code":"dli", + "code":"198", + "des":"Log in to the management console.Click in the upper left corner and select Region and Project.Click (the My Quotas icon) in the upper right corner.The Service Quota pag", + "doc_type":"usermanual", + "kw":"How Do I View My Quotas?,Quota,User Guide", + "title":"How Do I View My Quotas?", + "githuburl":"" + }, + { + "uri":"dli_03_0032.html", + "product_code":"dli", + "code":"199", + "des":"The system does not support online quota adjustment. To increase a resource quota, dial the hotline or send an email to the customer service. We will process your applica", + "doc_type":"usermanual", + "kw":"How Do I Increase a Quota?,Quota,User Guide", + "title":"How Do I Increase a Quota?", + "githuburl":"" + }, + { + "uri":"dli_03_0054.html", + "product_code":"dli", + "code":"200", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Permission", + "title":"Permission", + "githuburl":"" + }, + { + "uri":"dli_03_0100.html", + "product_code":"dli", + "code":"201", + "des":"DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM", + "doc_type":"usermanual", + "kw":"How Do I Manage Fine-Grained DLI Permissions?,Permission,User Guide", + "title":"How Do I Manage Fine-Grained DLI Permissions?", + "githuburl":"" + }, + { + "uri":"dli_03_0008.html", + "product_code":"dli", + "code":"202", + "des":"You cannot perform permission-related operations on the partition column of a partition table. However, when you grant the permission of any non-partition column in a par", + "doc_type":"usermanual", + "kw":"What Is Column Permission Granting of a DLI Partition Table?,Permission,User Guide", + "title":"What Is Column Permission Granting of a DLI Partition Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0101.html", + "product_code":"dli", + "code":"203", + "des":"You can set actions and resources of different levels for various scenarios.Define an action.The format is Service name:Resource type:Action. You can use wildcard *. The ", + "doc_type":"usermanual", + "kw":"How Do I Create a Custom Policy?,Permission,User Guide", + "title":"How Do I Create a Custom Policy?", + "githuburl":"" + }, + { + "uri":"dli_03_0225.html", + "product_code":"dli", + "code":"204", + "des":"You can isolate queues allocated to different users by setting permissions to ensure data query performance.", + "doc_type":"usermanual", + "kw":"How Do I Manage Queue Permissions?,Permission,User Guide", + "title":"How Do I Manage Queue Permissions?", + "githuburl":"" + }, + { + "uri":"dli_03_0140.html", + "product_code":"dli", + "code":"205", + "des":"When you submit a job, a message is displayed indicating that the job fails to be submitted due to insufficient permission caused by arrears. In this case, you need to ch", + "doc_type":"usermanual", + "kw":"What Should I Do When My Account Does Not Have Permission Due To Arrears?,Permission,User Guide", + "title":"What Should I Do When My Account Does Not Have Permission Due To Arrears?", + "githuburl":"" + }, + { + "uri":"dli_03_0195.html", + "product_code":"dli", + "code":"206", + "des":"When the user update an existing program package, the following error information is displayed:\"error_code\"*CLI.0003\",\"error_msg\":\"Permission denied for resource 'resourc", + "doc_type":"usermanual", + "kw":"What Should I Do If the Permission Is Required to Update a Program Package?,Permission,User Guide", + "title":"What Should I Do If the Permission Is Required to Update a Program Package?", + "githuburl":"" + }, + { + "uri":"dli_03_0227.html", + "product_code":"dli", + "code":"207", + "des":"When the SQL query statement is executed, the system displays a message indicating that the user does not have the permission to query resources.Error information: DLI.00", + "doc_type":"usermanual", + "kw":"What Should I Do If \"DLI.0003: Permission denied for resource...\" Is Reported When I Run a SQL State", + "title":"What Should I Do If \"DLI.0003: Permission denied for resource...\" Is Reported When I Run a SQL Statement?", + "githuburl":"" + }, + { + "uri":"dli_03_0228.html", + "product_code":"dli", + "code":"208", + "des":"The table permission has been granted and verified. However, after a period of time, an error is reported indicating that the table query fails.There are two possible rea", + "doc_type":"usermanual", + "kw":"What Should I Do If the Table Permission Has Been Granted But the Table Still Cannot Be Queried?,Per", + "title":"What Should I Do If the Table Permission Has Been Granted But the Table Still Cannot Be Queried?", + "githuburl":"" + }, + { + "uri":"dli_03_0049.html", + "product_code":"dli", + "code":"209", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Queue", + "title":"Queue", + "githuburl":"" + }, + { + "uri":"dli_03_0109.html", + "product_code":"dli", + "code":"210", + "des":"Currently, you are not allowed to modify the description of a created queue. You can add the description when purchasing the queue.", + "doc_type":"usermanual", + "kw":"Does the Description of a DLI Queue Can Be Modified?,Queue,User Guide", + "title":"Does the Description of a DLI Queue Can Be Modified?", + "githuburl":"" + }, + { + "uri":"dli_03_0166.html", + "product_code":"dli", + "code":"211", + "des":"Deleting a queue does not cause table data loss in your database.", + "doc_type":"usermanual", + "kw":"Will Table Data in My Database Be Lost If I Delete a Queue?,Queue,User Guide", + "title":"Will Table Data in My Database Be Lost If I Delete a Queue?", + "githuburl":"" + }, + { + "uri":"dli_03_0170.html", + "product_code":"dli", + "code":"212", + "des":"You need to develop a mechanism to retry failed jobs. When a faulty queue is recovered, your application tries to submit the failed jobs to the queue again.", + "doc_type":"usermanual", + "kw":"How Does DLI Ensure the Reliability of Spark Jobs When a Queue Is Abnormal?,Queue,User Guide", + "title":"How Does DLI Ensure the Reliability of Spark Jobs When a Queue Is Abnormal?", + "githuburl":"" + }, + { + "uri":"dli_03_0098.html", + "product_code":"dli", + "code":"213", + "des":"DLI allows you to subscribe to an SMN topic for failed jobs.Log in to the DLI console.In the navigation pane on the left, choose Queue Management.On the Queue Management ", + "doc_type":"usermanual", + "kw":"How Do I Monitor Queue Exceptions?,Queue,User Guide", + "title":"How Do I Monitor Queue Exceptions?", + "githuburl":"" + }, + { + "uri":"dli_03_0095.html", + "product_code":"dli", + "code":"214", + "des":"To check the running status of the DLI queue and determine whether to run more jobs on that queue, you need to check the queue load.Search for Cloud Eye on the console.In", + "doc_type":"usermanual", + "kw":"How Do I View DLI Queue Load?,Queue,User Guide", + "title":"How Do I View DLI Queue Load?", + "githuburl":"" + }, + { + "uri":"dli_03_0183.html", + "product_code":"dli", + "code":"215", + "des":"You need to check the large number of jobs in the Submitting and Running states on the queue.Use Cloud Eye to view jobs in different states on the queue. The procedure is", + "doc_type":"usermanual", + "kw":"How Do I Determine Whether There Are Too Many Jobs in the Current Queue?,Queue,User Guide", + "title":"How Do I Determine Whether There Are Too Many Jobs in the Current Queue?", + "githuburl":"" + }, + { + "uri":"dli_03_0065.html", + "product_code":"dli", + "code":"216", + "des":"Currently, DLI provides two types of queues, For SQL and For general use. SQL queues are used to run SQL jobs. General-use queues are compatible with Spark queues of earl", + "doc_type":"usermanual", + "kw":"How Do I Switch an Earlier-Version Spark Queue to a General-Purpose Queue?,Queue,User Guide", + "title":"How Do I Switch an Earlier-Version Spark Queue to a General-Purpose Queue?", + "githuburl":"" + }, + { + "uri":"dli_03_0193.html", + "product_code":"dli", + "code":"217", + "des":"DLI queues do not use resources or bandwidth when no job is running. In this case, the running status of DLI queues is not displayed on CES.", + "doc_type":"usermanual", + "kw":"Why Cannot I View the Resource Running Status of DLI Queues on Cloud Eye?,Queue,User Guide", + "title":"Why Cannot I View the Resource Running Status of DLI Queues on Cloud Eye?", + "githuburl":"" + }, + { + "uri":"dli_03_0088.html", + "product_code":"dli", + "code":"218", + "des":"In DLI, 64 CU = 64 cores and 256 GB memory.In a Spark job, if the driver occupies 4 cores and 16 GB memory, the executor can occupy 60 cores and 240 GB memory.", + "doc_type":"usermanual", + "kw":"How Do I Allocate Queue Resources for Running Spark Jobs If I Have Purchased 64 CUs?,Queue,User Guid", + "title":"How Do I Allocate Queue Resources for Running Spark Jobs If I Have Purchased 64 CUs?", + "githuburl":"" + }, + { + "uri":"dli_03_0159.html", + "product_code":"dli", + "code":"219", + "des":"Queue plans create failed. The plan xxx target cu is out of quota is displayed when you create a scheduled scaling task.The CU quota of the current account is insufficien", + "doc_type":"usermanual", + "kw":"What Should I Do If \"Queue plans create failed. The plan xxx target cu is out of quota\" Is Displayed", + "title":"What Should I Do If \"Queue plans create failed. The plan xxx target cu is out of quota\" Is Displayed When I Schedule CU Changes?", + "githuburl":"" + }, + { + "uri":"dli_03_0171.html", + "product_code":"dli", + "code":"220", + "des":"After a SQL job was submitted to the default queue, the job runs abnormally. The job log reported that the execution timed out. The exception logs are as follows:[ERROR] ", + "doc_type":"usermanual", + "kw":"What Should I Do If SQL Statements Fails to be Executed on the Default Queue, and a Timeout Exceptio", + "title":"What Should I Do If SQL Statements Fails to be Executed on the Default Queue, and a Timeout Exception is Reported?", + "githuburl":"" + }, + { + "uri":"dli_03_0022.html", + "product_code":"dli", + "code":"221", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Datasource Connections", + "title":"Datasource Connections", + "githuburl":"" + }, + { + "uri":"dli_03_0128.html", + "product_code":"dli", + "code":"222", + "des":"You need to create a VPC peering connection to enable network connectivity. Take MRS as an example. If DLI and MRS clusters are in the same VPC, and the security group is", + "doc_type":"usermanual", + "kw":"Why Do I Need to Create a VPC Peering Connection for an Enhanced Datasource Connection?,Datasource C", + "title":"Why Do I Need to Create a VPC Peering Connection for an Enhanced Datasource Connection?", + "githuburl":"" + }, + { + "uri":"dli_03_0237.html", + "product_code":"dli", + "code":"223", + "des":"An enhanced datasource connection failed to pass the network connectivity test. Datasource connection cannot be bound to a queue. The following error information is displ", + "doc_type":"usermanual", + "kw":"Failed to Bind a Queue to an Enhanced Datasource Connection,Datasource Connections,User Guide", + "title":"Failed to Bind a Queue to an Enhanced Datasource Connection", + "githuburl":"" + }, + { + "uri":"dli_03_0238.html", + "product_code":"dli", + "code":"224", + "des":"The outbound rule had been configured for the security group of the queue associated with the enhanced datasource connection. The datasource authentication used a passwor", + "doc_type":"usermanual", + "kw":"DLI Failed to Connect to GaussDB(DWS) Through an Enhanced Datasource Connection,Datasource Connectio", + "title":"DLI Failed to Connect to GaussDB(DWS) Through an Enhanced Datasource Connection", + "githuburl":"" + }, + { + "uri":"dli_03_0179.html", + "product_code":"dli", + "code":"225", + "des":"A datasource connection is created and bound to a queue. The connectivity test fails and the following error information is displayed:failed to connect to specified addre", + "doc_type":"usermanual", + "kw":"What Can I Do If the Datasource Connection is Created But the Network Connectivity Test Fails?,Datas", + "title":"What Can I Do If the Datasource Connection is Created But the Network Connectivity Test Fails?", + "githuburl":"" + }, + { + "uri":"dli_03_0186.html", + "product_code":"dli", + "code":"226", + "des":"Configuring the Connection Between a DLI Queue and a Data Source in a Private NetworkIf your DLI job needs to connect to a data source, for example, MRS, RDS, CSS, Kafka,", + "doc_type":"usermanual", + "kw":"How Do I Configure the Network Between a DLI Queue and a Data Source?,Datasource Connections,User Gu", + "title":"How Do I Configure the Network Between a DLI Queue and a Data Source?", + "githuburl":"" + }, + { + "uri":"dli_03_0257.html", + "product_code":"dli", + "code":"227", + "des":"The possible causes and solutions are as follows:If you have created a queue, do not bind it to a datasource connection immediately. Wait for 5 to 10 minutes. After the c", + "doc_type":"usermanual", + "kw":"What Can I Do If a Datasource Connection Is Stuck in Creating State When I Try to Bind a Queue to It", + "title":"What Can I Do If a Datasource Connection Is Stuck in Creating State When I Try to Bind a Queue to It?", + "githuburl":"" + }, + { + "uri":"dli_03_0258.html", + "product_code":"dli", + "code":"228", + "des":"Enhanced datasource connections support only yearly/monthly and pay-per-use queues.You can bind a datasource connection only when you select Dedicated Resource Mode when ", + "doc_type":"usermanual", + "kw":"How Do I Bind a Datasource Connection to a Pay-per-Use Queue?,Datasource Connections,User Guide", + "title":"How Do I Bind a Datasource Connection to a Pay-per-Use Queue?", + "githuburl":"" + }, + { + "uri":"dli_03_0259.html", + "product_code":"dli", + "code":"229", + "des":"DLI enhanced datasource connection uses VPC peering to directly connect the VPC networks of the desired data sources for point-to-point data exchanges.", + "doc_type":"usermanual", + "kw":"How Do I Connect DLI to Data Sources?,Datasource Connections,User Guide", + "title":"How Do I Connect DLI to Data Sources?", + "githuburl":"" + }, + { + "uri":"dli_03_0011.html", + "product_code":"dli", + "code":"230", + "des":"To perform query on data stored on services rather than DLI, perform the following steps:Assume that the data to be queried is stored on multiple services (for example, O", + "doc_type":"usermanual", + "kw":"How Can I Perform Query on Data Stored on Services Rather Than DLI?,Datasource Connections,User Guid", + "title":"How Can I Perform Query on Data Stored on Services Rather Than DLI?", + "githuburl":"" + }, + { + "uri":"dli_03_0085.html", + "product_code":"dli", + "code":"231", + "des":"Connect VPCs in different regions.Create an enhanced datasource connection on DLI and bind it to a queue.Add a DLI route.", + "doc_type":"usermanual", + "kw":"How Can I Access Data Across Regions?,Datasource Connections,User Guide", + "title":"How Can I Access Data Across Regions?", + "githuburl":"" + }, + { + "uri":"dli_03_0028.html", + "product_code":"dli", + "code":"232", + "des":"When data is inserted into DLI, set the ID field to NULL.", + "doc_type":"usermanual", + "kw":"How Do I Set the Auto-increment Primary Key or Other Fields That Are Automatically Filled in the RDS", + "title":"How Do I Set the Auto-increment Primary Key or Other Fields That Are Automatically Filled in the RDS Table When Creating a DLI and Associating It with the RDS Table?", + "githuburl":"" + }, + { + "uri":"dli_03_0047.html", + "product_code":"dli", + "code":"233", + "des":"Possible CausesThe network connectivity is abnormal. Check whether the security group is correctly selected and whether the VPC is correctly configured.The network connec", + "doc_type":"usermanual", + "kw":"Why Is the Error Message \"communication link failure\" Displayed When I Use a Newly Activated Datasou", + "title":"Why Is the Error Message \"communication link failure\" Displayed When I Use a Newly Activated Datasource Connection?", + "githuburl":"" + }, + { + "uri":"dli_03_0080.html", + "product_code":"dli", + "code":"234", + "des":"The cluster host information is not added to the datasource connection. As a result, the KRB authentication fails, the connection times out, and no error is recorded in l", + "doc_type":"usermanual", + "kw":"Connection Times Out During MRS HBase Datasource Connection, and No Error Is Recorded in Logs,Dataso", + "title":"Connection Times Out During MRS HBase Datasource Connection, and No Error Is Recorded in Logs", + "githuburl":"" + }, + { + "uri":"dli_03_0111.html", + "product_code":"dli", + "code":"235", + "des":"When you create a VPC peering connection for the datasource connection, the following error information is displayed:Before you create a datasource connection, check whet", + "doc_type":"usermanual", + "kw":"What Should I Do If the Subnet Cannot Be Found When I Create a DLI Datasource Connection?,Datasource", + "title":"What Should I Do If the Subnet Cannot Be Found When I Create a DLI Datasource Connection?", + "githuburl":"" + }, + { + "uri":"dli_03_0239.html", + "product_code":"dli", + "code":"236", + "des":"A cross-source RDS table was created in the DataArts Studio, and the insert overwrite statement was executed to write data into RDS. DLI.0999: BatchUpdateException: Incor", + "doc_type":"usermanual", + "kw":"Error Message \"Incorrect string value\" Is Displayed When insert overwrite Is Executed on a Cross-Sou", + "title":"Error Message \"Incorrect string value\" Is Displayed When insert overwrite Is Executed on a Cross-Source RDS Table", + "githuburl":"" + }, + { + "uri":"dli_03_0250.html", + "product_code":"dli", + "code":"237", + "des":"The system failed to create a cross-source RDS table, and null pointer error was reported.The following table creation statement was used:The RDS database is in a PostGre", + "doc_type":"usermanual", + "kw":"Null Pointer Error Is Displayed When the System Creates a Cross-Source RDS Table,Datasource Connecti", + "title":"Null Pointer Error Is Displayed When the System Creates a Cross-Source RDS Table", + "githuburl":"" + }, + { + "uri":"dli_03_0251.html", + "product_code":"dli", + "code":"238", + "des":"The system failed to execute insert overwrite on the cross-source GaussDB(DWS) table, and org.postgresql.util.PSQLException: ERROR: tuple concurrently updated was display", + "doc_type":"usermanual", + "kw":"Error Message \"org.postgresql.util.PSQLException: ERROR: tuple concurrently updated\" Is Displayed Wh", + "title":"Error Message \"org.postgresql.util.PSQLException: ERROR: tuple concurrently updated\" Is Displayed When the System Executes insert overwrite on a Cross-Source GaussDB(DWS) Table", + "githuburl":"" + }, + { + "uri":"dli_03_0252.html", + "product_code":"dli", + "code":"239", + "des":"A cross-source table was used to import data to a CloudTable HBase table. This HBase table contains a column family and a rowkey for 100 million simulating data records. ", + "doc_type":"usermanual", + "kw":"RegionTooBusyException Is Reported When Data Is Imported to a CloudTable HBase Table Through a Cross", + "title":"RegionTooBusyException Is Reported When Data Is Imported to a CloudTable HBase Table Through a Cross-Source Table", + "githuburl":"" + }, + { + "uri":"dli_03_0253.html", + "product_code":"dli", + "code":"240", + "des":"A table was created on GaussDB(DWS) and then a datasource connection was created on DLI to read and write data. An error message was displayed during data writing, indica", + "doc_type":"usermanual", + "kw":"A Null Value Is Written Into a Non-Null Field When a DLI Datasource Connection Is Used to Connect to", + "title":"A Null Value Is Written Into a Non-Null Field When a DLI Datasource Connection Is Used to Connect to a GaussDB(DWS) Table", + "githuburl":"" + }, + { + "uri":"dli_03_0254.html", + "product_code":"dli", + "code":"241", + "des":"A cross-source GaussDB(DWS) table and the datasource connection were created in DLI, and the schema of the source table in GaussDB(DWS) were updated. During the job execu", + "doc_type":"usermanual", + "kw":"An Insert Operation Failed After the Schema of the GaussDB(DWS) Source Table Is Updated,Datasource C", + "title":"An Insert Operation Failed After the Schema of the GaussDB(DWS) Source Table Is Updated", + "githuburl":"" + }, + { + "uri":"dli_01_00006.html", + "product_code":"dli", + "code":"242", + "des":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "doc_type":"usermanual", + "kw":"Change History,User Guide", + "title":"Change History", + "githuburl":"" + } +] \ No newline at end of file diff --git a/docs/dli/umn/CLASS.TXT.json b/docs/dli/umn/CLASS.TXT.json new file mode 100644 index 00000000..c1a7318b --- /dev/null +++ b/docs/dli/umn/CLASS.TXT.json @@ -0,0 +1,2180 @@ +[ + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Service Overview", + "uri":"dli_01_0538.html", + "doc_type":"usermanual", + "p_code":"", + "code":"1" + }, + { + "desc":"Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark and Apache Flink ecosystems. It frees you from managing an", + "product_code":"dli", + "title":"DLI Introduction", + "uri":"dli_01_0378.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"2" + }, + { + "desc":"You do not need a background in big data to use DLI for data analysis. You only need to know SQL, and you are good to go. The SQL syntax is fully compatible with the stan", + "product_code":"dli", + "title":"Advantages", + "uri":"dli_07_0007.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"3" + }, + { + "desc":"DLI is applicable to large-scale log analysis, federated analysis of heterogeneous data sources, and big data ETL processing.Gaming operation data analysisDifferent depar", + "product_code":"dli", + "title":"Application Scenarios", + "uri":"dli_07_0002.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"4" + }, + { + "desc":"When using DLI, you may need to make adjustments according to the following restrictions:Recommended browsers for logging in to DLI:Google Chrome 43.0 or laterMozilla Fir", + "product_code":"dli", + "title":"Constraints and Limitations on Using DLI", + "uri":"dli_07_0005.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"5" + }, + { + "desc":"If you need to assign different permissions to employees in your enterprise to access your DLI resources, IAM is a good choice for fine-grained permissions management. IA", + "product_code":"dli", + "title":"Permissions Management", + "uri":"dli_07_0006.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"6" + }, + { + "desc":"DLI allows multiple organizations, departments, or applications to share resources. A logical entity, also called a tenant, is provided to use diverse resources and servi", + "product_code":"dli", + "title":"Basic Concepts", + "uri":"dli_07_0003.html", + "doc_type":"usermanual", + "p_code":"1", + "code":"7" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Getting Started", + "uri":"dli_01_0220.html", + "doc_type":"usermanual", + "p_code":"", + "code":"8" + }, + { + "desc":"You can use DLI to submit a Spark SQL job to query data. The general procedure is as follows:Step 1: Logging in to the Cloud PlatformStep 2: Uploading Data to OBSStep 3: ", + "product_code":"dli", + "title":"Creating and Submitting a Spark SQL Job", + "uri":"dli_01_0002.html", + "doc_type":"usermanual", + "p_code":"8", + "code":"9" + }, + { + "desc":"To facilitate SQL operations, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not need to wr", + "product_code":"dli", + "title":"Developing and Submitting a Spark SQL Job Using the TPC-H Sample Template", + "uri":"dli_01_0512.html", + "doc_type":"usermanual", + "p_code":"8", + "code":"10" + }, + { + "desc":"You can use DLI to submit Spark jobs for real-time computing. The general procedure is as follows:Step 1: Logging in to the CloudStep 2: Uploading Data to OBSStep 3: Logg", + "product_code":"dli", + "title":"Creating and Submitting a Spark Jar Job", + "uri":"dli_01_0375.html", + "doc_type":"usermanual", + "p_code":"8", + "code":"11" + }, + { + "desc":"This section describes how to submit a Flink OpenSource SQL job on the DLI console for real-time computing. The general procedure is as follows:Step 1: Log In to the Clou", + "product_code":"dli", + "title":"Creating and Submitting a Flink OpenSource SQL Job", + "uri":"dli_01_0531.html", + "doc_type":"usermanual", + "p_code":"8", + "code":"12" + }, + { + "desc":"Log in to DLI and go to the Overview page.The following table describes the functional areas of the Overview page.", + "product_code":"dli", + "title":"DLI Console Overview", + "uri":"dli_01_0377.html", + "doc_type":"usermanual", + "p_code":"", + "code":"13" + }, + { + "desc":"You can use SQL statements in the SQL job editor to execute data query. DLI supports SQL 2003 and complies with Spark SQL.On the Overview page, click SQL Editor in the na", + "product_code":"dli", + "title":"SQL Editor", + "uri":"dli_01_0320.html", + "doc_type":"usermanual", + "p_code":"", + "code":"14" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Job Management", + "uri":"dli_01_0001.html", + "doc_type":"usermanual", + "p_code":"", + "code":"15" + }, + { + "desc":"SQL jobs allow you to execute SQL statements entered in the SQL job editing window, import data, and export data.SQL job management provides the following functions:Searc", + "product_code":"dli", + "title":"SQL Job Management", + "uri":"dli_01_0017.html", + "doc_type":"usermanual", + "p_code":"15", + "code":"16" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink Job Management", + "uri":"dli_01_0389.html", + "doc_type":"usermanual", + "p_code":"15", + "code":"17" + }, + { + "desc":"On the Job Management page of Flink jobs, you can submit a Flink job. Currently, the following job types are supported:Flink SQL uses SQL statements to define jobs and ca", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0403.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"18" + }, + { + "desc":"You can isolate Flink jobs allocated to different users by setting permissions to ensure data query performance.The administrator and job creator have all permissions, wh", + "product_code":"dli", + "title":"Managing Flink Job Permissions", + "uri":"dli_01_0479.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"19" + }, + { + "desc":"To create a Flink job, you need to enter the data source and data output channel, that is, source and sink. To use another service as the source or sink stream, you need ", + "product_code":"dli", + "title":"Preparing Flink Job Data", + "uri":"dli_01_0454.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"20" + }, + { + "desc":"This section describes how to create a Flink OpenSource SQL job. DLI Flink OpenSource SQL jobs are fully compatible with the syntax of Flink 1.10 and 1.12 provided by the", + "product_code":"dli", + "title":"(Recommended) Creating a Flink OpenSource SQL Job", + "uri":"dli_01_0498.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"21" + }, + { + "desc":"This section describes how to create a Flink SQL job. You can use Flink SQLs to develop jobs to meet your service requirements. Using SQL statements simplifies logic impl", + "product_code":"dli", + "title":"Creating a Flink SQL job", + "uri":"dli_01_0455.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"22" + }, + { + "desc":"This section describes how to create a Flink Jar job. You can perform secondary development based on Flink APIs, build your own JAR file, and submit the JAR file to DLI q", + "product_code":"dli", + "title":"Creating a Flink Jar Job", + "uri":"dli_01_0457.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"23" + }, + { + "desc":"The job debugging function helps you check the logic correctness of your compiled SQL statements before running a job.Currently, only Flink SQL jobs support this function", + "product_code":"dli", + "title":"Debugging a Flink Job", + "uri":"dli_01_0458.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"24" + }, + { + "desc":"After a job is created, you can perform operations on the job as required.Editing a JobStarting a JobStopping a JobDeleting a JobExporting a JobImporting a JobModifying N", + "product_code":"dli", + "title":"Performing Operations on a Flink Job", + "uri":"dli_01_0461.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"25" + }, + { + "desc":"After creating a job, you can view the job details to learn about the following information:Viewing Job DetailsChecking the Job Monitoring InformationViewing the Task Lis", + "product_code":"dli", + "title":"Flink Job Details", + "uri":"dli_01_0462.html", + "doc_type":"usermanual", + "p_code":"17", + "code":"26" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Spark Job Management", + "uri":"dli_01_0465.html", + "doc_type":"usermanual", + "p_code":"15", + "code":"27" + }, + { + "desc":"Based on the open-source Spark, DLI optimizes performance and reconstructs services to be compatible with the Apache Spark ecosystem and interfaces, and executes batch pr", + "product_code":"dli", + "title":"Spark Job Management", + "uri":"dli_01_0385.html", + "doc_type":"usermanual", + "p_code":"27", + "code":"28" + }, + { + "desc":"DLI provides fully-managed Spark computing services by allowing you to execute Spark jobs.On the Overview page, click Create Job in the upper right corner of the Spark Jo", + "product_code":"dli", + "title":"Creating a Spark Job", + "uri":"dli_01_0384.html", + "doc_type":"usermanual", + "p_code":"27", + "code":"29" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Queue Management", + "uri":"dli_01_0012.html", + "doc_type":"usermanual", + "p_code":"", + "code":"30" + }, + { + "desc":"Queues in DLI are computing resources, which are the basis for using DLI. All executed jobs require computing resources.Currently, DLI provides two types of queues, For S", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0402.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"31" + }, + { + "desc":"You can isolate queues allocated to different users by setting permissions to ensure data query performance.The administrator and queue owner have all permissions, which ", + "product_code":"dli", + "title":"Queue Permission Management", + "uri":"dli_01_0015.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"32" + }, + { + "desc":"Before executing a job, you need to create a queue.If you use a sub-account to create a queue for the first time, log in to the DLI management console using the main acco", + "product_code":"dli", + "title":"Creating a Queue", + "uri":"dli_01_0363.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"33" + }, + { + "desc":"You can delete a queue based on actual conditions.This operation will fail if there are jobs in the Submitting or Running state on this queue.Deleting a queue does not ca", + "product_code":"dli", + "title":"Deleting a Queue", + "uri":"dli_01_0016.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"34" + }, + { + "desc":"If the CIDR block of the DLI queue conflicts with that of the user data source, you can change the CIDR block of the queue.If the queue whose CIDR block is to be modified", + "product_code":"dli", + "title":"Modifying the CIDR Block", + "uri":"dli_01_0443.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"35" + }, + { + "desc":"Elastic scaling can be performed for a newly created queue only when there were jobs running in this queue.If Status of queue xxx is assigning, which is not available is ", + "product_code":"dli", + "title":"Elastic Scaling", + "uri":"dli_01_0487.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"36" + }, + { + "desc":"When services are busy, you might need to use more compute resources to process services in a period. After this period, you do not require the same amount of resources. ", + "product_code":"dli", + "title":"Scheduling CU Changes", + "uri":"dli_01_0488.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"37" + }, + { + "desc":"It can be used to test the connectivity between the DLI queue and the peer IP address specified by the user in common scenarios, or the connectivity between the DLI queue", + "product_code":"dli", + "title":"Testing Address Connectivity", + "uri":"dli_01_0489.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"38" + }, + { + "desc":"Once you have created a message notification topic, you can Add subscription of the topic on the Topic Management page of the Simple Message Notification service. You can", + "product_code":"dli", + "title":"Creating a Message Notification Topic", + "uri":"dli_01_0421.html", + "doc_type":"usermanual", + "p_code":"30", + "code":"39" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Data Management", + "uri":"dli_01_0004.html", + "doc_type":"usermanual", + "p_code":"", + "code":"40" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Databases and Tables", + "uri":"dli_01_0390.html", + "doc_type":"usermanual", + "p_code":"40", + "code":"41" + }, + { + "desc":"DLI database and table management provide the following functions:Database Permission ManagementTable Permission ManagementCreating a Database or a TableDeleting a Databa", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0228.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"42" + }, + { + "desc":"You can isolate databases allocated to different users by setting permissions to ensure data query performance.The administrator and database owner have all permissions, ", + "product_code":"dli", + "title":"Database Permission Management", + "uri":"dli_01_0447.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"43" + }, + { + "desc":"You can isolate databases allocated to different users by setting permissions to ensure data query performance.The administrator and database owner have all permissions, ", + "product_code":"dli", + "title":"Table Permission Management", + "uri":"dli_01_0448.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"44" + }, + { + "desc":"A database, built on the computer storage device, is a data warehouse where data is organized, stored, and managed based on its structure.The table is an important part o", + "product_code":"dli", + "title":"Creating a Database or a Table", + "uri":"dli_01_0005.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"45" + }, + { + "desc":"You can delete unnecessary databases and tables based on actual conditions.You are not allowed to delete databases or tables that are being used for running jobs.The admi", + "product_code":"dli", + "title":"Deleting a Database or a Table", + "uri":"dli_01_0011.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"46" + }, + { + "desc":"During actual use, developers create databases and tables and submit them to test personnel for testing. After the test is complete, the databases and tables are transfer", + "product_code":"dli", + "title":"Modifying the Owners of Databases and Tables", + "uri":"dli_01_0376.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"47" + }, + { + "desc":"You can import data from OBS to a table created in DLI.Only one path can be specified during data import. The path cannot contain commas (,).To import data in CSV format ", + "product_code":"dli", + "title":"Importing Data to the Table", + "uri":"dli_01_0253.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"48" + }, + { + "desc":"You can export data from a DLI table to OBS. During the export, a folder is created in OBS or the content in the existing folder is overwritten.The exported file can be i", + "product_code":"dli", + "title":"Exporting Data from DLI to OBS", + "uri":"dli_01_0010.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"49" + }, + { + "desc":"Metadata is used to define data types. It describes information about the data, including the source, size, format, and other data features. In database fields, metadata ", + "product_code":"dli", + "title":"Viewing Metadata", + "uri":"dli_01_0008.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"50" + }, + { + "desc":"The Preview page displays the first 10 records in the table.You can preview data on either the Data Management page or the SQL Editor page.To preview data on the Data Man", + "product_code":"dli", + "title":"Previewing Data", + "uri":"dli_01_0007.html", + "doc_type":"usermanual", + "p_code":"41", + "code":"51" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Package Management", + "uri":"dli_01_0366.html", + "doc_type":"usermanual", + "p_code":"40", + "code":"52" + }, + { + "desc":"Package management provides the following functions:Managing Package PermissionsCreating a PackageDeleting a PackageYou can delete program packages in batches.You can del", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0407.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"53" + }, + { + "desc":"You can isolate package groups or packages allocated to different users by setting permissions to ensure data query performance.The administrator and the owner of a packa", + "product_code":"dli", + "title":"Managing Permissions on Packages and Package Groups", + "uri":"dli_01_0477.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"54" + }, + { + "desc":"DLI allows you to submit program packages in batches to the general-use queue for running.If you need to update a package, you can use the same package or file to upload ", + "product_code":"dli", + "title":"Creating a Package", + "uri":"dli_01_0367.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"55" + }, + { + "desc":"You can delete a package based on actual conditions.On the left of the management console, choose Data Management > Package Management.Click Delete in the Operation colum", + "product_code":"dli", + "title":"Deleting a Package", + "uri":"dli_01_0369.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"56" + }, + { + "desc":"To change the owner of a package, click More > Modify Owner in the Operation column of a package on the Package Management page.If the package has been grouped, you can m", + "product_code":"dli", + "title":"Modifying the Owner", + "uri":"dli_01_0478.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"57" + }, + { + "desc":"DLI built-in dependencies are provided by the platform by default. In case of conflicts, you do not need to upload them when packaging JAR packages of Spark or Flink Jar ", + "product_code":"dli", + "title":"Built-in Dependencies", + "uri":"dli_01_0397.html", + "doc_type":"usermanual", + "p_code":"52", + "code":"58" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Job Templates", + "uri":"dli_01_0379.html", + "doc_type":"usermanual", + "p_code":"", + "code":"59" + }, + { + "desc":"To facilitate SQL operation execution, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not n", + "product_code":"dli", + "title":"SQL Template Management", + "uri":"dli_01_0021.html", + "doc_type":"usermanual", + "p_code":"59", + "code":"60" + }, + { + "desc":"Flink templates include sample templates and custom templates. You can modify an existing sample template to meet the actual job logic requirements and save time for edit", + "product_code":"dli", + "title":"Flink Template Management", + "uri":"dli_01_0464.html", + "doc_type":"usermanual", + "p_code":"59", + "code":"61" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Appendix", + "uri":"dli_01_05110.html", + "doc_type":"usermanual", + "p_code":"59", + "code":"62" + }, + { + "desc":"TPC-H is a test set developed by the Transaction Processing Performance Council (TPC) to simulate decision-making support applications. It is widely used in academia and ", + "product_code":"dli", + "title":"TPC-H Sample Data in the SQL Template", + "uri":"dli_01_05111.html", + "doc_type":"usermanual", + "p_code":"62", + "code":"63" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Datasource Connections", + "uri":"dli_01_0422.html", + "doc_type":"usermanual", + "p_code":"", + "code":"64" + }, + { + "desc":"DLI supports the datasource capability of the native Spark and extends it. With DLI datasource connection, you can access other data storage services through SQL statemen", + "product_code":"dli", + "title":"Datasource Connection and Cross-Source Analysis", + "uri":"dli_01_0410.html", + "doc_type":"usermanual", + "p_code":"64", + "code":"65" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Enhanced Datasource Connections", + "uri":"dli_01_0426.html", + "doc_type":"usermanual", + "p_code":"64", + "code":"66" + }, + { + "desc":"The enhanced datasource connection uses VPC peering at the bottom layer to directly connect the VPC network between the DLI cluster and the destination datasource. Data i", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0003.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"67" + }, + { + "desc":"The following describes how to create a datasource HBase connection for MRS.Only enhanced datasource connection to MRS HBase is supported.Apply for a cluster in MRS.If a ", + "product_code":"dli", + "title":"Creating, Querying, and Deleting an Enhanced Datasource Connection", + "uri":"dli_01_0006.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"68" + }, + { + "desc":"The CIDR block of the DLI queue that is bound with a datasource connection cannot overlap with that of the data source.The default queue cannot be bound with a connection", + "product_code":"dli", + "title":"Binding and Unbinding a Queue", + "uri":"dli_01_0009.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"69" + }, + { + "desc":"Method 1: Copy hosts information in /etc/hosts of an MRS node.cat /etc/hostsMethod 2: Log in to FusionInsight Manager to obtain MRS hosts information.The host information", + "product_code":"dli", + "title":"Modifying Host Information", + "uri":"dli_01_0013.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"70" + }, + { + "desc":"After an enhanced datasource connection is created and bound to a queue, the system automatically configures route information. You can also add a custom route for the qu", + "product_code":"dli", + "title":"Custom Route Information", + "uri":"dli_01_0014.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"71" + }, + { + "desc":"You can grant or revoke permissions for enhanced datasource connections of other projects.AuthorizationLog in to the DLI management console, choose Datasource Connections", + "product_code":"dli", + "title":"Enhanced Datasource Connection Permission Management", + "uri":"dli_01_0018.html", + "doc_type":"usermanual", + "p_code":"66", + "code":"72" + }, + { + "desc":"You can isolate datasource connections allocated to different users by setting permissions to ensure data query performance.The administrator and datasource connection ow", + "product_code":"dli", + "title":"Managing Datasource Connection Permissions", + "uri":"dli_01_0480.html", + "doc_type":"usermanual", + "p_code":"64", + "code":"73" + }, + { + "desc":"Datasource authentication is used to manage authentication information for accessing CSS and MRS security environments and encrypt passwords for accessing DWS, RDS, DDS, ", + "product_code":"dli", + "title":"Creating and Managing Datasource Authentication", + "uri":"dli_01_0427.html", + "doc_type":"usermanual", + "p_code":"64", + "code":"74" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Global Configuration", + "uri":"dli_01_0485.html", + "doc_type":"usermanual", + "p_code":"", + "code":"75" + }, + { + "desc":"Global variables can be used to simplify complex parameters. For example, long and difficult variables can be replaced to improve the readability of SQL statements.In the", + "product_code":"dli", + "title":"Global Variables", + "uri":"dli_01_0476.html", + "doc_type":"usermanual", + "p_code":"75", + "code":"76" + }, + { + "desc":"Only the tenant account or a subaccount of user group admin can authorize access.After entering the DLI management console, you are advised to set agency permissions to e", + "product_code":"dli", + "title":"Service Authorization", + "uri":"dli_01_0486.html", + "doc_type":"usermanual", + "p_code":"75", + "code":"77" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Permissions Management", + "uri":"dli_01_0408.html", + "doc_type":"usermanual", + "p_code":"", + "code":"78" + }, + { + "desc":"DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM", + "product_code":"dli", + "title":"Overview", + "uri":"dli_01_0440.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"79" + }, + { + "desc":"You can use Identity and Access Management (IAM) to implement fine-grained permissions control on DLI resources. For details, see Overview.If your cloud account does not ", + "product_code":"dli", + "title":"Creating an IAM User and Granting Permissions", + "uri":"dli_01_0418.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"80" + }, + { + "desc":"Custom policies can be created as a supplement to the system policies of DLI. You can add actions to custom policies. For the actions supported for custom policies, see \"", + "product_code":"dli", + "title":"Creating a Custom Policy", + "uri":"dli_01_0451.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"81" + }, + { + "desc":"A resource is an object that exists within a service. You can select DLI resources by specifying their paths.", + "product_code":"dli", + "title":"DLI Resources", + "uri":"dli_01_0417.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"82" + }, + { + "desc":"Request conditions are useful in determining when a custom policy takes effect. A request condition consists of a condition key and operator. Condition keys are either gl", + "product_code":"dli", + "title":"DLI Request Conditions", + "uri":"dli_01_0475.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"83" + }, + { + "desc":"Table 1 lists the common operations supported by each system policy of DLI. Choose proper system policies according to this table. For details about the SQL statement per", + "product_code":"dli", + "title":"Common Operations Supported by DLI System Policy", + "uri":"dli_01_0441.html", + "doc_type":"usermanual", + "p_code":"78", + "code":"84" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"FAQs", + "uri":"dli_01_0539.html", + "doc_type":"usermanual", + "p_code":"", + "code":"85" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Flink Jobs", + "uri":"dli_03_0037.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"86" + }, + { + "desc":"DLI Flink jobs support the following data formats:Avro, Avro_merge, BLOB, CSV, EMAIL, JSON, ORC, Parquet, and XML.DLI Flink jobs support data from the following data sour", + "product_code":"dli", + "title":"What Data Formats and Data Sources Are Supported by DLI Flink Jobs?", + "uri":"dli_03_0083.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"87" + }, + { + "desc":"A sub-user can view queues but cannot view Flink jobs. You can authorize the sub-user using DLI or IAM.Authorization on DLILog in to the DLI console using a tenant accoun", + "product_code":"dli", + "title":"How Do I Authorize a Subuser to View Flink Jobs?", + "uri":"dli_03_0139.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"88" + }, + { + "desc":"DLI Flink jobs are highly available. You can enable the automatic restart function to automatically restart your jobs after short-time faults of peripheral services are r", + "product_code":"dli", + "title":"How Do I Set Auto Restart upon Exception for a Flink Job?", + "uri":"dli_03_0090.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"89" + }, + { + "desc":"When you create a Flink SQL job or Flink Jar job, you can select Save Job Log on the job editing page to save job running logs to OBS.To set the OBS bucket for storing th", + "product_code":"dli", + "title":"How Do I Save Flink Job Logs?", + "uri":"dli_03_0099.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"90" + }, + { + "desc":"DLI can output Flink job results to DIS. You can view the results in DIS. For details, see Retrieving Data from DIS in the Data Ingestion Service User Guide.DLI can outpu", + "product_code":"dli", + "title":"How Can I Check Flink Job Results?", + "uri":"dli_03_0043.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"91" + }, + { + "desc":"Choose Job Management > Flink Jobs. In the Operation column of the target job, choose More > Permissions. When a new user is authorized, No such user. userName:xxxx. is d", + "product_code":"dli", + "title":"What Should I Do if \"No such user. userName:xxxx.\" Is Displayed on the Flink Job Management Page When I Grant Permission to a User?", + "uri":"dli_03_0160.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"92" + }, + { + "desc":"Checkpoint was enabled when a Flink job is created, and the OBS bucket for storing checkpoints was specified. After a Flink job is manually stopped, no message is display", + "product_code":"dli", + "title":"How Do I Know Which Checkpoint the Flink Job I Stopped Will Be Restored to When I Start the Job Again?", + "uri":"dli_03_0180.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"93" + }, + { + "desc":"The consumption capability of a Flink SQL job depends on the data source transmission, queue size, and job parameter settings. The peak consumption is 10 Mbit/s.", + "product_code":"dli", + "title":"How Much Data Can Be Processed in a Day by a Flink SQL Job?", + "uri":"dli_03_0130.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"94" + }, + { + "desc":"The temp stream in Flink SQL is similar to a subquery. It is a logical stream used to simplify the SQL logic and does not generate data storage. Therefore, there is no ne", + "product_code":"dli", + "title":"Does Data in the Temporary Stream of Flink SQL Need to Be Cleared Periodically? How Do I Clear the Data?", + "uri":"dli_03_0061.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"95" + }, + { + "desc":"SymptomWhen you create a Flink SQL job and configure the parameters, you select an OBS bucket you have created. The system displays a message indicating that the OBS buck", + "product_code":"dli", + "title":"What Should I Do if a Message Is Displayed Indicating that the OBS Bucket Is Not Authorized When I Select an OBS Bucket for a Flink SQL Job?", + "uri":"dli_03_0138.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"96" + }, + { + "desc":"When using a Flink SQL job, you need to create an OBS partition table for subsequent batch processing.In the following example, the day field is used as the partition fie", + "product_code":"dli", + "title":"How Do I Map an OBS Table to a Partitioned DLI Table?", + "uri":"dli_03_0089.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"97" + }, + { + "desc":"SymptomYou used Flink 1.10 to run a Flink Opensource SQL job. You set the number of Kafka partitions for the job a small value at the beginning and need to increase the n", + "product_code":"dli", + "title":"How Do I Change the Number of Kafka Partitions of a Flink SQL Job Without Stopping It?", + "uri":"dli_03_0120.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"98" + }, + { + "desc":"In this example, the day field is used as the partition field with the parquet encoding format (only the parquet format is supported currently) to dump car_info data to O", + "product_code":"dli", + "title":"How Can I Map an OBS Table to a DLI Partition Table?", + "uri":"dli_03_0075.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"99" + }, + { + "desc":"When I run the creation statement with an EL expression in the table name in a Flink SQL job, the following error message is displayed:DLI.0005: AnalysisException: t_user", + "product_code":"dli", + "title":"What Should I Do If Error Message DLI.0005 Is Displayed When I Use an EL Expression to Create a Table in a Flink SQL Job?", + "uri":"dli_03_0167.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"100" + }, + { + "desc":"After data is written to OBS through the Flink job output stream, data cannot be queried from the DLI table created in the OBS file path.For example, use the following Fl", + "product_code":"dli", + "title":"What Should I Do If No Data Is Found in the DLI Table Created Using the OBS File Path That Stores the Output of a Flink job?", + "uri":"dli_03_0168.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"101" + }, + { + "desc":"After a Flink SQL job is submitted on DLI, the job fails to be executed. The following error information is displayed in the job log:connect to DIS failed java.lang.Illeg", + "product_code":"dli", + "title":"What Should I Do If a Flink SQL Job Fails to Be Executed, and \"connect to DIS failed java.lang.IllegalArgumentException: Access key cannot be null\" Is Recorded in the Log?", + "uri":"dli_03_0174.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"102" + }, + { + "desc":"Semantic verification for a Flink SQL job (reading DIS data) fails. The following information is displayed when the job fails:Get dis channel xxxinfo failed. error info: ", + "product_code":"dli", + "title":"What Should I Do If \"Not authorized\" Is Reported When a Flink SQL Job Reads DIS Data?", + "uri":"dli_03_0176.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"103" + }, + { + "desc":"After a Flink SQL job consumed Kafka and sank data to the Elasticsearch cluster, the job was successfully executed, but no data is available.Possible causes are as follow", + "product_code":"dli", + "title":"Data Writing Fails After a Flink SQL Job Consumed Kafka and Sank Data to the Elasticsearch Cluster", + "uri":"dli_03_0232.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"104" + }, + { + "desc":"The procedure is as follows:Add the following code to the JAR package code of the Flink Jar job:// Configure the pom file on which the StreamExecutionEnvironment depends.", + "product_code":"dli", + "title":"How Do I Configure Checkpoints for Flink Jar Jobs and Save the Checkpoints to OBS?", + "uri":"dli_03_0038.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"105" + }, + { + "desc":"Configuration files can be uploaded for user-defined jobs (JAR).Upload the configuration file to DLI through Package Management.In the Other Dependencies area of the Flin", + "product_code":"dli", + "title":"Does a Flink JAR Job Support Configuration File Upload? How Do I Upload a Configuration File?", + "uri":"dli_03_0044.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"106" + }, + { + "desc":"The dependency of your Flink job conflicts with a built-in dependency of the DLI Flink platform. As a result, the job submission fails.Delete your JAR package that is the", + "product_code":"dli", + "title":"What Should I Do If Job Submission Fails Due to Flink JAR Package Conflict?", + "uri":"dli_03_0119.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"107" + }, + { + "desc":"When a Flink Jar job is submitted to access GaussDB(DWS), an error message is displayed indicating that the job fails to be started. The job log contains the following er", + "product_code":"dli", + "title":"What Should I Do If GaussDB(DWS) Fails to Be Started When a Flink Jar Job Accesses GaussDB(DWS), and a Message Indicating Too Many Client Connections is Displayed?", + "uri":"dli_03_0161.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"108" + }, + { + "desc":"An exception occurred when a Flink Jar job is running. The following error information is displayed in the job log:org.apache.flink.shaded.curator.org.apache.curator.Conn", + "product_code":"dli", + "title":"What Should I Do If Error Message \"Authentication failed\" Is Displayed When a Flink Jar Job Is Running?", + "uri":"dli_03_0165.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"109" + }, + { + "desc":"The storage path of the Flink Jar job checkpoints was set to an OBS bucket. The job failed to be submitted, and an error message indicating an invalid OBS bucket name was", + "product_code":"dli", + "title":"What Should I Do If Invalid OBS Bucket Name Is Reported After a Flink Job Submission Failed?", + "uri":"dli_03_0233.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"110" + }, + { + "desc":"Flink Job submission failed. The exception information is as follows:Flink JAR packages conflicted. The submitted Flink JAR package conflicted with the HDFS JAR package o", + "product_code":"dli", + "title":"Flink Job Submission Fails Due to Hadoop JAR Package Conflicts", + "uri":"dli_03_0234.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"111" + }, + { + "desc":"You can use Flink Jar to connect to Kafka with SASL SSL authentication enabled.", + "product_code":"dli", + "title":"How Do I Connect a Flink jar Job to SASL_SSL?", + "uri":"dli_03_0266.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"112" + }, + { + "desc":"Data Stacking in a Consumer GroupThe accumulated data of a consumer group can be calculated by the following formula: Total amount of data to be consumed by the consumer ", + "product_code":"dli", + "title":"How Do I Optimize Performance of a Flink Job?", + "uri":"dli_03_0106.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"113" + }, + { + "desc":"Add the following SQL statements to the Flink job:", + "product_code":"dli", + "title":"How Do I Write Data to Different Elasticsearch Clusters in a Flink Job?", + "uri":"dli_03_0048.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"114" + }, + { + "desc":"The DLI Flink checkpoint/savepoint mechanism is complete and reliable. You can use this mechanism to prevent data loss when a job is manually restarted or restarted due t", + "product_code":"dli", + "title":"How Do I Prevent Data Loss After Flink Job Restart?", + "uri":"dli_03_0096.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"115" + }, + { + "desc":"On the Flink job management page, hover the cursor on the status of the job that fails to be submitted to view the brief information about the failure.The possible causes", + "product_code":"dli", + "title":"How Do I Locate a Flink Job Submission Error?", + "uri":"dli_03_0103.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"116" + }, + { + "desc":"On the Flink job management, click Edit in the Operation column of the target job. On the displayed page, check whether Save Job Log in the Running Parameters tab is enab", + "product_code":"dli", + "title":"How Do I Locate a Flink Job Running Error?", + "uri":"dli_03_0105.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"117" + }, + { + "desc":"Check the following operations:Adjusting or adding optimization parameters or the number of concurrent threads of a job, or modifying Flink SQL statements or a Flink Jar ", + "product_code":"dli", + "title":"How Do I Know Whether a Flink Job Can Be Restored from a Checkpoint After Being Restarted?", + "uri":"dli_03_0136.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"118" + }, + { + "desc":"To rectify this fault, perform the following steps:Log in to the DIS management console. In the navigation pane, choose Stream Management. View the Flink job SQL statemen", + "product_code":"dli", + "title":"What Should I Do If the DIS Stream Does Not Exist During the Semantic Check?", + "uri":"dli_03_0040.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"119" + }, + { + "desc":"If the OBS bucket selected for a job is not authorized, perform the following steps:Select Enable Checkpointing or Save Job Log.Specify OBS Bucket.Select Authorize OBS.", + "product_code":"dli", + "title":"What Should I Do If the OBS Bucket Selected for a Job Is Not Authorized?", + "uri":"dli_03_0045.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"120" + }, + { + "desc":"Mode for storing generated job logs when a DLI Flink job fails to be submitted or executed. The options are as follows:If the submission fails, a submission log is genera", + "product_code":"dli", + "title":"What Should I Do If Logs Are Not Written to the OBS Bucket After a DLI Flink Job Fails to Be Submitted for Running?", + "uri":"dli_03_0064.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"121" + }, + { + "desc":"SymptomYou used Flink 1.10 to run a Flink Opensource SQL job. The job failed after the following error was reported when Flink Sink wrote data to Kafka.Caused by: org.apa", + "product_code":"dli", + "title":"How Do I Configure Connection Retries for Kafka Sink If it is Disconnected?", + "uri":"dli_03_0121.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"122" + }, + { + "desc":"The Flink/Spark UI was displayed with incomplete information.When a pay-per-use queue is used to run a job, the system releases the cluster and takes about 10 minutes to ", + "product_code":"dli", + "title":"What Should I Do If the Flink/Spark UI Page Cannot Be Displayed Properly?", + "uri":"dli_03_0235.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"123" + }, + { + "desc":"JobManager and TaskManager heartbeats timed out. As a result, the Flink job is abnormal.Check whether the network is intermittently disconnected and whether the cluster l", + "product_code":"dli", + "title":"Wha should I Do If JobManager and TaskManager Heartbeats Timed Out and the Flink Job Is Abnormal?", + "uri":"dli_03_0236.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"124" + }, + { + "desc":"Test address connectivity.If the network is unreachable, rectify the network connection first. Ensure that the network connection between the DLI queue and the external d", + "product_code":"dli", + "title":"What Can I Do If \"Timeout expired while fetching topic metadata\" Is Repeatedly Reported in Flink JobManager Logs?", + "uri":"dli_03_0265.html", + "doc_type":"usermanual", + "p_code":"86", + "code":"125" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Problems Related to SQL Jobs", + "uri":"dli_03_0020.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"126" + }, + { + "desc":"A temporary table is used to store intermediate results. When a transaction or session ends, the data in the temporary table can be automatically deleted. For example, in", + "product_code":"dli", + "title":"SQL Jobs", + "uri":"dli_03_0200.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"127" + }, + { + "desc":"If a large number of small files are generated during SQL execution, job execution and table query will take a long time. In this case, you should merge small files.Set t", + "product_code":"dli", + "title":"How Do I Merge Small Files?", + "uri":"dli_03_0086.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"128" + }, + { + "desc":"When creating an OBS table, you must specify a table path in the database. The path format is as follows: obs://xxx/database name/table name.If the specified path is akdc", + "product_code":"dli", + "title":"How Do I Specify an OBS Path When Creating an OBS Table?", + "uri":"dli_03_0092.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"129" + }, + { + "desc":"DLI allows you to associate JSON data in an OBS bucket to create tables in asynchronous mode.The statement for creating the table is as follows:", + "product_code":"dli", + "title":"How Do I Create a Table Using JSON Data in an OBS Bucket?", + "uri":"dli_03_0108.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"130" + }, + { + "desc":"You can use the where condition statement in the select statement to filter data. For example:", + "product_code":"dli", + "title":"How Do I Set Local Variables in SQL Statements?", + "uri":"dli_03_0087.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"131" + }, + { + "desc":"The correct method for using the count function to perform aggregation is as follows:OrIf an incorrect method is used, an error will be reported.", + "product_code":"dli", + "title":"How Can I Use the count Function to Perform Aggregation?", + "uri":"dli_03_0069.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"132" + }, + { + "desc":"You can use the cross-region replication function of OBS. The procedure is as follows:Export the DLI table data in region 1 to the user-defined OBS bucket. For details, s", + "product_code":"dli", + "title":"How Do I Synchronize DLI Table Data from One Region to Another?", + "uri":"dli_03_0072.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"133" + }, + { + "desc":"Currently, DLI does not allow you to insert table data into specific fields. To insert table data, you must insert data of all table fields at a time.", + "product_code":"dli", + "title":"How Do I Insert Table Data into Specific Fields of a Table Using a SQL Job?", + "uri":"dli_03_0191.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"134" + }, + { + "desc":"Create an OBS directory with a unique name. Alternatively, you can manually delete the existing OBS directory and submit the job again. However, exercise caution when del", + "product_code":"dli", + "title":"What Should I Do If Error \"path obs://xxx already exists\" Is Reported When Data Is Exported to OBS?", + "uri":"dli_03_0014.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"135" + }, + { + "desc":"This message indicates that the two tables to be joined contain the same column, but the owner of the column is not specified when the command is executed.For example, ta", + "product_code":"dli", + "title":"What Should I Do If \"SQL_ANALYSIS_ERROR: Reference 't.id' is ambiguous, could be: t.id, t.id.;\" Is Displayed When Two Tables Are Joined", + "uri":"dli_03_0066.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"136" + }, + { + "desc":"Check whether the account is in arrears. If yes, recharge the account.If the error persists, log out and log in again.", + "product_code":"dli", + "title":"What Should I Do If the System Notify Me that I Have No Permission to Execute the Query Statement Because the Current Account Was Restricted?", + "uri":"dli_03_0071.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"137" + }, + { + "desc":"Cause AnalysisWhen you query the partitioned table XX.YYY, the partition column is not specified in the search criteria.A partitioned table can be queried only when the q", + "product_code":"dli", + "title":"What Should I Do If Error Message \"There should be at least one partition pruning predicate on partitioned table XX.YYY\" Is Displayed When a Query Statement Is Executed?", + "uri":"dli_03_0145.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"138" + }, + { + "desc":"The following error message is displayed when the LOAD DATA command is executed by a Spark SQL job to import data to a DLI table:In some cases ,the following error messag", + "product_code":"dli", + "title":"What Should I Do If Error Message IllegalArgumentException: Buffer size too small. size is Displayed When Data Is Loaded to an OBS Foreign Table", + "uri":"dli_03_0169.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"139" + }, + { + "desc":"An error is reported during SQL job execution:Please contact DLI service. DLI.0002: FileNotFoundException: getFileStatus on obs://xxx: status [404]Check whether there is ", + "product_code":"dli", + "title":"What Should I Do If \"DLI.0002 FileNotFoundException\" Is Reported During SQL Job Running", + "uri":"dli_03_0189.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"140" + }, + { + "desc":"Currently, DLI supports the Hive syntax for creating tables of the TEXTFILE, SEQUENCEFILE, RCFILE, ORC, AVRO, and PARQUET file types. If the file format specified for cre", + "product_code":"dli", + "title":"What Should I Do If A Schema Parsing Error Is Reported When I Create a Hive Table Using CTAS", + "uri":"dli_03_0046.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"141" + }, + { + "desc":"When you run a DLI SQL script on DataArts Studio, the log shows that the statements fail to be executed. The error information is as follows:DLI.0999: RuntimeException: o", + "product_code":"dli", + "title":"What Should I Do If \"org.apache.hadoop.fs.obs.OBSIOException\" Is Reported If DLI SQL Scripts Are Executed on DataArts Studio?", + "uri":"dli_03_0173.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"142" + }, + { + "desc":"After the migration job is submitted, the following error information is displayed in the log:org.apache.sqoop.common.SqoopException:UQUERY_CONNECTOR_0001:Invoke DLI serv", + "product_code":"dli", + "title":"What Should I Do If \"UQUERY_CONNECTOR_0001:Invoke DLI service api failed\" Is Reported in the Job Log When I Use CDM to Migrate Data to DLI?", + "uri":"dli_03_0172.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"143" + }, + { + "desc":"Error message \"File not Found\" is displayed when a SQL job is accessed.Generally, the file cannot be found due to a read/write conflict. Check whether a job is overwritin", + "product_code":"dli", + "title":"What Should I Do If \"File not Found\" Is Displayed When I Access a SQL Job?", + "uri":"dli_03_0207.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"144" + }, + { + "desc":"Error message \"DLI.0003: AccessControlException XXX\" is reported when a SQL job is accessed.View the OBS bucket in the AccessControlException and check whether you are us", + "product_code":"dli", + "title":"What Should I Do If \"DLI.0003: AccessControlException XXX\" Is Reported When I Access a SQL Job?", + "uri":"dli_03_0208.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"145" + }, + { + "desc":"Error message \"DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists on {{bucket name}}: status [403]\" is reported when a SQL job is Accessed.Th", + "product_code":"dli", + "title":"What Should I Do If \"DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists on {{bucket name}}: status [403]\" Is Reported When I Access a SQL Job?", + "uri":"dli_03_0209.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"146" + }, + { + "desc":"Error message \"The current account does not have permission to perform this operation,the current account was restricted.\" is reported during SQL statement execution.Chec", + "product_code":"dli", + "title":"What Should I Do If \"The current account does not have permission to perform this operation,the current account was restricted. Restricted for no budget\" Is Reported During SQL Statement Execution? Restricted for no budget.", + "uri":"dli_03_0210.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"147" + }, + { + "desc":"If the job runs slowly, perform the following steps to find the causes and rectify the fault:Check whether the problem is caused by FullGC.Log in to the DLI console. In t", + "product_code":"dli", + "title":"How Do I Troubleshoot Slow SQL Jobs?", + "uri":"dli_03_0196.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"148" + }, + { + "desc":"You can view SQL job logs for routine O&M.Obtain the ID of the DLI job executed on the DataArts Studio console.Job IDOn the DLI console, choose Job Management > SQL Jobs.", + "product_code":"dli", + "title":"How Do I View DLI SQL Logs?", + "uri":"dli_03_0091.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"149" + }, + { + "desc":"You can view the job execution records when a job is running.Log in to the DLI management console.In the navigation pane on the left, choose Job Management > SQL Jobs.Ent", + "product_code":"dli", + "title":"How Do I View SQL Execution Records?", + "uri":"dli_03_0116.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"150" + }, + { + "desc":"If the execution of an SQL statement takes a long time, you need to access the Spark UI to check the execution status.If data skew occurs, the running time of a stage exc", + "product_code":"dli", + "title":"How Do I Eliminate Data Skew by Configuring AE Parameters?", + "uri":"dli_03_0093.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"151" + }, + { + "desc":"A DLI table exists but cannot be queried on the DLI console.If a table exists but cannot be queried, there is a high probability that the current user does not have the p", + "product_code":"dli", + "title":"What Can I Do If a Table Cannot Be Queried on the DLI Console?", + "uri":"dli_03_0184.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"152" + }, + { + "desc":"A high compression ratio of OBS tables in the Parquet or ORC format (for example, a compression ratio of 5 or higher compared with text compression) will lead to large da", + "product_code":"dli", + "title":"The Compression Ratio of OBS Tables Is Too High", + "uri":"dli_03_0013.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"153" + }, + { + "desc":"DLI supports only UTF-8-encoded texts. Ensure that data is encoded using UTF-8 during table creation and import.", + "product_code":"dli", + "title":"How Can I Avoid Garbled Characters Caused by Inconsistent Character Codes?", + "uri":"dli_03_0009.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"154" + }, + { + "desc":"User A created the testTable table in a database through a SQL job and granted user B the permission to insert and delete table data. User A deleted the testTable table a", + "product_code":"dli", + "title":"Do I Need to Grant Table Permissions to a User and Project After I Delete a Table and Create One with the Same Name?", + "uri":"dli_03_0175.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"155" + }, + { + "desc":"A CSV file is imported to a DLI partitioned table, but the imported file data does not contain the data in the partitioning column. The partitioning column needs to be sp", + "product_code":"dli", + "title":"What Should I Do If Table Data Fails to Be Queried After Data Is Imported to a DLI Partitioned Table Because the File to Be Imported Does Not Contain Data in the Partitioning Column?", + "uri":"dli_03_0177.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"156" + }, + { + "desc":"When an OBS foreign table is created, a field in the specified OBS file contains a carriage return line feed (CRLF) character. As a result, the data is incorrect.The stat", + "product_code":"dli", + "title":"How Do I Fix the Data Error Caused by CRLF Characters in a Field of the OBS File Used to Create an External OBS Table?", + "uri":"dli_03_0181.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"157" + }, + { + "desc":"A SQL job contains join operations. After the job is submitted, it is stuck in the Running state and no result is returned.When a Spark SQL job has join operations on sma", + "product_code":"dli", + "title":"What Should I Do If a SQL Job That Has Join Operations Stays in the Running State?", + "uri":"dli_03_0182.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"158" + }, + { + "desc":"The on clause was not added to the SQL statement for joining tables. As a result, the Cartesian product query occurs due to multi-table association, and the queue resourc", + "product_code":"dli", + "title":"The on Clause Is Not Added When Tables Are Joined. Cartesian Product Query Causes High Resource Usage of the Queue, and the Job Fails to Be Executed", + "uri":"dli_03_0187.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"159" + }, + { + "desc":"Partition data is manually uploaded to a partition of an OBS table. However, the data cannot be queried using DLI SQL editor.After manually adding partition data, you nee", + "product_code":"dli", + "title":"What Should I Do If Manually Added Data in a Partition of an OBS Table Cannot Be Queried?", + "uri":"dli_03_0190.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"160" + }, + { + "desc":"To dynamically overwrite the specified partitioned data in the DataSource table, set dli.sql.dynamicPartitionOverwrite.enabled to true and then run the insert overwrite s", + "product_code":"dli", + "title":"Why Is All Data Overwritten When insert overwrite Is Used to Overwrite Partitioned Table?", + "uri":"dli_03_0212.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"161" + }, + { + "desc":"The possible causes and solutions are as follows:After you purchase a DLI queue and submit a SQL job for the first time, wait for 5 to 10 minutes. After the cluster is st", + "product_code":"dli", + "title":"Why Is a SQL Job Stuck in the Submitting State?", + "uri":"dli_03_0213.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"162" + }, + { + "desc":"Spark does not have the datetime type and uses the TIMESTAMP type instead.You can use a function to convert data types.The following is an example.select cast(create_date", + "product_code":"dli", + "title":"Why Is the create_date Field in the RDS Table Is a Timestamp in the DLI query result?", + "uri":"dli_03_0214.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"163" + }, + { + "desc":"If the table name is changed immediately after SQL statements are executed, the data size of the table may be incorrect.If you need to change the table name, change it 5 ", + "product_code":"dli", + "title":"What Can I Do If datasize Cannot Be Changed After the Table Name Is Changed in a Finished SQL Job?", + "uri":"dli_03_0215.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"164" + }, + { + "desc":"When DLI is used to insert data into an OBS temporary table, only part of data is imported.Possible causes are as follows:The amount of data read during job execution is ", + "product_code":"dli", + "title":"Why Is the Data Volume Changes When Data Is Imported from DLI to OBS?", + "uri":"dli_03_0231.html", + "doc_type":"usermanual", + "p_code":"126", + "code":"165" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Problems Related to Spark Jobs", + "uri":"dli_03_0021.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"166" + }, + { + "desc":"DLI Spark does not support job scheduling. You can use other services, such as DataArts Studio, or use APIs or SDKs to customize job schedule.The Spark SQL syntax does no", + "product_code":"dli", + "title":"Spark Jobs", + "uri":"dli_03_0201.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"167" + }, + { + "desc":"To use Spark to write data into a DLI table, configure the following parameters:fs.obs.access.keyfs.obs.secret.keyfs.obs.implfs.obs.endpointThe following is an example:", + "product_code":"dli", + "title":"How Do I Use Spark to Write Data into a DLI Table?", + "uri":"dli_03_0107.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"168" + }, + { + "desc":"If the AK and SK are obtained, set the parameters as follows:Create SparkContext using codeval sc: SparkContext = new SparkContext()\nsc.hadoopConfiguration.set(\"fs.obs.ac", + "product_code":"dli", + "title":"How Do I Set the AK/SK for a Queue to Operate an OBS Table?", + "uri":"dli_03_0017.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"169" + }, + { + "desc":"Log in to the DLI console. In the navigation pane, choose Job Management > Spark Jobs. In the job list, locate the target job and click next to Job ID to view the parame", + "product_code":"dli", + "title":"How Do I View the Resource Usage of DLI Spark Jobs?", + "uri":"dli_03_0102.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"170" + }, + { + "desc":"If the pymysql module is missing, check whether the corresponding EGG package exists. If the package does not exist, upload the pyFile package on the Package Management p", + "product_code":"dli", + "title":"How Do I Use Python Scripts to Access the MySQL Database If the pymysql Module Is Missing from the Spark Job Results Stored in MySQL?", + "uri":"dli_03_0076.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"171" + }, + { + "desc":"DLI natively supports PySpark.For most cases, Python is preferred for data analysis, and PySpark is the best choice for big data analysis. Generally, JVM programs are pac", + "product_code":"dli", + "title":"How Do I Run a Complex PySpark Program in DLI?", + "uri":"dli_03_0082.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"172" + }, + { + "desc":"You can use DLI Spark jobs to access data in the MySQL database using either of the following methods:Solution 1: Purchase a pay-per-use queue, create an enhanced datasou", + "product_code":"dli", + "title":"How Does a Spark Job Access a MySQL Database?", + "uri":"dli_03_0127.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"173" + }, + { + "desc":"When shuffle statements, such as GROUP BY and JOIN, are executed in Spark jobs, data skew occurs, which slows down the job execution.To solve this problem, you can config", + "product_code":"dli", + "title":"How Do I Use JDBC to Set the spark.sql.shuffle.partitions Parameter to Improve the Task Concurrency?", + "uri":"dli_03_0068.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"174" + }, + { + "desc":"You can use SparkFiles to read the file submitted using –-file form a local path: SparkFiles.get(\"Name of the uploaded file\").The file path in the Driver is different fro", + "product_code":"dli", + "title":"How Do I Read Uploaded Files for a Spark Jar Job?", + "uri":"dli_03_0118.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"175" + }, + { + "desc":"The Spark 2.3 has changed the behavior of the internal interface Logging. If the user code directly inherits the Logging and the earlier version Spark is used during comp", + "product_code":"dli", + "title":"What Can I Do When Receiving java.lang.AbstractMethodError in the Spark Job?", + "uri":"dli_03_0023.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"176" + }, + { + "desc":"The following error is reported when a Spark job accesses OBS data:Set the AK/SK to enable Spark jobs to access OBS data. For details, see How Do I Set the AK/SK for a Qu", + "product_code":"dli", + "title":"What Should I Do If Errors \"ResponseCode: 403\" and \"ResponseStatus: Forbidden\" Are Reported When a Spark Job Accesses OBS Data", + "uri":"dli_03_0156.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"177" + }, + { + "desc":"Check whether the OBS bucket is used to store DLI logs on the Global Configuration > Job Configurations page. The job log bucket cannot be used for other purpose.", + "product_code":"dli", + "title":"Why is Error \"verifyBucketExists on XXXX: status [403]\" Is Reported When I Run a Spark Job with a User that has the Permission to Access the OBS Bucket?", + "uri":"dli_03_0164.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"178" + }, + { + "desc":"When a Spark job accesses a large amount of data, for example, accessing data in a GaussDB(DWS) database, you are advised to set the number of concurrent tasks and enable", + "product_code":"dli", + "title":"What Should I Do if a Running Timeout Error Is Reported When a Spark Job Runs a Large Amount of Data?", + "uri":"dli_03_0157.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"179" + }, + { + "desc":"Spark jobs cannot access SFTP. Upload the files you want to access to OBS and then you can analyze the data using Spark jobs.", + "product_code":"dli", + "title":"What Should I Do If a Spark job Used to Access Files in SFTP Fails and the Log Shows that the File Directory Is Abnormal?", + "uri":"dli_03_0188.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"180" + }, + { + "desc":"When a Spark job is running, an error message is displayed, indicating that the user does not have the database permission. The error information is as follows:org.apache", + "product_code":"dli", + "title":"What Should I Do If a Job Fails Because the Job User Does Not Have the Database and Table Permission?", + "uri":"dli_03_0192.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"181" + }, + { + "desc":"If the specified Python environment cannot be found after a Python3 package is added, the current compute cluster environment is Python2 by default.You can set spark.yarn", + "product_code":"dli", + "title":"What Can I Do If the Specified Python Environment Cannot Be Found After a Python Package Is Added?", + "uri":"dli_03_0077.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"182" + }, + { + "desc":"The remaining CUs in the queue may be insufficient. As a result, the job cannot be submitted.To view the remaining CUs of a queue, perform the following steps:Check the C", + "product_code":"dli", + "title":"Why Is a Spark Jar Job Stuck in the Submitting State?", + "uri":"dli_03_0220.html", + "doc_type":"usermanual", + "p_code":"166", + "code":"183" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Product Consultation", + "uri":"dli_03_0001.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"184" + }, + { + "desc":"Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark, and Flink ecosystems. It frees you from managing any serv", + "product_code":"dli", + "title":"What Is DLI?", + "uri":"dli_03_0002.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"185" + }, + { + "desc":"DLI supports the following data formats:ParquetCSVORCJsonAvro", + "product_code":"dli", + "title":"Which Data Formats Does DLI Support?", + "uri":"dli_03_0025.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"186" + }, + { + "desc":"The Spark component of DLI is a fully managed service. You can only use the DLI Spark through its APIs. .The Spark component of MRS is built on the VM in an MRS cluster. ", + "product_code":"dli", + "title":"What Are the Differences Between MRS Spark and DLI Spark?", + "uri":"dli_03_0115.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"187" + }, + { + "desc":"DLI data can be stored in either of the following:OBS: Data used by SQL jobs, Spark jobs, and Flink jobs can be stored in OBS, reducing storage costs.DLI: The column-base", + "product_code":"dli", + "title":"Where Can DLI Data Be Stored?", + "uri":"dli_03_0029.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"188" + }, + { + "desc":"DLI tables store data within the DLI service, and you do not need to know the data storage path.OBS tables store data in your OBS buckets, and you need to manage the sour", + "product_code":"dli", + "title":"What Are the Differences Between DLI Tables and OBS Tables?", + "uri":"dli_03_0117.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"189" + }, + { + "desc":"Currently, DLI supports analysis only on the data uploaded to the cloud. In scenarios where regular (for example, on a per day basis) one-off analysis on incremental data", + "product_code":"dli", + "title":"How Can I Use DLI If Data Is Not Uploaded to OBS?", + "uri":"dli_03_0010.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"190" + }, + { + "desc":"Data in the OBS bucket shared by IAM users under the same account can be imported. You cannot import data in the OBS bucket shared with other IAM account.", + "product_code":"dli", + "title":"Can I Import OBS Bucket Data Shared by Other Tenants into DLI?", + "uri":"dli_03_0129.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"191" + }, + { + "desc":"Log in to the management console.Click in the upper left corner and select Region and Project.Click (the My Quotas icon) in the upper right corner.The Service Quota pag", + "product_code":"dli", + "title":"What Should I do If the System Failed to Create a Database and {\"error_code\":\"DLI.1028\";\"error_msg\":\"Already reached the maximum quota of databases:XXX\" Is Displayed Indicating that the Quota Is Insufficient?", + "uri":"dli_03_0264.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"192" + }, + { + "desc":"No, a global variable can only be used by the user who created it. Global variables can be used to simplify complex parameters. For example, long and difficult variables ", + "product_code":"dli", + "title":"Can a Member Account Use Global Variables Created by Other Member Accounts?", + "uri":"dli_03_0263.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"193" + }, + { + "desc":"If you are suggested to perform following operations to run a large number of DLI jobs:Group the DLI jobs by type, and run each group on a queue.Alternatively, create IAM", + "product_code":"dli", + "title":"How Do I Manage Tens of Thousands of Jobs Running on DLI?", + "uri":"dli_03_0126.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"194" + }, + { + "desc":"The field names of tables that have been created cannot be changed.You can create a table, define new table fields, and migrate data from the old table to the new one.", + "product_code":"dli", + "title":"How Do I Change the Name of a Field in a Created Table?", + "uri":"dli_03_0162.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"195" + }, + { + "desc":"No. The spark.acls.enable configuration item is not used in DLI. The Apache Spark command injection vulnerability (CVE-2022-33891) does not exist in DLI.", + "product_code":"dli", + "title":"Does DLI Have the Apache Spark Command Injection Vulnerability (CVE-2022-33891)?", + "uri":"dli_03_0260.html", + "doc_type":"usermanual", + "p_code":"184", + "code":"196" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Quota", + "uri":"dli_03_0053.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"197" + }, + { + "desc":"Log in to the management console.Click in the upper left corner and select Region and Project.Click (the My Quotas icon) in the upper right corner.The Service Quota pag", + "product_code":"dli", + "title":"How Do I View My Quotas?", + "uri":"dli_03_0031.html", + "doc_type":"usermanual", + "p_code":"197", + "code":"198" + }, + { + "desc":"The system does not support online quota adjustment. To increase a resource quota, dial the hotline or send an email to the customer service. We will process your applica", + "product_code":"dli", + "title":"How Do I Increase a Quota?", + "uri":"dli_03_0032.html", + "doc_type":"usermanual", + "p_code":"197", + "code":"199" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Permission", + "uri":"dli_03_0054.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"200" + }, + { + "desc":"DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM", + "product_code":"dli", + "title":"How Do I Manage Fine-Grained DLI Permissions?", + "uri":"dli_03_0100.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"201" + }, + { + "desc":"You cannot perform permission-related operations on the partition column of a partition table. However, when you grant the permission of any non-partition column in a par", + "product_code":"dli", + "title":"What Is Column Permission Granting of a DLI Partition Table?", + "uri":"dli_03_0008.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"202" + }, + { + "desc":"You can set actions and resources of different levels for various scenarios.Define an action.The format is Service name:Resource type:Action. You can use wildcard *. The ", + "product_code":"dli", + "title":"How Do I Create a Custom Policy?", + "uri":"dli_03_0101.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"203" + }, + { + "desc":"You can isolate queues allocated to different users by setting permissions to ensure data query performance.", + "product_code":"dli", + "title":"How Do I Manage Queue Permissions?", + "uri":"dli_03_0225.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"204" + }, + { + "desc":"When you submit a job, a message is displayed indicating that the job fails to be submitted due to insufficient permission caused by arrears. In this case, you need to ch", + "product_code":"dli", + "title":"What Should I Do When My Account Does Not Have Permission Due To Arrears?", + "uri":"dli_03_0140.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"205" + }, + { + "desc":"When the user update an existing program package, the following error information is displayed:\"error_code\"*CLI.0003\",\"error_msg\":\"Permission denied for resource 'resourc", + "product_code":"dli", + "title":"What Should I Do If the Permission Is Required to Update a Program Package?", + "uri":"dli_03_0195.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"206" + }, + { + "desc":"When the SQL query statement is executed, the system displays a message indicating that the user does not have the permission to query resources.Error information: DLI.00", + "product_code":"dli", + "title":"What Should I Do If \"DLI.0003: Permission denied for resource...\" Is Reported When I Run a SQL Statement?", + "uri":"dli_03_0227.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"207" + }, + { + "desc":"The table permission has been granted and verified. However, after a period of time, an error is reported indicating that the table query fails.There are two possible rea", + "product_code":"dli", + "title":"What Should I Do If the Table Permission Has Been Granted But the Table Still Cannot Be Queried?", + "uri":"dli_03_0228.html", + "doc_type":"usermanual", + "p_code":"200", + "code":"208" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Queue", + "uri":"dli_03_0049.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"209" + }, + { + "desc":"Currently, you are not allowed to modify the description of a created queue. You can add the description when purchasing the queue.", + "product_code":"dli", + "title":"Does the Description of a DLI Queue Can Be Modified?", + "uri":"dli_03_0109.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"210" + }, + { + "desc":"Deleting a queue does not cause table data loss in your database.", + "product_code":"dli", + "title":"Will Table Data in My Database Be Lost If I Delete a Queue?", + "uri":"dli_03_0166.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"211" + }, + { + "desc":"You need to develop a mechanism to retry failed jobs. When a faulty queue is recovered, your application tries to submit the failed jobs to the queue again.", + "product_code":"dli", + "title":"How Does DLI Ensure the Reliability of Spark Jobs When a Queue Is Abnormal?", + "uri":"dli_03_0170.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"212" + }, + { + "desc":"DLI allows you to subscribe to an SMN topic for failed jobs.Log in to the DLI console.In the navigation pane on the left, choose Queue Management.On the Queue Management ", + "product_code":"dli", + "title":"How Do I Monitor Queue Exceptions?", + "uri":"dli_03_0098.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"213" + }, + { + "desc":"To check the running status of the DLI queue and determine whether to run more jobs on that queue, you need to check the queue load.Search for Cloud Eye on the console.In", + "product_code":"dli", + "title":"How Do I View DLI Queue Load?", + "uri":"dli_03_0095.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"214" + }, + { + "desc":"You need to check the large number of jobs in the Submitting and Running states on the queue.Use Cloud Eye to view jobs in different states on the queue. The procedure is", + "product_code":"dli", + "title":"How Do I Determine Whether There Are Too Many Jobs in the Current Queue?", + "uri":"dli_03_0183.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"215" + }, + { + "desc":"Currently, DLI provides two types of queues, For SQL and For general use. SQL queues are used to run SQL jobs. General-use queues are compatible with Spark queues of earl", + "product_code":"dli", + "title":"How Do I Switch an Earlier-Version Spark Queue to a General-Purpose Queue?", + "uri":"dli_03_0065.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"216" + }, + { + "desc":"DLI queues do not use resources or bandwidth when no job is running. In this case, the running status of DLI queues is not displayed on CES.", + "product_code":"dli", + "title":"Why Cannot I View the Resource Running Status of DLI Queues on Cloud Eye?", + "uri":"dli_03_0193.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"217" + }, + { + "desc":"In DLI, 64 CU = 64 cores and 256 GB memory.In a Spark job, if the driver occupies 4 cores and 16 GB memory, the executor can occupy 60 cores and 240 GB memory.", + "product_code":"dli", + "title":"How Do I Allocate Queue Resources for Running Spark Jobs If I Have Purchased 64 CUs?", + "uri":"dli_03_0088.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"218" + }, + { + "desc":"Queue plans create failed. The plan xxx target cu is out of quota is displayed when you create a scheduled scaling task.The CU quota of the current account is insufficien", + "product_code":"dli", + "title":"What Should I Do If \"Queue plans create failed. The plan xxx target cu is out of quota\" Is Displayed When I Schedule CU Changes?", + "uri":"dli_03_0159.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"219" + }, + { + "desc":"After a SQL job was submitted to the default queue, the job runs abnormally. The job log reported that the execution timed out. The exception logs are as follows:[ERROR] ", + "product_code":"dli", + "title":"What Should I Do If SQL Statements Fails to be Executed on the Default Queue, and a Timeout Exception is Reported?", + "uri":"dli_03_0171.html", + "doc_type":"usermanual", + "p_code":"209", + "code":"220" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Datasource Connections", + "uri":"dli_03_0022.html", + "doc_type":"usermanual", + "p_code":"85", + "code":"221" + }, + { + "desc":"You need to create a VPC peering connection to enable network connectivity. Take MRS as an example. If DLI and MRS clusters are in the same VPC, and the security group is", + "product_code":"dli", + "title":"Why Do I Need to Create a VPC Peering Connection for an Enhanced Datasource Connection?", + "uri":"dli_03_0128.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"222" + }, + { + "desc":"An enhanced datasource connection failed to pass the network connectivity test. Datasource connection cannot be bound to a queue. The following error information is displ", + "product_code":"dli", + "title":"Failed to Bind a Queue to an Enhanced Datasource Connection", + "uri":"dli_03_0237.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"223" + }, + { + "desc":"The outbound rule had been configured for the security group of the queue associated with the enhanced datasource connection. The datasource authentication used a passwor", + "product_code":"dli", + "title":"DLI Failed to Connect to GaussDB(DWS) Through an Enhanced Datasource Connection", + "uri":"dli_03_0238.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"224" + }, + { + "desc":"A datasource connection is created and bound to a queue. The connectivity test fails and the following error information is displayed:failed to connect to specified addre", + "product_code":"dli", + "title":"What Can I Do If the Datasource Connection is Created But the Network Connectivity Test Fails?", + "uri":"dli_03_0179.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"225" + }, + { + "desc":"Configuring the Connection Between a DLI Queue and a Data Source in a Private NetworkIf your DLI job needs to connect to a data source, for example, MRS, RDS, CSS, Kafka,", + "product_code":"dli", + "title":"How Do I Configure the Network Between a DLI Queue and a Data Source?", + "uri":"dli_03_0186.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"226" + }, + { + "desc":"The possible causes and solutions are as follows:If you have created a queue, do not bind it to a datasource connection immediately. Wait for 5 to 10 minutes. After the c", + "product_code":"dli", + "title":"What Can I Do If a Datasource Connection Is Stuck in Creating State When I Try to Bind a Queue to It?", + "uri":"dli_03_0257.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"227" + }, + { + "desc":"Enhanced datasource connections support only yearly/monthly and pay-per-use queues.You can bind a datasource connection only when you select Dedicated Resource Mode when ", + "product_code":"dli", + "title":"How Do I Bind a Datasource Connection to a Pay-per-Use Queue?", + "uri":"dli_03_0258.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"228" + }, + { + "desc":"DLI enhanced datasource connection uses VPC peering to directly connect the VPC networks of the desired data sources for point-to-point data exchanges.", + "product_code":"dli", + "title":"How Do I Connect DLI to Data Sources?", + "uri":"dli_03_0259.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"229" + }, + { + "desc":"To perform query on data stored on services rather than DLI, perform the following steps:Assume that the data to be queried is stored on multiple services (for example, O", + "product_code":"dli", + "title":"How Can I Perform Query on Data Stored on Services Rather Than DLI?", + "uri":"dli_03_0011.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"230" + }, + { + "desc":"Connect VPCs in different regions.Create an enhanced datasource connection on DLI and bind it to a queue.Add a DLI route.", + "product_code":"dli", + "title":"How Can I Access Data Across Regions?", + "uri":"dli_03_0085.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"231" + }, + { + "desc":"When data is inserted into DLI, set the ID field to NULL.", + "product_code":"dli", + "title":"How Do I Set the Auto-increment Primary Key or Other Fields That Are Automatically Filled in the RDS Table When Creating a DLI and Associating It with the RDS Table?", + "uri":"dli_03_0028.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"232" + }, + { + "desc":"Possible CausesThe network connectivity is abnormal. Check whether the security group is correctly selected and whether the VPC is correctly configured.The network connec", + "product_code":"dli", + "title":"Why Is the Error Message \"communication link failure\" Displayed When I Use a Newly Activated Datasource Connection?", + "uri":"dli_03_0047.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"233" + }, + { + "desc":"The cluster host information is not added to the datasource connection. As a result, the KRB authentication fails, the connection times out, and no error is recorded in l", + "product_code":"dli", + "title":"Connection Times Out During MRS HBase Datasource Connection, and No Error Is Recorded in Logs", + "uri":"dli_03_0080.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"234" + }, + { + "desc":"When you create a VPC peering connection for the datasource connection, the following error information is displayed:Before you create a datasource connection, check whet", + "product_code":"dli", + "title":"What Should I Do If the Subnet Cannot Be Found When I Create a DLI Datasource Connection?", + "uri":"dli_03_0111.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"235" + }, + { + "desc":"A cross-source RDS table was created in the DataArts Studio, and the insert overwrite statement was executed to write data into RDS. DLI.0999: BatchUpdateException: Incor", + "product_code":"dli", + "title":"Error Message \"Incorrect string value\" Is Displayed When insert overwrite Is Executed on a Cross-Source RDS Table", + "uri":"dli_03_0239.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"236" + }, + { + "desc":"The system failed to create a cross-source RDS table, and null pointer error was reported.The following table creation statement was used:The RDS database is in a PostGre", + "product_code":"dli", + "title":"Null Pointer Error Is Displayed When the System Creates a Cross-Source RDS Table", + "uri":"dli_03_0250.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"237" + }, + { + "desc":"The system failed to execute insert overwrite on the cross-source GaussDB(DWS) table, and org.postgresql.util.PSQLException: ERROR: tuple concurrently updated was display", + "product_code":"dli", + "title":"Error Message \"org.postgresql.util.PSQLException: ERROR: tuple concurrently updated\" Is Displayed When the System Executes insert overwrite on a Cross-Source GaussDB(DWS) Table", + "uri":"dli_03_0251.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"238" + }, + { + "desc":"A cross-source table was used to import data to a CloudTable HBase table. This HBase table contains a column family and a rowkey for 100 million simulating data records. ", + "product_code":"dli", + "title":"RegionTooBusyException Is Reported When Data Is Imported to a CloudTable HBase Table Through a Cross-Source Table", + "uri":"dli_03_0252.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"239" + }, + { + "desc":"A table was created on GaussDB(DWS) and then a datasource connection was created on DLI to read and write data. An error message was displayed during data writing, indica", + "product_code":"dli", + "title":"A Null Value Is Written Into a Non-Null Field When a DLI Datasource Connection Is Used to Connect to a GaussDB(DWS) Table", + "uri":"dli_03_0253.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"240" + }, + { + "desc":"A cross-source GaussDB(DWS) table and the datasource connection were created in DLI, and the schema of the source table in GaussDB(DWS) were updated. During the job execu", + "product_code":"dli", + "title":"An Insert Operation Failed After the Schema of the GaussDB(DWS) Source Table Is Updated", + "uri":"dli_03_0254.html", + "doc_type":"usermanual", + "p_code":"221", + "code":"241" + }, + { + "desc":"HUAWEI CLOUD Help Center presents technical documents to help you quickly get started with HUAWEI CLOUD services. The technical documents include Service Overview, Price Details, Purchase Guide, User Guide, API Reference, Best Practices, FAQs, and Videos.", + "product_code":"dli", + "title":"Change History", + "uri":"dli_01_00006.html", + "doc_type":"usermanual", + "p_code":"", + "code":"242" + } +] \ No newline at end of file diff --git a/docs/dli/umn/PARAMETERS.txt b/docs/dli/umn/PARAMETERS.txt new file mode 100644 index 00000000..6da8d5f0 --- /dev/null +++ b/docs/dli/umn/PARAMETERS.txt @@ -0,0 +1,3 @@ +version="" +language="en-us" +type="" \ No newline at end of file diff --git a/docs/dli/umn/dli_01_00006.html b/docs/dli/umn/dli_01_00006.html new file mode 100644 index 00000000..936c0e4c --- /dev/null +++ b/docs/dli/umn/dli_01_00006.html @@ -0,0 +1,20 @@ + + +
Released On + |
+What's New + |
+
---|---|
2023-01-30 + |
+This issue is the first official release. + |
+
You can use DLI to submit a Spark SQL job to query data. The general procedure is as follows:
+Step 1: Logging in to the Cloud Platform
+ +Step 3: Logging In to the DLI Management Console
+ + + + +The following illustrates how to query OBS data using DLI. Operations to query DLI data are similar.
+DLI allows you to query data stored on OBS. Before querying the data, you need to upload data to OBS.
+12,test+
After the file is uploaded successfully, the file path is obs://obs1/sampledata.csv.
+For more information about OBS operations, see the Object Storage Service Console Operation Guide.
+For more information about the tool, see the OBS Tool Guide.
+You are advised to use an OBS tool, such as OBS Browser+, to upload large files because OBS Console has restrictions on the file size and quantity.
+A queue is the basis for using DLI. Before executing an SQL job, you need to create a queue.
+For details, see Creating a Queue.
+ +Before querying data, create a database, for example, db1.
+The default database is a built-in database. You cannot create the database named default.
+create database db1;+
After database db1 is successfully created, db1 will be displayed in the Database list.
+When you execute a query on the DLI management console for the first time, you need to read the privacy agreement. You can perform operations only after you agree to the agreement. For later queries, you will not need to read the privacy agreement again.
+After database db1 is created, create a table (for example, table1) containing data in the sample file obs://obs1/sampledata.csv stored on OBS in db1.
+create table table1 (id int, name string) using csv options (path 'obs://obs1/sampledata.csv');+
After the table is created, click the Databases tab and then select db1. The created table table1 is displayed in the Table area.
+After performing the preceding steps, you can start querying data.
+select * from db1.table1 limit 1000;+
After the SQL statement is executed successfully, you can view the query result in View Result under the SQL job editing window.
+The enhanced datasource connection uses VPC peering at the bottom layer to directly connect the VPC network between the DLI cluster and the destination datasource. Data is exchanged in point-to-point mode. The enhanced datasource connection function supports all cross-source services implemented by DLI, including CloudTable HBase, CloudTableOpenTSDB, MRS OpenTSDB, DWS, RDS, CSS, DCS, and DDS. In addition, UDFs, Spark jobs, and Flink jobs can be used to access self-built data sources.
+ +The enhanced datasource scenario provides the following functions:
+This page displays all enhanced datasource connections. If there are a large number of connections, they are displayed on multiple pages.
+ +Parameter + |
+Description + |
+
---|---|
Connection Name + |
+Name of the created datasource connection. + |
+
Connection Status + |
+Status of a datasource connection. Currently, the console displays only connections in the Active state. + |
+
VPC + |
+VPC used by the destination data source. + |
+
Subnet + |
+Subnet used by the destination data source. + |
+
Created + |
+Time when a connection is created. The connections in the connection list can be displayed according to the creation time in ascending or descending order. + |
+
Operation + |
+
|
+
A database, built on the computer storage device, is a data warehouse where data is organized, stored, and managed based on its structure.
+The table is an important part of the database. It consists of rows and columns. Each column functions as a field. Each value in a field (column) represents a type of data.
+The database is a framework and the table contains data content. A database has one or more tables.
+You can create databases and tables on the management console or using SQL statements. This section describes how to create a database and a table on the management console.
+A view can be created only by using SQL statements. You cannot create a view on the Create Table page.
++
Parameter + |
+Description + |
+Example + |
+
---|---|---|
Database Name + |
+
NOTE:
+The default database is a built-in database. You cannot create the database named default. + |
+DB01 + |
+
Description + |
+Description of a database. + |
+- + |
+
After a database is created, you can view and select the database for use on the Databases and Tables page or SQL Editor page.
+Before creating a table, ensure that a database has been created.
+Datasource connection tables, such as View tables, HBase (MRS) tables, OpenTSDB (MRS) tables, DWS tables, RDS tables, and CSS tables, cannot be created. You can use SQL to create views and datasource connection tables. For details, see sections Creating a View and Creating a Datasource Connection Table in the Data Lake Insight SQL Syntax Reference.
+Parameter + |
+Description + |
+Example + |
+
---|---|---|
Table Name + |
+
|
+table01 + |
+
Data Location + |
+Data storage location. Currently, DLI and OBS are supported. + |
+OBS + |
+
Description + |
+Description of the table. + |
+- + |
+
Column Type + |
+Available values: Normal or Partition + |
+Normal + |
+
Column + |
+Name of a column in a table. The column name must contain at least one letter and can contain underscores (_). It cannot contain only digits. +You can select Normal or Partition. Partition columns are dedicated to partition tables. User data is partitioned to improve query efficiency. + NOTE:
+The column name is case-insensitive and must be unique. + |
+name + |
+
Type + |
+Data type of a column. This parameter corresponds to Column Name. +
|
+string + |
+
Column Description + |
+Description of a column. + |
+- + |
+
Operation + |
+
|
+- + |
+
Parameter + |
+Description + |
+Example + |
+
---|---|---|
Data Format + |
+DLI supports the following data formats: +
|
+CSV + |
+
Storage Path + |
+Enter or select an OBS path. The path can be a folder or a path. + NOTE:
+If you need to import data stored in OBS to the OBS table, set this parameter to the path of a folder. If the table creation path is a file, data fails to be imported. + |
+obs://obs1/sampledata.csv + |
+
Table Header: No/Yes + |
+This parameter is valid only when Data Format is set to CSV. Whether the data source to be imported contains the table header. +Click Advanced Settings and select the check box next to Table Header: No. If the check box is selected, the table header is displayed. If the check box is deselected, no table header is displayed. + |
+- + |
+
User-defined Delimiter + |
+This parameter is valid only when Data Format is set to CSV and you select User-defined Delimiter. +The following delimiters are supported: +
|
+Comma (,) + |
+
User-defined Quotation Character + |
+This parameter is valid only when Data Format is set to CSV and you select User-defined Quotation Character. +The following quotation characters are supported: +
|
+Single quotation mark (') + |
+
User-defined Escape Character + |
+This parameter is valid only when Data Format is set to CSV and you select User-defined Escape Character. +The following escape characters are supported: +
|
+Backslash (\) + |
+
Date Format + |
+This parameter is valid only when Data Format is set to CSV or JSON. +This parameter specifies the format of the date in the table and is valid only Advanced Settings is selected. The default value is yyyy-MM-dd. For definition of characters involved in the date pattern, see Table 3 in the + |
+2000-01-01 + |
+
Timestamp Format + |
+This parameter is valid only when Data Format is set to CSV or JSON. +This parameter specifies the format of the timestamp in the table and is valid only Advanced Settings is selected. The default value is yyyy-MM-dd HH:mm:ss. For definition of characters involved in the time pattern, see Table 3 in the + |
+2000-01-01 09:00:00 + |
+
After a table is created, you can view and select the table for use on the Data Management page or SQL Editor page.
+The following describes how to create a datasource HBase connection for MRS.
+Only enhanced datasource connection to MRS HBase is supported.
+If a cluster is available, you do not need to apply for one.
+Enter the Connection Name, select the Bind Queue (optional), VPC, and Subnet, and enter the Host Information (optional). For details about the parameters, see Table 1.
+ + +Parameter + |
+Description + |
+
---|---|
Connection Name + |
+Name of the created datasource connection. +
|
+
Bound Queue + |
+(Optional) Bind a queue that requires datasource connections. + NOTE:
+Before using an enhanced datasource connection, you must bind a queue and ensure that the VPC peering connection is in the Active state. + |
+
VPC + |
+VPC used by the destination data source. + |
+
Subnet + |
+Subnet used by the destination data source. + |
+
Host Information + |
+(Optional) When connecting to the HBase cluster of MRS, enter the host name and IP address of the ZooKeeper instance. Enter one record in each line. The format is as follows: IP address Host name/Domain name. +To obtain the host name and IP address of the MRS cluster, perform the following steps (with MRS3.x as an example): +
NOTE:
+If the MRS cluster has multiple IP addresses, enter any service IP address when creating a datasource connection. + |
+
On the Enhanced tab page, you can enter the keyword of a connection name in the search box to search for the matching connection.
+On the Enhanced tab page, select a connection and click to view its details. The connection ID and host information are displayed.
On the Enhanced tab page, click Delete Connection in the Operation column to delete an unnecessary connection.
+A connection with Connection Status of Creating cannot be deleted.
+The Preview page displays the first 10 records in the table.
+Before using an enhanced datasource connection, you must bind a queue and ensure that the VPC peering connection is in the Active state.
+ + +Parameter + |
+Description + |
+
---|---|
VPC Peering ID + |
+ID of the VPC peering connection created in the cluster to which the queue belongs. + NOTE:
+A VPC peering connection is created for each queue bound to an enhanced datasource connection. The VPC peering connection is used for cross-VPC communication. Ensure that the security group used by the data source allows access from the DLI queue CIDR block, and do not delete the VPC peering connection during the datasource connection. + |
+
Name + |
+Name of a bound queue. + |
+
Connection Status + |
+Datasource connection status. The following three statuses are available: +
NOTE:
+If the connection status is Failed, click |
+
Updated + |
+Time when a connection is updated. The connections in the connection list can be displayed according to the update time in ascending or descending order. + |
+
Operation + |
+Unbind Queue: This operation is used to unbind a datasource connection from a queue. + |
+
If you do not need to use an enhanced datasource connection, you can unbind the queue from it to release resources.
+ + +You can export data from a DLI table to OBS. During the export, a folder is created in OBS or the content in the existing folder is overwritten.
++
Parameter + |
+Description + |
+
---|---|
Databases + |
+Database where the current table is located. + |
+
Table Name + |
+Name of the current table. + |
+
Data Format + |
+Format of the file storing data to be exported. Formats other than JSON will be supported in later versions. + |
+
Queue + |
+Select a queue. + |
+
Compression Format + |
+Compression format of the data to be exported. The following compression formats are supported: +
|
+
Storage Path + |
+
|
+
Export Mode + |
+Storage mode of the data to be exported. +
|
+
Table Header: No/Yes + |
+Whether the data to be exported contains the table header. + |
+
You can delete unnecessary databases and tables based on actual conditions.
+If a database or table is deleted, it cannot be recovered. Exercise caution when performing this operation.
+You cannot delete databases that contain tables. To delete a database containing tables, delete the tables first.
+Method 1: Copy hosts information in /etc/hosts of an MRS node.
+cat /etc/hosts
+Method 2: Log in to FusionInsight Manager to obtain MRS hosts information.
+The host information is in the format of Service IP address Host name. Specify the IP addresses and host names obtained in 2, and separate multiple records by line breaks.
+For example:
+192.168.0.22 node-masterxxx1.com
+192.168.0.23 node-masterxxx2.com
+After an enhanced datasource connection is created and bound to a queue, the system automatically configures route information. You can also add a custom route for the queue to which the enhanced connection is bound.
+On the Enhanced tab page, select a connection and click Manage Route in the Operation column to view the route information of the datasource connection.
+On the Enhanced tab page, select a connection and choose More > Add Route in the Operation column, or click Add Route on the Details page of the connection to add a custom route. In the displayed dialog box, enter the route name and route CIDR block. For details about the parameters, see Table 1.
+ +Parameter + |
+Description + |
+
---|---|
Route Name + |
+Name of a custom route, which is unique in the same enhanced datasource scenario. The name contains 1 to 64 characters, including digits, letters, underscores (_), and hyphens (-). + |
+
IP Address + |
+Custom route CIDR block. The CIDR block of different routes can overlap but cannot be the same. + |
+
You can assign queue permissions to new users, modify permissions for users who have some permissions of a queue, and revoke all permissions of a user on a queue.
+Table 1 describes the related parameters.
+ +Parameter + |
+Description + |
+
---|---|
Username + |
+Name of the authorized user. + NOTE:
+The username is an existing IAM user name and has logged in to the DLI management console. + |
+
Permission Settings + |
+
|
+
If all options under Set Permission are gray, you are not allowed to change permissions on this queue. You can apply to the administrator, queue owner, or other authorized users for queue permission granting and revoking.
+In the user list under Permission Info, select the user whose permission needs to be revoked and click Revoke Permission under Operation. In the Revoke Permission dialog box, click OK. All permissions on this queue are revoked.
+You can delete a queue based on actual conditions.
+If Delete in the Operation column is gray, the current user does not have the permission of deleting the queue. You can apply to the administrator for the permission.
+SQL jobs allow you to execute SQL statements entered in the SQL job editing window, import data, and export data.
+SQL job management provides the following functions:
+On the Overview page of the DLI console, click SQL Jobs to go to the SQL job management page. Alternatively, you can click Job Management > SQL Jobs. The job list displays all SQL jobs. If there are a large number of jobs, they will be displayed on multiple pages. You can switch to the specified page as needed. DLI allows you to view jobs in all statuses. By default, jobs in the job list are displayed in descending order of the job creation time.
+ +Parameter + |
+Description + |
+
---|---|
Queues + |
+Name of the queue to which a job belongs + |
+
Username + |
+Name of the user who executed the job. + |
+
Type + |
+Job type. The following types are supported: +
|
+
Status + |
+Job status. Possible values are as follows: +
|
+
Query + |
+SQL statements for operations such as exporting and creating tables +You can click |
+
Duration + |
+Running duration of a job + |
+
Created + |
+Time when a job is created. Jobs can be displayed in ascending or descending order of the job creation time. + |
+
Operation + |
+
NOTE:
+To export the logs, you need to obtain the permission to create an OBS bucket. +Log archiving and export are not available for synchronization jobs and jobs running on the default queue. + |
+
On the SQL Jobs page, you can search jobs with any of the following operations.
+On the SQL Jobs page, you can click in front of a job record to view details about the job.
The following describes how to load data, create a table, and select a job. The job details vary depending on the job types, status, and configuration options.
+On the SQL Jobs page, you can click Terminate in the Operation column to stop a submitting or running job.
+A maximum of 1000 records can be displayed in the query result on the console. To view more or all data, you can export the data to OBS. The procedure is as follows:
+You can export results on the SQL Jobs page or the SQL Editor page.
+If no column of the numeric type is displayed in the query result, the result cannot be exported.
+Parameter + |
+Description + |
+
---|---|
Data Format + |
+Format of the exported query result file. Formats other than JSON will be supported in later versions. + |
+
Queues + |
+The queue where the jobs are executed. SQL jobs can be executed only in SQL queues. For details about how to create a queue, see Creating a Queue. + |
+
Compression Format + |
+Compression format of the data to be exported. The following options are supported: +
|
+
Storage Path + |
+OBS path to store the result. + NOTE:
+
|
+
Export Mode + |
+Mode for saving the exported query result data. +
|
+
Number of Results + |
+Number of exported query results. If no value is entered or the value is 0, all results are exported. + |
+
Table Header + |
+Whether the data to be exported contains table headers. + |
+
You can grant or revoke permissions for enhanced datasource connections of other projects.
+For example, if project B needs to access the data source of project A, perform the following operations:
+After creating a VPC peering connection and route between the enhanced datasource connection of project A and the queue of project B, you can create a job in the queue of project B to access the data source of project A.
+To facilitate SQL operation execution, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not need to compile SQL statements. Instead, you can directly perform the SQL operations using the templates.
+SQL templates include sample templates and custom templates. The default sample template contains 22 standard TPC-H query statements, which can meet most TPC-H test requirements. For details, see TPC-H Sample Data in the SQL Template.
+SQL template management provides the following functions:
+In the upper right corner of the SQL Template page, click Set Property to determine whether to display templates by group.
+ +If you select Display by Group, the following display modes are available:
+The current sample template contains 22 standard TPC-H query statements. You can view the template name, description, and statements. For details about TPC-H examples, see TPC-H Sample Data in the SQL Template.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Indicates the template name. +
|
+
Description + |
+Description of the template you create. + |
+
Statement + |
+SQL statement created as the template. + |
+
Operation + |
+Execute: After you click this button, the system switches to the SQL Editor page, where you can modify or directly perform the statement as required. For details, see Executing the Template. + |
+
The existing sample templates apply to the following scenarios:
+The custom template list displays all templates you have created. You can view the template name, description, statements, and more.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Indicates the template name. +
|
+
Description + |
+Description of the template you create. + |
+
Statement + |
+SQL statement created as the template. + |
+
Operation + |
+
|
+
You can create a template on either the Job Templates or the SQL Editor page.
+Enter the template name, SQL statement, and description information. For details, see Table 3.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Indicates the template name. +
|
+
Statement + |
+SQL statement to be saved as a template. + |
+
Description + |
+Description of the template you create. + |
+
Group + |
+
|
+
Group Name + |
+If you select Use existing or Use new, you need to enter the group name. + |
+
Enter the template name, SQL statement, and description information. For details, see Table 3.
+On the SQL Templates page, you can enter the template name keyword in the search box on the upper right corner to search for the desired template.
+Only custom templates can be modified. To modify a template, perform the following steps:
+DLI database and table management provide the following functions:
+The Databases and Tables page displays all created databases. You can view the database information, such as the owner and the number of tables.
+ +Parameter + |
+Description + |
+
---|---|
Database Name + |
+
|
+
Username + |
+Database owner. + |
+
Tables + |
+Number of tables in the database. + |
+
Description + |
+Description of the database specified during database creation. If no description is provided, -- is displayed. + |
+
Enterprise Project + |
+Enterprise project to which the database belongs. An enterprise project facilitates project-level management and grouping of cloud resources and users. +For details about how to set enterprise projects, see . + |
+
Operation + |
+
|
+
From the Data Management page, click the database name or Tables in the Operation column to switch to the Table Management page.
+The displayed page lists all tables created in the current database. You can view the table type, data storage location, and other information. Tables are listed in chronological order by default, with the most recently created tables displayed at the top.
+ +Parameter + |
+Description + |
+
---|---|
Table Name + |
+
|
+
Table Type + |
+Table type. Available options are as follows: +
|
+
Owner + |
+User who creates the table. + |
+
Storage Location + |
+DLI, OBS, View, CloudTable, and CSS data location + |
+
Size + |
+Size of the data in the table. The value is displayed only for tables of the Managed type. For tables of other types, -- is displayed. + |
+
Data Source Path + |
+
|
+
Created + |
+Time when the table is created. + |
+
Last Accessed + |
+Last time when an operation was performed on the table. + |
+
Operation + |
+
|
+
You can import data from OBS to a table created in DLI.
+The data to be imported has been stored on OBS.
++
Parameter + |
+Description + |
+Example + |
+
---|---|---|
Databases + |
+Database where the current table is located. + |
+- + |
+
Table Name + |
+Name of the current table. + |
+- + |
+
Queues + |
+Queue where the imported data will be used + |
+- + |
+
File Format + |
+Format of the data source file to be imported. The CSV, Parquet, ORC, JSON, and Avro formats are supported. Encoding format. Only UTF-8 is supported. + |
+CSV + |
+
Path + |
+You can directly enter a path or click
NOTE:
+The path can be a file or folder. + |
+obs://DLI/sampledata.csv + |
+
Table Header: No/Yes + |
+This parameter is valid only when File Format is set to CSV. Whether the data source to be imported contains the table header. +Click Advanced Settings and select the check box next to Table Header: No. If the check box is selected, the table header is displayed. If the check box is deselected, no table header is displayed. + |
+- + |
+
User-defined Delimiter + |
+This parameter is valid only when File Format is set to CSV and you select User-defined Delimiter. +The following delimiters are supported: +
|
+Default value: comma (,) + |
+
User-defined Quotation Character + |
+This parameter is valid only when File Format is set to CSV and User-defined Quotation Character is selected. +The following quotation characters are supported: +
|
+Default value: double quotation marks (") + |
+
User-defined Escape Character + |
+This parameter is valid only when File Format is set to CSV and you select User-defined Escape Character. +The following escape characters are supported: +
|
+Default value: backslash (\) + |
+
Date Format + |
+This parameter is valid only when File Format is set to CSV or JSON. +This parameter specifies the format of the date in the table and is valid only Advanced Settings is selected. The default value is yyyy-MM-dd. + |
+2000-01-01 + |
+
Timestamp Format + |
+This parameter is valid only when File Format is set to CSV or JSON. +This parameter specifies the format of the timestamp in the table and is valid only Advanced Settings is selected. The default value is yyyy-MM-dd HH:mm:ss. For definition of characters involved in the time pattern, see Table 3 in the + |
+2000-01-01 09:00:00 + |
+
Error Records Path + |
+This parameter is valid only when File Format is set to CSV or JSON. +The parameter specifies the error data is stored in the corresponding OBS path and is valid only Advanced Settings is selected. + |
+obs://DLI/ + |
+
Currently, only the first 10 records are displayed.
+You can use SQL statements in the SQL job editor to execute data query. DLI supports SQL 2003 and complies with Spark SQL.
+On the Overview page, click SQL Editor in the navigation pane on the left or Create Job in the upper right corner of the SQL Jobs pane. The SQL Editor page is displayed.
+A message is displayed, indicating that a temporary DLI data bucket will be created. The created bucket is used to store temporary data generated by DLI, such as job logs. You cannot view job logs if you choose not to create it. You can to periodically delete objects in a bucket or transit objects between different storage classes. The bucket name is set by default.
+If you do not need to create a DLI temporary data bucket and do not want to view this message, select Do not show again and click Cancel.
+The SQL Editor page is composed of the following parts:
+No. + |
+Tab/Button + |
+Name + |
+Description + |
+
---|---|---|---|
1 + |
+Databases + |
+Displays all the existing databases and tables in these databases. +
|
+|
2 + |
+Queues + |
+Displays existing queues. + |
+|
3 + |
+Templates + |
+Click the drop-down button to view 22 built-in standard TPC-H query templates and custom templates. + |
+|
4 + |
+Create + |
+Click the button on the related page to create a queue, database, or table. For details, see Creating a Queue and Creating a Database or a Table. + |
+|
5 + |
+Refresh + |
+Click the button to refresh the existing queue, database, and table lists. + |
+|
6 + |
+Search + |
+Enter a keyword to search for a database and table. + |
+
No. + |
+Button + |
+Description + |
+
---|---|---|
2 + |
+Queues + |
+Select a queue from the drop-down list box. If no queue is available, the default queue is displayed. Refer to Creating a Queue and create a queue. +SQL jobs can be executed only on SQL queues. + |
+
3 + |
+Database + |
+Select a database from the drop-down list box. If no database is available, the default database is displayed. For details about how to create a database, see section Creating a Database or a Table. + NOTE:
+If you specify the database in the SQL statements, the database you choose from the drop-down list will not be used. + |
+
4 + |
+Execute + |
+Click this button to run the SQL statements in the job editing window. + |
+
5 + |
+Format + |
+Click this button to format the SQL statements. + |
+
6 + |
+Syntax Reference + |
+Click this button to view the Data Lake Insight SQL Syntax Reference. + |
+
7 + |
+Settings + |
+Add parameters and tags. +Parameter Settings: Set parameters in key/value format for SQL jobs. +Tags: Set tags in key/value format for SQL jobs. + |
+
8 + |
+More + |
+The drop-down list includes the following options: +
|
+
9 + |
+SQL statement editing area + |
+Area for you to write and edit SQL statements. + |
+
10 + |
+Cursor position + |
+The line and column where the cursor is in the job editing area. + |
+
11 + |
+Shortcuts + |
+For details, see Table 3. + |
+
Shortcut + |
+Description + |
+
---|---|
Ctrl+Enter + |
+Execute SQL statements. You can run SQL statements by pressing Ctrl+R or Ctrl + Enter on the keyboard. + |
+
Ctrl+F + |
+Search for SQL statements. You can press Ctrl+F to search for a required SQL statement. + |
+
Shift+Alt+F + |
+Format SQL statements. You can press Shift + Alt + F to format a SQL statement. + |
+
Ctrl+Q + |
+Syntax verification. You can press Ctrl + Q to verify the syntax of SQL statements. + |
+
F11 + |
+Full screen. You can press F11 to display the SQL Job Editor window in full screen. Press F11 again to leave the full screen. + |
+
The Executed Queries (Last Day) and View Result tab pages are displayed in the lower part of the page.
+Area + |
+Description + |
+
---|---|
Executed Queries (Last Day) + |
+Displays the latest daily information about the submitted jobs.
+
|
+
You can filter the execution history in the following ways:
+No. + |
+Button + |
+Description + |
+
---|---|---|
1 + |
+Clear All + |
+Clear the displayed SQL statement query results. + |
+
2 + |
+Display the query result in a graph or table. + |
+|
3 + |
+Exports the query result to OBS. For details, see Exporting Query Results. +A maximum of 1000 records can be displayed in the query result on the console. To view more or all data, you can click Export Result to export the data to OBS. + |
+
1 | SELECT * FROM qw.qw LIMIT 10; + |
Alternatively, you can double-click the table name qw. The query statement is automatically entered in the SQL job editing window.
+To use the SQL statement template, click Templates from the left pane of the SQL editor page. Double-click the required template in the template list, and click Execute or modify it as required before executing the SQL statements.
+Before executing a job, you need to create a queue.
+Parameter + |
+Description + |
+
---|---|
Name + |
+Name of a queue. +
NOTE:
+The queue name is case-insensitive. Uppercase letters will be automatically converted to lowercase letters. + |
+
Type + |
+
|
+
Specifications + |
+Select queue specifications as required. A CU includes one core and 4 GB memory. You can set the total number of CUs on all compute nodes of a queue. DLI automatically allocates the memory and vCPUs for each node. +
|
+
Description + |
+Description of the queue to be created. The length of the queue name cannot exceed 256 characters. + |
+
Advanced Settings + |
+In the Queue Type area, select Dedicated Resource Mode and then click Advanced Settings.
+
|
+
After a queue is created, you can view and select the queue for use on the Queue Management page.
+It takes 6 to 10 minutes for a job running on a new queue for the first time.
+DLI allows you to submit program packages in batches to the general-use queue for running.
+If you need to update a package, you can use the same package or file to upload it to the same location (in the same group) on DLI to overwrite the original package or file.
+All software packages must be uploaded to OBS for storage in advance.
+Parameter + |
+Description + |
+
---|---|
Package Type + |
+The following package types are supported: +
|
+
Package File Path + |
+Select the OBS path of the corresponding packages. + NOTE:
+
|
+
Group Policy + |
+You can select Use existing group, Use new group, or No grouping. + |
+
Group Name + |
+
NOTE:
+
For details about how to manage permissions on package groups and packages, see Managing Permissions on Packages and Package Groups. + |
+
After a package is created, you can view and select the package for use on the Package Management page.
+You can delete a package based on actual conditions.
+You can use DLI to submit Spark jobs for real-time computing. The general procedure is as follows:
+Step 1: Logging in to the Cloud
+ +Step 3: Logging In to the DLI Management Console
+ + +Step 6: Submitting a Spark Job
+ +Write a Spark Jar job program , and compile and package it as spark-examples.jar. Perform the following steps to submit the job:
+Before submitting Spark Jar jobs, upload data files to OBS.
+After the file is uploaded successfully, the file path is obs://dli-test-obs01/spark-examples.jar.
+For more information about OBS operations, see the Object Storage Service Console Operation Guide.
+For more information about the tool, see the OBS Tool Guide.
+You are advised to use an OBS tool, such as OBS Browser+, to upload large files because OBS Console has restrictions on the file size and quantity.
+To submit Spark jobs, you need to enter the Spark job creation page first.
+If you log in to the DLI management console for the first time, you need to be authorized to access OBS. For details, see "Service Authorization".
+If you submit a Spark job for the first time, you need to create a queue first. For example, create a queue named sparktest and set Queue Type to General Queue.
+Before submitting a Spark job, you need to create a package, for example, spark-examples.jar.
+You can view and select the package on the Package Management page.
+For details about how to create a package, see "Creating a Package".
+For details about other parameters, see the description of the Spark job editing page in "Creating a Spark Job".
+When you click Execute on the DLI management console for the first time, you need to read the privacy agreement. Once agreed to the agreement, you will not receive any privacy agreement messages for subsequent operations.
+During actual use, developers create databases and tables and submit them to test personnel for testing. After the test is complete, the databases and tables are transferred to O&M personnel for user experience. In this case, you can change the owner of the databases and tables to transfer data to other owners.
+Log in to DLI and go to the Overview page.
+The following table describes the functional areas of the Overview page.
+ +No. + |
+Area + |
+Description + |
+
---|---|---|
1 + |
+Navigation pane + |
+The navigation bar on the DLI management console, including Overview, SQL Editor, Job Management, Queue Management, Data Management, Job Templates, Datasource Connections, and Global Configuration. + |
+
2 + |
+DLI introduction + |
+Data Lake Insight (DLI) is a fully hosted big data processing and analysis service. It frees you from managing any server. DLI supports standard SQL and is compatible with Spark and Flink SQL. It also supports multiple access modes, and is compatible with mainstream data formats. + |
+
3 + |
+Upper right corner + |
+Click Create Queue to create a queue. + |
+
4 + |
+SQL job introduction + |
+Click Create Job in this area to access the SQL Editor page. For details about how to create a package, see SQL Editor. + |
+
5 + |
+Flink job introduction + |
+Click Create Job in this area to go to the Flink Jobs page. For details about how to create a package, see Flink Job Management. + |
+
6 + |
+Spark job introduction + |
+Click Create Job in this area to access the Spark Jobs page. For details about how to create a Spark job, see Creating a Spark Job. + |
+
8 + |
+Queue usage (hours) + |
+This area displays the usage of all queues of the last day, week, or month. + |
+
9 + |
+Selected queue usage (hours) + |
+This area displays the usage of a queue in the last week or month. + |
+
10 + |
+Common links + |
+Including: +
|
+
11 + |
+Get started in four simple steps + |
+
|
+
Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark and Apache Flink ecosystems. It frees you from managing any server.
+DLI supports standard SQL and is compatible with Spark SQL and Flink SQL. It also supports multiple access modes, and is compatible with mainstream data formats. DLI supports SQL statements and Spark applications for heterogeneous data sources, including CloudTable, RDS, GaussDB(DWS), CSS, OBS, custom databases on ECSs, and offline databases.
+You can query and analyze heterogeneous data sources such as RDS, and DWS on the cloud using access methods, such as visualized interface, RESTful API, JDBC, ODBC, and Beeline. The data format is compatible with five mainstream data formats: CSV, JSON, Parquet, and ORC.
+For details, see .
+DLI is interconnected with OBS for data analysis. In this architecture where storage and compute are decoupled, resources of these two types are charged separately, helping you reduce costs and improving resource utilization.
+You can choose single-AZ or multi-AZ storage when you create an OBS bucket for storing redundant data on the DLI console. The differences between the two storage policies are as follows:
+DLI is a serverless big data query and analysis service. It has the following advantages:
+A web-based service management platform is provided. You can access DLI using the management console or HTTPS-based APIs, or connect to the DLI server through a client such as JDBC or ODBC.
+ + +DLI provides fully-managed Spark computing services by allowing you to execute Spark jobs.
+On the Overview page, click Create Job in the upper right corner of the Spark Jobs tab or click Create Job in the upper right corner of the Spark Jobs page. The Spark job editing page is displayed.
+On the Spark job editing page, a message is displayed, indicating that a temporary DLI data bucket will be created. The created bucket is used to store temporary data generated by DLI, such as job logs and job results. You cannot view job logs if you choose not to create it. The bucket will be created and the default bucket name is used.
+If you do not need to create a DLI temporary data bucket and do not want to receive this message, select Do not show again and click Cancel.
+On the Queue Management page, locate the queue you have created, and choose More > Test Address Connectivity in the Operation column to check whether the network connection between the queue and the data source is normal. For details, see Testing Address Connectivity.
+On the Spark job creation page, the navigation tree on the left contains the Queues and Packages tab pages.
+ + +No. + |
+Tab/Button + |
+Tab/Button Name + |
+Description + |
+
---|---|---|---|
1 + |
+Queues + |
+Display existing queues. + |
+|
2 + |
+Packages + |
+Display existing packages. + |
+|
3 + |
+Templates + |
+Templates + |
+Display job templates. + |
+
4 + |
+Create + |
+Create a queue or a package. + |
+|
5 + |
+Refresh + |
+Refresh the lists of existing queues and packages. + |
+|
6 + |
+Search for packages by name. + |
+On the Packages tab page, enter a package name for search. + |
+
In the job editing window, you can set parameters in Fill Form mode or Write API mode.
+The following uses the Fill Form as an example. In Write API mode, refer to the Data Lake Insight API Reference for parameter settings.
+Parameter + |
+Description + |
+
---|---|
Job Name + |
+Set a job name. + |
+
Application + |
+Select the package to be executed. The value can be .jar or .py. + |
+
Main Class + |
+Enter the name of the main class. When the application type is .jar, the main class name cannot be empty. + |
+
Application Parameters + |
+User-defined parameters. Separate multiple parameters by Enter. +These parameters can be replaced by global variables. For example, if you create a global variable batch_num on the Global Configuration > Global Variables page, you can use {{batch_num}} to replace a parameter with this variable after the job is submitted. + |
+
Spark Arguments + |
+Enter a parameter in the format of key=value. Press Enter to separate multiple key-value pairs. +These parameters can be replaced by global variables. For example, if you create a global variable custom_class on the Global Configuration > Global Variables page, you can use "spark.sql.catalog"={{custom_class}} to replace a parameter with this variable after the job is submitted. + NOTE:
+The JVM garbage collection algorithm cannot be customized for Spark jobs. + |
+
Job Type + |
+Type of the Spark image used by a job. This parameter is mandatory when you select a CCE queue. The values are as follows: +
|
+
JAR Package Dependencies + |
+JAR file on which the Spark job depends. You can enter the JAR package name or the corresponding OBS path. The format is as follows: obs://Bucket name/Folder name/Package name. + |
+
Python File Dependencies + |
+py-files on which the Spark job depends. You can enter the Python file name or the corresponding OBS path of the Python file. The format is as follows: obs://Bucket name/Folder name/File name. + |
+
Other Dependencies + |
+Other files on which the Spark job depends. You can enter the name of the dependency file or the corresponding OBS path of the dependency file. The format is as follows: obs://Bucket name/Folder name/File name. + |
+
Group Name + |
+If you select a group when creating a package, you can select all the packages and files in the group. For details about how to create a package, see Creating a Package. + |
+
Access Metadata + |
+Whether to access metadata through Spark jobs. . + |
+
Retry upon Failure + |
+Indicates whether to retry a failed job. +If you select Yes, you need to set the following parameters: +Maximum Retries: Maximum number of retry times. The maximum value is 100. + |
+
Advanced Settings + |
++ | +
Parameter + |
+Description + |
+
---|---|
Module Name + |
+Dependency modules provided by DLI for executing datasource connection jobs. To access different services, you need to select different modules.
+
|
+
Resource Package + |
+JAR package on which the Spark job depends. + |
+
Parameter + |
+Description + |
+
---|---|
Resource Specifications + |
+Select a resource specification from the drop-down list box. The system provides three resource specifications for you to select. The following configuration items in the resource specifications can be modified: +
If modified, your modified settings of the items are used. + |
+
Executor Memory + |
+Customize the configuration item based on the selected resource specifications. + |
+
Executor Cores + |
+Customize the configuration item based on the selected resource specifications. + |
+
Executors + |
+Customize the configuration item based on the selected resource specifications. + |
+
Driver Cores + |
+Customize the configuration item based on the selected resource specifications. + |
+
Driver Memory + |
+Customize the configuration item based on the selected resource specifications. + |
+
Spark job parameter calculation:
+The cluster management plane and driver use some CU resources. Number of Executors * Number of Executor Cores must be smaller than the number of computing CUs of the queue.
+Based on the open-source Spark, DLI optimizes performance and reconstructs services to be compatible with the Apache Spark ecosystem and interfaces, and executes batch processing tasks.
+DLI also allows you to use Spark jobs to access DLI metadata.
+Spark job management provides the following functions:
+ +In addition, you can click Quick Links to switch to the details on User Guide.
+On the Overview page, click Spark Jobs to go to the SQL job management page. Alternatively, you can click Job Management > Spark Jobs. The page displays all Spark jobs. If there are a large number of jobs, they will be displayed on multiple pages. DLI allows you to view jobs in all statuses.
+ +Parameter + |
+Description + |
+
---|---|
Job ID + |
+ID of a submitted Spark job, which is generated by the system by default. + |
+
Name + |
+Name of a submitted Spark job. + |
+
Queues + |
+Queue where the submitted Spark job runs + |
+
Username + |
+Name of the user who executed the Spark job + |
+
Status + |
+Job status. The following values are available: +
|
+
Created + |
+Time when a job is created. Jobs can be displayed in ascending or descending order of the job creation time. + |
+
Last Modified + |
+Time when a job is completed. + |
+
Operation + |
+
|
+
On the Spark Jobs page, click Edit in the Operation column of the job. On the Spark job creation page that is displayed, modify parameters as required and execute the job.
+On the Spark Jobs page, select Status or Queues. The system displays the jobs that meet the filter condition in the job list.
+On the Spark Jobs page, choose More > Terminate Job in the Operation column of the job that you want to stop.
+On the Spark Jobs page, choose More > Export Log in the Operation column of the corresponding job. In the dialog box that is displayed, enter the path of the created OBS bucket and click OK.
+DLI built-in dependencies are provided by the platform by default. In case of conflicts, you do not need to upload them when packaging JAR packages of Spark or Flink Jar jobs.
+Only queues created after December 2020 can use the Flink 1.10 dependencies.
+Queues in DLI are computing resources, which are the basis for using DLI. All executed jobs require computing resources.
+Currently, DLI provides two types of queues, For SQL and For general use. SQL queues are used to run SQL jobs. General-use queues are compatible with Spark queues of earlier versions and are used to run Spark and Flink jobs.
+Resource + |
+How to Obtain + |
+Function + |
+
---|---|---|
Compute resource + |
+Create queue on the DLI management console. + |
+Used for executing queries. + |
+
Storage resource + |
+DLI has a 5 GB quota. + |
+Used for storing data in the database and DLI tables. + |
+
Resources of a dedicated queue are not released when the queue is idle. That is, resources are reserved regardless of whether the queue is used. Dedicated queues ensure that resources exist when jobs are submitted.
+DLI allows you to flexibly scale in or out queues on demand. After a queue with specified specifications is created, you can scale it in and out as required.
+To change the queue specifications, see Elastic Scaling.
+Scaling can be performed for a newly created queue only when jobs are running on this queue.
+DLI allows you to schedule tasks for periodic queue scaling. After creating a queue, the scheduled scaling tasks can be executed.
+Scaling can be performed for a newly created queue only when jobs are running on this queue.
+Flink jobs use queues. DLI can automatically trigger scaling for jobs based on the job size.
+Scaling can be performed for a newly created queue only when there are jobs running on this queue.
+Queue Management provides the following functions:
+To receive notifications when a DLI job fails, SMN Administrator permissions are required.
+The queue list displays all queues created by you and the default queue. Queues are listed in chronological order by default in the queue list, with the most recently created queues displayed at the top.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Name of a queue. + |
+
Type + |
+Queue type. +
|
+
Specifications + |
+Queue size. Unit: CU +CU is the pricing unit of queues. A CU consists of 1 vCPU and 4-GB memory. The computing capabilities of queues vary with queue specifications. The higher the specifications, the stronger the computing capability. + |
+
Actual CUs + |
+Actual size of the current queue. + |
+
Elastic Scaling + |
+Target CU value for scheduled scaling, or the maximum and minimum CU values of the current specifications. + |
+
Username + |
+Queue owner + |
+
Description + |
+Description of a queue specified during queue creation. If no description is provided, -- is displayed. + |
+
Operation + |
+
|
+
On the Job Management page of Flink jobs, you can submit a Flink job. Currently, the following job types are supported:
+Flink job management provides the following functions:
+Agencies are required for DLI to execute Flink jobs. You can set the agency when logging in to the management console for the first time or go to Global Configurations > Service Authorization to modify the agencies.
+The permissions are as follows:
+Due to cloud service cache differences, permission setting operations require about 60 minutes to take effect.
+Due to cloud service cache differences, permission setting operations require about 30 minutes to take effect.
+Due to cloud service cache differences, permission setting operations require about 3 minutes to take effect.
+Due to cloud service cache differences, permission setting operations require about 3 minutes to take effect.
+On the Overview page, click Flink Jobs to go to the Flink job management page. Alternatively, you can choose Job Management > Flink Jobs from the navigation pane on the left. The page displays all Flink jobs. If there are a large number of jobs, they will be displayed on multiple pages. DLI allows you to view jobs in all statuses.
+ +Parameter + |
+Description + |
+
---|---|
ID + |
+ID of a submitted Flink job, which is generated by the system by default. + |
+
Name + |
+Name of the submitted Flink job. + |
+
Type + |
+Type of the submitted Flink job. Including: +
|
+
Status + |
+Job statuses, including: +
|
+
Description + |
+Description of the submitted Flink job. + |
+
Username + |
+Name of the user who submits a job. + |
+
Created + |
+Time when a job is created. + |
+
Started + |
+Time when a Flink job starts to run. + |
+
Duration + |
+Time consumed by job running. + |
+
Operation + |
+
|
+
Package management provides the following functions:
+You can delete program packages in batches.
+Parameter + |
+Description + |
+
---|---|
Group Name + |
+Name of the group to which the package belongs. If the package is not grouped, -- is displayed. + |
+
Package Name + |
+Name of a package. + |
+
Owner + |
+Name of the user who uploads the package. + |
+
Type + |
+Type of a package. The following package types are supported: +
|
+
Status + |
+Status of the package to be created. +
|
+
Created + |
+Time when a package is created. + |
+
Updated + |
+Time when the package is updated. + |
+
Operation + |
+Manage Permissions: Manage user permissions for a package. +Delete: Delete the package. +More: +
|
+
DLI supports the datasource capability of the native Spark and extends it. With DLI datasource connection, you can access other data storage services through SQL statements, Spark jobs, and Flink jobs and import, query, analyze, and process data in the services.
+Before using DLI to perform cross source analysis, you need to set up a datasource connection to enable the network between data sources.
+The enhanced datasource connection uses VPC peering at the bottom layer to directly connect the VPC network between the DLI queue and the destination datasource. Data is exchanged in point-to-point mode.
+The enhanced datasource connection supports all cross-source services implemented by DLI and implements access to self-built data sources by using UDFs, Spark jobs, and Flink jobs.
+Currently, DLI supports datasource connection to the following data sources: CloudTable HBase, CloudTable OpenTSDB, CSS, DCS Redis, DDS Mongo, DIS, DMS, DWS, MRS HBase, MRS Kafka, MRS OpenTSDB, OBS, RDS MySQL, RDS PostGre, and SMN.
+To use DLI for cross-source analysis, you need to create datasource connections and then develop different jobs to access data sources. Perform the following steps:
+The following describes the basic processes of developing SQL jobs, Spark jobs, and Flink jobs for datasource connection.
+A resource is an object that exists within a service. You can select DLI resources by specifying their paths.
+ +Resource Type + |
+Resource Names + |
+Path + |
+
---|---|---|
queue + |
+DLI queue + |
+queues.queuename + |
+
database + |
+DLI database + |
+databases.dbname + |
+
table + |
+DLI table + |
+databases.dbname.tables.tbname + |
+
column + |
+DLI column + |
+databases.dbname.tables.tbname.columns.colname + |
+
jobs + |
+DLI Flink job + |
+jobs.flink.jobid + |
+
resource + |
+DLI package + |
+resources.resourcename + |
+
group + |
+DLI package group + |
+groups.groupname + |
+
datasourceauth + |
+DLI cross-source authentication information + |
+datasourceauth.name + |
+
You can use Identity and Access Management (IAM) to implement fine-grained permissions control on DLI resources. For details, see Overview.
+If your cloud account does not need individual IAM users, then you may skip over this chapter.
+This section describes how to create an IAM user and grant DLI permissions to the user. Figure 1 shows the procedure.
+Before assigning permissions to user groups, you should learn about system policies and select the policies based on service requirements. For details about system permissions supported by DLI, see DLI System Permissions.
+Create a user group on the IAM console, and assign the DLI ReadOnlyAccess permission to the group.
+Create a user on the IAM console and add the user to the group created in 1.
+Log in to the management console using the newly created user, and verify that the user's permissions.
+Once you have created a message notification topic, you can Add subscription of the topic on the Topic Management page of the Simple Message Notification service. You can select different ways (such as text messages or emails) to subscribe. After the subscription succeeds, any job failure will automatically be sent to your subscription endpoints. The operation is as follows:
+Datasource authentication is used to manage authentication information for accessing CSS and MRS security environments and encrypt passwords for accessing DWS, RDS, DDS, and DCS data sources.
+If a cluster is available, you do not need to apply for one.
+Upload the obtained authentication credential file to the user-defined OBS bucket.
+Parameter + |
+Description + |
+
---|---|
Type + |
+Select CSS. + |
+
Authentication Certificate + |
+Name of the datasource authentication information to be created. +
|
+
Username + |
+Username for logging in to the security cluster. + |
+
Password + |
+The password of the security cluster + |
+
Certificate Path + |
+The OBS path for uploading the security certificate + |
+
Parameter + |
+Description + |
+
---|---|
Type + |
+Select Kerberos. + |
+
Authentication Certificate + |
+Name of the datasource authentication information to be created. +
|
+
Username + |
+Username for logging in to the security cluster. + |
+
krb5_conf Path + |
+OBS path to which the krb5.conf file is uploaded + NOTE:
+The renew_lifetime configuration item under [libdefaults] must be removed from krb5.conf. Otherwise, the Message stream modified (41) problem may occur. + |
+
keytab Path + |
+OBS path to which the user.keytab file is uploaded + |
+
Parameter + |
+Description + |
+
---|---|
Type + |
+Select Kafka_SSL. + |
+
Authentication Certificate + |
+Name of the datasource authentication information to be created. +
|
+
Truststore Path + |
+OBS path to which the SSL truststore file is uploaded. + |
+
Truststore Password + |
+Truststore password. The default value is dms@kafka. + |
+
Keystore Path + |
+OBS path to which the SSL keystore file (key and certificate) is uploaded. + |
+
Keystore Password + |
+Keystore (key and certificate) password. + |
+
Key Password + |
+Password of the private key in the keystore file. + |
+
Create datasource authentication for accessing DWS, RDS, DCS, and DDS data sources.
+Currently, database password authentication supports Spark SQL jobs only.
+Parameter + |
+Description + |
+
---|---|
Type + |
+Select Password. + |
+
Authentication Certificate + |
+Name of the datasource authentication information to be created. +
|
+
Username + |
+Username for accessing the datasource + |
+
Password + |
+Password for accessing the datasource + |
+
On the Datasource Authentication tab, you can enter the authentication information name in the search box to search for the matching authentication information. To ensure user information security, the password field is not returned.
+On the Datasource Authentication tab, click Update in the Operation column of the authentication information to be modified. Currently, only the username and password can be updated. If you need to update the certificate, delete the authentication information and create a new one.
+The username and password are optional. If they are not set, the field is not modified.
+On the Datasource Authentication tab, click Delete in the Operation column of the authentication information to be deleted.
+DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM to manage DLI permissions. You can use both the DLI's permission control mechanism and the IAM service for permission management.
+When using DLI on the cloud, enterprise users need to manage DLI resources (queues) used by employees in different departments, including creating, deleting, using, and isolating resources. In addition, data of different departments needs to be managed, including data isolation and sharing.
+DLI uses IAM for refined enterprise-level multi-tenant management. IAM provides identity authentication, permissions management, and access control, helping you securely access to your cloud resources.
+With IAM, you can use your cloud account to create IAM users for your employees, and assign permissions to the users to control their access to specific resource types. For example, some software developers in your enterprise need to use DLI resources but must not delete them or perform any high-risk operations. To achieve this result, you can create IAM users for the software developers and grant them only the permissions required for using DLI resources.
+For a new user, you need to log in for the system to record the metadata before using DLI.
+IAM is free of charge. You pay only for the resources you use.
+If your cloud account does not need individual IAM users for permissions management, skip this chapter.
+Table 1 lists all the system-defined roles and policies supported by DLI.
+Role/Policy Name + |
+Description + |
+Category + |
+
---|---|---|
DLI FullAccess + |
+Full permissions for DLI. + |
+System-defined policy + |
+
DLI ReadOnlyAccess + |
+Read-only permissions for DLI. +With read-only permissions, you can use DLI resources and perform operations that do not require fine-grained permissions. For example, create global variables, create packages and package groups, submit jobs to the default queue, create tables in the default database, create datasource connections, and delete datasource connections. + |
+System-defined policy + |
+
Tenant Administrator + |
+Tenant administrator +
|
+System-defined role + |
+
DLI Service Admin + |
+DLI administrator +
|
+System-defined role + |
+
For details, see Creating an IAM User and Granting Permissions.
+Table 2 lists the DLI service permissions. For details about the resources that can be controlled by DLI, see Table 4.
+ +Permission Type + |
+Subtype + |
+Console Operations + |
+SQL Syntax + |
+API Definition + |
+
---|---|---|---|---|
Queue Permissions + |
+Queue management permissions + |
+For details, see Queue Permission Management. + |
+None + |
+For details, see "Granting Users with the Queue Usage Permission" in the Data Lake Insight API Reference. + |
+
Queue usage permission + |
+||||
Data Permissions + |
+Database permissions + |
+For details, see Database Permission Management and Table Permission Management. + |
+For details, see SQL Syntax of Batch Jobs > Data Permissions Management > Data Permissions List in the Data Lake Insight SQL Syntax Reference. + |
+For details, see Permission-related APIs > Granting Users with the Data Usage Permission in the Data Lake Insight API Reference. + |
+
Table permissions + |
+||||
Column permissions + |
+||||
Job Permissions + |
+Flink job permissions + |
+For details, see Managing Flink Job Permissions. + |
+None + |
+For details, see Permission-related APIs > Granting Users with the Data Usage Permission in the Data Lake Insight API Reference. + |
+
Package Permissions + + |
+Package group permissions + |
+For details, see Managing Permissions on Packages and Package Groups. + |
+None + |
+For details, see Permission-related APIs > Granting Users with the Data Usage Permission in the Data Lake Insight API Reference. + |
+
Package permissions + |
+||||
Datasource Connection Permissions + |
+Datasource connection permissions + |
+For details, see Managing Datasource Connection Permissions. + |
+None + |
+For details, see Permission-related APIs > Granting Users with the Data Usage Permission in the Data Lake Insight API Reference. + |
+
An Internet company mainly provides game and music services. DLI is used to analyze user behaviors and assist decision making.
+As shown in Figure 1, the Leader of the Basic Platform Team has applied for a Tenant Administrator account to manage and use cloud services. Since the Big Data Platform Team needs DLI for data analysis, the Leader of the Basic Platform Team adds a subaccount with the permission of DLI Service Admin to manage and use DLI. The Leader of the Basic Platform Team creates a Queue A and assigns it to Data Engineer A to analyze the gaming data. A Queue B is also assigned to Data Engineer B to analyze the music data. Besides granting the queue usage permission, the Leader of the Basic Platform Team grants data (except the database) management and usage permissions to the two engineers.
+ +The Data Engineer A creates a table named gameTable for storing game prop data and a table named userTable for storing game user data. The music service is a new service. To explore potential music users among existing game users, the Data Engineer A assigns the query permission on the userTable to the Data Engineer B. In addition, Data Engineer B creates a table named musicTable for storing music copyrights information.
+Table 3 describes the queue and data permissions of Data Engineer A and Data Engineer B.
+ +User + |
+Data Engineer A (game data analysis) + |
+Data Engineer B (music data analysis) + |
+
---|---|---|
Queues + |
+Queue A (queue usage permission) + |
+Queue B (queue usage permission) + |
+
Data (Table) + |
+gameTable (table management and usage permission) + |
+musicTable (table management and usage permission) + |
+
userTable (table management and usage permission) + |
+userTable (table query permission) + |
+
The queue usage permission includes job submitting and terminating permissions.
+Table 1 lists the common operations supported by each system policy of DLI. Choose proper system policies according to this table. For details about the SQL statement permission matrix in DLI in terms of permissions on databases, tables, and roles, see SQL Syntax of Batch Jobs > Data Permissions Management > Data Permissions List in the Data Lake Insight SQL Syntax Reference.
+ +Resources + |
+Operation + |
+Description + |
+DLI FullAccess + |
+DLI ReadOnlyAccess + |
+Tenant Administrator + |
+DLI Service Admin + |
+
---|---|---|---|---|---|---|
Queue + |
+DROP_QUEUE + |
+Deleting a queue + |
+√ + |
+× + |
+√ + |
+√ + |
+
SUBMIT_JOB + |
+Submitting the job + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CANCEL_JOB + |
+Terminating the job + |
+√ + |
+× + |
+√ + |
+√ + |
+|
RESTART + |
+Restarting a queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions from the queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing the queue permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
Database + |
+DROP_DATABASE + |
+Deleting a database + |
+√ + |
+× + |
+√ + |
+√ + |
+
CREATE_TABLE + |
+Creating a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CREATE_VIEW + |
+Creating a view + |
+√ + |
+× + |
+√ + |
+√ + |
+|
EXPLAIN + |
+Explaining the SQL statement as an execution plan + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CREATE_ROLE + |
+Creating a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DROP_ROLE + |
+Deleting a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_ROLES + |
+Displaying a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_ROLE + |
+Binding a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_ROLE + |
+Unbinding the role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_USERS + |
+Displaying the binding relationships between all roles and users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the database + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions to the database + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing database permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DISPLAY_ALL_TABLES + |
+Displaying tables in the database + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
DISPLAY_DATABASE + |
+Displaying databases + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
CREATE_FUNCTION + |
+Creating a function + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DROP_FUNCTION + |
+Deleting a function + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_FUNCTIONS + |
+Displaying all functions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DESCRIBE_FUNCTION + |
+Displaying function details + |
+√ + |
+× + |
+√ + |
+√ + |
+|
Table + |
+DROP_TABLE + |
+Deleting a table + |
+√ + |
+× + |
+√ + |
+√ + |
+
SELECT + |
+Querying a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
INSERT_INTO_TABLE + |
+Inserting + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_ADD_COLUMNS + |
+Adding a column + |
+√ + |
+× + |
+√ + |
+√ + |
+|
INSERT_OVERWRITE_TABLE + |
+Rewriting + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RENAME + |
+Renaming a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_ADD_PARTITION + |
+Adding partitions to the partition table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RENAME_PARTITION + |
+Renaming a table partition + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_DROP_PARTITION + |
+Deleting partitions from a partition table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PARTITIONS + |
+Displaying all partitions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RECOVER_PARTITION + |
+Restoring table partitions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_SET_LOCATION + |
+Setting the partition path + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions from the table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing table permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DISPLAY_TABLE + |
+Displaying a table + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
DESCRIBE_TABLE + |
+Displaying table information + |
+√ + |
+× + |
+√ + |
+√ + |
+
If the CIDR block of the DLI queue conflicts with that of the user data source, you can change the CIDR block of the queue.
+If the queue whose CIDR block is to be modified has jobs that are being submitted or running, or the queue has been bound to enhanced datasource connections, the CIDR block cannot be modified.
+For example, user A is granted with the permission to delete the testdb database. Delete the database and create another one with the same name. You need to grant user A the deletion permission of the testdb database again.
+Permissions can be granted to new users or projects, modified for users or projects with existing permissions, or revoked from a user or project.
+Parameter + |
+Description + |
+
---|---|
Authorization Object + |
+Select User or Project. + |
+
Username/Project Name + |
+
|
+
Non-Inherited Permissions + |
+Select a permission to grant it to the user, or deselect a permission to revoke it. +Non-inherited permissions apply only to the current database. +
|
+
Inherited Permissions + |
+Select a permission to grant it to the user, or deselect a permission to revoke it. +Inherited permissions are applicable to the current database and all its tables. However, only the query permission is applicable to table columns. +The following permissions can be granted to both user and project. +
|
+
For a user or project that has some permissions on the database, you can revoke the existing permissions or grant new ones.
+If the options in Set Permission are gray, the corresponding account does not have the permission to modify the database. You can apply to the administrator, database owner, or other authorized users for granting and revoking permissions of databases.
+In the Project Permission Info list, locate the project for which you want to set permissions and click Set Permission.
+For details about the permissions of database users or projects, see Table 1.
+If a user is an administrator, Revoke Permission is gray, indicating that the user's permission cannot be revoked.
+For example, user A is granted with the permission to delete the testTable table. Delete the table and create another one with the same name. You need to grant user A the deletion permission of the testTable table again.
+Permissions can be granted to new users or projects, modified for users or projects with existing permissions, or revoked from a user or project.
+Parameter + |
+Description + |
+
---|---|
Authorization Object + |
+Select User or Project. + |
+
Username/Project + |
+
|
+
Non-inheritable Permissions + |
+Select a permission to grant it to the user, or deselect a permission to revoke it. +
|
+
Parameter + |
+Description + |
+
---|---|
Authorization Object + |
+Select User or Project. + |
+
Username/Project + |
+
|
+
Non-inheritable Permissions + |
+Select a permission to grant it to the user, or deselect a permission to revoke it.
+
|
+
A view can be created only by using SQL statements. You cannot create a view on the Create Table page.
+Parameter + |
+Description + |
+
---|---|
Authorization Object + |
+Select User or Project. + |
+
Username/Project + |
+
|
+
Non-inheritable Permissions + |
+Select a permission to grant it to the user, or deselect a permission to revoke it.
+
|
+
For a user or project that has some permissions on the database, you can revoke the existing permissions or grant new ones.
+If all options under Set Permission are gray, you are not allowed to change permissions on this table. You can apply to the administrator, table owner, or other authorized users for granting and revoking table permissions.
+In the Project Permission Info list, locate the project for which you want to set permissions and click Set Permission.
+Revoke all permissions of a user or a project.
+In the following cases, Revoke Permission is gray, indicating that the permission of the user cannot be revoked.
+If a project has only inheritable permissions, Revoke Permission is gray, indicating that the permissions of the project cannot be revoked.
+Custom policies can be created as a supplement to the system policies of DLI. You can add actions to custom policies. For the actions supported for custom policies, see "Permissions Policies and Supported Actions" in the Elastic Volume Service API Reference.
+You can create custom policies in either of the following two ways:
+. This section describes common DLI custom policies.
+The following example assumes that the authorized user has the permission to create tables in all databases in all regions:
+{ + "Version": "1.1", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dli:database:create_table" + ], + "Resource": [ + "dli:*:*:database:*" + ] + } + ] +}+
1.1 indicates a fine-grained permission policy that defines permissions required to perform operations on specific cloud resources under certain conditions.
+The value can be Allow and Deny. If both Allow and Deny are found in statements, the Deny overrides the Allow.
+Specific operation on a resource. A maximum of 100 actions are allowed.
+Conditions determine when a policy takes effect. A condition consists of a condition key and operator.
+A condition key is a key in the Condition element of a statement. There are global and service-level condition keys.
+An operator is used together with a condition key to form a complete condition statement. For details, see Table 1.
+IAM provides a set of DLI predefined condition keys. The following table lists the predefined condition keys of DLI.
+ +Condition Key + |
+Operator + |
+Description + |
+
---|---|---|
g:CurrentTime + |
+Date and time + |
+Time when an authentication request is received + NOTE:
+The time is expressed in the format defined by ISO 8601, for example, 2012-11-11T23:59:59Z. + |
+
g:MFAPresent + |
+Boolean + |
+Whether multi-factor authentication is used during user login + |
+
g:UserId + |
+String + |
+ID of the current login user + |
+
g:UserName + |
+String + |
+Current login user + |
+
g:ProjectName + |
+String + |
+Project that you have logged in to + |
+
g:DomainName + |
+String + |
+Domain that you have logged in to + |
+
The format is Service name:Region:Domain ID:Resource type:Resource path. The wildcard (*) indicates all options. For details about the resource types and path, see Table 4.
+Example:
+dli:*:*:queue:* indicates all queues.
+You can set actions and resources of different levels based on scenarios.
+The format is Service name:Resource type:Action. The wildcard is *. Example:
+ +Action + |
+Description + |
+
---|---|
dli:queue:submit_job + |
+Submission operations on a DLI queue + |
+
dli:queue:* + |
+All operations on a DLI queue + |
+
dli:*:* + |
+All operations on all DLI resource types + |
+
For more information about the relationship between operations and system permissions, see Common Operations Supported by DLI System Policy.
+The format is Service name:Region:Domain ID:Resource type:Resource path. The wildcard (*) indicates all resources. The five fields can be flexibly set. Different levels of permission control can be set for resource paths based on scenario requirements. If you need to set all resources of the service, you do not need to specify this field. For details about the definition of Resource, see Table 3. For details about the resource types and paths in Resource, see Table 4.
+ +Resource + |
+Description + |
+
---|---|
DLI:*:*:table:databases.dbname.tables.* + |
+DLI, any region, any account ID, all table resources of database dbname + |
+
DLI:*:*:database:databases.dbname + |
+DLI, any region, any account ID, queue of database dbname + |
+
DLI:*:*:queue:queues.* + |
+DLI, any region, any account ID, any queue resource + |
+
DLI:*:*:jobs:jobs.flink.1 + |
+DLI, any region, any account ID, Flink job whose ID is 1 + |
+
Resource Type + |
+Resource Names + |
+Path + |
+
---|---|---|
queue + |
+DLI queue + |
+queues.queuename + |
+
database + |
+DLI database + |
+databases.dbname + |
+
table + |
+DLI table + |
+databases.dbname.tables.tbname + |
+
column + |
+DLI column + |
+databases.dbname.tables.tbname.columns.colname + |
+
jobs + |
+DLI Flink job + |
+jobs.flink.jobid + |
+
resource + |
+DLI package + |
+resources.resourcename + |
+
group + |
+DLI package group + |
+groups.groupname + |
+
datasourceauth + |
+DLI cross-source authentication information + |
+datasourceauth.name + |
+
The authorized user has the permission to create and delete any database, submit jobs for any queue, and delete any table under any account ID in any region of DLI.
+{ + "Version": "1.1", + "Statement": [ + { + "Effect": " Allow", + "Action": [ + "dli:database:create_database", + "dli:database:drop_database", + "dli:queue:submit_job", + "dli:table:drop_table" + ], + "Resource": [ + "dli:*:*:database:*", + "dli:*:*:queue:*", + "dli:*:*:table:*" + ] + } + ] +}+
{ + "Version": "1.1", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dli:database:create_table" + ], + "Resource": [ + "dli:*:*:database:*" + ] + } + ] +}+
{ + "Version": "1.1", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dli:column:select" + ], + "Resource": [ + "dli:*:*:column:databases.db.tables.tb.columns.col" + ] + } + ] +}+
A deny policy must be used in conjunction with other policies to take effect. If the permissions assigned to a user contain both Allow and Deny actions, the Deny actions take precedence over the Allow actions.
+{ + "Version": "1.1", + "Statement": [ + { + "Effect": "Deny", + "Action": [ + "dli:database:create_database", + "dli:database:drop_database", + "dli:queue:submit_job", + "dli:table:drop_table" + ], + "Resource": [ + "dli:*:*:database:*", + "dli:*:*:queue:*", + "dli:*:*:table:*" + ] + } + ] +}+
{ + "Version": "1.1", + "Statement": [ + { + "Effect": "Deny", + "Action": [ + "dli:queue:submit_job" + ], + "Resource": [ + "dli:*:*:queue:queues.demo" + ] + } + ] +}+
To create a Flink job, you need to enter the data source and data output channel, that is, source and sink. To use another service as the source or sink stream, you need to apply for the service first.
+Flink jobs support the following data sources and output channels:
+To use DIS as the data source and output channel, you need to enable DIS first.
+For details about how to create a DIS stream, see
+ in the Data Ingestion Service User Guide.After applying for a DIS stream, you can upload local data to DIS to provide data sources for Flink jobs in real time. For details, see
+ in the Data Ingestion Service User Guide.An example is provided as follows:
+1,lilei,bmw320i,28 +2,hanmeimei,audia4,27+
To use OBS as the data source, enable OBS first. For details about how to enable OBS, see in the Object Storage Service Console Operation Guide.
+After you enable OBS, upload local files to OBS using the Internet. For detailed operations, see
+ in the Object Storage Service Console Operation Guide.To use RDS as the output channel, create an RDS instance. For details, see Creating a DB Instance in the Relational Database Service User Guide.
+To use SMN as the output channel, create an SMN topic to obtain the URN resource ID and then add topic subscription. For detailed operations, see Getting Started in the Simple Message Notification User Guide.
+If Kafka serves as both the source and sink streams, create an enhanced datasource connection between Flink jobs and Kafka. For details, see Enhanced Datasource Connections.
+If the port of the Kafka server is listened on by the host name, you need to add the mapping between the host name and IP address of the Kafka Broker node to the datasource connection.
+To use CloudTable as the data input and output channel, create a cluster in CloudTable and obtain the cluster ID.
+To use CSS as the data output channel, create a cluster in CSS and obtain the cluster's private network address. For details, see in the Cloud Search Service User Guide.
+To use DCS as the output channel, create a Redis cache instance in DCS and obtain the address used for Flink jobs to connect to the Redis instance. For detailed operations, see in the Distributed Cache Service User Guide.
+This section describes how to create a Flink SQL job. You can use Flink SQLs to develop jobs to meet your service requirements. Using SQL statements simplifies logic implementation. You can edit Flink SQL statements for your job in the DLI SQL editor. This section describes how to use the SQL editor to write Flink SQL statements.
+DLI Flink OpenSource SQL jobs are fully compatible with the syntax of Flink 1.10 and 1.12 provided by the community. In addition, Redis, DWS(GaussDB), and DIS data source types are added based on the community connector.
++
Parameter + |
+Description + |
+
---|---|
Type + |
+Set Type to Flink SQL. You will need to rewrite SQL statements to start the job. + |
+
Name + |
+Name of a job. Enter 1 to 57 characters. Only letters, digits, hyphens (-), and underscores (_) are allowed. + NOTE:
+The job name must be globally unique. + |
+
Description + |
+Description of a job. It can contain up to 512 characters. + |
+
Template Name + |
+You can select a sample template or a custom job template. For details about templates, see Flink Template Management. + |
+
Enter details SQL statements in the statement editing area. For details about SQL syntax, see the Data Lake Insight SQL Syntax Reference.
++
Parameter + |
+Description + |
+
---|---|
Queue + |
+A shared queue is selected by default. You can select a custom queue as needed. + NOTE:
+
|
+
UDF Jar + |
+If you selected custom queues, you need to configure this parameter. +You can customize a UDF Jar file. Before you select a JAR file, upload the corresponding JAR package to the OBS bucket and choose Creating a Package. + to create a package. For details, seeIn SQL, you can call a user-defined function that is inserted into a JAR file. + |
+
CUs + |
+Sum of the number of compute units and job manager CUs of DLI. One CU equals one vCPU and 4 GB. +The configured number of CUs is the number of CUs required for job running and cannot exceed the number of CUs in the bound queue. + |
+
Job Manager CUs + |
+Number of CUs of the management unit. + |
+
Parallelism + |
+Number of Flink SQL jobs that run at the same time Properly increasing the number of parallel threads improves the overall computing capability of the job. However, the switchover overhead caused by the increase of threads must be considered. + NOTE:
+
|
+
Task Manager Configuration + |
+Whether to set Task Manager resource parameters +If this option is selected, you need to set the following parameters: +
|
+
OBS Bucket + |
+OBS bucket to store job logs and checkpoint information. If the selected OBS bucket is not authorized, click Authorize. + NOTE:
+If both Enable Checkpointing and Save Job Log are selected, you only need to authorize OBS once. + |
+
Save Job Log + |
+Whether to save the job running logs to OBS The logs are saved in the following path: Bucket name/jobs/logs/Directory starting with the job ID. To go to this path, go to the job list and click the job name. On the Run Log tab page, click the provided OBS link. + CAUTION:
+You are advised to select this parameter. Otherwise, no run log is generated after the job is executed. If the job is abnormal, the run log cannot be obtained for fault locating. +If this option is selected, you need to set the following parameters: +OBS Bucket: Select an OBS bucket to store user job logs. If the selected OBS bucket is not authorized, click Authorize.
+ NOTE:
+If both Enable Checkpointing and Save Job Log are selected, you only need to authorize OBS once. + |
+
Alarm Generation upon Job Exception + |
+Whether to report job exceptions, for example, abnormal job running or exceptions due to an insufficient balance, to users via SMS or email +If this option is selected, you need to set the following parameters: +SMN Topic +Select a user-defined SMN topic. For details about how to customize SMN topics, see Creating a Topic in the Simple Message Notification User Guide. + |
+
Enable Checkpointing + |
+Whether to enable job snapshots. If this function is enabled, jobs can be restored based on the checkpoints. +If this option is selected, you need to set the following parameters:
+
|
+
Auto Restart upon Exception + |
+Whether to enable automatic restart. If this function is enabled, any job that has become abnormal will be automatically restarted. +If this option is selected, you need to set the following parameters: +
|
+
Idle State Retention Time + |
+Defines for how long the state of a key is retained without being updated before it is removed in GroupBy or Window. The default value is 1 hour. + |
+
Dirty Data Policy + |
+Select a policy for processing dirty data. The following policies are supported: Ignore, Trigger a job exception, and Save. + NOTE:
+Save indicates that the dirty data is stored to the OBS bucket selected above. + |
+
Dirty Data Dump Address + |
+Set this parameter when Dirty Data Policy is set to Save. Click the address box to select the OBS path for storing dirty data. + |
+
After the job is started, the system automatically switches to the
+ page, and the created job is displayed in the job list. You can view the job status in the column. After a job is successfully submitted, the job status will change from to . After the execution is complete, the message Completed is displayed.If the job status is to copy error information. After handling the fault based on the provided information, resubmit the job.
Other available buttons are as follows:
+This section describes how to create a Flink Jar job. You can perform secondary development based on Flink APIs, build your own JAR file, and submit the JAR file to DLI queues. DLI is fully compatible with open-source community APIs. To create a custom Flink job, you need to compile and build application JAR files. You must have a certain understanding of Flink secondary development and have high requirements related to stream computing complexity.
+On the Queue Management page, locate the queue you have created, and choose More > Test Address Connectivity in the Operation column to check whether the network connection between the queue and the data source is normal. For details, see Testing Address Connectivity.
+DLI does not support the download function. If you need to modify the uploaded data file, please edit the local file and upload it again.
++
Parameter + |
+Description + |
+
---|---|
Type + |
+Select Flink Jar. + |
+
Name + |
+Name of a job. Enter 1 to 57 characters. Only letters, digits, hyphens (-), and underscores (_) are allowed. + NOTE:
+The job name must be globally unique. + |
+
Description + |
+Description of a job. It can be up to 512 characters long. + |
+
Name + |
+Description + |
+
---|---|
Application + |
+User-defined package. Before selecting a JAR file to be inserted, upload the corresponding JAR file to the OBS bucket and choose Creating a Package. For details about the built-in dependencies, see Flink 1.7.2 Dependencies and Flink 1.10 Dependencies. + to create a package. For details, see |
+
Main Class + |
+The name of the JAR package to be loaded, for example, KafkaMessageStreaming. +
NOTE:
+When a class belongs to a package, the main class path must contain the complete package path, for example, packagePath.KafkaMessageStreaming. + |
+
Class Arguments + |
+List of arguments of a specified class. The arguments are separated by spaces. +Flink parameters support replacement of non-sensitive global variables. For example, if you add the global variable windowsize in Global Configuration > Global Variables, you can add the -windowsSize {{windowsize}} parameter for the Flink Jar job. + |
+
JAR Package Dependencies + |
+Select a user-defined package dependency. The dependent program packages are stored in the classpath directory of the cluster. +Before selecting a JAR file to be inserted, upload the corresponding JAR file to the OBS bucket and choose Creating a Package. + to create a package. Select JAR as the package type. For details, seeFor details about the built-in dependencies, see Flink 1.7.2 Dependencies and Flink 1.10 Dependencies. + |
+
Other Dependencies + |
+User-defined dependency files. Other dependency files need to be referenced in the code. +Before selecting a dependency file, upload the file to the OBS bucket and choose Creating a Package. + to create a package. The package type is not limited. For details, seeYou can add the following command to the application to access the corresponding dependency file. In the command, fileName indicates the name of the file to be accessed, and ClassName indicates the name of the class that needs to access the file. +ClassName.class.getClassLoader().getResource("userData/fileName")+ |
+
Flink Version + |
+Before selecting a Flink version, you need to select the queue to which the Flink version belongs. Currently, the following versions are supported: 1.10. + |
+
Runtime Configuration + |
+User-defined optimization parameters. The parameter format is key=value. +Flink optimization parameters support replacement non-sensitive global variable. For example, if you create global variable phase in Global Configuration > Global Variables, optimization parameter table.optimizer.agg-phase.strategy={{phase}} can be added to the Flink Jar job. + |
+
+
Name + |
+Description + |
+
---|---|
CUs + |
+One CU has one vCPU and 4-GB memory. The number of CUs ranges from 2 to 400. + |
+
Job Manager CUs + |
+Set the number of CUs on a management unit. The value ranges from 1 to 4. The default value is 1. + |
+
Parallelism + |
+Maximum number of parallel operators in a job. + NOTE:
+
|
+
Task Manager Configuration + |
+Whether to set Task Manager resource parameters +If this option is selected, you need to set the following parameters: +
|
+
Save Job Log + |
+Whether to save the job running logs to the OBS bucket. + CAUTION:
+You are advised to select this parameter. Otherwise, no run log is generated after the job is executed. If the job is abnormal, the run log cannot be obtained for fault locating. +If this option is selected, you need to set the following parameters: +OBS Bucket: Select an OBS bucket to store job logs. If the selected OBS bucket is not authorized, click Authorize. + |
+
Alarm Generation upon Job Exception + |
+Whether to report job exceptions, for example, abnormal job running or exceptions due to an insufficient balance, to users via SMS or email +If this option is selected, you need to set the following parameters: +SMN Topic +Select a user-defined SMN topic. For details about how to customize SMN topics, see Creating a Topic in the Simple Message Notification User Guide. + |
+
Auto Restart upon Exception + |
+Whether to enable automatic restart. If this function is enabled, any job that has become abnormal will be automatically restarted. +If this option is selected, you need to set the following parameters: +
|
+
After the job is started, the system automatically switches to the
+ page, and the created job is displayed in the job list. You can view the job status in the column. After a job is successfully submitted, the job status will change from to . After the execution is complete, the message Completed is displayed.If the job status is to copy these details. After handling the fault based on the provided information, resubmit the job.
Other available buttons are as follows:
+Save As: Save the created job as a new job.
+The job debugging function helps you check the logic correctness of your compiled SQL statements before running a job.
+For a job that is being created, you can debug the job on the editing page.
+If you select this value, prepare OBS data first before using DLI. For details, see Preparing Flink Job Data. OBS data is stored in CSV format, where multiple records are separated by line breaks and different fields in a single record are separated by commas (,). In addition, you need to select a specific object in OBS as the input source data.
+ +If you select this value, compile SQL statements as data sources. In this mode, you need to enter the value of each field in a single record.
+ +After a job is created, you can perform operations on the job as required.
+You can edit a created job, for example, by modifying the SQL statement, job name, job description, or job configurations.
+For details about how to edit a Flink SQL job, see Step 5 to Step 7 in Creating a Flink SQL Job.
+For details about how to edit a user-defined Flink job, see Step 5 to Step 7 in Creating a Flink Jar Job.
+You can start a saved or stopped job.
+Select a job and click Start in the Operation column.
+Alternatively, you can select the row where the job you want to start locates and click Start in the upper left of the job list.
+Select the rows where the jobs you want to start locate and click Start in the upper left of the job list.
+After you click Start, the Start Flink Jobs page is displayed.
+After a job is started, you can view the job execution result in the
+ column.You can stop a job in the Running or Submitting state.
+Locate the row that contains the job to be stopped, click More in the Operation column, and select Stop.
+Alternatively, you can select the row where the job you want to stop locates and click Stop in the upper left of the job list.
+Locate the rows containing the jobs you want to stop and click Stop in the upper left of the job list.
+If you do not need to use a job, perform the following operations to delete it. A deleted job cannot be restored. Therefore, exercise caution when deleting a job.
+Locate the row containing the job you want to delete and click in the Operation column.
+Alternatively, you can select the row containing the job you want to delete and click Delete in the upper left of the job list.
+ +Select the rows containing the jobs you want to delete and click Delete in the upper left of the job list.
+You can export the created Flink jobs to an OBS bucket.
+This mode is applicable to the scenario where a large number of jobs need to be created when you switch to another region, project, or user. In this case, you do not need to create a job. You only need to export the original job, log in to the system in a new region or project, or use a new user to import the job.
+When switching to another project or user, you need to grant permissions to the new project or user. For details, see Managing Flink Job Permissions.
+By default, configurations of all jobs are exported. You can enable the Custom Export function to export configurations of the desired jobs.
+You can import the Flink job configuration file stored in the OBS bucket to the Flink Jobs page of DLI.
+This mode is applicable to the scenario where a large number of jobs need to be created when you switch to another region, project, or user. In this case, you do not need to create a job. You only need to export the original job, log in to the system in a new region or project, or use a new user to import the job.
+If you need to import a self-created job, you are advised to use the job creation function. For details, see Creating a Flink SQL job, Creating a Flink Jar Job.
+You can change the job name and description as required.
+You can click Trigger Savepoint for jobs in the Running status to save the job status.
+You can import a savepoint to restore the job status. For details about the savepoint, see Checkpointing at the official website of Flink.
+You need to select the OBS path of the save point.
+You can select Runtime Configuration to configure job exception alarms and restart options.
+Flink SQL jobs and Flink Jar jobs are supported.
+Parameter + |
+Description + |
+
---|---|
Alarm Generation upon Job Exception + |
+Whether to report job exceptions, for example, abnormal job running or exceptions due to an insufficient balance, to users via SMS or email. +If this option is selected, you need to set the following parameters: +SMN Topic +Select a user-defined SMN topic. For details about how to customize SMN topics, see Creating a Topic in the Simple Message Notification User Guide. + |
+
Auto Restart upon Exception + |
+Whether to enable automatic restart. If this function is enabled, any job that has become abnormal will be automatically restarted. +If this option is selected, you need to set the following parameters: +
|
+
After creating a job, you can view the job details to learn about the following information:
+This section describes how to view job details. After you create and save a job, you can click the job name to view job details, including SQL statements and parameter settings. For a Jar job, you can only view its parameter settings.
+In the Job Details tab, you can view SQL statements, configured parameters.
+Parameter + |
+Description + |
+
---|---|
Type + |
+Job type, for example, Flink SQL + |
+
Name + |
+Flink job name + |
+
Description + |
+Description of a Flink job + |
+
Status + |
+Running status of a job + |
+
Running Mode + |
+If your job runs on a shared queue, this parameter is Shared. +If your job runs on a custom queue with dedicated resources, this parameter is Exclusive. + |
+
Queue + |
+If the queue where the job runs is a shared queue, the shared queue is displayed. +If the queue where the job runs is a custom queue with dedicated resources, the queue name is displayed. + |
+
UDF Jar + |
+This parameter is displayed when a non-shared queue is selected for the job and UDF Jar is configured. + |
+
Runtime Configuration + |
+Displayed when a user-defined parameter is added to a job + |
+
CUs + |
+Number of CUs configured for a job + |
+
Job Manager CUs + |
+Number of job manager CUs configured for a job. + |
+
Parallelism + |
+Number of jobs that can be concurrently executed by a Flink job + |
+
CU(s) per TM + |
+Number of CUs occupied by each Task Manager configured for a job + |
+
Slot(s) per TM + |
+Number of Task Manager slots configured for a job + |
+
OBS Bucket + |
+OBS bucket name. After Enable Checkpointing and Save Job Log are enabled, checkpoints and job logs are saved in this bucket. + |
+
Save Job Log + |
+Whether the job running logs are saved to OBS + |
+
Alarm Generation upon Job Exception + |
+Whether job exceptions are reported + |
+
SMN Topic + |
+Name of the SMN topic. This parameter is displayed when Alarm Generation upon Job Exception is enabled. + |
+
Auto Restart upon Exception + |
+Whether automatic restart is enabled. + |
+
Max. Retry Attempts + |
+Maximum number of retry times upon an exception. Unlimited means the number is not limited. + |
+
Restore Job from Checkpoint + |
+Whether the job can be restored from a checkpoint + |
+
ID + |
+Job ID + |
+
Savepoint + |
+OBS path of the savepoint + |
+
Enable Checkpointing + |
+Whether checkpointing is enabled + |
+
Checkpoint Interval + |
+Interval between storing intermediate job running results to OBS. The unit is second. + |
+
Checkpoint Mode + |
+Checkpoint mode. Available values are as follows: +
|
+
Idle State Retention Time + |
+Defines for how long the state of a key is retained without being updated before it is removed in GroupBy or Window. + |
+
Dirty Data Policy + |
+Policy for processing dirty data. The value is displayed only when there is a dirty data policy. Available values are as follows: +Ignore +Trigger a job exception +Save + |
+
Dirty Data Dump Address + |
+OBS path for storing dirty data when Dirty Data Policy is set to Save. + |
+
Created + |
+Time when a job is created + |
+
Updated + |
+Time when a job was last updated + |
+
You can use Cloud Eye to view details about job data input and output.
+Click Job Monitoring in the upper right corner of the page to switch to the Cloud Eye console.
+The following table describes monitoring metrics related to Flink jobs.
+ +Name + |
+Description + |
+
---|---|
Flink Job Data Read Rate + |
+Displays the data input rate of a Flink job for monitoring and debugging. Unit: record/s. + |
+
Flink Job Data Write Rate + |
+Displays the data output rate of a Flink job for monitoring and debugging. Unit: record/s. + |
+
Flink Job Total Data Read + |
+Displays the total number of data inputs of a Flink job for monitoring and debugging. Unit: records + |
+
Flink Job Total Data Write + |
+Displays the total number of output data records of a Flink job for monitoring and debugging. Unit: records + |
+
Flink Job Byte Read Rate + |
+Displays the number of input bytes per second of a Flink job. Unit: byte/s + |
+
Flink Job Byte Write Rate + |
+Displays the number of output bytes per second of a Flink job. Unit: byte/s + |
+
Flink Job Total Read Byte + |
+Displays the total number of input bytes of a Flink job. Unit: byte + |
+
Flink Job Total Write Byte + |
+Displays the total number of output bytes of a Flink job. Unit: byte + |
+
Flink Job CPU Usage + |
+Displays the CPU usage of Flink jobs. Unit: % + |
+
Flink Job Memory Usage + |
+Displays the memory usage of Flink jobs. Unit: % + |
+
Flink Job Max Operator Latency + |
+Displays the maximum operator delay of a Flink job. The unit is ms. + |
+
Flink Job Maximum Operator Backpressure + |
+Displays the maximum operator backpressure value of a Flink job. A larger value indicates severer backpressure. +0: OK +50: low +100: high + |
+
You can view details about each task running on a job, including the task start time, number of received and transmitted bytes, and running duration.
+If the value is 0, no data is received from the data source.
+Parameter + |
+Description + |
+
---|---|
Name + |
+Name of an operator. + |
+
Duration + |
+Running duration of an operator. + |
+
Max Concurrent Jobs + |
+Number of parallel tasks in an operator. + |
+
Task + |
+Operator tasks are categorized as follows: +
|
+
Status + |
+Status of an operator task. + |
+
Back Pressure Status + |
+Working load status of an operator. Available options are as follows: +
|
+
Delay + |
+Duration from the time when source data starts being processed to the time when data reaches the current operator. The unit is millisecond. + |
+
Sent Records + |
+Number of data records sent by an operator. + |
+
Sent Bytes + |
+Number of bytes sent by an operator. + |
+
Received Bytes + |
+Number of bytes received by an operator. + |
+
Received Records + |
+Number of data records received by an operator. + |
+
Started + |
+Time when an operator starts running. + |
+
Ended + |
+Time when an operator stops running. + |
+
You can view the execution plan to understand the operator stream information about the running job.
+You can view the submission logs to locate the fault.
+You can view the run logs to locate the faults occurring during job running.
+Information about JobManager and TaskManager is updated every minute. Only run logs of the last minute are displayed by default.
+If you select an OBS bucket for saving job logs during the job configuration, you can switch to the OBS bucket and download log files to view more historical logs.
+If the job is not running, information on the Task Manager page cannot be viewed.
+Flink templates include sample templates and custom templates. You can modify an existing sample template to meet the actual job logic requirements and save time for editing SQL statements. You can also customize a job template based on your habits and methods so that you can directly invoke or modify the template in later jobs.
+Flink template management provides the following functions:
+The template list displays existing sample templates for Flink SQL jobs. Table 1 describes the parameters in the template list.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Name of a template, which has 1 to 64 characters and only contains letters, digits, hyphens (-), and underlines (_). + |
+
Description + |
+Description of a template. It contains 0 to 512 characters. + |
+
Operation + |
+Create Job: Create a job directly by using the template. After a job is created, the system switches to the Edit page under . + |
+
The existing sample templates apply to the following scenarios:
+The custom template list displays all Jar job templates. Table 1 describes parameters in the custom template list.
+ +Parameter + |
+Description + |
+
---|---|
Name + |
+Name of a template, which has 1 to 64 characters and only contains letters, digits, hyphens (-), and underlines (_). + |
+
Description + |
+Description of a template. It contains 0 to 512 characters. + |
+
Created + |
+Time when a template is created. + |
+
Updated + |
+Latest time when a template is modified. + |
+
Operation + |
+
|
+
You can create a template using any of the following methods:
+Parameter + |
+Description + |
+
---|---|
Name + |
+Name of a template, which has 1 to 64 characters and only contains letters, digits, hyphens (-), and underlines (_). + NOTE:
+The template name must be unique. + |
+
Description + |
+Description of a template. It contains 0 to 512 characters. + |
+
Parameter + |
+Description + |
+
---|---|
Name + |
+You can modify the template name. + |
+
Description + |
+You can modify the template description. + |
+
Saving Mode + |
+
|
+
SQL statement editing area + |
+In the area, you can enter detailed SQL statements to implement business logic. For details about how to compile SQL statements, see the Data Lake Insight SQL Syntax Reference. + |
+
Save + |
+Save the modifications. + |
+
Create Job + |
+Use the current template to create a job. + |
+
Format + |
+Format SQL statements. After SQL statements are formatted, you need to compile SQL statements again. + |
+
Theme Settings + |
+Change the font size, word wrap, and page style (black or white background). + |
+
You can create jobs based on sample templates or custom templates.
+After creating a custom template, you can modify it as required. The sample template cannot be modified, but you can view the template details.
+You can delete a custom template as required. The sample templates cannot be deleted. Deleted templates cannot be restored. Exercise caution when performing this operation.
+Alternatively, you can delete a template by performing the following operations: In the custom template list, locate the row where the template you want to delete resides, and click More > Delete in the Operation column.
+Request conditions are useful in determining when a custom policy takes effect. A request condition consists of a condition key and operator. Condition keys are either global or service-level and are used in the Condition element of a policy statement. Global condition keys (starting with g:) are available for operations of all services, while service-level condition keys (starting with a service name such as dli) are available only for operations of a specific service. An operator is used together with a condition key to form a complete condition statement.
+IAM provides a set of DLI predefined condition keys. The following table lists the predefined condition keys of DLI.
+ +Condition Key + |
+Operator + |
+Description + |
+
---|---|---|
g:CurrentTime + |
+Date and time + |
+Time when an authentication request is received + NOTE:
+The time is expressed in the format defined by ISO 8601, for example, 2012-11-11T23:59:59Z. + |
+
g:MFAPresent + |
+Boolean + |
+Whether multi-factor authentication is used during user login + |
+
g:UserId + |
+String + |
+ID of the current login user + |
+
g:UserName + |
+String + |
+Current login user + |
+
g:ProjectName + |
+String + |
+Project that you have logged in to + |
+
g:DomainName + |
+String + |
+Domain that you have logged in to + |
+
Global variables can be used to simplify complex parameters. For example, long and difficult variables can be replaced to improve the readability of SQL statements.
+Parameter + |
+Description + |
+
---|---|
Variable + |
+Name of the created global variable. + |
+
Sensitive + |
+If the value is sensitive information, such as passwords, set this parameter to Yes. Otherwise, set it to No. + |
+
Value + |
+Global variable value. + |
+
create table {{table_name}} (String1 String, int4 int, varchar1 varchar(10)) + partitioned by (int1 int,int2 int,int3 int)+
On the Global Variables page, click Modify in the Operation column of a variable to modify the variable value.
+Only the user who creates a global variable can modify the variable.
+On the Global Variables page, click Delete in the Operation column of a variable to delete the variable value.
+On the Package Management page, click Manage Permissions in the Operation column of the target package. On the displayed User Permission Info page, you can grant permissions for the package group or package, set and revoke user permissions.
+Click Grant Permission in the upper right corner of the page.
+Parameter + |
+Description + |
+
---|---|
Username + |
+Name of the authorized IAM user. + NOTE:
+The username is the name of an existing IAM user. + |
+
Select the permissions to be granted to the user + |
+
|
+
Parameter + |
+Description + |
+
---|---|
Username + |
+Name of the authorized IAM user. + NOTE:
+The username is the name of an existing IAM user. + |
+
Select the permissions to be granted to the user + |
+
|
+
Click Set Permission in the Operation column of the sub-user to modify the permission of the user. Table 1 and Table 2 list the detailed permission descriptions.
+If the Set Permission button is gray, you do not have the permission to modify the package group or package. You can apply to the administrator, group owner, or other users who have the permissions on granting and revoking permissions of package groups or packages.
+Click Revoke Permission in the Operation column of a sub-user to revoke the user's permissions. After the operation, the sub-user does not have any permission on the package group or package.
+Querying permissions. A group owner can view the created package group and all packages in the group, and can also view package groups on which he or she has all permissions.
+A package group is a unit. If you select a group when creating a package, you can grant only the permissions of the package group to other users.
+Querying permissions. A package owner can view the created packages, and can also view packages on which he or she has all permissions.
+To change the owner of a package, click More > Modify Owner in the Operation column of a package on the Package Management page.
+Parameter + |
+Description + |
+
---|---|
Group Name + |
+
|
+
Name + |
+Name of a package. + |
+
Select Type + |
+
|
+
Username + |
+Name of the package owner. + NOTE:
+The username is the name of an existing IAM user. + |
+
You can assign queue permissions to new users, modify permissions for users who have some permissions of a queue, and revoke all permissions of a user on a queue.
+Table 1 describes the related parameters.
+ +Parameter + |
+Description + |
+
---|---|
Username + |
+Name of the user you want to grant permissions to. + NOTE:
+The username is the name of an existing IAM user. In addition, the user can perform authorization operations only after logging in to the platform. + |
+
Permissions to be granted to the user + |
+
|
+
If all options under Set Permission are gray, you are not allowed to change permissions on this job. You can apply to the administrator, job creator, or other authorized users for job permission granting and revoking.
+In the list under User Permissions for a job, locate the user whose permissions need to be revoked, click Revoke Permission in the Operation column, and click Yes. After this operation, the user does not have any permission on the job.
+If another user grants any permission other than the job viewing permission to a subuser, the job is displayed in the job list, but the details cannot be viewed by the subuser.
+When modifying a job, you need to have the permission to update the job and the permission to the package group or package used by the job belongs.
+On the Datasource Authentication tab page, click Manage Permissions in the Operation column of the row that contains the authentication to be modified. On the User Permission Info page that is displayed, you can grant, set, and revoke permissions of the datasource connection.
+Parameter + |
+Description + |
+
---|---|
Username + |
+Name of the authorized IAM user. + NOTE:
+The username is the name of an existing IAM user. + |
+
Select the permissions to be granted to the user + |
+
|
+
Click Set Permission in the Operation column of the sub-user to modify the permission of the user. Table 1 lists the detailed permission descriptions.
+If all options under Set Permission are gray, you are not allowed to change permissions on this datasource connection. You can apply for the granting and revoking permissions from administrators, group owners, and other users who have the permission to grant permissions.
+Click Revoke Permission in the Operation column of a sub-user to revoke the user's permissions. After this operation, the sub-user does not have any permission of the datasource connection.
+Only the tenant account or a subaccount of user group admin can authorize access.
+After entering the DLI management console, you are advised to set agency permissions to ensure that DLI can be used properly.
+If you need to adjust the agency permissions, modify them on the Service Authorization page. For details about the required agency permissions, see Table 1.
+Permission + |
+Details + |
+Remarks + |
+
---|---|---|
Tenant Administrator (global service) + |
+Tenant Administrator permissions are required to access data from OBS to execute Flink jobs on DLI, for example, obtaining OBS/DWS data sources, log dump (including bucket authorization), checkpointing enabling, and job import and export. + |
+Due to cloud service cache differences, permission setting operations require about 60 minutes to take effect. + |
+
DIS Administrator + |
+DIS Administrator permissions are required to use DIS data as the data source of DLI Flink jobs. + |
+Due to cloud service cache differences, permission setting operations require about 30 minutes to take effect. + |
+
VPC Administrator + |
+VPC Administrator permissions are required to use the VPC, subnet, route, VPC peering connection, and port for DLI datasource connections. + |
+Due to cloud service cache differences, permission setting operations require about 3 minutes to take effect. + |
+
SMN Administrator + |
+To receive notifications when a DLI job fails, SMN Administrator permissions are required. + |
+Due to cloud service cache differences, permission setting operations require about 3 minutes to take effect. + |
+
Tenant Administrator (project-level) + |
+Tenant Administrator (Project-level) permission is needed if you use services that can run only with this permission. + |
+Due to cloud service cache differences, permission setting operations require about 3 minutes to take effect. + |
+
Elastic scaling can be performed for a newly created queue only when there were jobs running in this queue.
+If the current queue specifications do not meet service requirements, you can add the number of CUs to scale out the queue.
+Scale-out is time-consuming. After you perform scale-out on the Elastic Scaling page of DLI, wait for about 10 minutes. The duration is related to the CU amount to add. After a period of time, refresh the Queue Management page and check whether values of Specifications and Actual CUs are the same to determine whether the scale-out is successful. Alternatively, on the Job Management page, check the status of the SCALE_QUEUE SQL job. If the job status is Scaling, the queue is being scaled out.
+The procedure is as follows:
+If the current queue specifications are too much for your computing service, you can reduce the number of CUs to scale in the queue.
+The procedure is as follows:
+When services are busy, you might need to use more compute resources to process services in a period. After this period, you do not require the same amount of resources. If the purchased queue specifications are small, resources may be insufficient during peak hours. If the queue specifications are large, resources may be wasted.
+DLI provides scheduled tasks for elastic scale-in and -out in the preceding scenario. You can set different queue sizes (CUs) at different time or in different periods based on your service period or usage and the existing queue specifications to meet your service requirements and reduce costs.
+The procedure is as follows:
+Parameter + |
+Description + |
+
---|---|
Task Name + |
+Enter the name of the periodic task. +
|
+
Enable Task + |
+Whether to enable periodic elastic scaling. The task is enabled by default. If disabled, the task will not be triggered on time. + |
+
Validity Period + |
+Set the time segment for executing the periodic task, including Date and Time. If there is no time segment restriction, leave this parameter empty, indicating that the task takes effect permanently. If you need to specify the time segment for the task to take effect, set this parameter based on the service requirements. + NOTE:
+
|
+
Actual CUs + |
+Queue specifications before scale-in or scale-out. + |
+
Final CUs + |
+Specifications after the queue is scaled in or out. + NOTE:
+
|
+
Repeat + |
+Time when scheduled scale-out or scale-in is repeat. Scheduled tasks can be scheduled by week in Repeat.
+
NOTE:
+
|
+
Executed + |
+Time when scheduled scale-out or scale-in is performed +
|
+
After a periodic task is created, you can view the specification change of the current queue and the latest execution time on the page for scheduling CU changes.
+Alternatively, on the Queue Management page, check whether the Specifications change to determine whether the scaling is successful.
+You can also go to the Job Management page and check the status of the SCALE_QUEUE job. If the job status is Scaling, the queue is being scaled in or out.
+If the configured scheduled task does not meet service requirements, you can click Modify in the Operation column of the task on the Schedule CU Changes page to modify the task configuration.
+If you do not need a scheduled task for queue scaling, click Delete in the Operation column of the task on the Schedule CU Changes page. In the displayed Delete Periodic Task dialog box, click Yes.
+It can be used to test the connectivity between the DLI queue and the peer IP address specified by the user in common scenarios, or the connectivity between the DLI queue and the peer IP address bound to the datasource connection in cross-source connection scenarios. The operation is as follows:
+If the test address is reachable, a message is displayed on the page, indicating that the address is reachable.
+If the test address is unreachable, the system displays a message indicating that the address is unreachable. Check the network configurations and try again. Network configurations include the VPC peering and the datasource connection. Check whether they have been activated.
+This section describes how to create a Flink OpenSource SQL job. DLI Flink OpenSource SQL jobs are fully compatible with the syntax of Flink 1.10 and 1.12 provided by the community. In addition, Redis, DWS(GaussDB), and DIS data source types are added based on the community connector. For details about the syntax and restrictions of Flink SQL DDL, DML, and functions, see Table API & SQL.
+A newly created general-purpose queue is a CCE queue by default. You do not need to submit a service ticket to enable the whitelist.
+On the Queue Management page, locate the queue you have created, and choose More > Test Address Connectivity in the Operation column to check whether the network connection between the queue and the data source is normal. For details, see Testing Address Connectivity.
++
Parameter + |
+Description + |
+
---|---|
Type + |
+Set Type to Flink OpenSource SQL. You will need to start jobs by compiling SQL statements. + |
+
Name + |
+Name of a job. Enter 1 to 57 characters. Only letters, digits, hyphens (-), and underscores (_) are allowed. + NOTE:
+The job name must be globally unique. + |
+
Description + |
+Description of a job. It can be up to 512 characters long. + |
+
Template Name + |
+You can select a sample template or a custom job template. For details about templates, see Flink Template Management. + |
+
Enter details SQL statements in the SQL statement edit area. For details about SQL statements, see the Data Lake Insight Flink OpenSource SQL Syntax Reference.
++
Parameter + |
+Description + |
+
---|---|
Queue + |
+A shared queue is selected by default. Select a CCE queue with dedicated resources and configure the following parameters: +UDF Jar: You can customize a UDF Jar file. Before selecting a JAR file to be inserted, upload the corresponding JAR package to the OBS bucket and choose to create a package. For details, see Creating a Package. +In SQL, you can call a user-defined function that is inserted into a JAR file. + NOTE:
+During job creation, a sub-user can only select a queue that has been allocated to the user. +If the remaining capacity of the selected queue cannot meet the job requirements, the system automatically scales up the capacity and you will be billed based on the increased capacity. When a queue is idle, the system automatically scales in the queue. + |
+
CUs + |
+Sum of the number of compute units and job manager CUs of DLI. One CU equals one vCPU and 4 GB. +The value is the number of CUs required for job running and cannot exceed the number of CUs in the bound queue. + |
+
Job Manager CUs + |
+Number of CUs of the management unit. + |
+
Parallelism + |
+Number of Flink OpenSource SQL jobs that run at the same time + NOTE:
+This value cannot be greater than four times the compute units (number of CUs minus the number of job manager CUs). + |
+
Task Manager Configuration + |
+Whether to set Task Manager resource parameters. +If this option is selected, you need to set the following parameters: +
|
+
OBS Bucket + |
+OBS bucket to store job logs and checkpoint information. If the selected OBS bucket is not authorized, click Authorize. + |
+
Save Job Log + |
+Whether to save the job running logs to OBS. The logs are saved in the following path: Bucket name/jobs/logs/Directory starting with the job ID. + CAUTION:
+You are advised to configure this parameter. Otherwise, no run log is generated after the job is executed. If the job fails, the run log cannot be obtained for fault locating. +If this option is selected, you need to set the following parameters: +OBS Bucket: Select an OBS bucket to store user job logs. If the selected OBS bucket is not authorized, click Authorize.
+ NOTE:
+If both Enable Checkpointing and Save Job Log are selected, you only need to authorize OBS once. + |
+
Alarm Generation upon Job Exception + |
+Whether to report job exceptions, for example, abnormal job running or exceptions due to an insufficient balance, to users via SMS or email. +If this option is selected, you need to set the following parameters: +SMN Topic +Select a user-defined SMN topic. For details about how to customize SMN topics, see Creating a Topic in the Simple Message Notification User Guide. + |
+
Enable Checkpointing + |
+Whether to enable job snapshots. If this function is enabled, jobs can be restored based on the checkpoints. +If this option is selected, you need to set the following parameters:
+
|
+
Auto Restart upon Exception + |
+Whether to enable automatic restart. If this function is enabled, any job that has become abnormal will be automatically restarted. +If this option is selected, you need to set the following parameters: +
|
+
Idle State Retention Time + |
+How long the state of a key is retained without being updated before it is removed in GroupBy or Window. The default value is 1 hour. + |
+
Dirty Data Policy + |
+Policy for processing dirty data. The following policies are supported: Ignore, Trigger a job exception, and Save. +If you set this filed to Save, the Dirty Data Dump Address must be set. Click the address box to select the OBS path for storing dirty data. + |
+
After the job is started, the system automatically switches to the
+ page, and the created job is displayed in the job list. You can view the job status in the column. After a job is successfully submitted, the job status will change from to . After the execution is complete, the message Completed is displayed.If the job status is to copy these details. After handling the fault based on the provided information, resubmit the job.
Other buttons are as follows:
+On the OpenSource SQL job editing page, click Simplified Stream Graph.
+On the OpenSource SQL job editing page, click Static Stream Graph.
+The Static Stream Graph page also allows you to:
+TPC-H is a test set developed by the Transaction Processing Performance Council (TPC) to simulate decision-making support applications. It is widely used in academia and industry to evaluate the performance of decision-making support technology. This business test has higher requirements on vendors, because it can comprehensively evaluate the overall business computing capability. With universal business significance, is widely used in analysis of bank credit, credit card, telecom operation, tax, as well as tobacco industry decision-making analysis.
+The TPC-H benchmark test is developed from TPC-D (a standard specified by TPC in 1994 and used as the test benchmark for decision-making support systems). TPC-H implements a 3NF data warehouse that contains eight basic relationships, with a data volume range from 1 GB to 3 TB. The TPC-H benchmark test includes 22 queries (Q1 to Q22). The main evaluation indicator is the response time of each query (from submission to result return). The unit of the TPC-H benchmark test is the query number per hour (QphH@size). H indicates the average number of complex queries per hour. size indicates the size of database, which reflects the query processing capability of the system. TPC-H can evaluate key performance parameters that other tests cannot evaluate, because it is modeled based on the actual production and operation environment. In a word, the TPC-H standard by TPC meets the test requirements of data warehouse and motivate vendors and research institutes to stretch the limit of this technology.
+In this example, DLI directly queries the TPC-H dataset on OBS. DLI has generated a standard TPC-H-2.18 dataset of 100 MB which is uploaded to the tpch folder on OBS. The read-only permission is granted to you to facilitate query operations.
+TPC-H test is divided into three sub-tests: data loading test, Power test, and Throughput test. Data loading indicates the process of setting up a test database, and the loading test is to test the data loading ability of DBMS. The first test is data loading test that tests data loading time, which is time-consuming. The second test is Power test, also called raw query. After data loading test is complete, the database is in the initial state without any other operation, especially the data in the buffer is not tested. Power test requires that the 22 queries be executed once in sequence and a pair of RF1 and RF2 operations be executed at the same time. The third test is Throughput test, the core and most complex test, more similar to the actual application environment. With multiple query statement groups and a pair of RF1 and RF2 update flows, Throughput test pose greater pressure on the SUT system than Power test does.
+The basic data in the test is related to the execution time (the time of each data loading step, each query execution, and each update execution), based on which you can calculate the data loading time, Power@Size, Throughput@Size, qphH@Size and $/QphH@Size.
+Power@Size is the result of the Power test, which is defined as the reciprocal of the geometric average value of the query time and change time. The formula is as follows:
+Size indicates the data size. SF is the scaling factor of data scale. QI (i, 0) indicates the time of the ith query, in seconds. R (I j, 0) is the update time of RFj, in seconds.
+Throughput@Size is the Throughput test result, which is defined as the reciprocal of the average value of all query execution time. The formula is as follows:
+You can use the built-in TPC-H test suite of DLI to perform interactive query without uploading data.
+When a sub-account uses the TPC-H test suite, the main account needs to grant the sub-account the OBS access permission and the permission to view the main account table. If the master account has not logged in to DLI, the sub-account needs to have the permissions to create databases and tables in addition to the preceding permissions.
+For details, see SQL Template Management.
+To facilitate SQL operations, DLI allows you to customize query templates or save the SQL statements in use as templates. After templates are saved, you do not need to write SQL statements. You can directly perform the SQL operations using the templates.
+The current system provides various standard TPC-H query statement templates. You can select a template as needed. This example shows how to use a TPC-H template to develop and submit a Spark SQL job.
+Step 1: Logging in to the Cloud
+Step 2: Logging In to the DLI Management Console
+Step 3: Execute the TPC-H Sample Template and View the Result
+For details about the templates, see SQL Template Management.
+You need to perform authorization when accessing the DLI management console for the first time. For details, see Service Authorization.
+This example uses the default queue and database preset in the system as an example. You can also run query statements on a self-created queue and database.
+For details about how to create a queue, see Creating a Queue. For details about how to create a database, see Creating a Database.
+This section describes how to submit a Flink OpenSource SQL job on the DLI console for real-time computing. The general procedure is as follows:
+Step 1: Log In to the Cloud Platform
+Step 2: Prepare a Data Source Channel
+Step 3: Prepare a Data Output Channel
+Step 4: Create an OBS Bucket to Store Output Data
+ +Step 6: Creating an Enhanced Datasource Connection Between DLI and Kafka
+Step 7: Creating an Enhanced Datasource Connection Between DLI and RDS
+Step 8: Creating a Flink OpenSource SQL Job
+You need to create a Flink OpenSource SQL job that has an input stream and an output stream. The input stream reads data from Kafka, and the output stream writes data into RDS.
+ +DLI Flink jobs allow other services to function as data sources and data output channels. For details, see"Preparing Data".
+In this example, the job name is JobSample, Kafka is the data source, and DIS is enabled to for data integration. For details, see "Buying an Instance" in the Distributed Message Service Kafka User Guide.
+For more information, see Managing Kafka Premium Instances > in the Distributed Message Service User Guide.
+Retain default values for other parameters.
+To use RDS as the data output channel, create an RDS MySQL instance. For details, see Getting Started with RDS for MySQL.
+CREATE TABLE `flink`.`orders` ( + `order_id` VARCHAR(32) NOT NULL, + `order_channel` VARCHAR(32) NULL, + `order_time` VARCHAR(32) NULL, + `pay_amount` DOUBLE UNSIGNED NOT NULL, + `real_pay` DOUBLE UNSIGNED NULL, + `pay_time` VARCHAR(32) NULL, + `user_id` VARCHAR(32) NULL, + `user_name` VARCHAR(32) NULL, + `area_id` VARCHAR(32) NULL, + PRIMARY KEY (`order_id`) +) ENGINE = InnoDB + DEFAULT CHARACTER SET = utf8mb4 + COLLATE = utf8mb4_general_ci;+
In this example, you need to enable OBS for JobSample to provide DLI Flink jobs with the functions of checkpoint, saving job logs, and commissioning test data.
+For details about how to create a bucket, see Creating a Bucket in the Object Storage Service Console Operation Guide.
+Flink OpenSource SQL jobs cannot run on the default queue. You need to create a queue, for example, Flinktest. For details, see Creating a Queue.
+If this is your first time to log in to the DLI management console, you need to be authorized to access OBS.
+It takes 10 to 15 minutes to bind the queue to a cluster after the queue is created.
+You need to create an enhanced datasource connection for the Flink OpenSource SQL job. For details, see "Creating an Enhanced Datasource Connection".
+For example, if the CIDR block of the queue is 10.0.0.0/16, set Priority to 1, Action to Allow, Protocol to TCP, Type to IPv4, Source to 10.0.0.0/16, and click OK.
+Click OK. Click the name of the created datasource connection to view its status. You can perform subsequent steps only after the connection status changes to Active.
+If the RDS DB instance and Kafka instance are in the same VPC and subnet, skip this step. The enhanced datasource connection created in step 6-2 has connected the subnet.
+Click OK. Click the name of the created datasource connection to view its status. You can perform subsequent steps only after the connection status changes to Active.
+After the data source and data output channel are prepared, you can create a Flink OpenSource SQL job.
+You do not need to set other parameters.
+CREATE TABLE kafkaSource ( + order_id string, + order_channel string, + order_time string, + pay_amount double, + real_pay double, + pay_time string, + user_id string, + user_name string, + area_id string +) WITH ( + 'connector' = 'kafka', + 'topic' = 'testkafkatopic',// Topic to be written to Kafka. Log in to the Kafka console, click the name of the created Kafka instance, and view the topic name on the Topic Management page. + 'properties.bootstrap.servers' = "192.168.0.237:9092,192.168.0.252:9092,192.168.0.137:9092", // Replace it with the internal network address and port number of Kafka. + 'properties.group.id' = 'GroupId', + 'scan.startup.mode' = 'latest-offset', + 'format' = 'json' +); + +CREATE TABLE jdbcSink ( + order_id string, + order_channel string, + order_time string, + pay_amount double, + real_pay double, + pay_time string, + user_id string, + user_name string, + area_id string +) WITH ( + 'connector' = 'jdbc', + 'url' = "jdbc:mysql://172.16.0.116:3306/rds-dliflink", // testrdsdb indicates the name of the created RDS database. Replace the IP address and port number with those of the RDS for MySQL instance. + 'table-name' = 'orders', + 'username' = "xxxxx", // Username of the RDS for MySQL DB instance + 'password'="xxxxx", // Password of the RDS for MySQL DB instance + 'sink.buffer-flush.max-rows' = '1' +); + +insert into jdbcSink select * from kafkaSource;+
After the job is started, the system automatically switches to the
+ page, and the created job is displayed in the job list. You can view the job status in the column. After a job is successfully submitted, Status of the job will change from to .If Status of a job is to copy these details. After handling the fault based on the provided information, resubmit the job.
{"order_id":"202103241000000001", "order_channel":"webShop", "order_time":"2021-03-24 10:00:00", "pay_amount":"100.00", "real_pay":"100.00", "pay_time":"2021-03-24 10:02:03", "user_id":"0001", "user_name":"Alice", "area_id":"330106"} + +{"order_id":"202103241606060001", "order_channel":"appShop", "order_time":"2021-03-24 16:06:06", "pay_amount":"200.00", "real_pay":"180.00", "pay_time":"2021-03-24 16:10:06", "user_id":"0001", "user_name":"Alice", "area_id":"330106"}+
select * from order;+
202103241000000001,webShop,2021-03-24 10:00:00,100.0,100.0,2021-03-24 10:02:03,0001,Alice,330106 +202103241606060001,appShop,2021-03-24 16:06:06,200.0,180.0,2021-03-24 16:10:06,0001,Alice,330106+
Data Lake Insight (DLI) is a serverless data processing and analysis service fully compatible with Apache Spark, and Flink ecosystems. It frees you from managing any server. DLI supports standard SQL and is compatible with Spark and Flink SQL. It also supports multiple access modes, and is compatible with mainstream data formats. DLI supports SQL statements and Spark applications for heterogeneous data sources, including CloudTable, RDS, GaussDB(DWS), CSS, OBS, custom databases on ECSs, and offline databases.
+You cannot perform permission-related operations on the partition column of a partition table. However, when you grant the permission of any non-partition column in a partition table to another user, the user gets the permission of the partition column by default. When the user views the permission of the partition table, the permission of the partition column will not be displayed.
+DLI supports only UTF-8-encoded texts. Ensure that data is encoded using UTF-8 during table creation and import.
+Currently, DLI supports analysis only on the data uploaded to the cloud. In scenarios where regular (for example, on a per day basis) one-off analysis on incremental data is conducted for business, you can do as follows: Anonymize data to be analyzed and store anonymized data on OBS temporarily. After analysis is complete, export the analysis report and delete the data temporarily stored on OBS.
+To perform query on data stored on services rather than DLI, perform the following steps:
+A high compression ratio of OBS tables in the Parquet or ORC format (for example, a compression ratio of 5 or higher compared with text compression) will lead to large data volumes to be processed by a single task. In this case, you are advised to set dli.sql.files.maxPartitionBytes to 33554432 (default: 134217728) in the conf field in the submit-job request body to reduce the data to be processed per task.
+Create an OBS directory with a unique name. Alternatively, you can manually delete the existing OBS directory and submit the job again. However, exercise caution when deleting the existing OBS directory because the operation will delete all data in the directory.
+val sc: SparkContext = new SparkContext() +sc.hadoopConfiguration.set("fs.obs.access.key", ak) +sc.hadoopConfiguration.set("fs.obs.secret.key", sk)+
val sparkSession: SparkSession = SparkSession + .builder() + .config("spark.hadoop.fs.obs.access.key", ak) + .config("spark.hadoop.fs.obs.secret.key", sk) + .enableHiveSupport() + .getOrCreate()+
val sc: SparkContext = new SparkContext() +sc.hadoopConfiguration.set("fs.obs.access.key", ak) +sc.hadoopConfiguration.set("fs.obs.secret.key", sk) +sc.hadoopConfiguration.set("fs.obs.session.token", sts)+
val sparkSession: SparkSession = SparkSession + .builder() + .config("spark.hadoop.fs.obs.access.key", ak) + .config("spark.hadoop.fs.obs.secret.key", sk) + .config("spark.hadoop.fs.obs.session.token", sts) + .enableHiveSupport() + .getOrCreate()+
For security purposes, you are advised not to include the AK and SK information in the OBS path. In addition, if a table is created in the OBS directory, the OBS path specified by the Path field cannot contain the AK and SK information.
+The Spark 2.3 has changed the behavior of the internal interface Logging. If the user code directly inherits the Logging and the earlier version Spark is used during compilation, the java.lang.AbstractMethodError is reported when the application runs in the Spark 2.3 environment.
+Solutions are as follows:
+<dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>1.7.16</version> +</dependency> +<dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + <version>1.7.16</version> +</dependency> +<dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>1.2.17</version> +</dependency> + +private val logger = LoggerFactory.getLogger(this.getClass) +logger.info("print log with sl4j+log4j")+
DLI supports the following data formats:
+When data is inserted into DLI, set the ID field to NULL.
+DLI data can be stored in either of the following:
+The Service Quota page is displayed.
+If a quota cannot meet service requirements, increase a quota.
+The system does not support online quota adjustment. To increase a resource quota, dial the hotline or send an email to the customer service. We will process your application and inform you of the progress by phone call or email.
+Before dialing the hotline number or sending an email, ensure that the following information has been obtained:
+ + +The procedure is as follows:
+// Configure the pom file on which the StreamExecutionEnvironment depends. +StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); + env.getCheckpointConfig().setCheckpointInterval(40000); + env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + RocksDBStateBackend rocksDbBackend = new RocksDBStateBackend(new FsStateBackend("obs://${userAk}:${userSk}@${obsendpoint}:443/${bucket}/jobs/checkpoint/my_jar"), false); + rocksDbBackend.setOptions(new OptionsFactory() { + @Override + public DBOptions createDBOptions(DBOptions currentOptions) { + return currentOptions + .setMaxLogFileSize(64 * 1024 * 1024) + .setKeepLogFileNum(3); + } + + @Override + public ColumnFamilyOptions createColumnOptions(ColumnFamilyOptions currentOptions) { + return currentOptions; + } + }); + env.setStateBackend(rocksDbBackend);+
The preceding code saves the checkpoint to the ${bucket} bucket in jobs/checkpoint/my_jar path every 40 seconds in EXACTLY_ONCE mode.
+Pay attention to the checkpoint storage path. Generally, the checkpoint is stored in the OBS bucket. The path format is as follows:
+<dependency> + <groupId>org.apache.flink</groupId> + <artifactId>flink-streaming-java_${scala.binary.version}</artifactId> + <version>${flink.version}</version> + <scope>provided</scope> +</dependency>+
The checkpoint path is the same as that you set in JAR package code. The format is as follows:
+ +To rectify this fault, perform the following steps:
+Ensure that the created DIS stream and Flink job are in the same region.
+Configuration files can be uploaded for user-defined jobs (JAR).
+If the OBS bucket selected for a job is not authorized, perform the following steps:
+Currently, DLI supports the Hive syntax for creating tables of the TEXTFILE, SEQUENCEFILE, RCFILE, ORC, AVRO, and PARQUET file types. If the file format specified for creating a table in the CTAS is AVRO and digits are directly used as the input of the query statement (SELECT), for example, if the query is CREATE TABLE tb_avro STORED AS AVRO AS SELECT 1, a schema parsing exception is reported.
+If the column name is not specified, the content after SELECT is used as both the column name and inserted value. The column name of the AVRO table cannot be a digit. Otherwise, an error will be reported, indicating that the schema fails to be parsed.
+Solution: You can use CREATE TABLE tb_avro STORED AS AVRO AS SELECT 1 AS colName to specify the column name or set the storage format to a format other than AVRO.
+The network connectivity is abnormal. Check whether the security group is correctly selected and whether the VPC is correctly configured.
+Example: When you create an RDS datasource connection, the system displays the error message Communication link failure.
+Select a correct Service Type. In this example, select RDS.
+If the error message is still displayed after you create a new datasource connection according to Step 1, check the VPC configuration.
+Add the following SQL statements to the Flink job:
+create source stream ssource(xx); +create sink stream es1(xx) with (xx); +create sink stream es2(xx) with (xx); +insert into es1 select * from ssource; +insert into es2 select * from ssource;+
The temp stream in Flink SQL is similar to a subquery. It is a logical stream used to simplify the SQL logic and does not generate data storage. Therefore, there is no need to clean data.
+Mode for storing generated job logs when a DLI Flink job fails to be submitted or executed. The options are as follows:
+Choose Job Management > Flink Jobs, click the target job name to go to the job details page, and click Run Log to view real-time logs.
+On this basis, the taskmanager.log file rolls as the log file size and time change.
+Currently, DLI provides two types of queues, For SQL and For general use. SQL queues are used to run SQL jobs. General-use queues are compatible with Spark queues of earlier versions and are used to run Spark and Flink jobs.
+You can perform the following steps to convert an old Spark queue to a general purpose queue.
+This message indicates that the two tables to be joined contain the same column, but the owner of the column is not specified when the command is executed.
+For example, tables tb1 and tb2 contain the id field.
+select id from tb1 join tb2;+
Correct command:
+select tb1.id from tb1 join tb2;+
When shuffle statements, such as GROUP BY and JOIN, are executed in Spark jobs, data skew occurs, which slows down the job execution.
+To solve this problem, you can configure spark.sql.shuffle.partitions to improve the concurrency of shuffle read tasks.
+You can use the set clause to configure the dli.sql.shuffle.partitions parameter in JDBC. The statement is as follows:
+Statement st = conn.stamte() +st,execute("set spark.sql.shuffle.partitions=20")+
The correct method for using the count function to perform aggregation is as follows:
+SELECT + http_method, + count(http_method) +FROM + apigateway +WHERE + service_id = 'ecs' Group BY http_method+
Or
+SELECT + http_method +FROM + apigateway +WHERE + service_id = 'ecs' DISTRIBUTE BY http_method+
If an incorrect method is used, an error will be reported.
+SELECT + http_method, + count(http_method) +FROM + apigateway +WHERE + service_id = 'ecs' DISTRIBUTE BY http_method+
Check whether the account is in arrears. If yes, recharge the account.
+If the error persists, log out and log in again.
+You can use the cross-region replication function of OBS. The procedure is as follows:
+In this example, the day field is used as the partition field with the parquet encoding format (only the parquet format is supported currently) to dump car_info data to OBS. For more information, see the "File System Sink Stream" in the Data Lake Insight SQL Syntax Reference.
+1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 | create sink stream car_infos ( + carId string, + carOwner string, + average_speed double, + day string + ) partitioned by (day) + with ( + type = "filesystem", + file.path = "obs://obs-sink/car_infos", + encode = "parquet", + ak = "{{myAk}}", + sk = "{{mySk}}" +); + |
Structure of the data storage directory in OBS: obs://obs-sink/car_infos/day=xx/part-x-x.
+After the data is generated, the OBS partition table can be established for subsequent batch processing through the following SQL statements:
+1 +2 +3 +4 +5 +6 +7 +8 | create table car_infos ( + carId string, + carOwner string, + average_speed double +) + partitioned by (day string) + stored as parquet + location 'obs://obs-sink/car-infos'; + |
1 | alter table car_infos recover partitions; + |
For details about how to create a datasource connection on the management console, see Data Lake Insight User Guide > Enhanced Datasource Connections.
+For details about how to call an API to create a datasource connection, see Data Lake Insight API Reference > Creating an Enhanced Datasource Connection.
+If the specified Python environment cannot be found after a Python3 package is added, the current compute cluster environment is Python2 by default.
+You can set spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3 in the conf file to specify the Python3 environment for the compute cluster.
+New clusters use the Python3 environment by default.
+The cluster host information is not added to the datasource connection. As a result, the KRB authentication fails, the connection times out, and no error is recorded in logs. Configure the host information and try again.
+On the Enhanced page, select the connection and click Modify Host. In the dialog box that is displayed, enter the host information. The format is Host IP address Host name/Domain name. Multiple records are separated by line breaks.
+For details, see section "Modifying the Host Information" in the Data Lake Insight User Guide.
+DLI natively supports PySpark.
+For most cases, Python is preferred for data analysis, and PySpark is the best choice for big data analysis. Generally, JVM programs are packed into JAR packages and depend on third-party JAR packages. Similarly, Python programs also depend on third-party libraries, especially big data analysis programs related to PySpark-based converged machine learning. Traditionally, the Python library is installed on the execution machine based on pip. For serverless services such as DLI, you do not need to and are unaware of the underlying compute resources. In this case, how does DLI ensure that you run their programs perfectly?
+DLI has built-in algorithm libraries for machine learning in its compute resources. These common algorithm libraries meet the requirements of most users. What if a user's PySpark program depends on a program library that is not provided by the built-in algorithm library? Actually, the dependency of PySpark is specified based on PyFiles. On the DLI Spark job page, you can directly select the Python third-party program library (such as ZIP and EGG) stored on OBS.
+The compressed package of the dependent third-party Python library has structure requirements. For example, if the PySpark program depends on moduleA (import moduleA), the compressed package must meet the following structure requirement:
+That is, the compressed package contains a folder named after a module name, and then the Python file of the corresponding class. Generally, the downloaded Python library may not meet this requirement. Therefore, you need to compress the Python library again. In addition, there is no requirement on the name of the compressed package. Therefore, it is recommended that you compress the packages of multiple modules into a compressed package. Now, a large and complex PySpark program is configured and runs normally.
+DLI Flink jobs support the following data formats:
+Avro, Avro_merge, BLOB, CSV, EMAIL, JSON, ORC, Parquet, and XML.
+DLI Flink jobs support data from the following data sources:
+CloudTable HBase, CloudTable OpenTSDB, CSS Elasticsearch, DCS, DDS, DIS, DMS, GaussDB(DWS), EdgeHub, MRS HBase, MRS Kafka, open-source Kafka, file systems, OBS, RDS, and SMN
+If a large number of small files are generated during SQL execution, job execution and table query will take a long time. In this case, you should merge small files.
+ +You can use the where condition statement in the select statement to filter data. For example:
+select * from table where part = '202012'+
In DLI, 64 CU = 64 cores and 256 GB memory.
+In a Spark job, if the driver occupies 4 cores and 16 GB memory, the executor can occupy 60 cores and 240 GB memory.
+When using a Flink SQL job, you need to create an OBS partition table for subsequent batch processing.
+In the following example, the day field is used as the partition field with the parquet encoding format (only the parquet format is supported currently) to dump car_info data to OBS.
+1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +10 +11 +12 +13 | create sink stream car_infos ( + carId string, + carOwner string, + average_speed double, + day string + ) partitioned by (day) + with ( + type = "filesystem", + file.path = "obs://obs-sink/car_infos", + encode = "parquet", + ak = "{{myAk}}", + sk = "{{mySk}}" +); + |
Structure of the data storage directory in OBS: obs://obs-sink/car_infos/day=xx/part-x-x.
+After the data is generated, the OBS partition table can be established for subsequent batch processing through the following SQL statements:
+1 +2 +3 +4 +5 +6 +7 +8 | create table car_infos ( + carId string, + carOwner string, + average_speed double +) + partitioned by (day string) + stored as parquet + location 'obs://obs-sink/car-infos'; + |
1 | alter table car_infos recover partitions; + |
DLI Flink jobs are highly available. You can enable the automatic restart function to automatically restart your jobs after short-time faults of peripheral services are rectified.
+You can view SQL job logs for routine O&M.
+When creating an OBS table, you must specify a table path in the database. The path format is as follows: obs://xxx/database name/table name.
+CREATE TABLE `di_seller_task_activity_30d` (`user_id` STRING COMMENT' user ID...) SORTED as parquet +LOCATION 'obs://akc-bigdata/akdc.db/di_seller_task_activity_30d'+
CREATE TABLE `di_seller_task_activity_30d` (`user_id` STRING COMMENT' user ID...) SORTED as parquet +LOCATION 'obs://akc-bigdata/akdc.db'+
If the specified path is akdc.db, data in this path will be cleared when the insert overwrite statement is executed.
+If the execution of an SQL statement takes a long time, you need to access the Spark UI to check the execution status.
+If data skew occurs, the running time of a stage exceeds 20 minutes and only one task is running.
+The string followed by the colons (:) are the configuration parameters, and the strings following the colons are the values.
+spark.sql.enableToString:false +spark.sql.adaptive.join.enabled:true +spark.sql.adaptive.enabled:true +spark.sql.adaptive.skewedJoin.enabled:true +spark.sql.adaptive.enableToString:false +spark.sql.adaptive.skewedPartitionMaxSplits:10+
spark.sql.adaptive.skewedPartitionMaxSplits indicates the maximum number of tasks for processing a skewed partition. The default value is 5, and the maximum value is 10. This parameter is optional.
+To check the running status of the DLI queue and determine whether to run more jobs on that queue, you need to check the queue load.
+The DLI Flink checkpoint/savepoint mechanism is complete and reliable. You can use this mechanism to prevent data loss when a job is manually restarted or restarted due to an exception.
+Flink checkpointing ensures that the internal state data is accurate and consistent. However, for custom Source/Sink or stateful operators, you need to implement the ListCheckpointed API to ensure the reliability of service data.
+DLI allows you to subscribe to an SMN topic for failed jobs.
+When you create a Flink SQL job or Flink Jar job, you can select Save Job Log on the job editing page to save job running logs to OBS.
+To set the OBS bucket for storing the job logs, specify a bucket for OBS Bucket. If the selected OBS bucket is not authorized, click Authorize.
+The logs are saved in the following path: Bucket name/jobs/logs/Directory starting with the job ID. You can customize the bucket name in the path. /jobs/logs/Directory starting with the job ID is a fixed format.
+In the job list, click the job name. In the Run Log tab, click the provided OBS link to go to the path.
+DLI has a comprehensive permission control mechanism and supports fine-grained authentication through Identity and Access Management (IAM). You can create policies in IAM to manage DLI permissions.
+With IAM, you can use your account to create IAM users for your employees, and assign permissions to the users to control their access to specific resource types. For example, some software developers in your enterprise need to use DLI resources but must not delete them or perform any high-risk operations. To achieve this result, you can create IAM users for the software developers and grant them only the permissions required for using DLI resources.
+For a new user, you need to log in for the system to record the metadata before using DLI.
+IAM can be used free of charge. You pay only for the resources in your account.
+If the account has met your requirements, you do not need to create an independent IAM user for permission management. Then you can skip this section. This will not affect other functions of DLI.
+Table 1 lists all the system-defined roles and policies supported by DLI.
+Role/Policy Name + |
+Description + |
+Category + |
+
---|---|---|
DLI FullAccess + |
+Full permissions for DLI. + |
+System-defined policy + |
+
DLI ReadOnlyAccess + |
+Read-only permissions for DLI. +With read-only permissions, you can use DLI resources and perform operations that do not require fine-grained permissions. For example, create global variables, create packages and package groups, submit jobs to the default queue, create tables in the default database, create datasource connections, and delete datasource connections. + |
+System-defined policy + |
+
Tenant Administrator + |
+Tenant administrator +
|
+System-defined role + |
+
DLI Service Admin + |
+DLI administrator +
|
+System-defined role + |
+
You can set actions and resources of different levels for various scenarios.
+The format is Service name:Resource type:Action. You can use wildcard *. The following is an example.
+ +Action + |
+Description + |
+
---|---|
dli:queue:submit_job + |
+Queue submission + |
+
dli:queue:* + |
+All operations on a queue + |
+
dli:*:* + |
+All operations on all DLI resource types + |
+
The format is Service name:Region:Domain ID:Resource type:Resource path. The wildcard (*) indicates all resources. The five fields in the format can be as you need. The resource path can be set for different levels of permission control based on scenario requirements. If you need to set all resources of the service, you do not need to specify the resource path field. The following is an example.
+ +Resource + |
+Description + |
+
---|---|
DLI:*:*:table:databases.dbname.tables.* + |
+DLI, any region, any account ID, all table resources of database dbname + |
+
DLI:*:*:database:databases.dbname + |
+DLI, any region, any account ID, queue of database dbname + |
+
DLI:*:*:queue:queues.* + |
+DLI, any region, any account ID, any queue resource + |
+
DLI:*:*:jobs:jobs.flink.1 + |
+DLI, any region, any account ID, Flink job whose ID is 1 + |
+
Create a policy to authorize users to create and delete any database, submit jobs for any queue, and delete any table under any account ID in any region of DLI.
+{ + "Version": "1.1", + "Statement": [ + { + "Effect": " Allow", + "Action": [ + "dli:database:create_database", + "dli:database:drop_database", + "dli:queue:submit_job", + "dli:table:drop_table" + ], + "Resource": [ + "dli:*:*:database:*", + "dli:*:*:queue:*", + "dli:*:*:table:*" + ] + } + ] +}+
Log in to the DLI console. In the navigation pane, choose Job Management > Spark Jobs. In the job list, locate the target job and click next to Job ID to view the parameters of the job.
These parameters are displayed only when Advanced Settings are configured when you create the job .
+Perform the following operations to view the number of running CUs occupied by a Spark job in real time:
+On the Spark UI page, click Environment to view Driver and Executor information.
+The possible causes are as follows:
+The accumulated data of a consumer group can be calculated by the following formula: Total amount of data to be consumed by the consumer group = Offset of the latest data – Offset of the data submitted to the consumer group
+If your Flink job is connected to the Kafka premium edition, you can view the customer group on the Cloud Eye console. To view consumer available messages, choose Cloud Service Monitoring > Distributed Message Service form the navigation pane. On the displayed page, select Kafka Premium and click the Consumer Groups tab. Click the Kafka instance name and select the target consumer group.
+Back pressure status is working load status of an operator. The back pressure is determined by the ratio of threads blocked in the output buffer to the total taskManager threads. This ratio is calculated by periodically sampling of the taskManager thread stack. By default, if the ratio is less than 0.1, the back pressure status is OK. If the ratio ranges from 0.1 to 0.5, the backpressure status is LOW. If the ratio exceeds 0.5, the backpressure status is HIGH.
+Delay indicates the duration from the time when source data starts being processed to the time when data reaches the current operator. The data source periodically sends a LatencyMarker (current timestamp). After receiving the LatencyMarker, the downstream operator subtracts the timestamp from the current time to calculate the duration. You can view the back pressure status and delay of an operator on the Flink UI or in the task list of a job. Generally, high back pressure and delay occur in pairs.
+Due to Flink back pressure, the data source consumption rate can be lower than the production rate when performance of a Flink job is low. As a result, data is stacked in a Kafka consumer group. In this case, you can use back pressure and delay of the operator to find its performance bottleneck.
+In this scenario, the performance bottleneck is the sink and the optimization is specific to the data source. For example, for the JDBC data source, you can adjust the write batch using connector.write.flush.max-rows and JDBC rewriting parameter rewriteBatchedStatements=true to optimize the performance.
+In this scenario, the performance bottleneck is the Vertex2 operator. You can view the description about the function of the operator for further optimization.
+In this scenario, the performance bottleneck is the source, and the performance is mainly affected by the data read speed. In this case, you can increase the number of Kafka partitions and the number of concurrent sources to solve the problem.
+In this scenario, the performance bottleneck is Vertex2 or Vertex3. To find out the specific bottleneck operator, enable inPoolUsage monitoring on the Flink UI page. If the inPoolUsage for operator concurrency is 100% for a long time, the corresponding operator is likely to be the performance bottleneck. In this case, you check the operator for further optimization.
+Top N sorting, window aggregate calculation, and stream-stream join involve a large number of status operations. You can optimize the performance of state operations to improve the overall performance. You can try any of the following optimization methods:
+The data skew problem is solved by Local-Global that divides a group aggregation into two stages: doing local aggregation in upstream first, and then global aggregation in downstream. To enable Local-global aggregation, set optimization parameter: table.optimizer.aggphase-strategy=TWO_PHASE
+COUNT(DISTINCT CASE WHEN flag IN ('android', 'iphone')THEN user_id ELSE NULL END) AS app_uv+
Can be changed to:
+COUNT(DISTINCT user_id) FILTER(WHERE flag IN ('android', 'iphone')) AS app_uv+
The dimension table in joined with the key of each record in the left table. The matched in the cache is performed first. If no match is found, the remotely obtained data is used for matching. The optimization is as follows:
+To use Spark to write data into a DLI table, configure the following parameters:
+The following is an example:
+import logging +from operator import add +from pyspark import SparkContext + +logging.basicConfig(format='%(message)s', level=logging.INFO) + +#import local file +test_file_name = "D://test-data_1.txt" +out_file_name = "D://test-data_result_1" + +sc = SparkContext("local","wordcount app") +sc._jsc.hadoopConfiguration().set("fs.obs.access.key", "myak") +sc._jsc.hadoopConfiguration().set("fs.obs.secret.key", "mysk") +sc._jsc.hadoopConfiguration().set("fs.obs.impl", "org.apache.hadoop.fs.obs.OBSFileSystem") +sc._jsc.hadoopConfiguration().set("fs.obs.endpoint", "myendpoint") + +# red: text_file rdd object +text_file = sc.textFile(test_file_name) + +# counts +counts = text_file.flatMap(lambda line: line.split(" ")).map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b) +# write +counts.saveAsTextFile(out_file_name)+
DLI allows you to associate JSON data in an OBS bucket to create tables in asynchronous mode.
+The statement for creating the table is as follows:
+create table tb1 using json options(path 'obs://....')+
Currently, you are not allowed to modify the description of a created queue. You can add the description when purchasing the queue.
+When you create a VPC peering connection for the datasource connection, the following error information is displayed:
+Failed to get subnet 2c2bd2ed-7296-4c64-9b60-ca25b5eee8fe. Response code : 404, message : {"code":"VPC.0202","message":"Query resource by id 2c2bd2ed-7296-4c64-9b60-ca25b5eee8fe fail.the subnet could not be found."}+
Before you create a datasource connection, check whether VPC Administrator is selected. If only the global Tenant Administrator is selected, the system cannot find the subnet.
+The Spark component of DLI is a fully managed service. You can only use the DLI Spark through its APIs. .
+The Spark component of MRS is built on the VM in an MRS cluster. You can develop the Spark component to optimize it as needed and make API calls to use it.
+You can view the job execution records when a job is running.
+You can use SparkFiles to read the file submitted using –-file form a local path: SparkFiles.get("Name of the uploaded file").
+The java code is as follows:
+package main.java + +import org.apache.spark.SparkFiles +import org.apache.spark.sql.SparkSession + +import scala.io.Source + +object DliTest { + def main(args:Array[String]): Unit = { + val spark = SparkSession.builder + .appName("SparkTest") + .getOrCreate() + + // Driver: obtains the uploaded file. + println(SparkFiles.get("test")) + + spark.sparkContext.parallelize(Array(1,2,3,4)) + // Executor: obtains the uploaded file. + .map(_ => println(SparkFiles.get("test"))) + .map(_ => println(Source.fromFile(SparkFiles.get("test")).mkString)).collect() + } +}+
The dependency of your Flink job conflicts with a built-in dependency of the DLI Flink platform. As a result, the job submission fails.
+Delete your JAR package that is the same as an existing JAR package of the DLI Flink platform.
+You used Flink 1.10 to run a Flink Opensource SQL job. You set the number of Kafka partitions for the job a small value at the beginning and need to increase the number now.
+Add the following parameters to the SQL statement:
+connector.properties.flink.partition-discovery.interval-millis="3000"+
This statement allows you to increase or decrease the number of Kafka partitions without stopping the Flink job.
+You used Flink 1.10 to run a Flink Opensource SQL job. The job failed after the following error was reported when Flink Sink wrote data to Kafka.
+Caused by: org.apache.kafka.common.errors.NetworkException: The server disconnected before a response was received.+
The CPU usage is too high. As a result, the network is intermittently disconnected.
+Add connector.properties.retries=5 to the SQL statement.
+create table kafka_sink( + car_type string + , car_name string + , primary key (union_id) not enforced +) with ( + "connector.type" = "upsert-kafka", + "connector.version" = "0.11", + "connector.properties.bootstrap.servers" = "xxxx:9092", + "connector.topic" = "kafka_car_topic ", + "connector.sink.ignore-retraction" = "true", + "connector.properties.retries" = "5", + "format.type" = "json" +);+
If you are suggested to perform following operations to run a large number of DLI jobs:
+You can use DLI Spark jobs to access data in the MySQL database using either of the following methods:
+You need to create a VPC peering connection to enable network connectivity. Take MRS as an example. If DLI and MRS clusters are in the same VPC, and the security group is enabled, you do not need a VPC peering connection for communications between MRS and DLI.
+Data in the OBS bucket shared by IAM users under the same account can be imported. You cannot import data in the OBS bucket shared with other IAM account.
+The consumption capability of a Flink SQL job depends on the data source transmission, queue size, and job parameter settings. The peak consumption is 10 Mbit/s.
+Check the following operations:
+When you create a Flink SQL job and configure the parameters, you select an OBS bucket you have created. The system displays a message indicating that the OBS bucket is not authorized. After you click Authorize, the system displays a message indicating that an internal error occurred on the server and you need to contact customer service or try again later.
+On the settings page, press F12 to view the error details. The following is an example:
+{"error_msg":"An internal error occurred. {0} Contact customer services or try again later ","error_json_opt":{"error": "Unexpected exception[NoSuchElementException: None.get]"},"error_code":"DLI.10001"}+
Check whether a DLI agency has been created. If you do not have the permission to create an agency. On the DLI console, choose Global Configuration > Service Authorization, select Tenant Administrator (Global service), and click Update.
+A sub-user can view queues but cannot view Flink jobs. You can authorize the sub-user using DLI or IAM.
+Click OK to create the policy.
+You can also select DLI Service Admin (with all DLI permissions) for the subuser to view Flink jobs.
+When you submit a job, a message is displayed indicating that the job fails to be submitted due to insufficient permission caused by arrears. In this case, you need to check the roles in your token:
+If the two roles described about are in your token, user operations are restricted.
+When you query the partitioned table XX.YYY, the partition column is not specified in the search criteria.
+A partitioned table can be queried only when the query condition contains at least one partition column.
+Query a partitioned table by referring to the following example:
+Assume that partitionedTable is a partitioned table and partitionedColumn is a partition column. The query statement is as follows:
+SELECT * FROM partitionedTable WHERE partitionedColumn = XXX+
+
The following error is reported when a Spark job accesses OBS data:
+Caused by: com.obs.services.exception.ObsException: Error message:Request Error.OBS servcie Error Message. -- ResponseCode: 403, ResponseStatus: Forbidden+
Set the AK/SK to enable Spark jobs to access OBS data. For details, see How Do I Set the AK/SK for a Queue to Operate an OBS Table?.
+When a Spark job accesses a large amount of data, for example, accessing data in a GaussDB(DWS) database, you are advised to set the number of concurrent tasks and enable multi-task processing.
+Queue plans create failed. The plan xxx target cu is out of quota is displayed when you create a scheduled scaling task.
+The CU quota of the current account is insufficient. You need to apply for more quotas.
+Choose Job Management > Flink Jobs. In the Operation column of the target job, choose More > Permissions. When a new user is authorized, No such user. userName:xxxx. is displayed.
+Check whether the username exists and whether the user has logged in to the system before authorization.
+When a Flink Jar job is submitted to access GaussDB(DWS), an error message is displayed indicating that the job fails to be started. The job log contains the following error information:
+FATAL: Already too many clients, active/non-active/reserved: 5/508/3+
The number of GaussDB(DWS) database connections exceeds the upper limit. In the error information, the value of non-active indicates the number of idle connections. For example, if the value of non-active is 508, there are 508 idle connections.
+Perform the following steps to solve the problem:
+SELECT PG_TERMINATE_BACKEND(pid) from pg_stat_activity WHERE state='idle';+
The default value of this parameter is 600 seconds. The value 0 indicates that the timeout limit is disabled. Do not set session_timeout to 0.
+The procedure for setting parameter session_timeout is as follows:
+The field names of tables that have been created cannot be changed.
+You can create a table, define new table fields, and migrate data from the old table to the new one.
+ +Check whether the OBS bucket is used to store DLI logs on the Global Configuration > Job Configurations page. The job log bucket cannot be used for other purpose.
+org.apache.flink.shaded.curator.org.apache.curator.ConnectionState - Authentication failed+
Service authorization is not configured for the account on the Global Configuration page. When the account is used to create a datasource connection to access external data, the access fails.
+Deleting a queue does not cause table data loss in your database.
+DLI.0005: AnalysisException: t_user_message_input_#{date_format(date_sub(current_date(), 1), 'yyyymmddhhmmss')} is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _.+
Replace the number sign (#) in the table name to the dollar sign ($). The format of the EL expression used in DLI should be ${expr}.
+After data is written to OBS through the Flink job output stream, data cannot be queried from the DLI table created in the OBS file path.
+For example, use the following Flink result table to write data to the obs://obs-sink/car_infos path in OBS.
+create sink stream car_infos_sink ( + carId string, + carOwner string, + average_speed double, + buyday string + ) partitioned by (buyday) + with ( + type = "filesystem", + file.path = "obs://obs-sink/car_infos", + encode = "parquet", + ak = "{{myAk}}", + sk = "{{mySk}}" +);+
create table car_infos ( + carId string, + carOwner string, + average_speed double +) + partitioned by (buyday string) + stored as parquet + location 'obs://obs-sink/car_infos';+
To enable checkpointing, perform the following steps:
+alter table car_infos recover partitions;+
The following error message is displayed when the LOAD DATA command is executed by a Spark SQL job to import data to a DLI table:
+error.DLI.0001: IllegalArgumentException: Buffer size too small. size = 262144 needed = 2272881+
error.DLI.0999: InvalidProtocolBufferException: EOF in compressed stream footer position: 3 length: 479 range: 0 offset: 3 limit: 479 range 0 = 0 to 479 while trying to read 143805 bytes+
The data volume of the file to be imported is large and the value of spark.sql.shuffle.partitions is too large. As a result, the cache size is insufficient.
+Decrease the spark.sql.shuffle.partitions value. To set this parameter, perform the following steps:
+You need to develop a mechanism to retry failed jobs. When a faulty queue is recovered, your application tries to submit the failed jobs to the queue again.
+[ERROR] Execute DLI SQL failed. Please contact DLI service. +[ERROR] Error message:Execution Timeout+
The default queue is a public preset queue in the system for function trials. When multiple users submit jobs to this queue, traffic control might be triggered. As a result, the jobs fail to be submitted.
+Buy a custom queue for your jobs. The procedure is as follows:
+org.apache.sqoop.common.SqoopException: UQUERY_CONNECTOR_0001:Invoke DLI service api failed, failed reason is %s. +at org.apache.sqoop.connector.uquery.intf.impl.UQueryWriter.close(UQueryWriter.java:42) +at org.apache.sqoop.connector.uquery.processor.Dataconsumer.run(Dataconsumer.java:217) +at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) +at java.util.concurrent.FutureTask.run(FutureTask.java:266) +at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) +at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) +at java.lang.Thread.run(Thread.java:748)+
When you create a migration job to DLI on the CDM console, you set Resource Queue to a DLI queue for general purpose. It should be a queue for SQL.
+DLI.0999: RuntimeException: org.apache.hadoop.fs.obs.OBSIOException: initializing on obs://xxx.csv: status [-1] - request id +[null] - error code [null] - error message [null] - trace :com.obs.services.exception.ObsException: OBS servcie Error Message. Request Error: +... +Cause by: ObsException: com.obs.services.exception.ObsException: OBSs servcie Error Message. Request Error: java.net.UnknownHostException: xxx: Name or service not known+
When you execute a DLI SQL script for the first time, you did not agree to the privacy agreement on the DLI console. As a result, the error is reported when the SQL script is executed on DataArts Studio.
+You only need to agree to the privacy agreement when it is your first time to execute the statements.
+connect to DIS failed java.lang.IllegalArgumentException: Access key cannot be null+
When configuring job running parameters for the Flink SQL job, Save Job Log or Checkpointing is enabled, and an OBS bucket for saving job logs and Checkpoints is configured. However, the IAM user who runs the Flink SQL job does not have the OBS write permission.
+User A created the testTable table in a database through a SQL job and granted user B the permission to insert and delete table data. User A deleted the testTable table and created a new testTable table. If user A wants user B to retain the insert and delete permission, user A needs to grant the permissions to user B again.
+After a table is deleted, the table permissions are not retained. You need to grant permissions to a user or project.
+Operations to grant permissions to a user or project are as follows:
+Get dis channel xxx info failed. error info: Not authorized, please click the overview page to do the authorize action+
Before running a Flink job, the permission to obtain DIS data is not granted to the user.
+A CSV file is imported to a DLI partitioned table, but the imported file data does not contain the data in the partitioning column. The partitioning column needs to be specified for a partitioned table query. As a result, table data cannot be queried.
+When data is imported to a DLI partitionedtable, if the file data does not contain the partitioning column, the system specifies __HIVE_DEFAULT_PARTITION__ as the column by default. If a Spark job finds that the partition is empty, null is returned.
+failed to connect to specified address+
The issues here are described in order of how likely they are to occur.
+Troubleshoot the issue by ruling out the causes described here, one by one.
+The port number is required for the connectivity test.
+The following example tests the connectivity between a queue and a specified RDS DB instance. The RDS DB instance uses port 3306.
+The following figure shows how you should specify the IP address.
+When you create an enhanced datasource connection, you need to specify the peer VPC and subnet.
+For example, to test the connectivity between a queue and a specified RDS DB instance, you need to specify the RDS VPC and subnet information.
+The CIDR block of the DLI queue bound with a datasource connection cannot overlap the CIDR block of the data source.
+You can check whether they overlap by viewing the connection logs.
+CIDR block conflicts of queue A and queue B. In this example, queue B is bound to an enhanced datasource connection to data source C. Therefore, a message is displayed, indicating that the network segment of queue A conflicts with that of data source C. As a result, a new enhanced datasource connection cannot be established.
+Solution: Modify the CIDR block of the queue or create another queue.
+Planing the CIDR blocks for your queues helps you to avoid this problem.
+View the connection logs to check whether there is the required permission.
+Figure 1 and Figure 2 show the logs when subnet ID and route ID of the destination cannot be obtained because there is no permission .
+Solution: Grant DLI the VPC Administrator permission and cancel the IAM ReadOnlyAccess authorization.
+ + +Check the routing table of the VPC peering connection corresponding to the enhanced datasource connection. Check whether the CIDR block of the queue overlaps other CIDR blocks in the routing table. If it does, the forwarding may be incorrect.
+Check whether an ACL is configured for the subnet corresponding to the datasource connection and whether the ACL rules restrict network access.
+For example, if you set a CIDR block whose security group rule allows access from a queue and set a network ACL rule to deny access from that CIDR block, the security group rule does not take effect.
+Checkpoint was enabled when a Flink job is created, and the OBS bucket for storing checkpoints was specified. After a Flink job is manually stopped, no message is displayed specifying the checkpoint where the Flink job will be restored if the Flink job is started again.
+The generation mechanism and format of Flink checkpoints are the same as those of savepoints. You can import a savepoint of the job to restore it from the latest checkpoint saved in OBS.
+When an OBS foreign table is created, a field in the specified OBS file contains a carriage return line feed (CRLF) character. As a result, the data is incorrect.
+The statement for creating an OBS foreign table is similar as follows:
+CREATE TABLE test06 (name string, id int, no string) USING csv OPTIONS (path "obs://dli-test-001/test.csv");+
Jordon,88,"aa +bb"+
name id classno +Jordon 88 aa +bb" null null+
CREATE TABLE test06 (name string, id int, no string) USING csv OPTIONS (path "obs://dli-test-001/test.csv",multiLine=true);+
A SQL job contains join operations. After the job is submitted, it is stuck in the Running state and no result is returned.
+When a Spark SQL job has join operations on small tables, all executors are automatically broadcast to quickly complete the operations. However, this increases the memory consumption of the executors. If the executor memory usage is too high, the job fails to be executed.
+You need to check the large number of jobs in the Submitting and Running states on the queue.
+Use Cloud Eye to view jobs in different states on the queue. The procedure is as follows:
+A DLI table exists but cannot be queried on the DLI console.
+If a table exists but cannot be queried, there is a high probability that the current user does not have the permission to query or operate the table.
+Contact the user who creates the table and obtain the required permissions. To assign permissions, perform the following steps:
+If your DLI job needs to connect to a data source, for example, MRS, RDS, CSS, Kafka, or GaussDB(DWS), you need to enable the network between DLI and the data source.
+An enhanced datasource connection uses VPC peering to directly connect the VPC networks of the desired data sources for point-to-point data exchanges.
+You can configure SNAT rules and add routes to the public network to enable communications between a queue and the Internet.
+The on clause was not added to the SQL statement for joining tables. As a result, the Cartesian product query occurs due to multi-table association, and the queue resources were used up. Job execution fails on the queue.
+select + case + when to_char(from_unixtime(fs.special_start_time), 'yyyy-mm-dd') < '2018-10-12' and row_number() over(partition by fg.goods_no order by fs.special_start_time asc) = 1 then 1 + when to_char(from_unixtime(fs.special_start_time), 'yyyy-mm-dd') >= '2018-10-12' and fge.is_new = 1 then 1 + else 0 end as is_new +from testdb.table1 fg +left join testdb.table2 fs +left join testdb.table3 fge +where to_char(from_unixtime(fs.special_start_time), 'yyyymmdd') = substr('20220601',1,8)+
When you use join to perform multi-table query, you must use the on clause to reduce the data volume.
+select + case + when to_char(from_unixtime(fs.special_start_time), 'yyyy-mm-dd') < '2018-10-12' and row_number() over(partition by fg.goods_no order by fs.special_start_time asc) = 1 then 1 + when to_char(from_unixtime(fs.special_start_time), 'yyyy-mm-dd') >= '2018-10-12' and fge.is_new = 1 then 1 + else 0 end as is_new +from testdb.table1 fg +left join testdb.table2 fs on fg.col1 = fs.col2 +left join testdb.table3 fge on fg.col3 = fge.col4 +where to_char(from_unixtime(fs.special_start_time), 'yyyymmdd') = substr('20220601',1,8)+
Spark jobs cannot access SFTP. Upload the files you want to access to OBS and then you can analyze the data using Spark jobs.
+Please contact DLI service. DLI.0002: FileNotFoundException: getFileStatus on obs://xxx: status [404]+
Check whether there is another job that has deleted table information.
+DLI does not allow multiple jobs to read and write the same table at the same time. Otherwise, job conflicts may occur and the jobs fail.
+Partition data is manually uploaded to a partition of an OBS table. However, the data cannot be queried using DLI SQL editor.
+MSCK REPAIR TABLE table_name;+
Query the data in the OBS partitioned table.
+Currently, DLI does not allow you to insert table data into specific fields. To insert table data, you must insert data of all table fields at a time.
+org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:Permission denied for resource: databases.xxx,action:SPARK_APP_ACCESS_META)+
You need to assign the database permission to the user who executes the job. The procedure is as follows:
+DLI queues do not use resources or bandwidth when no job is running. In this case, the running status of DLI queues is not displayed on CES.
+"error_code"*CLI.0003","error_msg":"Permission denied for resource 'resources. xxx', User = 'xxx', Action = "UPDATZ_RISCURCL'."+
You need to assign the package permission to the user who executes the job. The procedure is as follows:
+If the job runs slowly, perform the following steps to find the causes and rectify the fault:
+Check whether the problem is caused by FullGC.
+Cause locating and solution
+select count(distinct fn) FROM +(select input_file_name() as fn from table name) a+
Cause 2: There is a broadcast table.
+Check whether the problem is caused by data skew.
+Cause locating and solution
+Join table lefttbl and table righttbl. num in the lefttbl table is the key value of the join. You can perform group by and count on lefttbl.num.
+SELECT * FROM lefttbl a LEFT join righttbl b on a.num = b.int2; +SELECT count(1) as count,num from lefttbl group by lefttbl.num ORDER BY count desc;+
A temporary table is used to store intermediate results. When a transaction or session ends, the data in the temporary table can be automatically deleted. For example, in MySQL, you can use create temporary table... to create a temporary table. After a transaction or session ends, the table data is automatically deleted. Does DLI Support This Function?
+Currently, you cannot create temporary tables on DLI. You can create a table by using SQL statements.
+Currently DLI can only be accessed through a browser. You must submit jobs on the console.
+By default, SQL jobs that have been running for more than 12 hours will be canceled to ensure stability of queues.
+You can use the dli.sql.job.timeout parameter (unit: second) to configure the timeout interval.
+Currently, DLI does not support local testing of Spark jobs. You can install the DLI Livy tool and use its interactive sessions to debug Spark jobs.
+Deleting a row of data from an OBS table or DLI table is not allowed.
+DLI Spark does not support job scheduling. You can use other services, such as DataArts Studio, or use APIs or SDKs to customize job schedule.
+The Spark SQL syntax does not support primary key definition.
+Yes.
+DLI built-in dependencies are provided by the platform by default. In case of conflicts, you do not need to upload them when packaging JAR packages of Spark or Flink Jar jobs.
+No, the packages cannot be downloaded.
+Store the third-party dependency in the /opt/spark/jars directory.
+Error message "File not Found" is displayed when a SQL job is accessed.
+Generally, the file cannot be found due to a read/write conflict. Check whether a job is overwriting the data when the error occurs.
+Error message "DLI.0003: AccessControlException XXX" is reported when a SQL job is accessed.
+View the OBS bucket in the AccessControlException and check whether you are using an account that has the permission to access the bucket.
+Error message "DLI.0001: org.apache.hadoop.security.AccessControlException: verifyBucketExists on {{bucket name}}: status [403]" is reported when a SQL job is Accessed.
+The current account does not have the permission to access the OBS bucket where the foreign table is located. Obtain the OBS permission and perform the query again.
+Error message "The current account does not have permission to perform this operation,the current account was restricted." is reported during SQL statement execution.
+Check whether your account is in arrears. If it is, renew your account and try again.
+If the error persists after renewal, log out and log in again.
+To dynamically overwrite the specified partitioned data in the DataSource table, set dli.sql.dynamicPartitionOverwrite.enabled to true and then run the insert overwrite statement. (The default value of dli.sql.dynamicPartitionOverwrite.enabled is false.)
+The possible causes and solutions are as follows:
+Spark does not have the datetime type and uses the TIMESTAMP type instead.
+You can use a function to convert data types.
+The following is an example.
+select cast(create_date as string), * from table where create_date>'2221-12-01 00:00:00';
+If the table name is changed immediately after SQL statements are executed, the data size of the table may be incorrect.
+If you need to change the table name, change it 5 minutes after the SQL job is complete.
+The remaining CUs in the queue may be insufficient. As a result, the job cannot be submitted.
+To view the remaining CUs of a queue, perform the following steps:
+Log in to the Cloud Eye console. In the navigation pane on the left, choose Cloud Service Monitoring > Data Lake Insight. On the displayed page, locate the desired queue and click View Metric in the Operation column, and check CU Usage (queue) on the displayed page.
+Remaining CUs of a queue = Total CUs of the queue – CU usage.
+If the number of remaining CUs is less than the number of CUs required by the job, the job submission fails. The submission can be successful only after resources are available.
+You can isolate queues allocated to different users by setting permissions to ensure data query performance.
+When the SQL query statement is executed, the system displays a message indicating that the user does not have the permission to query resources.
+Error information: DLI.0003: Permission denied for resource 'databases.dli_test.tables.test.columns.col1', User = '{UserName}', Action = 'SELECT'
+The user does not have the permission to query the table.
+In the navigation pane on the left of the DLI console page, choose Data Management > Databases and Tables, search for the desired database table, view the permission configuration, and grant the table query permission to the user who requires it.
+The table permission has been granted and verified. However, after a period of time, an error is reported indicating that the table query fails.
+There are two possible reasons:
+When DLI is used to insert data into an OBS temporary table, only part of data is imported.
+Possible causes are as follows:
+Run a query statement to check whether the amount of imported data is correct.
+If OBS limits the number of files to be stored, add DISTRIBUTE BY number to the end of the insert statement. For example, if DISTRIBUTE BY 1 is added to the end of the insert statement, multiple files generated by multiple tasks can be inserted into one file.
+If you used this method, you can verify that the text editor cannot read all the data.
+Run the query statement to view the amount of data import into the OBS bucket. The query result indicates that all the data is imported.
+This issue is caused by incorrect verification of the data volume.
+After a Flink SQL job consumed Kafka and sank data to the Elasticsearch cluster, the job was successfully executed, but no data is available.
+Possible causes are as follows:
+The storage path of the Flink Jar job checkpoints was set to an OBS bucket. The job failed to be submitted, and an error message indicating an invalid OBS bucket name was displayed.
+ +Flink Job submission failed. The exception information is as follows:
+Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.obs.metrics.OBSAMetricsProvider not found + at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2664) + at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2688) + ... 31 common frames omitted + Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.obs.metrics.OBSAMetricsProvider not found + at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2568) + at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2662) + ... 32 common frames omitted+
Flink JAR packages conflicted. The submitted Flink JAR package conflicted with the HDFS JAR package of the DLI cluster.
+<dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <scope> provided </scope> + </dependency>+
Alternatively, use the exclusions tag to exclude the association.
+conf.addResource(HBaseUtil.class.getClassLoader().getResourceAsStream("mrs-core-site.xml"), false); + conf.addResource(HBaseUtil.class.getClassLoader().getResourceAsStream("mrs-hdfs-site.xml"), false); + conf.addResource(HBaseUtil.class.getClassLoader().getResourceAsStream("mrs-hbase-site.xml"), false);+
The Flink/Spark UI was displayed with incomplete information.
+When a pay-per-use queue is used to run a job, the system releases the cluster and takes about 10 minutes to create a new one. Accessing the Flink UI before completion of the creation will empty the project ID in the cache. As a result, the UI cannot be displayed. The possible cause is that the cluster was not created.
+Change the queue to dedicated, so that the cluster will not be released when the queue is idle. Alternatively, submit a job, wait for a while, and then access FlinkUI.
+JobManager and TaskManager heartbeats timed out. As a result, the Flink job is abnormal.
+An enhanced datasource connection failed to pass the network connectivity test. Datasource connection cannot be bound to a queue. The following error information is displayed:
+Failed to get subnet 86ddcf50-233a-449d-9811-cfef2f603213. Response code : 404, message : {"code":"VPC.0202","message":"Query resource by id 86ddcf50-233a-449d-9811-cfef2f603213 fail.the subnet could not be found."}+
VPC Administrator permissions are required to use the VPC, subnet, route, VPC peering connection, and port for DLI datasource connections.
+The binding fails because the user does not have the required VPC permissions.
+On the DLI console, choose Global Configuration > Service Authorization, select the required VPC permission, and click Update.
+The outbound rule had been configured for the security group of the queue associated with the enhanced datasource connection. The datasource authentication used a password. The connection failed and DLI.0999: PSQLException: The connection attempt failed is reported.
+Possible causes are as follows:
+Both the inbound and outbound rules of the security group are configured for the subnets of the DLI queue. Set the source IP address in the inbound direction to 0.0.0.0/0 and port 8000, indicating that any IP address can access port 8000.
+A network ACL is associated and no inbound or outbound rules are configured. As a result, the IP address cannot be accessed.
+A cross-source RDS table was created in the DataArts Studio, and the insert overwrite statement was executed to write data into RDS. DLI.0999: BatchUpdateException: Incorrect string value: '\xF0\x9F\x90\xB3' for column 'robot_name' at row 1 was reported.
+The data to be written contains emojis, which are encoded in the unit of four bytes. MySQL databases use the UTF-8 format, which encodes data in the unit of three bytes by default. In this case, an error occurs when the emoji data is inserted into to the MySQL database.
+Possible causes are as follows:
+Change the character set to utf8mb4.
+ALTER DATABASE DATABASE_NAME DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;+
ALTER TABLE TABLE_NAME DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;+
ALTER TABLE TABLE_NAME CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci;+
The system failed to create a cross-source RDS table, and null pointer error was reported.
+The following table creation statement was used:
+CREATE TABLE IF NOT EXISTS dli_to_rds + USING JDBC OPTIONS ( + 'url'='jdbc:mysql://to-rds-1174405119-oLRHAGE7.datasource.com:3306/postgreDB', + 'driver'='org.postgresql.Driver', + 'dbtable'='pg_schema.test1', + 'passwdauth' = 'xxx', + 'encryption' = 'true');+
The RDS database is in a PostGre cluster, and the protocol header in the URL is invalid.
+Change the URL to url'='jdbc:postgresql://to-rds-1174405119-oLRHAGE7.datasource.com:3306/postgreDB and run the creation statement. The cross-source table is successfully created.
+The system failed to execute insert overwrite on the cross-source GaussDB(DWS) table, and org.postgresql.util.PSQLException: ERROR: tuple concurrently updated was displayed.
+Concurrent operations existed in the job. Two insert overwrite operations were executed on the table at the same time.
+One CN was running the following statement:
+TRUNCATE TABLE BI_MONITOR.SAA_OUTBOUND_ORDER_CUST_SUM+
Another CN was running the following command:
+call bi_monitor.pkg_saa_out_bound_monitor_p_saa_outbound_order_cust_sum+
This function deletes and inserts SAA_OUTBOUND_ORDER_CUST_SUM.
+Modify job logic to prevent concurrent insert overwrite operations on the same table.
+A cross-source table was used to import data to a CloudTable HBase table. This HBase table contains a column family and a rowkey for 100 million simulating data records. The data volume is 9.76 GB. The job failed after 10 million data records were imported.
+The rowkey was poorly designed causing a large amount of traffic redirected to single or very few numbers of nodes.
+Distribute data to different RegionServer. Add distribute by rand() to the end of the insert statement.
+A table was created on GaussDB(DWS) and then a datasource connection was created on DLI to read and write data. An error message was displayed during data writing, indicating that DLI was writing a null value to a non-null field of the table, and the job failed.
+DLI.0999: PSQLException: ERROR: dn_6009_6010: null value in column "ctr" violates not-null constraint +Detail: Failing row contains (400070309, 9.00, 25, null, 2020-09-22, 2020-09-23 04:30:01.741).+
Change the precision of the decimal data defined in the target table.
+A cross-source GaussDB(DWS) table and the datasource connection were created in DLI, and the schema of the source table in GaussDB(DWS) were updated. During the job execution, the schema of the source table failed to be updated, and the job failed.
+When the insert operation is executed on the DLI cross-source table, the GaussDB(DWS) source table is deleted and recreated. If the statement for creating the cross-source table is not updated on DLI, the GaussDB(DWS) source table will fail to be updated.
+Create a cross-source table on DLI and add table creation configuration truncate = true to clear table data but not delete the table.
+After the source table is updated, the corresponding cross-source table must be updated too on DLI.
+The possible causes and solutions are as follows:
+Enhanced datasource connections support only yearly/monthly and pay-per-use queues.
+You can bind a datasource connection only when you select Dedicated Resource Mode when purchasing a pay-per-use queue. The resources are dedicated and used on demand (the billing period is equal to the lifecycle of the queue).
+DLI enhanced datasource connection uses VPC peering to directly connect the VPC networks of the desired data sources for point-to-point data exchanges.
+No. The spark.acls.enable configuration item is not used in DLI. The Apache Spark command injection vulnerability (CVE-2022-33891) does not exist in DLI.
+No, a global variable can only be used by the user who created it. Global variables can be used to simplify complex parameters. For example, long and difficult variables can be replaced to improve the readability of SQL statements.
+The restrictions on using global variables are as follows:
+The Service Quota page is displayed.
+If a quota cannot meet service requirements, increase a quota.
+The system does not support online quota adjustment. To increase a resource quota, dial the hotline or send an email to the customer service. We will process your application and inform you of the progress by phone call or email.
+Before dialing the hotline number or sending an email, ensure that the following information has been obtained:
+Log in to the management console using the cloud account, click the username in the upper right corner, select My Credentials from the drop-down list, and obtain the domain name, project name, and project ID on the My Credentials page.
+You can use Flink Jar to connect to Kafka with SASL SSL authentication enabled.
+DLI is applicable to large-scale log analysis, federated analysis of heterogeneous data sources, and big data ETL processing.
+Different departments of a game company analyze daily new logs via the game data analysis platform to obtain required metrics and make decision based on the obtained metric data. For example, the operation department obtains required metric data, such as new players, active players, retention rate, churn rate, and payment rate, to learn the current game status and determine follow-up actions. The placement department obtains the channel sources of new players and active players to determine the platforms for placement in the next cycle.
+In the face of new competition pressures and changes in travel services, car companies build the IoV cloud platform and IVI OS to streamline Internet applications and vehicle use scenarios, completing digital service transformation for car companies. This delivers better travel experience for vehicle owners, increases the competitiveness of car companies, and promotes sales growth. For example, DLI can be used to collect and analyze daily vehicle metric data (such as batteries, engines, tire pressure, and airbags), and give maintenance suggestions to vehicle owners in time.
+Carriers typically require petabytes, or even exabytes of data storage, for both structured (base station details) and unstructured (messages and communications) data. They need to be able to access the data with extremely low data latency. It is a major challenge to extract value from this data efficiently. DLI provides multi-mode engines such as batch processing and stream processing to break down data silos and perform unified data analysis.
+DLI allows multiple organizations, departments, or applications to share resources. A logical entity, also called a tenant, is provided to use diverse resources and services. A mode involving different tenants is called multi-tenant mode. A tenant corresponds to a company. Multiple sub-users can be created under a tenant and are assigned different permissions.
+A project is a collection of resources accessible to services. In a region, an account can create multiple projects and assign different permissions to different projects. Resources used for different projects are isolated from one another. A project can either be a department or a project team.
+A database is a warehouse where data is organized, stored, and managed based on the data structure. DLI management permissions are granted on a per database basis.
+In DLI, tables and databases are metadata containers that define underlying data. The metadata in the table shows the location of the data and specifies the data structure, such as the column name, data type, and table name. A database is a collection of tables.
+Metadata is used to define data types. It describes information about the data, including the source, size, format, and other data features. In database fields, metadata interprets data content in the data warehouse.
+Queues in DLI are computing resources, which are the basis for using DLI. SQL jobs and Spark jobs performed by users require computing resources.
+Storage resources in DLI are used to store data of databases and DLI tables. To import data to DLI, storage resources must be prepared. The storage resources reflect the volume of data you are allowed to store in DLI.
+SQL job refers to the SQL statement executed in the SQL job editor. It serves as the execution entity used for performing operations, such as importing and exporting data, in the SQL job editor.
+Spark jobs are those submitted by users through visualized interfaces and RESTful APIs. Full-stack Spark jobs are allowed, such as Spark Core, DataSet, MLlib, and GraphX jobs.
+The table type indicates the storage location of data.
+You can create a table on DLI and associate the table with other services to achieve querying data from multiple data sources.
+The differences between constants and variables are as follows:
+When using DLI, you may need to make adjustments according to the following restrictions:
+If you need to assign different permissions to employees in your enterprise to access your DLI resources, IAM is a good choice for fine-grained permissions management. IAM provides identity authentication, permissions management, and access control, helping you securely access your resources.
+With IAM, you can use your account to create IAM users for your employees, and assign permissions to the users to control their access to specific resource types. For example, some software developers in your enterprise need to use DLI resources but must not delete them or perform any high-risk operations. To achieve this result, you can create IAM users for the software developers and grant them only the permissions required for using DLI resources.
+If the account has met your requirements, you do not need to create an independent IAM user for permission management. Then you can skip this section. This will not affect other functions of DLI.
+By default, new IAM users do not have permissions assigned. You need to add the users to one or more groups, and attach permissions policies or roles to these groups. The users then inherit permissions from the groups to which they are added. After authorization, the users can perform specified operations on DLI based on the permissions.
+DLI is a project-level service deployed and accessed in specific physical regions. To assign ServiceStage permissions to a user group, specify the scope as region-specific projects and select projects for the permissions to take effect. If All projects is selected, the permissions will take effect for the user group in all region-specific projects. When accessing DLI, the users need to switch to a region where they have been authorized to use cloud services.
+Role/Policy Name + |
+Description + |
+Policy Type + |
+
---|---|---|
DLI FullAccess + |
+Full permissions for DLI. + |
+System-Defined Policies + |
+
DLI ReadOnlyAccess + |
+Read-only permissions for DLI. +With read-only permissions, you can use DLI resources and perform operations that do not require fine-grained permissions. For example, create global variables, create packages and package groups, submit jobs to the default queue, create tables in the default database, create datasource connections, and delete datasource connections. + |
+System-Defined Policies + |
+
Tenant Administrator + |
+Tenant administrator +
|
+System-defined role + |
+
DLI Service Admin + |
+DLI administrator. +
|
+System-defined role + |
+
Table 2 lists the common SQL operations supported by each system policy of DLI. Choose proper system policies according to this table.
+ +Resources + |
+Operation + |
+Description + |
+DLI FullAccess + |
+DLI ReadOnlyAccess + |
+Tenant Administrator + |
+DLI Service Admin + |
+
---|---|---|---|---|---|---|
Queue + |
+DROP_QUEUE + |
+Deleting a queue + |
+√ + |
+× + |
+√ + |
+√ + |
+
SUBMIT_JOB + |
+Submitting the job + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CANCEL_JOB + |
+Terminating the job + |
+√ + |
+× + |
+√ + |
+√ + |
+|
RESTART + |
+Restarting a queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions from the queue + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing the queue permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
Database + |
+DROP_DATABASE + |
+Deleting a database + |
+√ + |
+× + |
+√ + |
+√ + |
+
CREATE_TABLE + |
+Creating a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CREATE_VIEW + |
+Creating a view + |
+√ + |
+× + |
+√ + |
+√ + |
+|
EXPLAIN + |
+Explaining the SQL statement as an execution plan + |
+√ + |
+× + |
+√ + |
+√ + |
+|
CREATE_ROLE + |
+Creating a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DROP_ROLE + |
+Deleting a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_ROLES + |
+Displaying a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_ROLE + |
+Binding a role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_ROLE + |
+Unbinding the role + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_USERS + |
+Displaying the binding relationships between all roles and users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the database + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions to the database + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing database permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DISPLAY_ALL_TABLES + |
+Displaying tables in the database + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
DISPLAY_DATABASE + |
+Displaying databases + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
CREATE_FUNCTION + |
+Creating a function + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DROP_FUNCTION + |
+Deleting a function + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_FUNCTIONS + |
+Displaying all functions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DESCRIBE_FUNCTION + |
+Displaying function details + |
+√ + |
+× + |
+√ + |
+√ + |
+|
Table + |
+DROP_TABLE + |
+Deleting a table + |
+√ + |
+× + |
+√ + |
+√ + |
+
SELECT + |
+Querying a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
INSERT_INTO_TABLE + |
+Inserting + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_ADD_COLUMNS + |
+Adding a column + |
+√ + |
+× + |
+√ + |
+√ + |
+|
INSERT_OVERWRITE_TABLE + |
+Rewriting + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RENAME + |
+Renaming a table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_ADD_PARTITION + |
+Adding partitions to the partition table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RENAME_PARTITION + |
+Renaming a table partition + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_DROP_PARTITION + |
+Deleting partitions from a partition table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PARTITIONS + |
+Displaying all partitions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_RECOVER_PARTITION + |
+Restoring table partitions + |
+√ + |
+× + |
+√ + |
+√ + |
+|
ALTER_TABLE_SET_LOCATION + |
+Setting the partition path + |
+√ + |
+× + |
+√ + |
+√ + |
+|
GRANT_PRIVILEGE + |
+Granting permissions to the table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
REVOKE_PRIVILEGE + |
+Revoking permissions from the table + |
+√ + |
+× + |
+√ + |
+√ + |
+|
SHOW_PRIVILEGES + |
+Viewing table permissions of other users + |
+√ + |
+× + |
+√ + |
+√ + |
+|
DISPLAY_TABLE + |
+Displaying a table + |
+√ + |
+√ + |
+√ + |
+√ + |
+|
DESCRIBE_TABLE + |
+Displaying table information + |
+√ + |
+× + |
+√ + |
+√ + |
+
You do not need a background in big data to use DLI for data analysis. You only need to know SQL, and you are good to go. The SQL syntax is fully compatible with the standard ANSI SQL 2003. The SQL syntax is fully compatible with the standard ANSI SQL 2003.
+DLI compute and storage loads are decoupled. This architecture allows you to flexibly configure storage and compute resources on demand, improving resource utilization and reducing costs.
+DLI is fully compatible with Apache Spark and Apache Flink ecosystems and APIs. It is a serverless big data computing and analysis service that integrates real-time, offline, and interactive analysis. Offline applications can be seamlessly migrated to the cloud, reducing the migration workload. DLI provides a highly-scalable framework integrating batch and stream processing, allowing you to handle data analysis requests with ease. With a deeply optimized kernel and architecture, DLI delivers 100-fold performance improvement compared with the MapReduce model. Your analysis is backed by an industry-vetted 99.95% SLA.
+Analyze your data across databases. No migration required. A unified view of your data gives you a comprehensive understanding of your data and helps you innovate faster. There are no restrictions on data formats, cloud data sources, or whether the database is created online or off.
+