A datasource connection has been created on the DLI management console.
1 2 3 4 5 | <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.3.2</version> </dependency> |
1 2 | import org.apache.spark.sql.{Row, SaveMode, SparkSession} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} |
1 | val sparkSession = SparkSession.builder().getOrCreate() |
1 2 3 4 | sparkSession.sql("create table css_table(id int, name string) using css options( 'es.nodes' 'to-css-1174404221-Y2bKVIqY.datasource.com:9200', 'es.nodes.wan.only'='true', 'resource' '/mytest/css')") |
Parameter |
Description |
---|---|
es.nodes |
CSS connection address. You need to create a datasource connection first. If you have created an enhanced datasource connection, use the intranet IP address provided by CSS. The address format is IP1:PORT1,IP2:PORT2. |
resource |
Name of the resource for the CSS datasource connection name. You can use /index/type to specify the resource location (for easier understanding, the index may be seen as database and type as table). NOTE:
|
pushdown |
Whether to enable the pushdown function of CSS. The default value is true. For tables with a large number of I/O requests, the pushdown function help reduce I/O pressure when the where condition is specified. |
strict |
Whether the CSS pushdown is strict. The default value is false. The exact match function can reduce more I/O requests than pushdown. |
batch.size.entries |
Maximum number of entries that can be inserted in a batch. The default value is 1000. If the size of a single data record is so large that the number of data records in the bulk storage reaches the upper limit of the data amount in a single batch, the system stops storing data and submits the data based on the batch.size.bytes parameter. |
batch.size.bytes |
Maximum amount of data in a single batch. The default value is 1 MB. If the size of a single data record is so small that the number of data records in the bulk storage reaches the upper limit of the data amount of a single batch, the system stops storing data and submits the data based on the batch.size.entries parameter. |
es.nodes.wan.only |
Whether to access the Elasticsearch node using only the domain name. The default value is false. If the original internal IP address provided by CSS is used as the es.nodes, you do not need to set this parameter or set it to false. |
es.mapping.id |
Document field name that contains the document ID in the Elasticsearch node. NOTE:
|
batch.size.entries and batch.size.bytes limit the number of data records and data volume respectively.
1 | sparkSession.sql("insert into css_table values(13, 'John'),(22, 'Bob')") |
1 2 | val dataFrame = sparkSession.sql("select * from css_table") dataFrame.show() |
Before data is inserted:
Response:
1 | sparkSession.sql("drop table css_table") |
1 2 | val resource = "/mytest/css" val nodes = "to-css-1174405013-Ht7O1tYf.datasource.com:9200" |
1 2 | val schema = StructType(Seq(StructField("id", IntegerType, false), StructField("name", StringType, false))) val rdd = sparkSession.sparkContext.parallelize(Seq(Row(12, "John"),Row(21,"Bob"))) |
1 2 3 4 5 6 7 | val dataFrame_1 = sparkSession.createDataFrame(rdd, schema) dataFrame_1.write .format("css") .option("resource", resource) .option("es.nodes", nodes) .mode(SaveMode.Append) .save() |
The value of SaveMode can be one of the following:
1 2 | val dataFrameR = sparkSession.read.format("css").option("resource",resource).option("es.nodes", nodes).load() dataFrameR.show() |
Before data is inserted:
Response:
spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/css/*
spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/css/*
1 2 3 4 5 | <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.3.2</version> </dependency> |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import org.apache.spark.sql.SparkSession object Test_SQL_CSS { def main(args: Array[String]): Unit = { // Create a SparkSession session. val sparkSession = SparkSession.builder().getOrCreate() // Create a DLI data table for DLI-associated CSS sparkSession.sql("create table css_table(id long, name string) using css options( 'es.nodes' = 'to-css-1174404217-QG2SwbVV.datasource.com:9200', 'es.nodes.wan.only' = 'true', 'resource' = '/mytest/css')") //*****************************SQL model*********************************** // Insert data into the DLI data table sparkSession.sql("insert into css_table values(13, 'John'),(22, 'Bob')") // Read data from DLI data table val dataFrame = sparkSession.sql("select * from css_table") dataFrame.show() // drop table sparkSession.sql("drop table css_table") sparkSession.close() } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | import org.apache.spark.sql.{Row, SaveMode, SparkSession}; import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}; object Test_SQL_CSS { def main(args: Array[String]): Unit = { //Create a SparkSession session. val sparkSession = SparkSession.builder().getOrCreate() //*****************************DataFrame model*********************************** // Setting the /index/type of CSS val resource = "/mytest/css" // Define the cross-origin connection address of the CSS cluster val nodes = "to-css-1174405013-Ht7O1tYf.datasource.com:9200" //Setting schema val schema = StructType(Seq(StructField("id", IntegerType, false), StructField("name", StringType, false))) // Construction data val rdd = sparkSession.sparkContext.parallelize(Seq(Row(12, "John"),Row(21,"Bob"))) // Create a DataFrame from RDD and schema val dataFrame_1 = sparkSession.createDataFrame(rdd, schema) //Write data to the CSS dataFrame_1.write.format("css") .option("resource", resource) .option("es.nodes", nodes) .mode(SaveMode.Append) .save() //Read data val dataFrameR = sparkSession.read.format("css").option("resource", resource).option("es.nodes", nodes).load() dataFrameR.show() spardSession.close() } } |
1 2 3 4 5 | <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.3.2</version> </dependency> |
1 2 | import org.apache.spark.sql.{Row, SaveMode, SparkSession} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} |
Hard-coded or plaintext AK and SK pose significant security risks. To ensure security, encrypt your AK and SK, store them in configuration files or environment variables, and decrypt them when needed.
1 2 3 4 5 | val sparkSession = SparkSession.builder().getOrCreate() sparkSession.conf.set("fs.obs.access.key", ak) sparkSession.conf.set("fs.obs.secret.key", sk) sparkSession.conf.set("fs.obs.endpoint", enpoint) sparkSession.conf.set("fs.obs.connecton.ssl.enabled", "false") |
1 2 3 4 5 6 7 8 9 10 11 | sparkSession.sql("create table css_table(id int, name string) using css options( 'es.nodes' 'to-css-1174404221-Y2bKVIqY.datasource.com:9200', 'es.nodes.wan.only'='true', 'resource'='/mytest/css', 'es.net.ssl'='true', 'es.net.ssl.keystore.location'='obs://Bucket name/path/transport-keystore.jks', 'es.net.ssl.keystore.pass'='***', 'es.net.ssl.truststore.location'='obs://Bucket name/path/truststore.jks', 'es.net.ssl.truststore.pass'='***', 'es.net.http.auth.user'='admin', 'es.net.http.auth.pass'='***')") |
Parameter |
Description |
---|---|
es.nodes |
CSS connection address. You need to create a datasource connection first. If you have created an enhanced datasource connection, use the intranet IP address provided by CSS. The address format is IP1:PORT1,IP2:PORT2. |
resource |
Name of the resource for the CSS datasource connection name. You can use /index/type to specify the resource location (for easier understanding, the index may be seen as database and type as table). NOTE:
1. In Elasticsearch 6.X, a single index supports only one type, and the type name can be customized. 2. In Elasticsearch 7.X, a single index uses _doc as the type name and cannot be customized. To access Elasticsearch 7.X, set this parameter to index. |
pushdown |
Whether to enable the pushdown function of CSS. The default value is true. For tables with a large number of I/O requests, the pushdown function help reduce I/O pressure when the where condition is specified. |
strict |
Whether the CSS pushdown is strict. The default value is false. The exact match function can reduce more I/O requests than pushdown. |
batch.size.entries |
Maximum number of entries that can be inserted in a batch. The default value is 1000. If the size of a single data record is so large that the number of data records in the bulk storage reaches the upper limit of the data amount in a single batch, the system stops storing data and submits the data based on the batch.size.bytes parameter. |
batch.size.bytes |
Maximum amount of data in a single batch. The default value is 1 MB. If the size of a single data record is so small that the number of data records in the bulk storage reaches the upper limit of the data amount of a single batch, the system stops storing data and submits the data based on the batch.size.entries parameter. |
es.nodes.wan.only |
Whether to access the Elasticsearch node using only the domain name. The default value is false. If the original internal IP address provided by CSS is used as the es.nodes, you do not need to set this parameter or set it to false. |
es.mapping.id |
Document field name that contains the document ID in the Elasticsearch node. NOTE:
|
es.net.ssl |
Whether to connect to the security CSS cluster. The default value is false. |
es.net.ssl.keystore.location |
OBS bucket location of the keystore file generated by the security CSS cluster certificate. |
es.net.ssl.keystore.pass |
Password of the keystore file generated by the security CSS cluster certificate. |
es.net.ssl.truststore.location |
OBS bucket location of the truststore file generated by the security CSS cluster certificate. |
es.net.ssl.truststore.pass |
Password of the truststore file generated by the security CSS cluster certificate. |
es.net.http.auth.user |
Username of the security CSS cluster. |
es.net.http.auth.pass |
Password of the security CSS cluster. |
batch.size.entries and batch.size.bytes limit the number of data records and data volume respectively.
1 | sparkSession.sql("insert into css_table values(13, 'John'),(22, 'Bob')") |
1 2 | val dataFrame = sparkSession.sql("select * from css_table") dataFrame.show() |
Before data is inserted:
Response:
1 | sparkSession.sql("drop table css_table") |
1 2 | val resource = "/mytest/css" val nodes = "to-css-1174405013-Ht7O1tYf.datasource.com:9200" |
1 2 | val schema = StructType(Seq(StructField("id", IntegerType, false), StructField("name", StringType, false))) val rdd = sparkSession.sparkContext.parallelize(Seq(Row(12, "John"),Row(21,"Bob"))) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | val dataFrame_1 = sparkSession.createDataFrame(rdd, schema) dataFrame_1.write .format("css") .option("resource", resource) .option("es.nodes", nodes) .option("es.net.ssl", "true") .option("es.net.ssl.keystore.location", "obs://Bucket name/path/transport-keystore.jks") .option("es.net.ssl.keystore.pass", "***") .option("es.net.ssl.truststore.location", "obs://Bucket name/path/truststore.jks") .option("es.net.ssl.truststore.pass", "***") .option("es.net.http.auth.user", "admin") .option("es.net.http.auth.pass", "***") .mode(SaveMode.Append) .save() |
The value of Mode can be one of the following:
1 2 3 4 5 6 7 8 9 10 11 12 | val dataFrameR = sparkSession.read.format("css") .option("resource",resource) .option("es.nodes", nodes) .option("es.net.ssl", "true") .option("es.net.ssl.keystore.location", "obs://Bucket name/path/transport-keystore.jks") .option("es.net.ssl.keystore.pass", "***") .option("es.net.ssl.truststore.location", "obs://Bucket name/path/truststore.jks") .option("es.net.ssl.truststore.pass", "***") .option("es.net.http.auth.user", "admin") .option("es.net.http.auth.pass", "***") .load() dataFrameR.show() |
Before data is inserted:
Response:
1 2 3 4 5 | <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.3.2</version> </dependency> |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | import org.apache.spark.sql.SparkSession object csshttpstest { def main(args: Array[String]): Unit = { //Create a SparkSession session. val sparkSession = SparkSession.builder().getOrCreate() // Create a DLI data table for DLI-associated CSS sparkSession.sql("create table css_table(id long, name string) using css options('es.nodes' = '192.168.6.204:9200','es.nodes.wan.only' = 'false','resource' = '/mytest','es.net.ssl'='true','es.net.ssl.keystore.location' = 'obs://xietest1/lzq/keystore.jks','es.net.ssl.keystore.pass' = '**','es.net.ssl.truststore.location'='obs://xietest1/lzq/truststore.jks','es.net.ssl.truststore.pass'='**','es.net.http.auth.user'='admin','es.net.http.auth.pass'='**')") //*****************************SQL model*********************************** // Insert data into the DLI data table sparkSession.sql("insert into css_table values(13, 'John'),(22, 'Bob')") // Read data from DLI data table val dataFrame = sparkSession.sql("select * from css_table") dataFrame.show() // drop table sparkSession.sql("drop table css_table") sparkSession.close() } } |
Hard-coded or plaintext AK and SK pose significant security risks. To ensure security, encrypt your AK and SK, store them in configuration files or environment variables, and decrypt them when needed.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import org.apache.spark.sql.{Row, SaveMode, SparkSession}; import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}; object Test_SQL_CSS { def main(args: Array[String]): Unit = { //Create a SparkSession session. val sparkSession = SparkSession.builder().getOrCreate() sparkSession.conf.set("fs.obs.access.key", ak) sparkSession.conf.set("fs.obs.secret.key", sk) //*****************************DataFrame model*********************************** // Setting the /index/type of CSS val resource = "/mytest/css" // Define the cross-origin connection address of the CSS cluster val nodes = "to-css-1174405013-Ht7O1tYf.datasource.com:9200" //Setting schema val schema = StructType(Seq(StructField("id", IntegerType, false), StructField("name", StringType, false))) // Construction data val rdd = sparkSession.sparkContext.parallelize(Seq(Row(12, "John"),Row(21,"Bob"))) // Create a DataFrame from RDD and schema val dataFrame_1 = sparkSession.createDataFrame(rdd, schema) //Write data to the CSS dataFrame_1.write .format("css") .option("resource", resource) .option("es.nodes", nodes) .option("es.net.ssl", "true") .option("es.net.ssl.keystore.location", "obs://Bucket name/path/transport-keystore.jks") .option("es.net.ssl.keystore.pass", "***") .option("es.net.ssl.truststore.location", "obs://Bucket name/path/truststore.jks") .option("es.net.ssl.truststore.pass", "***") .option("es.net.http.auth.user", "admin") .option("es.net.http.auth.pass", "***") .mode(SaveMode.Append) .save(); //Read data val dataFrameR = sparkSession.read.format("css") .option("resource", resource) .option("es.nodes", nodes) .option("es.net.ssl", "true") .option("es.net.ssl.keystore.location", "obs://Bucket name/path/transport-keystore.jks") .option("es.net.ssl.keystore.pass", "***") .option("es.net.ssl.truststore.location", "obs://Bucket name/path/truststore.jks") .option("es.net.ssl.truststore.pass", "***") .option("es.net.http.auth.user", "admin") .option("es.net.http.auth.pass", "***") .load() dataFrameR.show() spardSession.close() } } |