Mongo can be connected only through enhanced datasource connections.
DDS is compatible with the MongoDB protocol.
An enhanced datasource connection has been created on the DLI management console and bound to a queue in packages.
Hard-coded or plaintext passwords pose significant security risks. To ensure security, encrypt your passwords, store them in configuration files or environment variables, and decrypt them when needed.
1 2 3 4 5 | <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>2.3.2</version> </dependency> |
import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.SaveMode;
1 2 3 | SparkContext sparkContext = new SparkContext(new SparkConf().setAppName("datasource-mongo")); JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext); SQLContext sqlContext = new SQLContext(javaSparkContext); |
JavaRDD<String> javaRDD = javaSparkContext.parallelize(Arrays.asList("{\"id\":\"5\",\"name\":\"Ann\",\"age\":\"23\"}")); Dataset<Row> dataFrame = sqlContext.read().json(javaRDD);
String url = "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin"; String uri = "mongodb://username:pwd@host:8635/db"; String user = "rwuser"; String database = "test"; String collection = "test"; String password = "######";
dataFrame.write().format("mongo") .option("url",url) .option("uri",uri) .option("database",database) .option("collection",collection) .option("user",user) .option("password",password) .mode(SaveMode.Overwrite) .save();
1 2 3 4 5 6 7 8 | sqlContext.read().format("mongo") .option("url",url) .option("uri",uri) .option("database",database) .option("collection",collection) .option("user",user) .option("password",password) .load().show(); |
spark.driver.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/mongo/*
spark.executor.extraClassPath=/usr/share/extension/dli/spark-jar/datasource/mongo/*
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SQLContext; import org.apache.spark.sql.SaveMode; import java.util.Arrays; public class TestMongoSparkSql { public static void main(String[] args) { SparkContext sparkContext = new SparkContext(new SparkConf().setAppName("datasource-mongo")); JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext); SQLContext sqlContext = new SQLContext(javaSparkContext); // // Read json file as DataFrame, read csv / parquet file, same as json file distribution // DataFrame dataFrame = sqlContext.read().format("json").load("filepath"); // Read RDD in JSON format to create DataFrame JavaRDD<String> javaRDD = javaSparkContext.parallelize(Arrays.asList("{\"id\":\"5\",\"name\":\"Ann\",\"age\":\"23\"}")); Dataset<Row> dataFrame = sqlContext.read().json(javaRDD); String url = "192.168.4.62:8635,192.168.5.134:8635/test?authSource=admin"; String uri = "mongodb://username:pwd@host:8635/db"; String user = "rwuser"; String database = "test"; String collection = "test"; String password = "######"; dataFrame.write().format("mongo") .option("url",url) .option("uri",uri) .option("database",database) .option("collection",collection) .option("user",user) .option("password",password) .mode(SaveMode.Overwrite) .save(); sqlContext.read().format("mongo") .option("url",url) .option("uri",uri) .option("database",database) .option("collection",collection) .option("user",user) .option("password",password) .load().show(); sparkContext.stop(); javaSparkContext.close(); } } |